| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (C) 2016 foo86 | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public | ||
| 17 | * License along with FFmpeg; if not, write to the Free Software | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include <stdlib.h> | ||
| 22 | |||
| 23 | #include "dcadct.h" | ||
| 24 | #include "dcamath.h" | ||
| 25 | |||
| 26 | 270240 | static void sum_a(const int *input, int *output, int len) | |
| 27 | { | ||
| 28 | int i; | ||
| 29 | |||
| 30 |
2/2✓ Branch 0 taken 3434880 times.
✓ Branch 1 taken 270240 times.
|
3705120 | for (i = 0; i < len; i++) |
| 31 | 3434880 | output[i] = input[2 * i] + input[2 * i + 1]; | |
| 32 | 270240 | } | |
| 33 | |||
| 34 | 270240 | static void sum_b(const int *input, int *output, int len) | |
| 35 | { | ||
| 36 | int i; | ||
| 37 | |||
| 38 | 270240 | output[0] = input[0]; | |
| 39 |
2/2✓ Branch 0 taken 3164640 times.
✓ Branch 1 taken 270240 times.
|
3434880 | for (i = 1; i < len; i++) |
| 40 | 3164640 | output[i] = input[2 * i] + input[2 * i - 1]; | |
| 41 | 270240 | } | |
| 42 | |||
| 43 | 159120 | static void sum_c(const int *input, int *output, int len) | |
| 44 | { | ||
| 45 | int i; | ||
| 46 | |||
| 47 |
2/2✓ Branch 0 taken 1349760 times.
✓ Branch 1 taken 159120 times.
|
1508880 | for (i = 0; i < len; i++) |
| 48 | 1349760 | output[i] = input[2 * i]; | |
| 49 | 159120 | } | |
| 50 | |||
| 51 | 159120 | static void sum_d(const int *input, int *output, int len) | |
| 52 | { | ||
| 53 | int i; | ||
| 54 | |||
| 55 | 159120 | output[0] = input[1]; | |
| 56 |
2/2✓ Branch 0 taken 1190640 times.
✓ Branch 1 taken 159120 times.
|
1349760 | for (i = 1; i < len; i++) |
| 57 | 1190640 | output[i] = input[2 * i - 1] + input[2 * i + 1]; | |
| 58 | 159120 | } | |
| 59 | |||
| 60 | 130320 | static void dct_a(const int *input, int *output) | |
| 61 | { | ||
| 62 | static const int cos_mod[8][8] = { | ||
| 63 | { 8348215, 8027397, 7398092, 6484482, 5321677, 3954362, 2435084, 822227 }, | ||
| 64 | { 8027397, 5321677, 822227, -3954362, -7398092, -8348215, -6484482, -2435084 }, | ||
| 65 | { 7398092, 822227, -6484482, -8027397, -2435084, 5321677, 8348215, 3954362 }, | ||
| 66 | { 6484482, -3954362, -8027397, 822227, 8348215, 2435084, -7398092, -5321677 }, | ||
| 67 | { 5321677, -7398092, -2435084, 8348215, -822227, -8027397, 3954362, 6484482 }, | ||
| 68 | { 3954362, -8348215, 5321677, 2435084, -8027397, 6484482, 822227, -7398092 }, | ||
| 69 | { 2435084, -6484482, 8348215, -7398092, 3954362, 822227, -5321677, 8027397 }, | ||
| 70 | { 822227, -2435084, 3954362, -5321677, 6484482, -7398092, 8027397, -8348215 } | ||
| 71 | }; | ||
| 72 | |||
| 73 | int i, j; | ||
| 74 | |||
| 75 |
2/2✓ Branch 0 taken 1042560 times.
✓ Branch 1 taken 130320 times.
|
1172880 | for (i = 0; i < 8; i++) { |
| 76 | 1042560 | int64_t res = 0; | |
| 77 |
2/2✓ Branch 0 taken 8340480 times.
✓ Branch 1 taken 1042560 times.
|
9383040 | for (j = 0; j < 8; j++) |
| 78 | 8340480 | res += (int64_t)cos_mod[i][j] * input[j]; | |
| 79 | 1042560 | output[i] = norm23(res); | |
| 80 | } | ||
| 81 | 130320 | } | |
| 82 | |||
| 83 | 429360 | static void dct_b(const int *input, int *output) | |
| 84 | { | ||
| 85 | static const int cos_mod[8][7] = { | ||
| 86 | { 8227423, 7750063, 6974873, 5931642, 4660461, 3210181, 1636536 }, | ||
| 87 | { 6974873, 3210181, -1636536, -5931642, -8227423, -7750063, -4660461 }, | ||
| 88 | { 4660461, -3210181, -8227423, -5931642, 1636536, 7750063, 6974873 }, | ||
| 89 | { 1636536, -7750063, -4660461, 5931642, 6974873, -3210181, -8227423 }, | ||
| 90 | { -1636536, -7750063, 4660461, 5931642, -6974873, -3210181, 8227423 }, | ||
| 91 | { -4660461, -3210181, 8227423, -5931642, -1636536, 7750063, -6974873 }, | ||
| 92 | { -6974873, 3210181, 1636536, -5931642, 8227423, -7750063, 4660461 }, | ||
| 93 | { -8227423, 7750063, -6974873, 5931642, -4660461, 3210181, -1636536 } | ||
| 94 | }; | ||
| 95 | |||
| 96 | int i, j; | ||
| 97 | |||
| 98 |
2/2✓ Branch 0 taken 3434880 times.
✓ Branch 1 taken 429360 times.
|
3864240 | for (i = 0; i < 8; i++) { |
| 99 | 3434880 | int64_t res = input[0] * (INT64_C(1) << 23); | |
| 100 |
2/2✓ Branch 0 taken 24044160 times.
✓ Branch 1 taken 3434880 times.
|
27479040 | for (j = 0; j < 7; j++) |
| 101 | 24044160 | res += (int64_t)cos_mod[i][j] * input[1 + j]; | |
| 102 | 3434880 | output[i] = norm23(res); | |
| 103 | } | ||
| 104 | 429360 | } | |
| 105 | |||
| 106 | 130320 | static void mod_a(const int *input, int *output) | |
| 107 | { | ||
| 108 | static const int cos_mod[16] = { | ||
| 109 | 4199362, 4240198, 4323885, 4454708, | ||
| 110 | 4639772, 4890013, 5221943, 5660703, | ||
| 111 | -6245623, -7040975, -8158494, -9809974, | ||
| 112 | -12450076, -17261920, -28585092, -85479984 | ||
| 113 | }; | ||
| 114 | |||
| 115 | int i, k; | ||
| 116 | |||
| 117 |
2/2✓ Branch 0 taken 1042560 times.
✓ Branch 1 taken 130320 times.
|
1172880 | for (i = 0; i < 8; i++) |
| 118 | 1042560 | output[i] = mul23(cos_mod[i], input[i] + input[8 + i]); | |
| 119 | |||
| 120 |
2/2✓ Branch 0 taken 1042560 times.
✓ Branch 1 taken 130320 times.
|
1172880 | for (i = 8, k = 7; i < 16; i++, k--) |
| 121 | 1042560 | output[i] = mul23(cos_mod[i], input[k] - input[8 + k]); | |
| 122 | 130320 | } | |
| 123 | |||
| 124 | 149520 | static void mod_b(int *input, int *output) | |
| 125 | { | ||
| 126 | static const int cos_mod[8] = { | ||
| 127 | 4214598, 4383036, 4755871, 5425934, | ||
| 128 | 6611520, 8897610, 14448934, 42791536 | ||
| 129 | }; | ||
| 130 | |||
| 131 | int i, k; | ||
| 132 | |||
| 133 |
2/2✓ Branch 0 taken 1196160 times.
✓ Branch 1 taken 149520 times.
|
1345680 | for (i = 0; i < 8; i++) |
| 134 | 1196160 | input[8 + i] = mul23(cos_mod[i], input[8 + i]); | |
| 135 | |||
| 136 |
2/2✓ Branch 0 taken 1196160 times.
✓ Branch 1 taken 149520 times.
|
1345680 | for (i = 0; i < 8; i++) |
| 137 | 1196160 | output[i] = input[i] + input[8 + i]; | |
| 138 | |||
| 139 |
2/2✓ Branch 0 taken 1196160 times.
✓ Branch 1 taken 149520 times.
|
1345680 | for (i = 8, k = 7; i < 16; i++, k--) |
| 140 | 1196160 | output[i] = input[k] - input[8 + k]; | |
| 141 | 149520 | } | |
| 142 | |||
| 143 | 120720 | static void mod_c(const int *input, int *output) | |
| 144 | { | ||
| 145 | static const int cos_mod[32] = { | ||
| 146 | 1048892, 1051425, 1056522, 1064244, | ||
| 147 | 1074689, 1087987, 1104313, 1123884, | ||
| 148 | 1146975, 1173922, 1205139, 1241133, | ||
| 149 | 1282529, 1330095, 1384791, 1447815, | ||
| 150 | -1520688, -1605358, -1704360, -1821051, | ||
| 151 | -1959964, -2127368, -2332183, -2587535, | ||
| 152 | -2913561, -3342802, -3931480, -4785806, | ||
| 153 | -6133390, -8566050, -14253820, -42727120 | ||
| 154 | }; | ||
| 155 | |||
| 156 | int i, k; | ||
| 157 | |||
| 158 |
2/2✓ Branch 0 taken 1931520 times.
✓ Branch 1 taken 120720 times.
|
2052240 | for (i = 0; i < 16; i++) |
| 159 | 1931520 | output[i] = mul23(cos_mod[i], input[i] + input[16 + i]); | |
| 160 | |||
| 161 |
2/2✓ Branch 0 taken 1931520 times.
✓ Branch 1 taken 120720 times.
|
2052240 | for (i = 16, k = 15; i < 32; i++, k--) |
| 162 | 1931520 | output[i] = mul23(cos_mod[i], input[k] - input[16 + k]); | |
| 163 | 120720 | } | |
| 164 | |||
| 165 | 540480 | static void clp_v(int *input, int len) | |
| 166 | { | ||
| 167 | int i; | ||
| 168 | |||
| 169 |
2/2✓ Branch 0 taken 19138560 times.
✓ Branch 1 taken 540480 times.
|
19679040 | for (i = 0; i < len; i++) |
| 170 | 19138560 | input[i] = clip23(input[i]); | |
| 171 | 540480 | } | |
| 172 | |||
| 173 | 120720 | static void imdct_half_32(int32_t *output, const int32_t *input) | |
| 174 | { | ||
| 175 | int buf_a[32], buf_b[32]; | ||
| 176 | int i, k, mag, shift, round; | ||
| 177 | |||
| 178 | 120720 | mag = 0; | |
| 179 |
2/2✓ Branch 0 taken 3863040 times.
✓ Branch 1 taken 120720 times.
|
3983760 | for (i = 0; i < 32; i++) |
| 180 | 3863040 | mag += abs(input[i]); | |
| 181 | |||
| 182 |
2/2✓ Branch 0 taken 6670 times.
✓ Branch 1 taken 114050 times.
|
120720 | shift = mag > 0x400000 ? 2 : 0; |
| 183 |
2/2✓ Branch 0 taken 6670 times.
✓ Branch 1 taken 114050 times.
|
120720 | round = shift > 0 ? 1 << (shift - 1) : 0; |
| 184 | |||
| 185 |
2/2✓ Branch 0 taken 3863040 times.
✓ Branch 1 taken 120720 times.
|
3983760 | for (i = 0; i < 32; i++) |
| 186 | 3863040 | buf_a[i] = (input[i] + round) >> shift; | |
| 187 | |||
| 188 | 120720 | sum_a(buf_a, buf_b + 0, 16); | |
| 189 | 120720 | sum_b(buf_a, buf_b + 16, 16); | |
| 190 | 120720 | clp_v(buf_b, 32); | |
| 191 | |||
| 192 | 120720 | sum_a(buf_b + 0, buf_a + 0, 8); | |
| 193 | 120720 | sum_b(buf_b + 0, buf_a + 8, 8); | |
| 194 | 120720 | sum_c(buf_b + 16, buf_a + 16, 8); | |
| 195 | 120720 | sum_d(buf_b + 16, buf_a + 24, 8); | |
| 196 | 120720 | clp_v(buf_a, 32); | |
| 197 | |||
| 198 | 120720 | dct_a(buf_a + 0, buf_b + 0); | |
| 199 | 120720 | dct_b(buf_a + 8, buf_b + 8); | |
| 200 | 120720 | dct_b(buf_a + 16, buf_b + 16); | |
| 201 | 120720 | dct_b(buf_a + 24, buf_b + 24); | |
| 202 | 120720 | clp_v(buf_b, 32); | |
| 203 | |||
| 204 | 120720 | mod_a(buf_b + 0, buf_a + 0); | |
| 205 | 120720 | mod_b(buf_b + 16, buf_a + 16); | |
| 206 | 120720 | clp_v(buf_a, 32); | |
| 207 | |||
| 208 | 120720 | mod_c(buf_a, buf_b); | |
| 209 | |||
| 210 |
2/2✓ Branch 0 taken 3863040 times.
✓ Branch 1 taken 120720 times.
|
3983760 | for (i = 0; i < 32; i++) |
| 211 | 3863040 | buf_b[i] = clip23(buf_b[i] * (1 << shift)); | |
| 212 | |||
| 213 |
2/2✓ Branch 0 taken 1931520 times.
✓ Branch 1 taken 120720 times.
|
2052240 | for (i = 0, k = 31; i < 16; i++, k--) { |
| 214 | 1931520 | output[ i] = clip23(buf_b[i] - buf_b[k]); | |
| 215 | 1931520 | output[16 + i] = clip23(buf_b[i] + buf_b[k]); | |
| 216 | } | ||
| 217 | 120720 | } | |
| 218 | |||
| 219 | 9600 | static void mod64_a(const int *input, int *output) | |
| 220 | { | ||
| 221 | static const int cos_mod[32] = { | ||
| 222 | 4195568, 4205700, 4226086, 4256977, | ||
| 223 | 4298755, 4351949, 4417251, 4495537, | ||
| 224 | 4587901, 4695690, 4820557, 4964534, | ||
| 225 | 5130115, 5320382, 5539164, 5791261, | ||
| 226 | -6082752, -6421430, -6817439, -7284203, | ||
| 227 | -7839855, -8509474, -9328732, -10350140, | ||
| 228 | -11654242, -13371208, -15725922, -19143224, | ||
| 229 | -24533560, -34264200, -57015280, -170908480 | ||
| 230 | }; | ||
| 231 | |||
| 232 | int i, k; | ||
| 233 | |||
| 234 |
2/2✓ Branch 0 taken 153600 times.
✓ Branch 1 taken 9600 times.
|
163200 | for (i = 0; i < 16; i++) |
| 235 | 153600 | output[i] = mul23(cos_mod[i], input[i] + input[16 + i]); | |
| 236 | |||
| 237 |
2/2✓ Branch 0 taken 153600 times.
✓ Branch 1 taken 9600 times.
|
163200 | for (i = 16, k = 15; i < 32; i++, k--) |
| 238 | 153600 | output[i] = mul23(cos_mod[i], input[k] - input[16 + k]); | |
| 239 | 9600 | } | |
| 240 | |||
| 241 | 9600 | static void mod64_b(int *input, int *output) | |
| 242 | { | ||
| 243 | static const int cos_mod[16] = { | ||
| 244 | 4199362, 4240198, 4323885, 4454708, | ||
| 245 | 4639772, 4890013, 5221943, 5660703, | ||
| 246 | 6245623, 7040975, 8158494, 9809974, | ||
| 247 | 12450076, 17261920, 28585092, 85479984 | ||
| 248 | }; | ||
| 249 | |||
| 250 | int i, k; | ||
| 251 | |||
| 252 |
2/2✓ Branch 0 taken 153600 times.
✓ Branch 1 taken 9600 times.
|
163200 | for (i = 0; i < 16; i++) |
| 253 | 153600 | input[16 + i] = mul23(cos_mod[i], input[16 + i]); | |
| 254 | |||
| 255 |
2/2✓ Branch 0 taken 153600 times.
✓ Branch 1 taken 9600 times.
|
163200 | for (i = 0; i < 16; i++) |
| 256 | 153600 | output[i] = input[i] + input[16 + i]; | |
| 257 | |||
| 258 |
2/2✓ Branch 0 taken 153600 times.
✓ Branch 1 taken 9600 times.
|
163200 | for (i = 16, k = 15; i < 32; i++, k--) |
| 259 | 153600 | output[i] = input[k] - input[16 + k]; | |
| 260 | 9600 | } | |
| 261 | |||
| 262 | 9600 | static void mod64_c(const int *input, int *output) | |
| 263 | { | ||
| 264 | static const int cos_mod[64] = { | ||
| 265 | 741511, 741958, 742853, 744199, | ||
| 266 | 746001, 748262, 750992, 754197, | ||
| 267 | 757888, 762077, 766777, 772003, | ||
| 268 | 777772, 784105, 791021, 798546, | ||
| 269 | 806707, 815532, 825054, 835311, | ||
| 270 | 846342, 858193, 870912, 884554, | ||
| 271 | 899181, 914860, 931667, 949686, | ||
| 272 | 969011, 989747, 1012012, 1035941, | ||
| 273 | -1061684, -1089412, -1119320, -1151629, | ||
| 274 | -1186595, -1224511, -1265719, -1310613, | ||
| 275 | -1359657, -1413400, -1472490, -1537703, | ||
| 276 | -1609974, -1690442, -1780506, -1881904, | ||
| 277 | -1996824, -2128058, -2279225, -2455101, | ||
| 278 | -2662128, -2909200, -3208956, -3579983, | ||
| 279 | -4050785, -4667404, -5509372, -6726913, | ||
| 280 | -8641940, -12091426, -20144284, -60420720 | ||
| 281 | }; | ||
| 282 | |||
| 283 | int i, k; | ||
| 284 | |||
| 285 |
2/2✓ Branch 0 taken 307200 times.
✓ Branch 1 taken 9600 times.
|
316800 | for (i = 0; i < 32; i++) |
| 286 | 307200 | output[i] = mul23(cos_mod[i], input[i] + input[32 + i]); | |
| 287 | |||
| 288 |
2/2✓ Branch 0 taken 307200 times.
✓ Branch 1 taken 9600 times.
|
316800 | for (i = 32, k = 31; i < 64; i++, k--) |
| 289 | 307200 | output[i] = mul23(cos_mod[i], input[k] - input[32 + k]); | |
| 290 | 9600 | } | |
| 291 | |||
| 292 | 9600 | static void imdct_half_64(int32_t *output, const int32_t *input) | |
| 293 | { | ||
| 294 | int buf_a[64], buf_b[64]; | ||
| 295 | int i, k, mag, shift, round; | ||
| 296 | |||
| 297 | 9600 | mag = 0; | |
| 298 |
2/2✓ Branch 0 taken 614400 times.
✓ Branch 1 taken 9600 times.
|
624000 | for (i = 0; i < 64; i++) |
| 299 | 614400 | mag += abs(input[i]); | |
| 300 | |||
| 301 |
2/2✓ Branch 0 taken 2454 times.
✓ Branch 1 taken 7146 times.
|
9600 | shift = mag > 0x400000 ? 2 : 0; |
| 302 |
2/2✓ Branch 0 taken 2454 times.
✓ Branch 1 taken 7146 times.
|
9600 | round = shift > 0 ? 1 << (shift - 1) : 0; |
| 303 | |||
| 304 |
2/2✓ Branch 0 taken 614400 times.
✓ Branch 1 taken 9600 times.
|
624000 | for (i = 0; i < 64; i++) |
| 305 | 614400 | buf_a[i] = (input[i] + round) >> shift; | |
| 306 | |||
| 307 | 9600 | sum_a(buf_a, buf_b + 0, 32); | |
| 308 | 9600 | sum_b(buf_a, buf_b + 32, 32); | |
| 309 | 9600 | clp_v(buf_b, 64); | |
| 310 | |||
| 311 | 9600 | sum_a(buf_b + 0, buf_a + 0, 16); | |
| 312 | 9600 | sum_b(buf_b + 0, buf_a + 16, 16); | |
| 313 | 9600 | sum_c(buf_b + 32, buf_a + 32, 16); | |
| 314 | 9600 | sum_d(buf_b + 32, buf_a + 48, 16); | |
| 315 | 9600 | clp_v(buf_a, 64); | |
| 316 | |||
| 317 | 9600 | sum_a(buf_a + 0, buf_b + 0, 8); | |
| 318 | 9600 | sum_b(buf_a + 0, buf_b + 8, 8); | |
| 319 | 9600 | sum_c(buf_a + 16, buf_b + 16, 8); | |
| 320 | 9600 | sum_d(buf_a + 16, buf_b + 24, 8); | |
| 321 | 9600 | sum_c(buf_a + 32, buf_b + 32, 8); | |
| 322 | 9600 | sum_d(buf_a + 32, buf_b + 40, 8); | |
| 323 | 9600 | sum_c(buf_a + 48, buf_b + 48, 8); | |
| 324 | 9600 | sum_d(buf_a + 48, buf_b + 56, 8); | |
| 325 | 9600 | clp_v(buf_b, 64); | |
| 326 | |||
| 327 | 9600 | dct_a(buf_b + 0, buf_a + 0); | |
| 328 | 9600 | dct_b(buf_b + 8, buf_a + 8); | |
| 329 | 9600 | dct_b(buf_b + 16, buf_a + 16); | |
| 330 | 9600 | dct_b(buf_b + 24, buf_a + 24); | |
| 331 | 9600 | dct_b(buf_b + 32, buf_a + 32); | |
| 332 | 9600 | dct_b(buf_b + 40, buf_a + 40); | |
| 333 | 9600 | dct_b(buf_b + 48, buf_a + 48); | |
| 334 | 9600 | dct_b(buf_b + 56, buf_a + 56); | |
| 335 | 9600 | clp_v(buf_a, 64); | |
| 336 | |||
| 337 | 9600 | mod_a(buf_a + 0, buf_b + 0); | |
| 338 | 9600 | mod_b(buf_a + 16, buf_b + 16); | |
| 339 | 9600 | mod_b(buf_a + 32, buf_b + 32); | |
| 340 | 9600 | mod_b(buf_a + 48, buf_b + 48); | |
| 341 | 9600 | clp_v(buf_b, 64); | |
| 342 | |||
| 343 | 9600 | mod64_a(buf_b + 0, buf_a + 0); | |
| 344 | 9600 | mod64_b(buf_b + 32, buf_a + 32); | |
| 345 | 9600 | clp_v(buf_a, 64); | |
| 346 | |||
| 347 | 9600 | mod64_c(buf_a, buf_b); | |
| 348 | |||
| 349 |
2/2✓ Branch 0 taken 614400 times.
✓ Branch 1 taken 9600 times.
|
624000 | for (i = 0; i < 64; i++) |
| 350 | 614400 | buf_b[i] = clip23(buf_b[i] * (1 << shift)); | |
| 351 | |||
| 352 |
2/2✓ Branch 0 taken 307200 times.
✓ Branch 1 taken 9600 times.
|
316800 | for (i = 0, k = 63; i < 32; i++, k--) { |
| 353 | 307200 | output[ i] = clip23(buf_b[i] - buf_b[k]); | |
| 354 | 307200 | output[32 + i] = clip23(buf_b[i] + buf_b[k]); | |
| 355 | } | ||
| 356 | 9600 | } | |
| 357 | |||
| 358 | 155 | av_cold void ff_dcadct_init(DCADCTContext *c) | |
| 359 | { | ||
| 360 | 155 | c->imdct_half[0] = imdct_half_32; | |
| 361 | 155 | c->imdct_half[1] = imdct_half_64; | |
| 362 | 155 | } | |
| 363 |