Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (C) 2016 foo86 | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | #include <stdlib.h> | ||
22 | |||
23 | #include "dcadct.h" | ||
24 | #include "dcamath.h" | ||
25 | |||
26 | 255552 | static void sum_a(const int *input, int *output, int len) | |
27 | { | ||
28 | int i; | ||
29 | |||
30 |
2/2✓ Branch 0 taken 3201024 times.
✓ Branch 1 taken 255552 times.
|
3456576 | for (i = 0; i < len; i++) |
31 | 3201024 | output[i] = input[2 * i] + input[2 * i + 1]; | |
32 | 255552 | } | |
33 | |||
34 | 255552 | static void sum_b(const int *input, int *output, int len) | |
35 | { | ||
36 | int i; | ||
37 | |||
38 | 255552 | output[0] = input[0]; | |
39 |
2/2✓ Branch 0 taken 2945472 times.
✓ Branch 1 taken 255552 times.
|
3201024 | for (i = 1; i < len; i++) |
40 | 2945472 | output[i] = input[2 * i] + input[2 * i - 1]; | |
41 | 255552 | } | |
42 | |||
43 | 144576 | static void sum_c(const int *input, int *output, int len) | |
44 | { | ||
45 | int i; | ||
46 | |||
47 |
2/2✓ Branch 0 taken 1210368 times.
✓ Branch 1 taken 144576 times.
|
1354944 | for (i = 0; i < len; i++) |
48 | 1210368 | output[i] = input[2 * i]; | |
49 | 144576 | } | |
50 | |||
51 | 144576 | static void sum_d(const int *input, int *output, int len) | |
52 | { | ||
53 | int i; | ||
54 | |||
55 | 144576 | output[0] = input[1]; | |
56 |
2/2✓ Branch 0 taken 1065792 times.
✓ Branch 1 taken 144576 times.
|
1210368 | for (i = 1; i < len; i++) |
57 | 1065792 | output[i] = input[2 * i - 1] + input[2 * i + 1]; | |
58 | 144576 | } | |
59 | |||
60 | 124416 | static void dct_a(const int *input, int *output) | |
61 | { | ||
62 | static const int cos_mod[8][8] = { | ||
63 | { 8348215, 8027397, 7398092, 6484482, 5321677, 3954362, 2435084, 822227 }, | ||
64 | { 8027397, 5321677, 822227, -3954362, -7398092, -8348215, -6484482, -2435084 }, | ||
65 | { 7398092, 822227, -6484482, -8027397, -2435084, 5321677, 8348215, 3954362 }, | ||
66 | { 6484482, -3954362, -8027397, 822227, 8348215, 2435084, -7398092, -5321677 }, | ||
67 | { 5321677, -7398092, -2435084, 8348215, -822227, -8027397, 3954362, 6484482 }, | ||
68 | { 3954362, -8348215, 5321677, 2435084, -8027397, 6484482, 822227, -7398092 }, | ||
69 | { 2435084, -6484482, 8348215, -7398092, 3954362, 822227, -5321677, 8027397 }, | ||
70 | { 822227, -2435084, 3954362, -5321677, 6484482, -7398092, 8027397, -8348215 } | ||
71 | }; | ||
72 | |||
73 | int i, j; | ||
74 | |||
75 |
2/2✓ Branch 0 taken 995328 times.
✓ Branch 1 taken 124416 times.
|
1119744 | for (i = 0; i < 8; i++) { |
76 | 995328 | int64_t res = 0; | |
77 |
2/2✓ Branch 0 taken 7962624 times.
✓ Branch 1 taken 995328 times.
|
8957952 | for (j = 0; j < 8; j++) |
78 | 7962624 | res += (int64_t)cos_mod[i][j] * input[j]; | |
79 | 995328 | output[i] = norm23(res); | |
80 | } | ||
81 | 124416 | } | |
82 | |||
83 | 400128 | static void dct_b(const int *input, int *output) | |
84 | { | ||
85 | static const int cos_mod[8][7] = { | ||
86 | { 8227423, 7750063, 6974873, 5931642, 4660461, 3210181, 1636536 }, | ||
87 | { 6974873, 3210181, -1636536, -5931642, -8227423, -7750063, -4660461 }, | ||
88 | { 4660461, -3210181, -8227423, -5931642, 1636536, 7750063, 6974873 }, | ||
89 | { 1636536, -7750063, -4660461, 5931642, 6974873, -3210181, -8227423 }, | ||
90 | { -1636536, -7750063, 4660461, 5931642, -6974873, -3210181, 8227423 }, | ||
91 | { -4660461, -3210181, 8227423, -5931642, -1636536, 7750063, -6974873 }, | ||
92 | { -6974873, 3210181, 1636536, -5931642, 8227423, -7750063, 4660461 }, | ||
93 | { -8227423, 7750063, -6974873, 5931642, -4660461, 3210181, -1636536 } | ||
94 | }; | ||
95 | |||
96 | int i, j; | ||
97 | |||
98 |
2/2✓ Branch 0 taken 3201024 times.
✓ Branch 1 taken 400128 times.
|
3601152 | for (i = 0; i < 8; i++) { |
99 | 3201024 | int64_t res = input[0] * (INT64_C(1) << 23); | |
100 |
2/2✓ Branch 0 taken 22407168 times.
✓ Branch 1 taken 3201024 times.
|
25608192 | for (j = 0; j < 7; j++) |
101 | 22407168 | res += (int64_t)cos_mod[i][j] * input[1 + j]; | |
102 | 3201024 | output[i] = norm23(res); | |
103 | } | ||
104 | 400128 | } | |
105 | |||
106 | 124416 | static void mod_a(const int *input, int *output) | |
107 | { | ||
108 | static const int cos_mod[16] = { | ||
109 | 4199362, 4240198, 4323885, 4454708, | ||
110 | 4639772, 4890013, 5221943, 5660703, | ||
111 | -6245623, -7040975, -8158494, -9809974, | ||
112 | -12450076, -17261920, -28585092, -85479984 | ||
113 | }; | ||
114 | |||
115 | int i, k; | ||
116 | |||
117 |
2/2✓ Branch 0 taken 995328 times.
✓ Branch 1 taken 124416 times.
|
1119744 | for (i = 0; i < 8; i++) |
118 | 995328 | output[i] = mul23(cos_mod[i], input[i] + input[8 + i]); | |
119 | |||
120 |
2/2✓ Branch 0 taken 995328 times.
✓ Branch 1 taken 124416 times.
|
1119744 | for (i = 8, k = 7; i < 16; i++, k--) |
121 | 995328 | output[i] = mul23(cos_mod[i], input[k] - input[8 + k]); | |
122 | 124416 | } | |
123 | |||
124 | 137856 | static void mod_b(int *input, int *output) | |
125 | { | ||
126 | static const int cos_mod[8] = { | ||
127 | 4214598, 4383036, 4755871, 5425934, | ||
128 | 6611520, 8897610, 14448934, 42791536 | ||
129 | }; | ||
130 | |||
131 | int i, k; | ||
132 | |||
133 |
2/2✓ Branch 0 taken 1102848 times.
✓ Branch 1 taken 137856 times.
|
1240704 | for (i = 0; i < 8; i++) |
134 | 1102848 | input[8 + i] = mul23(cos_mod[i], input[8 + i]); | |
135 | |||
136 |
2/2✓ Branch 0 taken 1102848 times.
✓ Branch 1 taken 137856 times.
|
1240704 | for (i = 0; i < 8; i++) |
137 | 1102848 | output[i] = input[i] + input[8 + i]; | |
138 | |||
139 |
2/2✓ Branch 0 taken 1102848 times.
✓ Branch 1 taken 137856 times.
|
1240704 | for (i = 8, k = 7; i < 16; i++, k--) |
140 | 1102848 | output[i] = input[k] - input[8 + k]; | |
141 | 137856 | } | |
142 | |||
143 | 117696 | static void mod_c(const int *input, int *output) | |
144 | { | ||
145 | static const int cos_mod[32] = { | ||
146 | 1048892, 1051425, 1056522, 1064244, | ||
147 | 1074689, 1087987, 1104313, 1123884, | ||
148 | 1146975, 1173922, 1205139, 1241133, | ||
149 | 1282529, 1330095, 1384791, 1447815, | ||
150 | -1520688, -1605358, -1704360, -1821051, | ||
151 | -1959964, -2127368, -2332183, -2587535, | ||
152 | -2913561, -3342802, -3931480, -4785806, | ||
153 | -6133390, -8566050, -14253820, -42727120 | ||
154 | }; | ||
155 | |||
156 | int i, k; | ||
157 | |||
158 |
2/2✓ Branch 0 taken 1883136 times.
✓ Branch 1 taken 117696 times.
|
2000832 | for (i = 0; i < 16; i++) |
159 | 1883136 | output[i] = mul23(cos_mod[i], input[i] + input[16 + i]); | |
160 | |||
161 |
2/2✓ Branch 0 taken 1883136 times.
✓ Branch 1 taken 117696 times.
|
2000832 | for (i = 16, k = 15; i < 32; i++, k--) |
162 | 1883136 | output[i] = mul23(cos_mod[i], input[k] - input[16 + k]); | |
163 | 117696 | } | |
164 | |||
165 | 511104 | static void clp_v(int *input, int len) | |
166 | { | ||
167 | int i; | ||
168 | |||
169 |
2/2✓ Branch 0 taken 17645568 times.
✓ Branch 1 taken 511104 times.
|
18156672 | for (i = 0; i < len; i++) |
170 | 17645568 | input[i] = clip23(input[i]); | |
171 | 511104 | } | |
172 | |||
173 | 117696 | static void imdct_half_32(int32_t *output, const int32_t *input) | |
174 | { | ||
175 | int buf_a[32], buf_b[32]; | ||
176 | int i, k, mag, shift, round; | ||
177 | |||
178 | 117696 | mag = 0; | |
179 |
2/2✓ Branch 0 taken 3766272 times.
✓ Branch 1 taken 117696 times.
|
3883968 | for (i = 0; i < 32; i++) |
180 | 3766272 | mag += abs(input[i]); | |
181 | |||
182 |
2/2✓ Branch 0 taken 5650 times.
✓ Branch 1 taken 112046 times.
|
117696 | shift = mag > 0x400000 ? 2 : 0; |
183 |
2/2✓ Branch 0 taken 5650 times.
✓ Branch 1 taken 112046 times.
|
117696 | round = shift > 0 ? 1 << (shift - 1) : 0; |
184 | |||
185 |
2/2✓ Branch 0 taken 3766272 times.
✓ Branch 1 taken 117696 times.
|
3883968 | for (i = 0; i < 32; i++) |
186 | 3766272 | buf_a[i] = (input[i] + round) >> shift; | |
187 | |||
188 | 117696 | sum_a(buf_a, buf_b + 0, 16); | |
189 | 117696 | sum_b(buf_a, buf_b + 16, 16); | |
190 | 117696 | clp_v(buf_b, 32); | |
191 | |||
192 | 117696 | sum_a(buf_b + 0, buf_a + 0, 8); | |
193 | 117696 | sum_b(buf_b + 0, buf_a + 8, 8); | |
194 | 117696 | sum_c(buf_b + 16, buf_a + 16, 8); | |
195 | 117696 | sum_d(buf_b + 16, buf_a + 24, 8); | |
196 | 117696 | clp_v(buf_a, 32); | |
197 | |||
198 | 117696 | dct_a(buf_a + 0, buf_b + 0); | |
199 | 117696 | dct_b(buf_a + 8, buf_b + 8); | |
200 | 117696 | dct_b(buf_a + 16, buf_b + 16); | |
201 | 117696 | dct_b(buf_a + 24, buf_b + 24); | |
202 | 117696 | clp_v(buf_b, 32); | |
203 | |||
204 | 117696 | mod_a(buf_b + 0, buf_a + 0); | |
205 | 117696 | mod_b(buf_b + 16, buf_a + 16); | |
206 | 117696 | clp_v(buf_a, 32); | |
207 | |||
208 | 117696 | mod_c(buf_a, buf_b); | |
209 | |||
210 |
2/2✓ Branch 0 taken 3766272 times.
✓ Branch 1 taken 117696 times.
|
3883968 | for (i = 0; i < 32; i++) |
211 | 3766272 | buf_b[i] = clip23(buf_b[i] * (1 << shift)); | |
212 | |||
213 |
2/2✓ Branch 0 taken 1883136 times.
✓ Branch 1 taken 117696 times.
|
2000832 | for (i = 0, k = 31; i < 16; i++, k--) { |
214 | 1883136 | output[ i] = clip23(buf_b[i] - buf_b[k]); | |
215 | 1883136 | output[16 + i] = clip23(buf_b[i] + buf_b[k]); | |
216 | } | ||
217 | 117696 | } | |
218 | |||
219 | 6720 | static void mod64_a(const int *input, int *output) | |
220 | { | ||
221 | static const int cos_mod[32] = { | ||
222 | 4195568, 4205700, 4226086, 4256977, | ||
223 | 4298755, 4351949, 4417251, 4495537, | ||
224 | 4587901, 4695690, 4820557, 4964534, | ||
225 | 5130115, 5320382, 5539164, 5791261, | ||
226 | -6082752, -6421430, -6817439, -7284203, | ||
227 | -7839855, -8509474, -9328732, -10350140, | ||
228 | -11654242, -13371208, -15725922, -19143224, | ||
229 | -24533560, -34264200, -57015280, -170908480 | ||
230 | }; | ||
231 | |||
232 | int i, k; | ||
233 | |||
234 |
2/2✓ Branch 0 taken 107520 times.
✓ Branch 1 taken 6720 times.
|
114240 | for (i = 0; i < 16; i++) |
235 | 107520 | output[i] = mul23(cos_mod[i], input[i] + input[16 + i]); | |
236 | |||
237 |
2/2✓ Branch 0 taken 107520 times.
✓ Branch 1 taken 6720 times.
|
114240 | for (i = 16, k = 15; i < 32; i++, k--) |
238 | 107520 | output[i] = mul23(cos_mod[i], input[k] - input[16 + k]); | |
239 | 6720 | } | |
240 | |||
241 | 6720 | static void mod64_b(int *input, int *output) | |
242 | { | ||
243 | static const int cos_mod[16] = { | ||
244 | 4199362, 4240198, 4323885, 4454708, | ||
245 | 4639772, 4890013, 5221943, 5660703, | ||
246 | 6245623, 7040975, 8158494, 9809974, | ||
247 | 12450076, 17261920, 28585092, 85479984 | ||
248 | }; | ||
249 | |||
250 | int i, k; | ||
251 | |||
252 |
2/2✓ Branch 0 taken 107520 times.
✓ Branch 1 taken 6720 times.
|
114240 | for (i = 0; i < 16; i++) |
253 | 107520 | input[16 + i] = mul23(cos_mod[i], input[16 + i]); | |
254 | |||
255 |
2/2✓ Branch 0 taken 107520 times.
✓ Branch 1 taken 6720 times.
|
114240 | for (i = 0; i < 16; i++) |
256 | 107520 | output[i] = input[i] + input[16 + i]; | |
257 | |||
258 |
2/2✓ Branch 0 taken 107520 times.
✓ Branch 1 taken 6720 times.
|
114240 | for (i = 16, k = 15; i < 32; i++, k--) |
259 | 107520 | output[i] = input[k] - input[16 + k]; | |
260 | 6720 | } | |
261 | |||
262 | 6720 | static void mod64_c(const int *input, int *output) | |
263 | { | ||
264 | static const int cos_mod[64] = { | ||
265 | 741511, 741958, 742853, 744199, | ||
266 | 746001, 748262, 750992, 754197, | ||
267 | 757888, 762077, 766777, 772003, | ||
268 | 777772, 784105, 791021, 798546, | ||
269 | 806707, 815532, 825054, 835311, | ||
270 | 846342, 858193, 870912, 884554, | ||
271 | 899181, 914860, 931667, 949686, | ||
272 | 969011, 989747, 1012012, 1035941, | ||
273 | -1061684, -1089412, -1119320, -1151629, | ||
274 | -1186595, -1224511, -1265719, -1310613, | ||
275 | -1359657, -1413400, -1472490, -1537703, | ||
276 | -1609974, -1690442, -1780506, -1881904, | ||
277 | -1996824, -2128058, -2279225, -2455101, | ||
278 | -2662128, -2909200, -3208956, -3579983, | ||
279 | -4050785, -4667404, -5509372, -6726913, | ||
280 | -8641940, -12091426, -20144284, -60420720 | ||
281 | }; | ||
282 | |||
283 | int i, k; | ||
284 | |||
285 |
2/2✓ Branch 0 taken 215040 times.
✓ Branch 1 taken 6720 times.
|
221760 | for (i = 0; i < 32; i++) |
286 | 215040 | output[i] = mul23(cos_mod[i], input[i] + input[32 + i]); | |
287 | |||
288 |
2/2✓ Branch 0 taken 215040 times.
✓ Branch 1 taken 6720 times.
|
221760 | for (i = 32, k = 31; i < 64; i++, k--) |
289 | 215040 | output[i] = mul23(cos_mod[i], input[k] - input[32 + k]); | |
290 | 6720 | } | |
291 | |||
292 | 6720 | static void imdct_half_64(int32_t *output, const int32_t *input) | |
293 | { | ||
294 | int buf_a[64], buf_b[64]; | ||
295 | int i, k, mag, shift, round; | ||
296 | |||
297 | 6720 | mag = 0; | |
298 |
2/2✓ Branch 0 taken 430080 times.
✓ Branch 1 taken 6720 times.
|
436800 | for (i = 0; i < 64; i++) |
299 | 430080 | mag += abs(input[i]); | |
300 | |||
301 |
2/2✓ Branch 0 taken 1644 times.
✓ Branch 1 taken 5076 times.
|
6720 | shift = mag > 0x400000 ? 2 : 0; |
302 |
2/2✓ Branch 0 taken 1644 times.
✓ Branch 1 taken 5076 times.
|
6720 | round = shift > 0 ? 1 << (shift - 1) : 0; |
303 | |||
304 |
2/2✓ Branch 0 taken 430080 times.
✓ Branch 1 taken 6720 times.
|
436800 | for (i = 0; i < 64; i++) |
305 | 430080 | buf_a[i] = (input[i] + round) >> shift; | |
306 | |||
307 | 6720 | sum_a(buf_a, buf_b + 0, 32); | |
308 | 6720 | sum_b(buf_a, buf_b + 32, 32); | |
309 | 6720 | clp_v(buf_b, 64); | |
310 | |||
311 | 6720 | sum_a(buf_b + 0, buf_a + 0, 16); | |
312 | 6720 | sum_b(buf_b + 0, buf_a + 16, 16); | |
313 | 6720 | sum_c(buf_b + 32, buf_a + 32, 16); | |
314 | 6720 | sum_d(buf_b + 32, buf_a + 48, 16); | |
315 | 6720 | clp_v(buf_a, 64); | |
316 | |||
317 | 6720 | sum_a(buf_a + 0, buf_b + 0, 8); | |
318 | 6720 | sum_b(buf_a + 0, buf_b + 8, 8); | |
319 | 6720 | sum_c(buf_a + 16, buf_b + 16, 8); | |
320 | 6720 | sum_d(buf_a + 16, buf_b + 24, 8); | |
321 | 6720 | sum_c(buf_a + 32, buf_b + 32, 8); | |
322 | 6720 | sum_d(buf_a + 32, buf_b + 40, 8); | |
323 | 6720 | sum_c(buf_a + 48, buf_b + 48, 8); | |
324 | 6720 | sum_d(buf_a + 48, buf_b + 56, 8); | |
325 | 6720 | clp_v(buf_b, 64); | |
326 | |||
327 | 6720 | dct_a(buf_b + 0, buf_a + 0); | |
328 | 6720 | dct_b(buf_b + 8, buf_a + 8); | |
329 | 6720 | dct_b(buf_b + 16, buf_a + 16); | |
330 | 6720 | dct_b(buf_b + 24, buf_a + 24); | |
331 | 6720 | dct_b(buf_b + 32, buf_a + 32); | |
332 | 6720 | dct_b(buf_b + 40, buf_a + 40); | |
333 | 6720 | dct_b(buf_b + 48, buf_a + 48); | |
334 | 6720 | dct_b(buf_b + 56, buf_a + 56); | |
335 | 6720 | clp_v(buf_a, 64); | |
336 | |||
337 | 6720 | mod_a(buf_a + 0, buf_b + 0); | |
338 | 6720 | mod_b(buf_a + 16, buf_b + 16); | |
339 | 6720 | mod_b(buf_a + 32, buf_b + 32); | |
340 | 6720 | mod_b(buf_a + 48, buf_b + 48); | |
341 | 6720 | clp_v(buf_b, 64); | |
342 | |||
343 | 6720 | mod64_a(buf_b + 0, buf_a + 0); | |
344 | 6720 | mod64_b(buf_b + 32, buf_a + 32); | |
345 | 6720 | clp_v(buf_a, 64); | |
346 | |||
347 | 6720 | mod64_c(buf_a, buf_b); | |
348 | |||
349 |
2/2✓ Branch 0 taken 430080 times.
✓ Branch 1 taken 6720 times.
|
436800 | for (i = 0; i < 64; i++) |
350 | 430080 | buf_b[i] = clip23(buf_b[i] * (1 << shift)); | |
351 | |||
352 |
2/2✓ Branch 0 taken 215040 times.
✓ Branch 1 taken 6720 times.
|
221760 | for (i = 0, k = 63; i < 32; i++, k--) { |
353 | 215040 | output[ i] = clip23(buf_b[i] - buf_b[k]); | |
354 | 215040 | output[32 + i] = clip23(buf_b[i] + buf_b[k]); | |
355 | } | ||
356 | 6720 | } | |
357 | |||
358 | 99 | av_cold void ff_dcadct_init(DCADCTContext *c) | |
359 | { | ||
360 | 99 | c->imdct_half[0] = imdct_half_32; | |
361 | 99 | c->imdct_half[1] = imdct_half_64; | |
362 | 99 | } | |
363 |