Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Simple IDCT | ||
3 | * | ||
4 | * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with FFmpeg; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | /** | ||
24 | * @file | ||
25 | * simpleidct in C. | ||
26 | */ | ||
27 | |||
28 | /* Based upon some commented-out C code from mpeg2dec (idct_mmx.c | ||
29 | * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>). */ | ||
30 | |||
31 | #include "bit_depth_template.c" | ||
32 | |||
33 | #undef W1 | ||
34 | #undef W2 | ||
35 | #undef W3 | ||
36 | #undef W4 | ||
37 | #undef W5 | ||
38 | #undef W6 | ||
39 | #undef W7 | ||
40 | #undef ROW_SHIFT | ||
41 | #undef COL_SHIFT | ||
42 | #undef DC_SHIFT | ||
43 | #undef MUL | ||
44 | #undef MAC | ||
45 | |||
46 | #if BIT_DEPTH == 8 | ||
47 | |||
48 | #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
49 | #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
50 | #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
51 | #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
52 | #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
53 | #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
54 | #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
55 | |||
56 | #define ROW_SHIFT 11 | ||
57 | #define COL_SHIFT 20 | ||
58 | #define DC_SHIFT 3 | ||
59 | |||
60 | #define MUL(a, b) MUL16(a, b) | ||
61 | #define MAC(a, b, c) MAC16(a, b, c) | ||
62 | |||
63 | #elif BIT_DEPTH == 10 || BIT_DEPTH == 12 | ||
64 | |||
65 | # if BIT_DEPTH == 10 | ||
66 | #define W1 22725 // 90901 | ||
67 | #define W2 21407 // 85627 | ||
68 | #define W3 19265 // 77062 | ||
69 | #define W4 16384 // 65535 | ||
70 | #define W5 12873 // 51491 | ||
71 | #define W6 8867 // 35468 | ||
72 | #define W7 4520 // 18081 | ||
73 | |||
74 | # ifdef EXTRA_SHIFT | ||
75 | #define ROW_SHIFT 13 | ||
76 | #define COL_SHIFT 18 | ||
77 | #define DC_SHIFT 1 | ||
78 | # elif IN_IDCT_DEPTH == 32 | ||
79 | #define ROW_SHIFT 13 | ||
80 | #define COL_SHIFT 21 | ||
81 | #define DC_SHIFT 2 | ||
82 | # else | ||
83 | #define ROW_SHIFT 12 | ||
84 | #define COL_SHIFT 19 | ||
85 | #define DC_SHIFT 2 | ||
86 | # endif | ||
87 | |||
88 | # else | ||
89 | #define W1 45451 | ||
90 | #define W2 42813 | ||
91 | #define W3 38531 | ||
92 | #define W4 32767 | ||
93 | #define W5 25746 | ||
94 | #define W6 17734 | ||
95 | #define W7 9041 | ||
96 | |||
97 | #define ROW_SHIFT 16 | ||
98 | #define COL_SHIFT 17 | ||
99 | #define DC_SHIFT -1 | ||
100 | # endif | ||
101 | |||
102 | #define MUL(a, b) ((int)((SUINT)(a) * (b))) | ||
103 | #define MAC(a, b, c) ((a) += (SUINT)(b) * (c)) | ||
104 | |||
105 | #else | ||
106 | |||
107 | #error "Unsupported bitdepth" | ||
108 | |||
109 | #endif | ||
110 | |||
111 | #ifdef EXTRA_SHIFT | ||
112 | 21924096 | static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift) | |
113 | #else | ||
114 | 1477603664 | static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift) | |
115 | #endif | ||
116 | { | ||
117 | SUINT a0, a1, a2, a3, b0, b1, b2, b3; | ||
118 | |||
119 | // TODO: Add DC-only support for int32_t input | ||
120 | #if IN_IDCT_DEPTH == 16 | ||
121 | #if HAVE_FAST_64BIT | ||
122 | #define ROW0_MASK (0xffffULL << 48 * HAVE_BIGENDIAN) | ||
123 |
4/4✓ Branch 0 taken 547950713 times.
✓ Branch 1 taken 200593087 times.
✓ Branch 2 taken 13064919 times.
✓ Branch 3 taken 8859177 times.
|
1499442384 | if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) { |
124 | uint64_t temp; | ||
125 |
3/4✓ Branch 0 taken 536224548 times.
✓ Branch 1 taken 11726165 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 13064919 times.
|
1098527950 | if (DC_SHIFT - extra_shift >= 0) { |
126 | 1072449096 | temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff; | |
127 | } else { | ||
128 | 26078854 | temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff; | |
129 | } | ||
130 | 1098527950 | temp += temp * (1 << 16); | |
131 | 1098527950 | temp += temp * ((uint64_t) 1 << 32); | |
132 | 1098527950 | AV_WN64A(row, temp); | |
133 | 1098527950 | AV_WN64A(row + 4, temp); | |
134 | 1098527950 | return; | |
135 | } | ||
136 | #else | ||
137 | if (!(AV_RN32A(row+2) | | ||
138 | AV_RN32A(row+4) | | ||
139 | AV_RN32A(row+6) | | ||
140 | row[1])) { | ||
141 | uint32_t temp; | ||
142 | if (DC_SHIFT - extra_shift >= 0) { | ||
143 | temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff; | ||
144 | } else { | ||
145 | temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff; | ||
146 | } | ||
147 | temp += temp * (1 << 16); | ||
148 | AV_WN32A(row, temp); | ||
149 | AV_WN32A(row+2, temp); | ||
150 | AV_WN32A(row+4, temp); | ||
151 | AV_WN32A(row+6, temp); | ||
152 | return; | ||
153 | } | ||
154 | #endif | ||
155 | #endif | ||
156 | |||
157 | 400999810 | a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1)); | |
158 | 400999810 | a1 = a0; | |
159 | 400999810 | a2 = a0; | |
160 | 400999810 | a3 = a0; | |
161 | |||
162 | 400999810 | a0 += (SUINT)W2 * row[2]; | |
163 | 400999810 | a1 += (SUINT)W6 * row[2]; | |
164 | 400999810 | a2 -= (SUINT)W6 * row[2]; | |
165 | 400999810 | a3 -= (SUINT)W2 * row[2]; | |
166 | |||
167 | 400999810 | b0 = MUL(W1, row[1]); | |
168 | 400999810 | MAC(b0, W3, row[3]); | |
169 | 400999810 | b1 = MUL(W3, row[1]); | |
170 | 400999810 | MAC(b1, -W7, row[3]); | |
171 | 400999810 | b2 = MUL(W5, row[1]); | |
172 | 400999810 | MAC(b2, -W1, row[3]); | |
173 | 400999810 | b3 = MUL(W7, row[1]); | |
174 | 400999810 | MAC(b3, -W5, row[3]); | |
175 | |||
176 | #if IN_IDCT_DEPTH == 32 | ||
177 |
2/2✓ Branch 0 taken 4223 times.
✓ Branch 1 taken 38465 times.
|
85376 | if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) { |
178 | #else | ||
179 |
4/4✓ Branch 0 taken 82280698 times.
✓ Branch 1 taken 118312389 times.
✓ Branch 2 taken 6710882 times.
✓ Branch 3 taken 2148295 times.
|
400914434 | if (AV_RN64A(row + 4)) { |
180 | #endif | ||
181 | 165760028 | a0 += (SUINT) W4*row[4] + (SUINT)W6*row[6]; | |
182 | 165760028 | a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6]; | |
183 | 165760028 | a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6]; | |
184 | 165760028 | a3 += (SUINT) W4*row[4] - (SUINT)W6*row[6]; | |
185 | |||
186 | 165760028 | MAC(b0, W5, row[5]); | |
187 | 165760028 | MAC(b0, W7, row[7]); | |
188 | |||
189 | 165760028 | MAC(b1, -W1, row[5]); | |
190 | 165760028 | MAC(b1, -W5, row[7]); | |
191 | |||
192 | 165760028 | MAC(b2, W7, row[5]); | |
193 | 165760028 | MAC(b2, W3, row[7]); | |
194 | |||
195 | 165760028 | MAC(b3, W3, row[5]); | |
196 | 165760028 | MAC(b3, -W1, row[7]); | |
197 | } | ||
198 | |||
199 | 400999810 | row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift); | |
200 | 400999810 | row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift); | |
201 | 400999810 | row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift); | |
202 | 400999810 | row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift); | |
203 | 400999810 | row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift); | |
204 | 400999810 | row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift); | |
205 | 400999810 | row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift); | |
206 | 400999810 | row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift); | |
207 | 85376 | } | |
208 | |||
209 | #define IDCT_COLS do { \ | ||
210 | a0 = (SUINT)W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \ | ||
211 | a1 = a0; \ | ||
212 | a2 = a0; \ | ||
213 | a3 = a0; \ | ||
214 | \ | ||
215 | a0 += (SUINT) W2*col[8*2]; \ | ||
216 | a1 += (SUINT) W6*col[8*2]; \ | ||
217 | a2 += (SUINT)-W6*col[8*2]; \ | ||
218 | a3 += (SUINT)-W2*col[8*2]; \ | ||
219 | \ | ||
220 | b0 = MUL(W1, col[8*1]); \ | ||
221 | b1 = MUL(W3, col[8*1]); \ | ||
222 | b2 = MUL(W5, col[8*1]); \ | ||
223 | b3 = MUL(W7, col[8*1]); \ | ||
224 | \ | ||
225 | MAC(b0, W3, col[8*3]); \ | ||
226 | MAC(b1, -W7, col[8*3]); \ | ||
227 | MAC(b2, -W1, col[8*3]); \ | ||
228 | MAC(b3, -W5, col[8*3]); \ | ||
229 | \ | ||
230 | if (col[8*4]) { \ | ||
231 | a0 += (SUINT) W4*col[8*4]; \ | ||
232 | a1 += (SUINT)-W4*col[8*4]; \ | ||
233 | a2 += (SUINT)-W4*col[8*4]; \ | ||
234 | a3 += (SUINT) W4*col[8*4]; \ | ||
235 | } \ | ||
236 | \ | ||
237 | if (col[8*5]) { \ | ||
238 | MAC(b0, W5, col[8*5]); \ | ||
239 | MAC(b1, -W1, col[8*5]); \ | ||
240 | MAC(b2, W7, col[8*5]); \ | ||
241 | MAC(b3, W3, col[8*5]); \ | ||
242 | } \ | ||
243 | \ | ||
244 | if (col[8*6]) { \ | ||
245 | a0 += (SUINT) W6*col[8*6]; \ | ||
246 | a1 += (SUINT)-W2*col[8*6]; \ | ||
247 | a2 += (SUINT) W2*col[8*6]; \ | ||
248 | a3 += (SUINT)-W6*col[8*6]; \ | ||
249 | } \ | ||
250 | \ | ||
251 | if (col[8*7]) { \ | ||
252 | MAC(b0, W7, col[8*7]); \ | ||
253 | MAC(b1, -W5, col[8*7]); \ | ||
254 | MAC(b2, W3, col[8*7]); \ | ||
255 | MAC(b3, -W1, col[8*7]); \ | ||
256 | } \ | ||
257 | } while (0) | ||
258 | |||
259 | #ifdef EXTRA_SHIFT | ||
260 | 21924096 | static inline void FUNC(idctSparseCol_extrashift)(int16_t *col) | |
261 | #else | ||
262 | 84597728 | static inline void FUNC6(idctSparseCol)(idctin *col) | |
263 | #endif | ||
264 | { | ||
265 | unsigned a0, a1, a2, a3, b0, b1, b2, b3; | ||
266 | |||
267 |
16/16✓ Branch 0 taken 17129675 times.
✓ Branch 1 taken 34953845 times.
✓ Branch 2 taken 15340586 times.
✓ Branch 3 taken 36742934 times.
✓ Branch 4 taken 12208678 times.
✓ Branch 5 taken 39874842 times.
✓ Branch 6 taken 10910348 times.
✓ Branch 7 taken 41173172 times.
✓ Branch 8 taken 8566104 times.
✓ Branch 9 taken 13357992 times.
✓ Branch 10 taken 8394116 times.
✓ Branch 11 taken 13529980 times.
✓ Branch 12 taken 6694259 times.
✓ Branch 13 taken 15229837 times.
✓ Branch 14 taken 5669425 times.
✓ Branch 15 taken 16254671 times.
|
106521824 | IDCT_COLS; |
268 | |||
269 | 106521824 | col[0 ] = ((int)(a0 + b0) >> COL_SHIFT); | |
270 | 106521824 | col[8 ] = ((int)(a1 + b1) >> COL_SHIFT); | |
271 | 106521824 | col[16] = ((int)(a2 + b2) >> COL_SHIFT); | |
272 | 106521824 | col[24] = ((int)(a3 + b3) >> COL_SHIFT); | |
273 | 106521824 | col[32] = ((int)(a3 - b3) >> COL_SHIFT); | |
274 | 106521824 | col[40] = ((int)(a2 - b2) >> COL_SHIFT); | |
275 | 106521824 | col[48] = ((int)(a1 - b1) >> COL_SHIFT); | |
276 | 106521824 | col[56] = ((int)(a0 - b0) >> COL_SHIFT); | |
277 | 106521824 | } | |
278 | |||
279 | #ifndef PRORES_ONLY | ||
280 | #ifndef EXTRA_SHIFT | ||
281 | 1117527008 | static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size, | |
282 | idctin *col) | ||
283 | { | ||
284 | SUINT a0, a1, a2, a3, b0, b1, b2, b3; | ||
285 | |||
286 |
8/8✓ Branch 0 taken 119008792 times.
✓ Branch 1 taken 439754712 times.
✓ Branch 2 taken 112751000 times.
✓ Branch 3 taken 446012504 times.
✓ Branch 4 taken 64421727 times.
✓ Branch 5 taken 494341777 times.
✓ Branch 6 taken 74543527 times.
✓ Branch 7 taken 484219977 times.
|
1117527008 | IDCT_COLS; |
287 | |||
288 | 1117527008 | dest[0] = av_clip_pixel((int)(a0 + b0) >> COL_SHIFT); | |
289 | 1117527008 | dest += line_size; | |
290 | 1117527008 | dest[0] = av_clip_pixel((int)(a1 + b1) >> COL_SHIFT); | |
291 | 1117527008 | dest += line_size; | |
292 | 1117527008 | dest[0] = av_clip_pixel((int)(a2 + b2) >> COL_SHIFT); | |
293 | 1117527008 | dest += line_size; | |
294 | 1117527008 | dest[0] = av_clip_pixel((int)(a3 + b3) >> COL_SHIFT); | |
295 | 1117527008 | dest += line_size; | |
296 | 1117527008 | dest[0] = av_clip_pixel((int)(a3 - b3) >> COL_SHIFT); | |
297 | 1117527008 | dest += line_size; | |
298 | 1117527008 | dest[0] = av_clip_pixel((int)(a2 - b2) >> COL_SHIFT); | |
299 | 1117527008 | dest += line_size; | |
300 | 1117527008 | dest[0] = av_clip_pixel((int)(a1 - b1) >> COL_SHIFT); | |
301 | 1117527008 | dest += line_size; | |
302 | 1117527008 | dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT); | |
303 | 1117527008 | } | |
304 | |||
305 | 275013856 | static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size, | |
306 | idctin *col) | ||
307 | { | ||
308 | unsigned a0, a1, a2, a3, b0, b1, b2, b3; | ||
309 | |||
310 |
8/8✓ Branch 0 taken 40263037 times.
✓ Branch 1 taken 97243891 times.
✓ Branch 2 taken 33919159 times.
✓ Branch 3 taken 103587769 times.
✓ Branch 4 taken 28101566 times.
✓ Branch 5 taken 109405362 times.
✓ Branch 6 taken 62707427 times.
✓ Branch 7 taken 74799501 times.
|
275013856 | IDCT_COLS; |
311 | |||
312 | 275013856 | dest[0] = av_clip_pixel(dest[0] + ((int)(a0 + b0) >> COL_SHIFT)); | |
313 | 275013856 | dest += line_size; | |
314 | 275013856 | dest[0] = av_clip_pixel(dest[0] + ((int)(a1 + b1) >> COL_SHIFT)); | |
315 | 275013856 | dest += line_size; | |
316 | 275013856 | dest[0] = av_clip_pixel(dest[0] + ((int)(a2 + b2) >> COL_SHIFT)); | |
317 | 275013856 | dest += line_size; | |
318 | 275013856 | dest[0] = av_clip_pixel(dest[0] + ((int)(a3 + b3) >> COL_SHIFT)); | |
319 | 275013856 | dest += line_size; | |
320 | 275013856 | dest[0] = av_clip_pixel(dest[0] + ((int)(a3 - b3) >> COL_SHIFT)); | |
321 | 275013856 | dest += line_size; | |
322 | 275013856 | dest[0] = av_clip_pixel(dest[0] + ((int)(a2 - b2) >> COL_SHIFT)); | |
323 | 275013856 | dest += line_size; | |
324 | 275013856 | dest[0] = av_clip_pixel(dest[0] + ((int)(a1 - b1) >> COL_SHIFT)); | |
325 | 275013856 | dest += line_size; | |
326 | 275013856 | dest[0] = av_clip_pixel(dest[0] + ((int)(a0 - b0) >> COL_SHIFT)); | |
327 | 275013856 | } | |
328 | |||
329 | 139690876 | void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_) | |
330 | { | ||
331 | 139690876 | idctin *block = (idctin *)block_; | |
332 | 139690876 | pixel *dest = (pixel *)dest_; | |
333 | int i; | ||
334 | |||
335 | 139690876 | line_size /= sizeof(pixel); | |
336 | |||
337 |
2/2✓ Branch 0 taken 558763504 times.
✓ Branch 1 taken 69845438 times.
|
1257217884 | for (i = 0; i < 8; i++) |
338 | 1117527008 | FUNC6(idctRowCondDC)(block + i*8, 0); | |
339 | |||
340 |
2/2✓ Branch 0 taken 558763504 times.
✓ Branch 1 taken 69845438 times.
|
1257217884 | for (i = 0; i < 8; i++) |
341 | 1117527008 | FUNC6(idctSparseColPut)(dest + i, line_size, block + i); | |
342 | 139690876 | } | |
343 | |||
344 | #if IN_IDCT_DEPTH == 16 | ||
345 | 34346776 | void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block) | |
346 | { | ||
347 | 34346776 | pixel *dest = (pixel *)dest_; | |
348 | int i; | ||
349 | |||
350 | 34346776 | line_size /= sizeof(pixel); | |
351 | |||
352 |
2/2✓ Branch 0 taken 137387104 times.
✓ Branch 1 taken 17173388 times.
|
309120984 | for (i = 0; i < 8; i++) |
353 | 274774208 | FUNC6(idctRowCondDC)(block + i*8, 0); | |
354 | |||
355 |
2/2✓ Branch 0 taken 137387104 times.
✓ Branch 1 taken 17173388 times.
|
309120984 | for (i = 0; i < 8; i++) |
356 | 274774208 | FUNC6(idctSparseColAdd)(dest + i, line_size, block + i); | |
357 | 34346776 | } | |
358 | |||
359 | 8128552 | void FUNC6(ff_simple_idct)(int16_t *block) | |
360 | { | ||
361 | int i; | ||
362 | |||
363 |
2/2✓ Branch 0 taken 32514208 times.
✓ Branch 1 taken 4064276 times.
|
73156968 | for (i = 0; i < 8; i++) |
364 | 65028416 | FUNC6(idctRowCondDC)(block + i*8, 0); | |
365 | |||
366 |
2/2✓ Branch 0 taken 32514208 times.
✓ Branch 1 taken 4064276 times.
|
73156968 | for (i = 0; i < 8; i++) |
367 | 65028416 | FUNC6(idctSparseCol)(block + i); | |
368 | 8128552 | } | |
369 | #endif | ||
370 | #endif | ||
371 | #endif /* PRORES_ONLY */ | ||
372 |