Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Simple IDCT | ||
3 | * | ||
4 | * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with FFmpeg; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | /** | ||
24 | * @file | ||
25 | * simpleidct in C. | ||
26 | */ | ||
27 | |||
28 | /* Based upon some commented-out C code from mpeg2dec (idct_mmx.c | ||
29 | * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>). */ | ||
30 | |||
31 | #include "simple_idct.h" | ||
32 | |||
33 | #include "bit_depth_template.c" | ||
34 | |||
35 | #undef W1 | ||
36 | #undef W2 | ||
37 | #undef W3 | ||
38 | #undef W4 | ||
39 | #undef W5 | ||
40 | #undef W6 | ||
41 | #undef W7 | ||
42 | #undef ROW_SHIFT | ||
43 | #undef COL_SHIFT | ||
44 | #undef DC_SHIFT | ||
45 | #undef MUL | ||
46 | #undef MAC | ||
47 | |||
48 | #if BIT_DEPTH == 8 | ||
49 | |||
50 | #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
51 | #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
52 | #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
53 | #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
54 | #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
55 | #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
56 | #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | ||
57 | |||
58 | #define ROW_SHIFT 11 | ||
59 | #define COL_SHIFT 20 | ||
60 | #define DC_SHIFT 3 | ||
61 | |||
62 | #define MUL(a, b) MUL16(a, b) | ||
63 | #define MAC(a, b, c) MAC16(a, b, c) | ||
64 | |||
65 | #elif BIT_DEPTH == 10 || BIT_DEPTH == 12 | ||
66 | |||
67 | # if BIT_DEPTH == 10 | ||
68 | #define W1 22725 // 90901 | ||
69 | #define W2 21407 // 85627 | ||
70 | #define W3 19265 // 77062 | ||
71 | #define W4 16384 // 65535 | ||
72 | #define W5 12873 // 51491 | ||
73 | #define W6 8867 // 35468 | ||
74 | #define W7 4520 // 18081 | ||
75 | |||
76 | # ifdef EXTRA_SHIFT | ||
77 | #define ROW_SHIFT 13 | ||
78 | #define COL_SHIFT 18 | ||
79 | #define DC_SHIFT 1 | ||
80 | # elif IN_IDCT_DEPTH == 32 | ||
81 | #define ROW_SHIFT 13 | ||
82 | #define COL_SHIFT 21 | ||
83 | #define DC_SHIFT 2 | ||
84 | # else | ||
85 | #define ROW_SHIFT 12 | ||
86 | #define COL_SHIFT 19 | ||
87 | #define DC_SHIFT 2 | ||
88 | # endif | ||
89 | |||
90 | # else | ||
91 | #define W1 45451 | ||
92 | #define W2 42813 | ||
93 | #define W3 38531 | ||
94 | #define W4 32767 | ||
95 | #define W5 25746 | ||
96 | #define W6 17734 | ||
97 | #define W7 9041 | ||
98 | |||
99 | #define ROW_SHIFT 16 | ||
100 | #define COL_SHIFT 17 | ||
101 | #define DC_SHIFT -1 | ||
102 | # endif | ||
103 | |||
104 | #define MUL(a, b) ((int)((SUINT)(a) * (b))) | ||
105 | #define MAC(a, b, c) ((a) += (SUINT)(b) * (c)) | ||
106 | |||
107 | #else | ||
108 | |||
109 | #error "Unsupported bitdepth" | ||
110 | |||
111 | #endif | ||
112 | |||
113 | #ifdef EXTRA_SHIFT | ||
114 | 21924096 | static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift) | |
115 | #else | ||
116 | 1496646448 | static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift) | |
117 | #endif | ||
118 | { | ||
119 | SUINT a0, a1, a2, a3, b0, b1, b2, b3; | ||
120 | |||
121 | // TODO: Add DC-only support for int32_t input | ||
122 | #if IN_IDCT_DEPTH == 16 | ||
123 | #if HAVE_FAST_64BIT | ||
124 | #define ROW0_MASK (0xffffULL << 48 * HAVE_BIGENDIAN) | ||
125 |
2/2✓ Branch 0 taken 561035432 times.
✓ Branch 1 taken 209169200 times.
|
1518485168 | if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) { |
126 | uint64_t temp; | ||
127 |
2/2✓ Branch 0 taken 536244348 times.
✓ Branch 1 taken 24791084 times.
|
1109005945 | if (DC_SHIFT - extra_shift >= 0) { |
128 | 1072488696 | temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff; | |
129 | } else { | ||
130 | 36517249 | temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff; | |
131 | } | ||
132 | 1109005945 | temp += temp * (1 << 16); | |
133 | 1109005945 | temp += temp * ((uint64_t) 1 << 32); | |
134 | 1109005945 | AV_WN64A(row, temp); | |
135 | 1109005945 | AV_WN64A(row + 4, temp); | |
136 | 1109005945 | return; | |
137 | } | ||
138 | #else | ||
139 | if (!(AV_RN32A(row+2) | | ||
140 | AV_RN32A(row+4) | | ||
141 | AV_RN32A(row+6) | | ||
142 | row[1])) { | ||
143 | uint32_t temp; | ||
144 | if (DC_SHIFT - extra_shift >= 0) { | ||
145 | temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff; | ||
146 | } else { | ||
147 | temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff; | ||
148 | } | ||
149 | temp += temp * (1 << 16); | ||
150 | AV_WN32A(row, temp); | ||
151 | AV_WN32A(row+2, temp); | ||
152 | AV_WN32A(row+4, temp); | ||
153 | AV_WN32A(row+6, temp); | ||
154 | return; | ||
155 | } | ||
156 | #endif | ||
157 | #endif | ||
158 | |||
159 | 409564599 | a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1)); | |
160 | 409564599 | a1 = a0; | |
161 | 409564599 | a2 = a0; | |
162 | 409564599 | a3 = a0; | |
163 | |||
164 | 409564599 | a0 += (SUINT)W2 * row[2]; | |
165 | 409564599 | a1 += (SUINT)W6 * row[2]; | |
166 | 409564599 | a2 -= (SUINT)W6 * row[2]; | |
167 | 409564599 | a3 -= (SUINT)W2 * row[2]; | |
168 | |||
169 | 409564599 | b0 = MUL(W1, row[1]); | |
170 | 409564599 | MAC(b0, W3, row[3]); | |
171 | 409564599 | b1 = MUL(W3, row[1]); | |
172 | 409564599 | MAC(b1, -W7, row[3]); | |
173 | 409564599 | b2 = MUL(W5, row[1]); | |
174 | 409564599 | MAC(b2, -W1, row[3]); | |
175 | 409564599 | b3 = MUL(W7, row[1]); | |
176 | 409564599 | MAC(b3, -W5, row[3]); | |
177 | |||
178 | #if IN_IDCT_DEPTH == 32 | ||
179 |
2/2✓ Branch 0 taken 4223 times.
✓ Branch 1 taken 38465 times.
|
85376 | if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) { |
180 | #else | ||
181 |
2/2✓ Branch 0 taken 88828639 times.
✓ Branch 1 taken 120340561 times.
|
409479223 | if (AV_RN64A(row + 4)) { |
182 | #endif | ||
183 | 170954842 | a0 += (SUINT) W4*row[4] + (SUINT)W6*row[6]; | |
184 | 170954842 | a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6]; | |
185 | 170954842 | a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6]; | |
186 | 170954842 | a3 += (SUINT) W4*row[4] - (SUINT)W6*row[6]; | |
187 | |||
188 | 170954842 | MAC(b0, W5, row[5]); | |
189 | 170954842 | MAC(b0, W7, row[7]); | |
190 | |||
191 | 170954842 | MAC(b1, -W1, row[5]); | |
192 | 170954842 | MAC(b1, -W5, row[7]); | |
193 | |||
194 | 170954842 | MAC(b2, W7, row[5]); | |
195 | 170954842 | MAC(b2, W3, row[7]); | |
196 | |||
197 | 170954842 | MAC(b3, W3, row[5]); | |
198 | 170954842 | MAC(b3, -W1, row[7]); | |
199 | } | ||
200 | |||
201 | 409564599 | row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift); | |
202 | 409564599 | row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift); | |
203 | 409564599 | row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift); | |
204 | 409564599 | row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift); | |
205 | 409564599 | row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift); | |
206 | 409564599 | row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift); | |
207 | 409564599 | row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift); | |
208 | 409564599 | row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift); | |
209 | 85376 | } | |
210 | |||
211 | #define IDCT_COLS do { \ | ||
212 | a0 = (SUINT)W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \ | ||
213 | a1 = a0; \ | ||
214 | a2 = a0; \ | ||
215 | a3 = a0; \ | ||
216 | \ | ||
217 | a0 += (SUINT) W2*col[8*2]; \ | ||
218 | a1 += (SUINT) W6*col[8*2]; \ | ||
219 | a2 += (SUINT)-W6*col[8*2]; \ | ||
220 | a3 += (SUINT)-W2*col[8*2]; \ | ||
221 | \ | ||
222 | b0 = MUL(W1, col[8*1]); \ | ||
223 | b1 = MUL(W3, col[8*1]); \ | ||
224 | b2 = MUL(W5, col[8*1]); \ | ||
225 | b3 = MUL(W7, col[8*1]); \ | ||
226 | \ | ||
227 | MAC(b0, W3, col[8*3]); \ | ||
228 | MAC(b1, -W7, col[8*3]); \ | ||
229 | MAC(b2, -W1, col[8*3]); \ | ||
230 | MAC(b3, -W5, col[8*3]); \ | ||
231 | \ | ||
232 | if (col[8*4]) { \ | ||
233 | a0 += (SUINT) W4*col[8*4]; \ | ||
234 | a1 += (SUINT)-W4*col[8*4]; \ | ||
235 | a2 += (SUINT)-W4*col[8*4]; \ | ||
236 | a3 += (SUINT) W4*col[8*4]; \ | ||
237 | } \ | ||
238 | \ | ||
239 | if (col[8*5]) { \ | ||
240 | MAC(b0, W5, col[8*5]); \ | ||
241 | MAC(b1, -W1, col[8*5]); \ | ||
242 | MAC(b2, W7, col[8*5]); \ | ||
243 | MAC(b3, W3, col[8*5]); \ | ||
244 | } \ | ||
245 | \ | ||
246 | if (col[8*6]) { \ | ||
247 | a0 += (SUINT) W6*col[8*6]; \ | ||
248 | a1 += (SUINT)-W2*col[8*6]; \ | ||
249 | a2 += (SUINT) W2*col[8*6]; \ | ||
250 | a3 += (SUINT)-W6*col[8*6]; \ | ||
251 | } \ | ||
252 | \ | ||
253 | if (col[8*7]) { \ | ||
254 | MAC(b0, W7, col[8*7]); \ | ||
255 | MAC(b1, -W5, col[8*7]); \ | ||
256 | MAC(b2, W3, col[8*7]); \ | ||
257 | MAC(b3, -W1, col[8*7]); \ | ||
258 | } \ | ||
259 | } while (0) | ||
260 | |||
261 | #ifdef EXTRA_SHIFT | ||
262 | 21924096 | static inline void FUNC(idctSparseCol_extrashift)(int16_t *col) | |
263 | #else | ||
264 | 1117693120 | static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size, | |
265 | idctin *col) | ||
266 | { | ||
267 | SUINT a0, a1, a2, a3, b0, b1, b2, b3; | ||
268 | |||
269 |
8/8✓ Branch 0 taken 118996222 times.
✓ Branch 1 taken 439850338 times.
✓ Branch 2 taken 112744572 times.
✓ Branch 3 taken 446101988 times.
✓ Branch 4 taken 64429225 times.
✓ Branch 5 taken 494417335 times.
✓ Branch 6 taken 74332948 times.
✓ Branch 7 taken 484513612 times.
|
1117693120 | IDCT_COLS; |
270 | |||
271 | 1117693120 | dest[0] = av_clip_pixel((int)(a0 + b0) >> COL_SHIFT); | |
272 | 1117693120 | dest += line_size; | |
273 | 1117693120 | dest[0] = av_clip_pixel((int)(a1 + b1) >> COL_SHIFT); | |
274 | 1117693120 | dest += line_size; | |
275 | 1117693120 | dest[0] = av_clip_pixel((int)(a2 + b2) >> COL_SHIFT); | |
276 | 1117693120 | dest += line_size; | |
277 | 1117693120 | dest[0] = av_clip_pixel((int)(a3 + b3) >> COL_SHIFT); | |
278 | 1117693120 | dest += line_size; | |
279 | 1117693120 | dest[0] = av_clip_pixel((int)(a3 - b3) >> COL_SHIFT); | |
280 | 1117693120 | dest += line_size; | |
281 | 1117693120 | dest[0] = av_clip_pixel((int)(a2 - b2) >> COL_SHIFT); | |
282 | 1117693120 | dest += line_size; | |
283 | 1117693120 | dest[0] = av_clip_pixel((int)(a1 - b1) >> COL_SHIFT); | |
284 | 1117693120 | dest += line_size; | |
285 | 1117693120 | dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT); | |
286 | 1117693120 | } | |
287 | |||
288 | 274321216 | static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size, | |
289 | idctin *col) | ||
290 | { | ||
291 | unsigned a0, a1, a2, a3, b0, b1, b2, b3; | ||
292 | |||
293 |
8/8✓ Branch 0 taken 40014846 times.
✓ Branch 1 taken 97145762 times.
✓ Branch 2 taken 33690674 times.
✓ Branch 3 taken 103469934 times.
✓ Branch 4 taken 27911094 times.
✓ Branch 5 taken 109249514 times.
✓ Branch 6 taken 62171417 times.
✓ Branch 7 taken 74989191 times.
|
274321216 | IDCT_COLS; |
294 | |||
295 | 274321216 | dest[0] = av_clip_pixel(dest[0] + ((int)(a0 + b0) >> COL_SHIFT)); | |
296 | 274321216 | dest += line_size; | |
297 | 274321216 | dest[0] = av_clip_pixel(dest[0] + ((int)(a1 + b1) >> COL_SHIFT)); | |
298 | 274321216 | dest += line_size; | |
299 | 274321216 | dest[0] = av_clip_pixel(dest[0] + ((int)(a2 + b2) >> COL_SHIFT)); | |
300 | 274321216 | dest += line_size; | |
301 | 274321216 | dest[0] = av_clip_pixel(dest[0] + ((int)(a3 + b3) >> COL_SHIFT)); | |
302 | 274321216 | dest += line_size; | |
303 | 274321216 | dest[0] = av_clip_pixel(dest[0] + ((int)(a3 - b3) >> COL_SHIFT)); | |
304 | 274321216 | dest += line_size; | |
305 | 274321216 | dest[0] = av_clip_pixel(dest[0] + ((int)(a2 - b2) >> COL_SHIFT)); | |
306 | 274321216 | dest += line_size; | |
307 | 274321216 | dest[0] = av_clip_pixel(dest[0] + ((int)(a1 - b1) >> COL_SHIFT)); | |
308 | 274321216 | dest += line_size; | |
309 | 274321216 | dest[0] = av_clip_pixel(dest[0] + ((int)(a0 - b0) >> COL_SHIFT)); | |
310 | 274321216 | } | |
311 | |||
312 | 104167040 | static inline void FUNC6(idctSparseCol)(idctin *col) | |
313 | #endif | ||
314 | { | ||
315 | unsigned a0, a1, a2, a3, b0, b1, b2, b3; | ||
316 | |||
317 |
8/8✓ Branch 0 taken 25695779 times.
✓ Branch 1 taken 48311837 times.
✓ Branch 2 taken 23734702 times.
✓ Branch 3 taken 50272914 times.
✓ Branch 4 taken 18902937 times.
✓ Branch 5 taken 55104679 times.
✓ Branch 6 taken 16579773 times.
✓ Branch 7 taken 57427843 times.
|
126091136 | IDCT_COLS; |
318 | |||
319 | 126091136 | col[0 ] = ((int)(a0 + b0) >> COL_SHIFT); | |
320 | 126091136 | col[8 ] = ((int)(a1 + b1) >> COL_SHIFT); | |
321 | 126091136 | col[16] = ((int)(a2 + b2) >> COL_SHIFT); | |
322 | 126091136 | col[24] = ((int)(a3 + b3) >> COL_SHIFT); | |
323 | 126091136 | col[32] = ((int)(a3 - b3) >> COL_SHIFT); | |
324 | 126091136 | col[40] = ((int)(a2 - b2) >> COL_SHIFT); | |
325 | 126091136 | col[48] = ((int)(a1 - b1) >> COL_SHIFT); | |
326 | 126091136 | col[56] = ((int)(a0 - b0) >> COL_SHIFT); | |
327 | 126091136 | } | |
328 | |||
329 | #ifndef EXTRA_SHIFT | ||
330 | 139711640 | void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_) | |
331 | { | ||
332 | 139711640 | idctin *block = (idctin *)block_; | |
333 | 139711640 | pixel *dest = (pixel *)dest_; | |
334 | int i; | ||
335 | |||
336 | 139711640 | line_size /= sizeof(pixel); | |
337 | |||
338 |
2/2✓ Branch 0 taken 558846560 times.
✓ Branch 1 taken 69855820 times.
|
1257404760 | for (i = 0; i < 8; i++) |
339 | 1117693120 | FUNC6(idctRowCondDC)(block + i*8, 0); | |
340 | |||
341 |
2/2✓ Branch 0 taken 558846560 times.
✓ Branch 1 taken 69855820 times.
|
1257404760 | for (i = 0; i < 8; i++) |
342 | 1117693120 | FUNC6(idctSparseColPut)(dest + i, line_size, block + i); | |
343 | 139711640 | } | |
344 | |||
345 | #if IN_IDCT_DEPTH == 16 | ||
346 | 34260196 | void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block) | |
347 | { | ||
348 | 34260196 | pixel *dest = (pixel *)dest_; | |
349 | int i; | ||
350 | |||
351 | 34260196 | line_size /= sizeof(pixel); | |
352 | |||
353 |
2/2✓ Branch 0 taken 137040784 times.
✓ Branch 1 taken 17130098 times.
|
308341764 | for (i = 0; i < 8; i++) |
354 | 274081568 | FUNC6(idctRowCondDC)(block + i*8, 0); | |
355 | |||
356 |
2/2✓ Branch 0 taken 137040784 times.
✓ Branch 1 taken 17130098 times.
|
308341764 | for (i = 0; i < 8; i++) |
357 | 274081568 | FUNC6(idctSparseColAdd)(dest + i, line_size, block + i); | |
358 | 34260196 | } | |
359 | |||
360 | 8128552 | void FUNC6(ff_simple_idct)(int16_t *block) | |
361 | { | ||
362 | int i; | ||
363 | |||
364 |
2/2✓ Branch 0 taken 32514208 times.
✓ Branch 1 taken 4064276 times.
|
73156968 | for (i = 0; i < 8; i++) |
365 | 65028416 | FUNC6(idctRowCondDC)(block + i*8, 0); | |
366 | |||
367 |
2/2✓ Branch 0 taken 32514208 times.
✓ Branch 1 taken 4064276 times.
|
73156968 | for (i = 0; i < 8; i++) |
368 | 65028416 | FUNC6(idctSparseCol)(block + i); | |
369 | 8128552 | } | |
370 | #endif | ||
371 | #endif | ||
372 |