Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Simple IDCT | ||
3 | * | ||
4 | * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with FFmpeg; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | /** | ||
24 | * @file | ||
25 | * simpleidct in C. | ||
26 | */ | ||
27 | |||
28 | #include "libavutil/intreadwrite.h" | ||
29 | #include "mathops.h" | ||
30 | #include "simple_idct.h" | ||
31 | |||
32 | #define IN_IDCT_DEPTH 16 | ||
33 | |||
34 | #define BIT_DEPTH 8 | ||
35 | #include "simple_idct_template.c" | ||
36 | #undef BIT_DEPTH | ||
37 | |||
38 | #define BIT_DEPTH 10 | ||
39 | #include "simple_idct_template.c" | ||
40 | #undef BIT_DEPTH | ||
41 | |||
42 | #define BIT_DEPTH 12 | ||
43 | #include "simple_idct_template.c" | ||
44 | #undef BIT_DEPTH | ||
45 | #undef IN_IDCT_DEPTH | ||
46 | |||
47 | #define IN_IDCT_DEPTH 32 | ||
48 | #define BIT_DEPTH 10 | ||
49 | #include "simple_idct_template.c" | ||
50 | #undef BIT_DEPTH | ||
51 | #undef IN_IDCT_DEPTH | ||
52 | |||
53 | /* 2x4x8 idct */ | ||
54 | |||
55 | #define CN_SHIFT 12 | ||
56 | #define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5)) | ||
57 | #define C1 C_FIX(0.6532814824) | ||
58 | #define C2 C_FIX(0.2705980501) | ||
59 | |||
60 | /* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, | ||
61 | and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ | ||
62 | #define C_SHIFT (4+1+12) | ||
63 | |||
64 | 441216 | static inline void idct4col_put(uint8_t *dest, ptrdiff_t line_size, const int16_t *col) | |
65 | { | ||
66 | int c0, c1, c2, c3, a0, a1, a2, a3; | ||
67 | |||
68 | 441216 | a0 = col[8*0]; | |
69 | 441216 | a1 = col[8*2]; | |
70 | 441216 | a2 = col[8*4]; | |
71 | 441216 | a3 = col[8*6]; | |
72 | 441216 | c0 = ((a0 + a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); | |
73 | 441216 | c2 = ((a0 - a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); | |
74 | 441216 | c1 = a1 * C1 + a3 * C2; | |
75 | 441216 | c3 = a1 * C2 - a3 * C1; | |
76 | 441216 | dest[0] = av_clip_uint8((c0 + c1) >> C_SHIFT); | |
77 | 441216 | dest += line_size; | |
78 | 441216 | dest[0] = av_clip_uint8((c2 + c3) >> C_SHIFT); | |
79 | 441216 | dest += line_size; | |
80 | 441216 | dest[0] = av_clip_uint8((c2 - c3) >> C_SHIFT); | |
81 | 441216 | dest += line_size; | |
82 | 441216 | dest[0] = av_clip_uint8((c0 - c1) >> C_SHIFT); | |
83 | 441216 | } | |
84 | |||
85 | #define BF(k) \ | ||
86 | {\ | ||
87 | int a0, a1;\ | ||
88 | a0 = ptr[k];\ | ||
89 | a1 = ptr[8 + k];\ | ||
90 | ptr[k] = a0 + a1;\ | ||
91 | ptr[8 + k] = a0 - a1;\ | ||
92 | } | ||
93 | |||
94 | /* only used by DV codec. The input must be interlaced. 128 is added | ||
95 | to the pixels before clamping to avoid systematic error | ||
96 | (1024*sqrt(2)) offset would be needed otherwise. */ | ||
97 | /* XXX: I think a 1.0/sqrt(2) normalization should be needed to | ||
98 | compensate the extra butterfly stage - I don't have the full DV | ||
99 | specification */ | ||
100 | 27576 | void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block) | |
101 | { | ||
102 | int i; | ||
103 | int16_t *ptr; | ||
104 | |||
105 | /* butterfly */ | ||
106 | 27576 | ptr = block; | |
107 |
2/2✓ Branch 0 taken 110304 times.
✓ Branch 1 taken 27576 times.
|
137880 | for(i=0;i<4;i++) { |
108 | 110304 | BF(0); | |
109 | 110304 | BF(1); | |
110 | 110304 | BF(2); | |
111 | 110304 | BF(3); | |
112 | 110304 | BF(4); | |
113 | 110304 | BF(5); | |
114 | 110304 | BF(6); | |
115 | 110304 | BF(7); | |
116 | 110304 | ptr += 2 * 8; | |
117 | } | ||
118 | |||
119 | /* IDCT8 on each line */ | ||
120 |
2/2✓ Branch 0 taken 220608 times.
✓ Branch 1 taken 27576 times.
|
248184 | for(i=0; i<8; i++) { |
121 | 220608 | idctRowCondDC_int16_8bit(block + i*8, 0); | |
122 | } | ||
123 | |||
124 | /* IDCT4 and store */ | ||
125 |
2/2✓ Branch 0 taken 220608 times.
✓ Branch 1 taken 27576 times.
|
248184 | for(i=0;i<8;i++) { |
126 | 220608 | idct4col_put(dest + i, 2 * line_size, block + i); | |
127 | 220608 | idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i); | |
128 | } | ||
129 | 27576 | } | |
130 | |||
131 | /* 8x4 & 4x8 WMV2 IDCT */ | ||
132 | #undef CN_SHIFT | ||
133 | #undef C_SHIFT | ||
134 | #undef C_FIX | ||
135 | #undef C1 | ||
136 | #undef C2 | ||
137 | #define CN_SHIFT 12 | ||
138 | #define C_FIX(x) ((int)((x) * M_SQRT2 * (1 << CN_SHIFT) + 0.5)) | ||
139 | #define C1 C_FIX(0.6532814824) | ||
140 | #define C2 C_FIX(0.2705980501) | ||
141 | #define C3 C_FIX(0.5) | ||
142 | #define C_SHIFT (4+1+12) | ||
143 | 263504 | static inline void idct4col_add(uint8_t *dest, ptrdiff_t line_size, const int16_t *col) | |
144 | { | ||
145 | int c0, c1, c2, c3, a0, a1, a2, a3; | ||
146 | |||
147 | 263504 | a0 = col[8*0]; | |
148 | 263504 | a1 = col[8*1]; | |
149 | 263504 | a2 = col[8*2]; | |
150 | 263504 | a3 = col[8*3]; | |
151 | 263504 | c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1)); | |
152 | 263504 | c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); | |
153 | 263504 | c1 = a1 * C1 + a3 * C2; | |
154 | 263504 | c3 = a1 * C2 - a3 * C1; | |
155 | 263504 | dest[0] = av_clip_uint8(dest[0] + ((c0 + c1) >> C_SHIFT)); | |
156 | 263504 | dest += line_size; | |
157 | 263504 | dest[0] = av_clip_uint8(dest[0] + ((c2 + c3) >> C_SHIFT)); | |
158 | 263504 | dest += line_size; | |
159 | 263504 | dest[0] = av_clip_uint8(dest[0] + ((c2 - c3) >> C_SHIFT)); | |
160 | 263504 | dest += line_size; | |
161 | 263504 | dest[0] = av_clip_uint8(dest[0] + ((c0 - c1) >> C_SHIFT)); | |
162 | 263504 | } | |
163 | |||
164 | #define RN_SHIFT 15 | ||
165 | #define R_FIX(x) ((int)((x) * M_SQRT2 * (1 << RN_SHIFT) + 0.5)) | ||
166 | #define R1 R_FIX(0.6532814824) | ||
167 | #define R2 R_FIX(0.2705980501) | ||
168 | #define R3 R_FIX(0.5) | ||
169 | #define R_SHIFT 11 | ||
170 | 239648 | static inline void idct4row(int16_t *row) | |
171 | { | ||
172 | unsigned c0, c1, c2, c3; | ||
173 | int a0, a1, a2, a3; | ||
174 | |||
175 | 239648 | a0 = row[0]; | |
176 | 239648 | a1 = row[1]; | |
177 | 239648 | a2 = row[2]; | |
178 | 239648 | a3 = row[3]; | |
179 | 239648 | c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1)); | |
180 | 239648 | c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1)); | |
181 | 239648 | c1 = a1 * R1 + a3 * R2; | |
182 | 239648 | c3 = a1 * R2 - a3 * R1; | |
183 | 239648 | row[0]= (c0 + c1) >> R_SHIFT; | |
184 | 239648 | row[1]= (c2 + c3) >> R_SHIFT; | |
185 | 239648 | row[2]= (c2 - c3) >> R_SHIFT; | |
186 | 239648 | row[3]= (c0 - c1) >> R_SHIFT; | |
187 | 239648 | } | |
188 | |||
189 | 32938 | void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) | |
190 | { | ||
191 | int i; | ||
192 | |||
193 | /* IDCT8 on each line */ | ||
194 |
2/2✓ Branch 0 taken 131752 times.
✓ Branch 1 taken 32938 times.
|
164690 | for(i=0; i<4; i++) { |
195 | 131752 | idctRowCondDC_int16_8bit(block + i*8, 0); | |
196 | } | ||
197 | |||
198 | /* IDCT4 and store */ | ||
199 |
2/2✓ Branch 0 taken 263504 times.
✓ Branch 1 taken 32938 times.
|
296442 | for(i=0;i<8;i++) { |
200 | 263504 | idct4col_add(dest + i, line_size, block + i); | |
201 | } | ||
202 | 32938 | } | |
203 | |||
204 | 29956 | void ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) | |
205 | { | ||
206 | int i; | ||
207 | |||
208 | /* IDCT4 on each line */ | ||
209 |
2/2✓ Branch 0 taken 239648 times.
✓ Branch 1 taken 29956 times.
|
269604 | for(i=0; i<8; i++) { |
210 | 239648 | idct4row(block + i*8); | |
211 | } | ||
212 | |||
213 | /* IDCT8 and store */ | ||
214 |
2/2✓ Branch 0 taken 119824 times.
✓ Branch 1 taken 29956 times.
|
149780 | for(i=0; i<4; i++){ |
215 | 119824 | idctSparseColAdd_int16_8bit(dest + i, line_size, block + i); | |
216 | } | ||
217 | 29956 | } | |
218 | |||
219 | ✗ | void ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) | |
220 | { | ||
221 | int i; | ||
222 | |||
223 | /* IDCT4 on each line */ | ||
224 | ✗ | for(i=0; i<4; i++) { | |
225 | ✗ | idct4row(block + i*8); | |
226 | } | ||
227 | |||
228 | /* IDCT4 and store */ | ||
229 | ✗ | for(i=0; i<4; i++){ | |
230 | ✗ | idct4col_add(dest + i, line_size, block + i); | |
231 | } | ||
232 | ✗ | } | |
233 |