Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * (c) 2001 Fabrice Bellard | ||
3 | * 2007 Marc Hoffman <marc.hoffman@analog.com> | ||
4 | * | ||
5 | * This file is part of FFmpeg. | ||
6 | * | ||
7 | * FFmpeg is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU Lesser General Public | ||
9 | * License as published by the Free Software Foundation; either | ||
10 | * version 2.1 of the License, or (at your option) any later version. | ||
11 | * | ||
12 | * FFmpeg is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * Lesser General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU Lesser General Public | ||
18 | * License along with FFmpeg; if not, write to the Free Software | ||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
20 | */ | ||
21 | |||
22 | /** | ||
23 | * @file | ||
24 | * DCT test (c) 2001 Fabrice Bellard | ||
25 | * Started from sample code by Juan J. Sierralta P. | ||
26 | */ | ||
27 | |||
28 | #include "config.h" | ||
29 | #include "config_components.h" | ||
30 | #include <stdlib.h> | ||
31 | #include <stdio.h> | ||
32 | #include <string.h> | ||
33 | #if HAVE_UNISTD_H | ||
34 | #include <unistd.h> | ||
35 | #endif | ||
36 | #include <math.h> | ||
37 | |||
38 | #include "libavutil/cpu.h" | ||
39 | #include "libavutil/common.h" | ||
40 | #include "libavutil/emms.h" | ||
41 | #include "libavutil/internal.h" | ||
42 | #include "libavutil/lfg.h" | ||
43 | #include "libavutil/mem_internal.h" | ||
44 | #include "libavutil/time.h" | ||
45 | |||
46 | #include "libavcodec/dct.h" | ||
47 | #include "libavcodec/fdctdsp.h" | ||
48 | #include "libavcodec/idctdsp.h" | ||
49 | #include "libavcodec/simple_idct.h" | ||
50 | #include "libavcodec/xvididct.h" | ||
51 | #include "libavcodec/aandcttab.h" | ||
52 | #include "libavcodec/faandct.h" | ||
53 | #include "libavcodec/faanidct.h" | ||
54 | #include "libavcodec/dctref.h" | ||
55 | |||
56 | struct algo { | ||
57 | const char *name; | ||
58 | void (*func)(int16_t *block); | ||
59 | enum idct_permutation_type perm_type; | ||
60 | int cpu_flag; | ||
61 | int nonspec; | ||
62 | }; | ||
63 | |||
64 | static const struct algo fdct_tab[] = { | ||
65 | { "REF-DBL", ff_ref_fdct, FF_IDCT_PERM_NONE }, | ||
66 | { "IJG-AAN-INT", ff_fdct_ifast, FF_IDCT_PERM_NONE }, | ||
67 | { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE }, | ||
68 | #if CONFIG_FAANDCT | ||
69 | { "FAAN", ff_faandct, FF_IDCT_PERM_NONE }, | ||
70 | #endif /* CONFIG_FAANDCT */ | ||
71 | }; | ||
72 | |||
73 | 60000 | static void ff_prores_idct_wrap(int16_t *dst){ | |
74 | 60000 | LOCAL_ALIGNED(16, int16_t, qmat, [64]); | |
75 | int i; | ||
76 | |||
77 |
2/2✓ Branch 0 taken 3840000 times.
✓ Branch 1 taken 60000 times.
|
3900000 | for(i=0; i<64; i++){ |
78 | 3840000 | qmat[i]=4; | |
79 | } | ||
80 | 60000 | ff_prores_idct_10(dst, qmat); | |
81 |
2/2✓ Branch 0 taken 3840000 times.
✓ Branch 1 taken 60000 times.
|
3900000 | for(i=0; i<64; i++) { |
82 | 3840000 | dst[i] -= 512; | |
83 | } | ||
84 | 60000 | } | |
85 | |||
86 | static const struct algo idct_tab[] = { | ||
87 | { "REF-DBL", ff_ref_idct, FF_IDCT_PERM_NONE }, | ||
88 | { "INT", ff_j_rev_dct, FF_IDCT_PERM_LIBMPEG2 }, | ||
89 | { "SIMPLE-C", ff_simple_idct_int16_8bit, FF_IDCT_PERM_NONE }, | ||
90 | { "SIMPLE-C10", ff_simple_idct_int16_10bit, FF_IDCT_PERM_NONE }, | ||
91 | { "SIMPLE-C12", ff_simple_idct_int16_12bit, FF_IDCT_PERM_NONE, 0, 1 }, | ||
92 | { "PR-C", ff_prores_idct_wrap, FF_IDCT_PERM_NONE, 0, 1 }, | ||
93 | #if CONFIG_FAANIDCT | ||
94 | { "FAANI", ff_faanidct, FF_IDCT_PERM_NONE }, | ||
95 | #endif /* CONFIG_FAANIDCT */ | ||
96 | #if CONFIG_MPEG4_DECODER | ||
97 | { "XVID", ff_xvid_idct, FF_IDCT_PERM_NONE, 0, 1 }, | ||
98 | #endif /* CONFIG_MPEG4_DECODER */ | ||
99 | }; | ||
100 | |||
101 | #if ARCH_AARCH64 | ||
102 | #include "aarch64/dct.c" | ||
103 | #elif ARCH_ARM | ||
104 | #include "arm/dct.c" | ||
105 | #elif ARCH_PPC | ||
106 | #include "ppc/dct.c" | ||
107 | #elif ARCH_X86 | ||
108 | #include "x86/dct.c" | ||
109 | #else | ||
110 | static const struct algo fdct_tab_arch[] = { { 0 } }; | ||
111 | static const struct algo idct_tab_arch[] = { { 0 } }; | ||
112 | #endif | ||
113 | |||
114 | #define AANSCALE_BITS 12 | ||
115 | |||
116 | #define NB_ITS 20000 | ||
117 | #define NB_ITS_SPEED 50000 | ||
118 | |||
119 | DECLARE_ALIGNED(16, static int16_t, block)[64]; | ||
120 | DECLARE_ALIGNED(8, static int16_t, block1)[64]; | ||
121 | |||
122 | 1120000 | static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals) | |
123 | { | ||
124 | int i, j; | ||
125 | |||
126 | 1120000 | memset(block, 0, 64 * sizeof(*block)); | |
127 | |||
128 |
3/4✓ Branch 0 taken 340000 times.
✓ Branch 1 taken 440000 times.
✓ Branch 2 taken 340000 times.
✗ Branch 3 not taken.
|
1120000 | switch (test) { |
129 | 340000 | case 0: | |
130 |
2/2✓ Branch 0 taken 21760000 times.
✓ Branch 1 taken 340000 times.
|
22100000 | for (i = 0; i < 64; i++) |
131 | 21760000 | block[i] = (av_lfg_get(prng) % (2*vals)) -vals; | |
132 |
1/2✓ Branch 0 taken 340000 times.
✗ Branch 1 not taken.
|
340000 | if (is_idct) { |
133 | 340000 | ff_ref_fdct(block); | |
134 |
2/2✓ Branch 0 taken 21760000 times.
✓ Branch 1 taken 340000 times.
|
22100000 | for (i = 0; i < 64; i++) |
135 | 21760000 | block[i] >>= 3; | |
136 | } | ||
137 | 340000 | break; | |
138 | 440000 | case 1: | |
139 | 440000 | j = av_lfg_get(prng) % 10 + 1; | |
140 |
2/2✓ Branch 0 taken 2413686 times.
✓ Branch 1 taken 440000 times.
|
2853686 | for (i = 0; i < j; i++) { |
141 | 2413686 | int idx = av_lfg_get(prng) % 64; | |
142 | 2413686 | block[idx] = av_lfg_get(prng) % (2*vals) -vals; | |
143 | } | ||
144 | 440000 | break; | |
145 | 340000 | case 2: | |
146 | 340000 | block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals); | |
147 | 340000 | block[63] = (block[0] & 1) ^ 1; | |
148 | 340000 | break; | |
149 | } | ||
150 | 1120000 | } | |
151 | |||
152 | 1120000 | static void permute(int16_t dst[64], const int16_t src[64], | |
153 | enum idct_permutation_type perm_type) | ||
154 | { | ||
155 | int i; | ||
156 | |||
157 | #if ARCH_X86 | ||
158 |
2/2✓ Branch 1 taken 60000 times.
✓ Branch 2 taken 1060000 times.
|
1120000 | if (permute_x86(dst, src, perm_type)) |
159 | 60000 | return; | |
160 | #endif | ||
161 | |||
162 |
3/4✓ Branch 0 taken 60000 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 480000 times.
✓ Branch 3 taken 520000 times.
|
1060000 | switch (perm_type) { |
163 | 60000 | case FF_IDCT_PERM_LIBMPEG2: | |
164 |
2/2✓ Branch 0 taken 3840000 times.
✓ Branch 1 taken 60000 times.
|
3900000 | for (i = 0; i < 64; i++) |
165 | 3840000 | dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i]; | |
166 | 60000 | break; | |
167 | ✗ | case FF_IDCT_PERM_PARTTRANS: | |
168 | ✗ | for (i = 0; i < 64; i++) | |
169 | ✗ | dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i]; | |
170 | ✗ | break; | |
171 | 480000 | case FF_IDCT_PERM_TRANSPOSE: | |
172 |
2/2✓ Branch 0 taken 30720000 times.
✓ Branch 1 taken 480000 times.
|
31200000 | for (i = 0; i < 64; i++) |
173 | 30720000 | dst[(i>>3) | ((i<<3)&0x38)] = src[i]; | |
174 | 480000 | break; | |
175 | 520000 | default: | |
176 |
2/2✓ Branch 0 taken 33280000 times.
✓ Branch 1 taken 520000 times.
|
33800000 | for (i = 0; i < 64; i++) |
177 | 33280000 | dst[i] = src[i]; | |
178 | 520000 | break; | |
179 | } | ||
180 | } | ||
181 | |||
182 | 56 | static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits) | |
183 | { | ||
184 |
2/2✓ Branch 0 taken 51 times.
✓ Branch 1 taken 5 times.
|
56 | void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct; |
185 | int it, i, scale; | ||
186 | int err_inf, v; | ||
187 | 56 | int64_t err2, ti, ti1, it1, err_sum = 0; | |
188 | 56 | int64_t sysErr[64], sysErrMax = 0; | |
189 | 56 | int64_t err2_matrix[64], err2_max = 0; | |
190 | 56 | int maxout = 0; | |
191 | 56 | int blockSumErrMax = 0, blockSumErr; | |
192 | AVLFG prng; | ||
193 | 56 | const int vals=1<<bits; | |
194 | double omse, ome; | ||
195 | int spec_err; | ||
196 | |||
197 | 56 | av_lfg_init(&prng, 1); | |
198 | |||
199 | 56 | err_inf = 0; | |
200 | 56 | err2 = 0; | |
201 |
2/2✓ Branch 0 taken 3584 times.
✓ Branch 1 taken 56 times.
|
3640 | for (i = 0; i < 64; i++) |
202 | 3584 | err2_matrix[i] = sysErr[i] = 0; | |
203 |
2/2✓ Branch 0 taken 1120000 times.
✓ Branch 1 taken 56 times.
|
1120056 | for (it = 0; it < NB_ITS; it++) { |
204 | 1120000 | init_block(block1, test, is_idct, &prng, vals); | |
205 | 1120000 | permute(block, block1, dct->perm_type); | |
206 | |||
207 | 1120000 | dct->func(block); | |
208 | 1120000 | emms_c(); | |
209 | |||
210 |
2/2✓ Branch 0 taken 20000 times.
✓ Branch 1 taken 1100000 times.
|
1120000 | if (!strcmp(dct->name, "IJG-AAN-INT")) { |
211 |
2/2✓ Branch 0 taken 1280000 times.
✓ Branch 1 taken 20000 times.
|
1300000 | for (i = 0; i < 64; i++) { |
212 | 1280000 | scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i]; | |
213 | 1280000 | block[i] = (block[i] * scale) >> AANSCALE_BITS; | |
214 | } | ||
215 | } | ||
216 | |||
217 | 1120000 | ref(block1); | |
218 |
2/2✓ Branch 0 taken 60000 times.
✓ Branch 1 taken 1060000 times.
|
1120000 | if (!strcmp(dct->name, "PR-SSE2")) |
219 |
2/2✓ Branch 0 taken 3840000 times.
✓ Branch 1 taken 60000 times.
|
3900000 | for (i = 0; i < 64; i++) |
220 | 3840000 | block1[i] = av_clip(block1[i], 4-512, 1019-512); | |
221 | |||
222 | 1120000 | blockSumErr = 0; | |
223 |
2/2✓ Branch 0 taken 71680000 times.
✓ Branch 1 taken 1120000 times.
|
72800000 | for (i = 0; i < 64; i++) { |
224 | 71680000 | int err = block[i] - block1[i]; | |
225 | 71680000 | err_sum += err; | |
226 | 71680000 | v = abs(err); | |
227 |
2/2✓ Branch 0 taken 61 times.
✓ Branch 1 taken 71679939 times.
|
71680000 | if (v > err_inf) |
228 | 61 | err_inf = v; | |
229 | 71680000 | err2_matrix[i] += v * (int64_t)v; | |
230 | 71680000 | err2 += v * (int64_t)v; | |
231 | 71680000 | sysErr[i] += block[i] - block1[i]; | |
232 | 71680000 | blockSumErr += v; | |
233 |
2/2✓ Branch 0 taken 756 times.
✓ Branch 1 taken 71679244 times.
|
71680000 | if (abs(block[i]) > maxout) |
234 | 756 | maxout = abs(block[i]); | |
235 | } | ||
236 |
2/2✓ Branch 0 taken 246 times.
✓ Branch 1 taken 1119754 times.
|
1120000 | if (blockSumErrMax < blockSumErr) |
237 | 246 | blockSumErrMax = blockSumErr; | |
238 | } | ||
239 |
2/2✓ Branch 0 taken 3584 times.
✓ Branch 1 taken 56 times.
|
3640 | for (i = 0; i < 64; i++) { |
240 | 3584 | sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i])); | |
241 | 3584 | err2_max = FFMAX(err2_max , FFABS(err2_matrix[i])); | |
242 | } | ||
243 | |||
244 |
2/2✓ Branch 0 taken 3584 times.
✓ Branch 1 taken 56 times.
|
3640 | for (i = 0; i < 64; i++) { |
245 |
2/2✓ Branch 0 taken 448 times.
✓ Branch 1 taken 3136 times.
|
3584 | if (i % 8 == 0) |
246 | 448 | printf("\n"); | |
247 | 3584 | printf("%7d ", (int) sysErr[i]); | |
248 | } | ||
249 | 56 | printf("\n"); | |
250 | |||
251 | 56 | omse = (double) err2 / NB_ITS / 64; | |
252 | 56 | ome = (double) err_sum / NB_ITS / 64; | |
253 | |||
254 |
7/8✓ Branch 0 taken 51 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 51 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 36 times.
✓ Branch 5 taken 15 times.
✓ Branch 6 taken 4 times.
✓ Branch 7 taken 32 times.
|
56 | spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015); |
255 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 17 times.
|
56 | if (test < 2) |
256 |
6/6✓ Branch 0 taken 34 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 28 times.
✓ Branch 3 taken 6 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 26 times.
|
39 | spec_err = is_idct && ((double) err2_max / NB_ITS > 0.06 || (double) sysErrMax / NB_ITS > 0.015); |
257 | |||
258 | 56 | printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", | |
259 | 56 | is_idct ? "IDCT" : "DCT", dct->name, err_inf, | |
260 |
2/2✓ Branch 0 taken 51 times.
✓ Branch 1 taken 5 times.
|
56 | omse, ome, (double) sysErrMax / NB_ITS, |
261 | maxout, blockSumErrMax); | ||
262 | |||
263 |
3/4✓ Branch 0 taken 13 times.
✓ Branch 1 taken 43 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 13 times.
|
56 | if (spec_err && !dct->nonspec) { |
264 | ✗ | printf("Failed!\n"); | |
265 | ✗ | return 1; | |
266 | } | ||
267 | |||
268 |
1/2✓ Branch 0 taken 56 times.
✗ Branch 1 not taken.
|
56 | if (!speed) |
269 | 56 | return 0; | |
270 | |||
271 | /* speed test */ | ||
272 | |||
273 | ✗ | init_block(block, test, is_idct, &prng, vals); | |
274 | ✗ | permute(block1, block, dct->perm_type); | |
275 | |||
276 | ✗ | ti = av_gettime_relative(); | |
277 | ✗ | it1 = 0; | |
278 | do { | ||
279 | ✗ | for (it = 0; it < NB_ITS_SPEED; it++) { | |
280 | ✗ | memcpy(block, block1, sizeof(block)); | |
281 | ✗ | dct->func(block); | |
282 | } | ||
283 | ✗ | emms_c(); | |
284 | ✗ | it1 += NB_ITS_SPEED; | |
285 | ✗ | ti1 = av_gettime_relative() - ti; | |
286 | ✗ | } while (ti1 < 1000000); | |
287 | |||
288 | ✗ | printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name, | |
289 | ✗ | (double) it1 * 1000.0 / (double) ti1); | |
290 | |||
291 | ✗ | return 0; | |
292 | } | ||
293 | |||
294 | DECLARE_ALIGNED(8, static uint8_t, img_dest)[64]; | ||
295 | DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64]; | ||
296 | |||
297 | 20000 | static void idct248_ref(uint8_t *dest, ptrdiff_t linesize, int16_t *block) | |
298 | { | ||
299 | static int init; | ||
300 | static double c8[8][8]; | ||
301 | static double c4[4][4]; | ||
302 | double block1[64], block2[64], block3[64]; | ||
303 | double s, sum, v; | ||
304 | int i, j, k; | ||
305 | |||
306 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 19999 times.
|
20000 | if (!init) { |
307 | 1 | init = 1; | |
308 | |||
309 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
|
9 | for (i = 0; i < 8; i++) { |
310 | 8 | sum = 0; | |
311 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 8 times.
|
72 | for (j = 0; j < 8; j++) { |
312 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 56 times.
|
64 | s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0); |
313 | 64 | c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0); | |
314 | 64 | sum += c8[i][j] * c8[i][j]; | |
315 | } | ||
316 | } | ||
317 | |||
318 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | for (i = 0; i < 4; i++) { |
319 | 4 | sum = 0; | |
320 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 4 times.
|
20 | for (j = 0; j < 4; j++) { |
321 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 12 times.
|
16 | s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0); |
322 | 16 | c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0); | |
323 | 16 | sum += c4[i][j] * c4[i][j]; | |
324 | } | ||
325 | } | ||
326 | } | ||
327 | |||
328 | /* butterfly */ | ||
329 | 20000 | s = 0.5 * sqrt(2.0); | |
330 |
2/2✓ Branch 0 taken 80000 times.
✓ Branch 1 taken 20000 times.
|
100000 | for (i = 0; i < 4; i++) { |
331 |
2/2✓ Branch 0 taken 640000 times.
✓ Branch 1 taken 80000 times.
|
720000 | for (j = 0; j < 8; j++) { |
332 | 640000 | block1[8 * (2 * i) + j] = | |
333 | 640000 | (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s; | |
334 | 640000 | block1[8 * (2 * i + 1) + j] = | |
335 | 640000 | (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s; | |
336 | } | ||
337 | } | ||
338 | |||
339 | /* idct8 on lines */ | ||
340 |
2/2✓ Branch 0 taken 160000 times.
✓ Branch 1 taken 20000 times.
|
180000 | for (i = 0; i < 8; i++) { |
341 |
2/2✓ Branch 0 taken 1280000 times.
✓ Branch 1 taken 160000 times.
|
1440000 | for (j = 0; j < 8; j++) { |
342 | 1280000 | sum = 0; | |
343 |
2/2✓ Branch 0 taken 10240000 times.
✓ Branch 1 taken 1280000 times.
|
11520000 | for (k = 0; k < 8; k++) |
344 | 10240000 | sum += c8[k][j] * block1[8 * i + k]; | |
345 | 1280000 | block2[8 * i + j] = sum; | |
346 | } | ||
347 | } | ||
348 | |||
349 | /* idct4 */ | ||
350 |
2/2✓ Branch 0 taken 160000 times.
✓ Branch 1 taken 20000 times.
|
180000 | for (i = 0; i < 8; i++) { |
351 |
2/2✓ Branch 0 taken 640000 times.
✓ Branch 1 taken 160000 times.
|
800000 | for (j = 0; j < 4; j++) { |
352 | /* top */ | ||
353 | 640000 | sum = 0; | |
354 |
2/2✓ Branch 0 taken 2560000 times.
✓ Branch 1 taken 640000 times.
|
3200000 | for (k = 0; k < 4; k++) |
355 | 2560000 | sum += c4[k][j] * block2[8 * (2 * k) + i]; | |
356 | 640000 | block3[8 * (2 * j) + i] = sum; | |
357 | |||
358 | /* bottom */ | ||
359 | 640000 | sum = 0; | |
360 |
2/2✓ Branch 0 taken 2560000 times.
✓ Branch 1 taken 640000 times.
|
3200000 | for (k = 0; k < 4; k++) |
361 | 2560000 | sum += c4[k][j] * block2[8 * (2 * k + 1) + i]; | |
362 | 640000 | block3[8 * (2 * j + 1) + i] = sum; | |
363 | } | ||
364 | } | ||
365 | |||
366 | /* clamp and store the result */ | ||
367 |
2/2✓ Branch 0 taken 160000 times.
✓ Branch 1 taken 20000 times.
|
180000 | for (i = 0; i < 8; i++) { |
368 |
2/2✓ Branch 0 taken 1280000 times.
✓ Branch 1 taken 160000 times.
|
1440000 | for (j = 0; j < 8; j++) { |
369 | 1280000 | v = block3[8 * i + j]; | |
370 |
2/2✓ Branch 0 taken 53473 times.
✓ Branch 1 taken 1226527 times.
|
1280000 | if (v < 0) v = 0; |
371 |
2/2✓ Branch 0 taken 54705 times.
✓ Branch 1 taken 1171822 times.
|
1226527 | else if (v > 255) v = 255; |
372 | 1280000 | dest[i * linesize + j] = (int) rint(v); | |
373 | } | ||
374 | } | ||
375 | 20000 | } | |
376 | |||
377 | 1 | static void idct248_error(const char *name, | |
378 | void (*idct248_put)(uint8_t *dest, | ||
379 | ptrdiff_t line_size, | ||
380 | int16_t *block), | ||
381 | int speed) | ||
382 | { | ||
383 | int it, i, it1, ti, ti1, err_max, v; | ||
384 | AVLFG prng; | ||
385 | |||
386 | 1 | av_lfg_init(&prng, 1); | |
387 | |||
388 | /* just one test to see if code is correct (precision is less | ||
389 | important here) */ | ||
390 | 1 | err_max = 0; | |
391 |
2/2✓ Branch 0 taken 20000 times.
✓ Branch 1 taken 1 times.
|
20001 | for (it = 0; it < NB_ITS; it++) { |
392 | /* XXX: use forward transform to generate values */ | ||
393 |
2/2✓ Branch 0 taken 1280000 times.
✓ Branch 1 taken 20000 times.
|
1300000 | for (i = 0; i < 64; i++) |
394 | 1280000 | block1[i] = av_lfg_get(&prng) % 256 - 128; | |
395 | 20000 | block1[0] += 1024; | |
396 | |||
397 |
2/2✓ Branch 0 taken 1280000 times.
✓ Branch 1 taken 20000 times.
|
1300000 | for (i = 0; i < 64; i++) |
398 | 1280000 | block[i] = block1[i]; | |
399 | 20000 | idct248_ref(img_dest1, 8, block); | |
400 | |||
401 |
2/2✓ Branch 0 taken 1280000 times.
✓ Branch 1 taken 20000 times.
|
1300000 | for (i = 0; i < 64; i++) |
402 | 1280000 | block[i] = block1[i]; | |
403 | 20000 | idct248_put(img_dest, 8, block); | |
404 | |||
405 |
2/2✓ Branch 0 taken 1280000 times.
✓ Branch 1 taken 20000 times.
|
1300000 | for (i = 0; i < 64; i++) { |
406 | 1280000 | v = abs((int) img_dest[i] - (int) img_dest1[i]); | |
407 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1280000 times.
|
1280000 | if (v == 255) |
408 | ✗ | printf("%d %d\n", img_dest[i], img_dest1[i]); | |
409 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1279999 times.
|
1280000 | if (v > err_max) |
410 | 1 | err_max = v; | |
411 | } | ||
412 | #if 0 | ||
413 | printf("ref=\n"); | ||
414 | for(i=0;i<8;i++) { | ||
415 | int j; | ||
416 | for(j=0;j<8;j++) { | ||
417 | printf(" %3d", img_dest1[i*8+j]); | ||
418 | } | ||
419 | printf("\n"); | ||
420 | } | ||
421 | |||
422 | printf("out=\n"); | ||
423 | for(i=0;i<8;i++) { | ||
424 | int j; | ||
425 | for(j=0;j<8;j++) { | ||
426 | printf(" %3d", img_dest[i*8+j]); | ||
427 | } | ||
428 | printf("\n"); | ||
429 | } | ||
430 | #endif | ||
431 | } | ||
432 | 1 | printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max); | |
433 | |||
434 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (!speed) |
435 | 1 | return; | |
436 | |||
437 | ✗ | ti = av_gettime_relative(); | |
438 | ✗ | it1 = 0; | |
439 | do { | ||
440 | ✗ | for (it = 0; it < NB_ITS_SPEED; it++) { | |
441 | ✗ | for (i = 0; i < 64; i++) | |
442 | ✗ | block[i] = block1[i]; | |
443 | ✗ | idct248_put(img_dest, 8, block); | |
444 | } | ||
445 | ✗ | emms_c(); | |
446 | ✗ | it1 += NB_ITS_SPEED; | |
447 | ✗ | ti1 = av_gettime_relative() - ti; | |
448 | ✗ | } while (ti1 < 1000000); | |
449 | |||
450 | ✗ | printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name, | |
451 | ✗ | (double) it1 * 1000.0 / (double) ti1); | |
452 | } | ||
453 | |||
454 | ✗ | static void help(void) | |
455 | { | ||
456 | ✗ | printf("dct-test [-i] [<test-number>] [<bits>]\n" | |
457 | "test-number 0 -> test with random matrixes\n" | ||
458 | " 1 -> test with random sparse matrixes\n" | ||
459 | " 2 -> do 3. test from MPEG-4 std\n" | ||
460 | "bits Number of time domain bits to use, 8 is default\n" | ||
461 | "-i test IDCT implementations\n" | ||
462 | "-4 test IDCT248 implementations\n" | ||
463 | "-t speed test\n"); | ||
464 | ✗ | } | |
465 | |||
466 | #if !HAVE_GETOPT | ||
467 | #include "compat/getopt.c" | ||
468 | #endif | ||
469 | |||
470 | 5 | int main(int argc, char **argv) | |
471 | { | ||
472 | 5 | int test_idct = 0, test_248_dct = 0; | |
473 | int c, i; | ||
474 | 5 | int test = 1; | |
475 | 5 | int speed = 0; | |
476 | 5 | int err = 0; | |
477 | 5 | int bits=8; | |
478 | |||
479 | 5 | ff_ref_dct_init(); | |
480 | |||
481 | for (;;) { | ||
482 | 9 | c = getopt(argc, argv, "ih4t"); | |
483 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 4 times.
|
9 | if (c == -1) |
484 | 5 | break; | |
485 |
2/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
4 | switch (c) { |
486 | 3 | case 'i': | |
487 | 3 | test_idct = 1; | |
488 | 3 | break; | |
489 | 1 | case '4': | |
490 | 1 | test_248_dct = 1; | |
491 | 1 | break; | |
492 | ✗ | case 't': | |
493 | ✗ | speed = 1; | |
494 | ✗ | break; | |
495 | ✗ | default: | |
496 | case 'h': | ||
497 | ✗ | help(); | |
498 | ✗ | return 0; | |
499 | } | ||
500 | } | ||
501 | |||
502 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 2 times.
|
5 | if (optind < argc) |
503 | 3 | test = atoi(argv[optind]); | |
504 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | if(optind+1 < argc) bits= atoi(argv[optind+1]); |
505 | |||
506 | 5 | printf("ffmpeg DCT/IDCT test\n"); | |
507 | |||
508 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
|
5 | if (test_248_dct) { |
509 | 1 | idct248_error("SIMPLE-C", ff_simple_idct248_put, speed); | |
510 | } else { | ||
511 | 4 | const int cpu_flags = av_get_cpu_flags(); | |
512 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (test_idct) { |
513 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 3 times.
|
27 | for (i = 0; i < FF_ARRAY_ELEMS(idct_tab); i++) |
514 | 24 | err |= dct_error(&idct_tab[i], test, test_idct, speed, bits); | |
515 | |||
516 |
2/2✓ Branch 0 taken 27 times.
✓ Branch 1 taken 3 times.
|
30 | for (i = 0; idct_tab_arch[i].name; i++) |
517 |
1/2✓ Branch 0 taken 27 times.
✗ Branch 1 not taken.
|
27 | if (!(~cpu_flags & idct_tab_arch[i].cpu_flag)) |
518 | 27 | err |= dct_error(&idct_tab_arch[i], test, test_idct, speed, bits); | |
519 | } | ||
520 | #if CONFIG_FDCTDSP | ||
521 | else { | ||
522 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | for (i = 0; i < FF_ARRAY_ELEMS(fdct_tab); i++) |
523 | 4 | err |= dct_error(&fdct_tab[i], test, test_idct, speed, bits); | |
524 | |||
525 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (i = 0; fdct_tab_arch[i].name; i++) |
526 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (!(~cpu_flags & fdct_tab_arch[i].cpu_flag)) |
527 | 1 | err |= dct_error(&fdct_tab_arch[i], test, test_idct, speed, bits); | |
528 | } | ||
529 | #endif /* CONFIG_FDCTDSP */ | ||
530 | } | ||
531 | |||
532 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | if (err) |
533 | ✗ | printf("Error: %d.\n", err); | |
534 | |||
535 | 5 | return !!err; | |
536 | } | ||
537 |