Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (c) 2022 Ben Avison | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License along | ||
17 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
19 | */ | ||
20 | |||
21 | #include <string.h> | ||
22 | |||
23 | #include "checkasm.h" | ||
24 | |||
25 | #include "libavcodec/vc1dsp.h" | ||
26 | |||
27 | #include "libavutil/common.h" | ||
28 | #include "libavutil/internal.h" | ||
29 | #include "libavutil/intreadwrite.h" | ||
30 | #include "libavutil/mem.h" | ||
31 | #include "libavutil/mem_internal.h" | ||
32 | |||
33 | #define VC1DSP_TEST(func) { #func, offsetof(VC1DSPContext, func) }, | ||
34 | #define VC1DSP_SIZED_TEST(func, width, height) { #func, offsetof(VC1DSPContext, func), width, height }, | ||
35 | |||
36 | typedef struct { | ||
37 | const char *name; | ||
38 | size_t offset; | ||
39 | int width; | ||
40 | int height; | ||
41 | } test; | ||
42 | |||
43 | typedef struct matrix { | ||
44 | size_t width; | ||
45 | size_t height; | ||
46 | float d[]; | ||
47 | } matrix; | ||
48 | |||
49 | static const matrix T8 = { 8, 8, { | ||
50 | 12, 12, 12, 12, 12, 12, 12, 12, | ||
51 | 16, 15, 9, 4, -4, -9, -15, -16, | ||
52 | 16, 6, -6, -16, -16, -6, 6, 16, | ||
53 | 15, -4, -16, -9, 9, 16, 4, -15, | ||
54 | 12, -12, -12, 12, 12, -12, -12, 12, | ||
55 | 9, -16, 4, 15, -15, -4, 16, -9, | ||
56 | 6, -16, 16, -6, -6, 16, -16, 6, | ||
57 | 4, -9, 15, -16, 16, -15, 9, -4 | ||
58 | } }; | ||
59 | |||
60 | static const matrix T4 = { 4, 4, { | ||
61 | 17, 17, 17, 17, | ||
62 | 22, 10, -10, -22, | ||
63 | 17, -17, -17, 17, | ||
64 | 10, -22, 22, -10 | ||
65 | } }; | ||
66 | |||
67 | static const matrix T8t = { 8, 8, { | ||
68 | 12, 16, 16, 15, 12, 9, 6, 4, | ||
69 | 12, 15, 6, -4, -12, -16, -16, -9, | ||
70 | 12, 9, -6, -16, -12, 4, 16, 15, | ||
71 | 12, 4, -16, -9, 12, 15, -6, -16, | ||
72 | 12, -4, -16, 9, 12, -15, -6, 16, | ||
73 | 12, -9, -6, 16, -12, -4, 16, -15, | ||
74 | 12, -15, 6, 4, -12, 16, -16, 9, | ||
75 | 12, -16, 16, -15, 12, -9, 6, -4 | ||
76 | } }; | ||
77 | |||
78 | static const matrix T4t = { 4, 4, { | ||
79 | 17, 22, 17, 10, | ||
80 | 17, 10, -17, -22, | ||
81 | 17, -10, -17, 22, | ||
82 | 17, -22, 17, -10 | ||
83 | } }; | ||
84 | |||
85 | 60 | static matrix *new_matrix(size_t width, size_t height) | |
86 | { | ||
87 | 60 | matrix *out = av_mallocz(sizeof (matrix) + height * width * sizeof (float)); | |
88 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 60 times.
|
60 | if (out == NULL) { |
89 | ✗ | fprintf(stderr, "Memory allocation failure\n"); | |
90 | ✗ | exit(EXIT_FAILURE); | |
91 | } | ||
92 | 60 | out->width = width; | |
93 | 60 | out->height = height; | |
94 | 60 | return out; | |
95 | } | ||
96 | |||
97 | 48 | static matrix *multiply(const matrix *a, const matrix *b) | |
98 | { | ||
99 | matrix *out; | ||
100 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
|
48 | if (a->width != b->height) { |
101 | ✗ | fprintf(stderr, "Incompatible multiplication\n"); | |
102 | ✗ | exit(EXIT_FAILURE); | |
103 | } | ||
104 | 48 | out = new_matrix(b->width, a->height); | |
105 |
2/2✓ Branch 0 taken 288 times.
✓ Branch 1 taken 48 times.
|
336 | for (int j = 0; j < out->height; ++j) |
106 |
2/2✓ Branch 0 taken 1728 times.
✓ Branch 1 taken 288 times.
|
2016 | for (int i = 0; i < out->width; ++i) { |
107 | 1728 | float sum = 0; | |
108 |
2/2✓ Branch 0 taken 11520 times.
✓ Branch 1 taken 1728 times.
|
13248 | for (int k = 0; k < a->width; ++k) |
109 | 11520 | sum += a->d[j * a->width + k] * b->d[k * b->width + i]; | |
110 | 1728 | out->d[j * out->width + i] = sum; | |
111 | } | ||
112 | 48 | return out; | |
113 | } | ||
114 | |||
115 | 12 | static void normalise(matrix *a) | |
116 | { | ||
117 |
2/2✓ Branch 0 taken 72 times.
✓ Branch 1 taken 12 times.
|
84 | for (int j = 0; j < a->height; ++j) |
118 |
2/2✓ Branch 0 taken 432 times.
✓ Branch 1 taken 72 times.
|
504 | for (int i = 0; i < a->width; ++i) { |
119 | 432 | float *p = a->d + j * a->width + i; | |
120 | 432 | *p *= 64; | |
121 |
2/2✓ Branch 0 taken 144 times.
✓ Branch 1 taken 288 times.
|
432 | if (a->height == 4) |
122 | 144 | *p /= (const unsigned[]) { 289, 292, 289, 292 } [j]; | |
123 | else | ||
124 | 288 | *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [j]; | |
125 |
2/2✓ Branch 0 taken 144 times.
✓ Branch 1 taken 288 times.
|
432 | if (a->width == 4) |
126 | 144 | *p /= (const unsigned[]) { 289, 292, 289, 292 } [i]; | |
127 | else | ||
128 | 288 | *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [i]; | |
129 | } | ||
130 | 12 | } | |
131 | |||
132 | 36 | static void divide_and_round_nearest(matrix *a, float by) | |
133 | { | ||
134 |
2/2✓ Branch 0 taken 216 times.
✓ Branch 1 taken 36 times.
|
252 | for (int j = 0; j < a->height; ++j) |
135 |
2/2✓ Branch 0 taken 1296 times.
✓ Branch 1 taken 216 times.
|
1512 | for (int i = 0; i < a->width; ++i) { |
136 | 1296 | float *p = a->d + j * a->width + i; | |
137 | 1296 | *p = rintf(*p / by); | |
138 | } | ||
139 | 36 | } | |
140 | |||
141 | 12 | static void tweak(matrix *a) | |
142 | { | ||
143 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 12 times.
|
36 | for (int j = 4; j < a->height; ++j) |
144 |
2/2✓ Branch 0 taken 144 times.
✓ Branch 1 taken 24 times.
|
168 | for (int i = 0; i < a->width; ++i) { |
145 | 144 | float *p = a->d + j * a->width + i; | |
146 | 144 | *p += 1; | |
147 | } | ||
148 | 12 | } | |
149 | |||
150 | /* The VC-1 spec places restrictions on the values permitted at three | ||
151 | * different stages: | ||
152 | * - D: the input coefficients in frequency domain | ||
153 | * - E: the intermediate coefficients, inverse-transformed only horizontally | ||
154 | * - R: the fully inverse-transformed coefficients | ||
155 | * | ||
156 | * To fully cater for the ranges specified requires various intermediate | ||
157 | * values to be held to 17-bit precision; yet these conditions do not appear | ||
158 | * to be utilised in real-world streams. At least some assembly | ||
159 | * implementations have chosen to restrict these values to 16-bit precision, | ||
160 | * to accelerate the decoding of real-world streams at the cost of strict | ||
161 | * adherence to the spec. To avoid our test marking these as failures, | ||
162 | * reduce our random inputs. | ||
163 | */ | ||
164 | #define ATTENUATION 4 | ||
165 | |||
166 | 12 | static matrix *generate_inverse_quantized_transform_coefficients(size_t width, size_t height) | |
167 | { | ||
168 | matrix *raw, *tmp, *D, *E, *R; | ||
169 | 12 | raw = new_matrix(width, height); | |
170 |
2/2✓ Branch 0 taken 432 times.
✓ Branch 1 taken 12 times.
|
444 | for (int i = 0; i < width * height; ++i) |
171 | 432 | raw->d[i] = (int) (rnd() % (1024/ATTENUATION)) - 512/ATTENUATION; | |
172 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | tmp = multiply(height == 8 ? &T8 : &T4, raw); |
173 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | D = multiply(tmp, width == 8 ? &T8t : &T4t); |
174 | 12 | normalise(D); | |
175 | 12 | divide_and_round_nearest(D, 1); | |
176 |
2/2✓ Branch 0 taken 432 times.
✓ Branch 1 taken 12 times.
|
444 | for (int i = 0; i < width * height; ++i) { |
177 |
2/4✓ Branch 0 taken 432 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 432 times.
|
432 | if (D->d[i] < -2048/ATTENUATION || D->d[i] > 2048/ATTENUATION-1) { |
178 | /* Rare, so simply try again */ | ||
179 | ✗ | av_free(raw); | |
180 | ✗ | av_free(tmp); | |
181 | ✗ | av_free(D); | |
182 | ✗ | return generate_inverse_quantized_transform_coefficients(width, height); | |
183 | } | ||
184 | } | ||
185 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | E = multiply(D, width == 8 ? &T8 : &T4); |
186 | 12 | divide_and_round_nearest(E, 8); | |
187 |
2/2✓ Branch 0 taken 432 times.
✓ Branch 1 taken 12 times.
|
444 | for (int i = 0; i < width * height; ++i) |
188 |
2/4✓ Branch 0 taken 432 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 432 times.
|
432 | if (E->d[i] < -4096/ATTENUATION || E->d[i] > 4096/ATTENUATION-1) { |
189 | /* Rare, so simply try again */ | ||
190 | ✗ | av_free(raw); | |
191 | ✗ | av_free(tmp); | |
192 | ✗ | av_free(D); | |
193 | ✗ | av_free(E); | |
194 | ✗ | return generate_inverse_quantized_transform_coefficients(width, height); | |
195 | } | ||
196 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | R = multiply(height == 8 ? &T8t : &T4t, E); |
197 | 12 | tweak(R); | |
198 | 12 | divide_and_round_nearest(R, 128); | |
199 |
2/2✓ Branch 0 taken 432 times.
✓ Branch 1 taken 12 times.
|
444 | for (int i = 0; i < width * height; ++i) |
200 |
2/4✓ Branch 0 taken 432 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 432 times.
|
432 | if (R->d[i] < -512/ATTENUATION || R->d[i] > 512/ATTENUATION-1) { |
201 | /* Rare, so simply try again */ | ||
202 | ✗ | av_free(raw); | |
203 | ✗ | av_free(tmp); | |
204 | ✗ | av_free(D); | |
205 | ✗ | av_free(E); | |
206 | ✗ | av_free(R); | |
207 | ✗ | return generate_inverse_quantized_transform_coefficients(width, height); | |
208 | } | ||
209 | 12 | av_free(raw); | |
210 | 12 | av_free(tmp); | |
211 | 12 | av_free(E); | |
212 | 12 | av_free(R); | |
213 | 12 | return D; | |
214 | } | ||
215 | |||
216 | #define RANDOMIZE_BUFFER16(name, size) \ | ||
217 | do { \ | ||
218 | int i; \ | ||
219 | for (i = 0; i < size; ++i) { \ | ||
220 | uint16_t r = rnd(); \ | ||
221 | AV_WN16A(name##0 + i, r); \ | ||
222 | AV_WN16A(name##1 + i, r); \ | ||
223 | } \ | ||
224 | } while (0) | ||
225 | |||
226 | #define RANDOMIZE_BUFFER8(name, size) \ | ||
227 | do { \ | ||
228 | int i; \ | ||
229 | for (i = 0; i < size; ++i) { \ | ||
230 | uint8_t r = rnd(); \ | ||
231 | name##0[i] = r; \ | ||
232 | name##1[i] = r; \ | ||
233 | } \ | ||
234 | } while (0) | ||
235 | |||
236 | #define RANDOMIZE_BUFFER8_MID_WEIGHTED(name, size) \ | ||
237 | do { \ | ||
238 | uint8_t *p##0 = name##0, *p##1 = name##1; \ | ||
239 | int i = (size); \ | ||
240 | while (i-- > 0) { \ | ||
241 | int x = 0x80 | (rnd() & 0x7F); \ | ||
242 | x >>= rnd() % 9; \ | ||
243 | if (rnd() & 1) \ | ||
244 | x = -x; \ | ||
245 | *p##1++ = *p##0++ = 0x80 + x; \ | ||
246 | } \ | ||
247 | } while (0) | ||
248 | |||
249 | 13 | static void check_inv_trans_inplace(void) | |
250 | { | ||
251 | /* Inverse transform input coefficients are stored in a 16-bit buffer | ||
252 | * with row stride of 8 coefficients irrespective of transform size. | ||
253 | * vc1_inv_trans_8x8 differs from the others in two ways: coefficients | ||
254 | * are stored in column-major order, and the outputs are written back | ||
255 | * to the input buffer, so we oversize it slightly to catch overruns. */ | ||
256 | 13 | LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [10 * 8]); | |
257 | 13 | LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [10 * 8]); | |
258 | |||
259 | VC1DSPContext h; | ||
260 | |||
261 | 13 | ff_vc1dsp_init(&h); | |
262 | |||
263 |
2/2✓ Branch 3 taken 1 times.
✓ Branch 4 taken 12 times.
|
13 | if (check_func(h.vc1_inv_trans_8x8, "vc1dsp.vc1_inv_trans_8x8")) { |
264 | matrix *coeffs; | ||
265 | 1 | declare_func(void, int16_t *); | |
266 |
2/2✓ Branch 1 taken 80 times.
✓ Branch 2 taken 1 times.
|
81 | RANDOMIZE_BUFFER16(inv_trans_in, 10 * 8); |
267 | 1 | coeffs = generate_inverse_quantized_transform_coefficients(8, 8); | |
268 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
|
9 | for (int j = 0; j < 8; ++j) |
269 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 8 times.
|
72 | for (int i = 0; i < 8; ++i) { |
270 | 64 | int idx = 8 + i * 8 + j; | |
271 | 64 | inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * 8 + i]; | |
272 | } | ||
273 | 1 | call_ref(inv_trans_in0 + 8); | |
274 | 1 | call_new(inv_trans_in1 + 8); | |
275 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (memcmp(inv_trans_in0, inv_trans_in1, 10 * 8 * sizeof (int16_t))) |
276 | ✗ | fail(); | |
277 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
1 | bench_new(inv_trans_in1 + 8); |
278 | 1 | av_free(coeffs); | |
279 | } | ||
280 | 13 | } | |
281 | |||
282 | 13 | static void check_inv_trans_adding(void) | |
283 | { | ||
284 | /* Inverse transform input coefficients are stored in a 16-bit buffer | ||
285 | * with row stride of 8 coefficients irrespective of transform size. */ | ||
286 | 13 | LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [8 * 8]); | |
287 | 13 | LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [8 * 8]); | |
288 | |||
289 | /* For all but vc1_inv_trans_8x8, the inverse transform is narrowed and | ||
290 | * added with saturation to an array of unsigned 8-bit values. Oversize | ||
291 | * this by 8 samples left and right and one row above and below. */ | ||
292 | 13 | LOCAL_ALIGNED_8(uint8_t, inv_trans_out0, [10 * 24]); | |
293 | 13 | LOCAL_ALIGNED_8(uint8_t, inv_trans_out1, [10 * 24]); | |
294 | |||
295 | VC1DSPContext h; | ||
296 | |||
297 | 13 | const test tests[] = { | |
298 | VC1DSP_SIZED_TEST(vc1_inv_trans_8x4, 8, 4) | ||
299 | VC1DSP_SIZED_TEST(vc1_inv_trans_4x8, 4, 8) | ||
300 | VC1DSP_SIZED_TEST(vc1_inv_trans_4x4, 4, 4) | ||
301 | VC1DSP_SIZED_TEST(vc1_inv_trans_8x8_dc, 8, 8) | ||
302 | VC1DSP_SIZED_TEST(vc1_inv_trans_8x4_dc, 8, 4) | ||
303 | VC1DSP_SIZED_TEST(vc1_inv_trans_4x8_dc, 4, 8) | ||
304 | VC1DSP_SIZED_TEST(vc1_inv_trans_4x4_dc, 4, 4) | ||
305 | }; | ||
306 | |||
307 | 13 | ff_vc1dsp_init(&h); | |
308 | |||
309 |
2/2✓ Branch 0 taken 91 times.
✓ Branch 1 taken 13 times.
|
104 | for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { |
310 | 91 | void (*func)(uint8_t *, ptrdiff_t, int16_t *) = *(void **)((intptr_t) &h + tests[t].offset); | |
311 |
2/2✓ Branch 3 taken 11 times.
✓ Branch 4 taken 80 times.
|
91 | if (check_func(func, "vc1dsp.%s", tests[t].name)) { |
312 | matrix *coeffs; | ||
313 |
2/2✓ Branch 1 taken 4 times.
✓ Branch 2 taken 7 times.
|
11 | declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int16_t *); |
314 |
2/2✓ Branch 1 taken 704 times.
✓ Branch 2 taken 11 times.
|
715 | RANDOMIZE_BUFFER16(inv_trans_in, 8 * 8); |
315 |
2/2✓ Branch 1 taken 2640 times.
✓ Branch 2 taken 11 times.
|
2651 | RANDOMIZE_BUFFER8(inv_trans_out, 10 * 24); |
316 | 11 | coeffs = generate_inverse_quantized_transform_coefficients(tests[t].width, tests[t].height); | |
317 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 11 times.
|
75 | for (int j = 0; j < tests[t].height; ++j) |
318 |
2/2✓ Branch 0 taken 368 times.
✓ Branch 1 taken 64 times.
|
432 | for (int i = 0; i < tests[t].width; ++i) { |
319 | 368 | int idx = j * 8 + i; | |
320 | 368 | inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * tests[t].width + i]; | |
321 | } | ||
322 | 11 | call_ref(inv_trans_out0 + 24 + 8, 24, inv_trans_in0); | |
323 | 11 | call_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1); | |
324 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 11 times.
|
11 | if (memcmp(inv_trans_out0, inv_trans_out1, 10 * 24)) |
325 | ✗ | fail(); | |
326 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 11 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
11 | bench_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1 + 8); |
327 | 11 | av_free(coeffs); | |
328 | } | ||
329 | } | ||
330 | 13 | } | |
331 | |||
332 | 13 | static void check_loop_filter(void) | |
333 | { | ||
334 | /* Deblocking filter buffers are big enough to hold a 16x16 block, | ||
335 | * plus 16 columns left and 4 rows above to hold filter inputs | ||
336 | * (depending on whether v or h neighbouring block edge, oversized | ||
337 | * horizontally to maintain 16-byte alignment) plus 16 columns and | ||
338 | * 4 rows below to catch write overflows */ | ||
339 | 13 | LOCAL_ALIGNED_16(uint8_t, filter_buf0, [24 * 48]); | |
340 | 13 | LOCAL_ALIGNED_16(uint8_t, filter_buf1, [24 * 48]); | |
341 | |||
342 | VC1DSPContext h; | ||
343 | |||
344 | 13 | const test tests[] = { | |
345 | VC1DSP_TEST(vc1_v_loop_filter4) | ||
346 | VC1DSP_TEST(vc1_h_loop_filter4) | ||
347 | VC1DSP_TEST(vc1_v_loop_filter8) | ||
348 | VC1DSP_TEST(vc1_h_loop_filter8) | ||
349 | VC1DSP_TEST(vc1_v_loop_filter16) | ||
350 | VC1DSP_TEST(vc1_h_loop_filter16) | ||
351 | }; | ||
352 | |||
353 | 13 | ff_vc1dsp_init(&h); | |
354 | |||
355 |
2/2✓ Branch 0 taken 78 times.
✓ Branch 1 taken 13 times.
|
91 | for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { |
356 | 78 | void (*func)(uint8_t *, ptrdiff_t, int) = *(void **)((intptr_t) &h + tests[t].offset); | |
357 |
2/2✓ Branch 1 taken 72 times.
✓ Branch 2 taken 6 times.
|
78 | declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int); |
358 |
2/2✓ Branch 3 taken 20 times.
✓ Branch 4 taken 58 times.
|
78 | if (check_func(func, "vc1dsp.%s", tests[t].name)) { |
359 |
2/2✓ Branch 0 taken 20000 times.
✓ Branch 1 taken 20 times.
|
20020 | for (int count = 1000; count > 0; --count) { |
360 | 20000 | int pq = rnd() % 31 + 1; | |
361 |
4/4✓ Branch 3 taken 11521861 times.
✓ Branch 4 taken 11518139 times.
✓ Branch 5 taken 23040000 times.
✓ Branch 6 taken 20000 times.
|
23060000 | RANDOMIZE_BUFFER8_MID_WEIGHTED(filter_buf, 24 * 48); |
362 | 20000 | call_ref(filter_buf0 + 4 * 48 + 16, 48, pq); | |
363 | 20000 | call_new(filter_buf1 + 4 * 48 + 16, 48, pq); | |
364 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20000 times.
|
20000 | if (memcmp(filter_buf0, filter_buf1, 24 * 48)) |
365 | ✗ | fail(); | |
366 | } | ||
367 | } | ||
368 |
2/2✓ Branch 0 taken 1872 times.
✓ Branch 1 taken 78 times.
|
1950 | for (int j = 0; j < 24; ++j) |
369 |
2/2✓ Branch 0 taken 89856 times.
✓ Branch 1 taken 1872 times.
|
91728 | for (int i = 0; i < 48; ++i) |
370 |
4/4✓ Branch 0 taken 59904 times.
✓ Branch 1 taken 29952 times.
✓ Branch 2 taken 49920 times.
✓ Branch 3 taken 9984 times.
|
89856 | filter_buf1[j * 48 + i] = 0x60 + 0x40 * (i >= 16 && j >= 4); |
371 |
2/2✓ Branch 3 taken 20 times.
✓ Branch 4 taken 58 times.
|
78 | if (check_func(func, "vc1dsp.%s_bestcase", tests[t].name)) |
372 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 20 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
20 | bench_new(filter_buf1 + 4 * 48 + 16, 48, 1); |
373 |
2/2✓ Branch 3 taken 20 times.
✓ Branch 4 taken 58 times.
|
78 | if (check_func(func, "vc1dsp.%s_worstcase", tests[t].name)) |
374 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 20 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
20 | bench_new(filter_buf1 + 4 * 48 + 16, 48, 31); |
375 | } | ||
376 | 13 | } | |
377 | |||
378 | #define TEST_UNESCAPE \ | ||
379 | do { \ | ||
380 | for (int count = 100; count > 0; --count) { \ | ||
381 | escaped_offset = rnd() & 7; \ | ||
382 | unescaped_offset = rnd() & 7; \ | ||
383 | escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7); \ | ||
384 | RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE); \ | ||
385 | len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \ | ||
386 | len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \ | ||
387 | if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE)) \ | ||
388 | fail(); \ | ||
389 | } \ | ||
390 | } while (0) | ||
391 | |||
392 | 13 | static void check_unescape(void) | |
393 | { | ||
394 | /* This appears to be a typical length of buffer in use */ | ||
395 | #define LOG2_UNESCAPE_BUF_SIZE 17 | ||
396 | #define UNESCAPE_BUF_SIZE (1u<<LOG2_UNESCAPE_BUF_SIZE) | ||
397 | 13 | LOCAL_ALIGNED_8(uint8_t, escaped0, [UNESCAPE_BUF_SIZE]); | |
398 | 13 | LOCAL_ALIGNED_8(uint8_t, escaped1, [UNESCAPE_BUF_SIZE]); | |
399 | 13 | LOCAL_ALIGNED_8(uint8_t, unescaped0, [UNESCAPE_BUF_SIZE]); | |
400 | 13 | LOCAL_ALIGNED_8(uint8_t, unescaped1, [UNESCAPE_BUF_SIZE]); | |
401 | |||
402 | VC1DSPContext h; | ||
403 | |||
404 | 13 | ff_vc1dsp_init(&h); | |
405 | |||
406 |
2/2✓ Branch 3 taken 1 times.
✓ Branch 4 taken 12 times.
|
13 | if (check_func(h.vc1_unescape_buffer, "vc1dsp.vc1_unescape_buffer")) { |
407 | int len0, len1, escaped_offset, unescaped_offset, escaped_len; | ||
408 | 1 | declare_func(int, const uint8_t *, int, uint8_t *); | |
409 | |||
410 | /* Test data which consists of escapes sequences packed as tightly as possible */ | ||
411 |
2/2✓ Branch 0 taken 131072 times.
✓ Branch 1 taken 1 times.
|
131073 | for (int x = 0; x < UNESCAPE_BUF_SIZE; ++x) |
412 |
2/2✓ Branch 0 taken 43691 times.
✓ Branch 1 taken 87381 times.
|
131072 | escaped1[x] = escaped0[x] = 3 * (x % 3 == 0); |
413 |
6/8✓ Branch 5 taken 13107200 times.
✓ Branch 6 taken 100 times.
✓ Branch 14 taken 100 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 100 times.
✓ Branch 20 taken 100 times.
✓ Branch 21 taken 1 times.
|
13107301 | TEST_UNESCAPE; |
414 | |||
415 | /* Test random data */ | ||
416 |
2/2✓ Branch 1 taken 131072 times.
✓ Branch 2 taken 1 times.
|
131073 | RANDOMIZE_BUFFER8(escaped, UNESCAPE_BUF_SIZE); |
417 |
6/8✓ Branch 5 taken 13107200 times.
✓ Branch 6 taken 100 times.
✓ Branch 14 taken 100 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 100 times.
✓ Branch 20 taken 100 times.
✓ Branch 21 taken 1 times.
|
13107301 | TEST_UNESCAPE; |
418 | |||
419 | /* Test data with escape sequences at random intervals */ | ||
420 |
2/2✓ Branch 0 taken 1409 times.
✓ Branch 1 taken 1 times.
|
1410 | for (int x = 0; x <= UNESCAPE_BUF_SIZE - 4;) { |
421 | int gap, gap_msb; | ||
422 | 1409 | escaped1[x+0] = escaped0[x+0] = 0; | |
423 | 1409 | escaped1[x+1] = escaped0[x+1] = 0; | |
424 | 1409 | escaped1[x+2] = escaped0[x+2] = 3; | |
425 | 1409 | escaped1[x+3] = escaped0[x+3] = rnd() & 3; | |
426 | 1409 | gap_msb = 2u << (rnd() % 8); | |
427 | 1409 | gap = (rnd() &~ -gap_msb) | gap_msb; | |
428 | 1409 | x += gap; | |
429 | } | ||
430 |
6/8✓ Branch 5 taken 13107200 times.
✓ Branch 6 taken 100 times.
✓ Branch 14 taken 100 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 100 times.
✓ Branch 20 taken 100 times.
✓ Branch 21 taken 1 times.
|
13107301 | TEST_UNESCAPE; |
431 | |||
432 | /* Test data which is known to contain no escape sequences */ | ||
433 | 1 | memset(escaped0, 0xFF, UNESCAPE_BUF_SIZE); | |
434 | 1 | memset(escaped1, 0xFF, UNESCAPE_BUF_SIZE); | |
435 |
6/8✓ Branch 5 taken 13107200 times.
✓ Branch 6 taken 100 times.
✓ Branch 14 taken 100 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 100 times.
✓ Branch 20 taken 100 times.
✓ Branch 21 taken 1 times.
|
13107301 | TEST_UNESCAPE; |
436 | |||
437 | /* Benchmark the no-escape-sequences case */ | ||
438 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
1 | bench_new(escaped1, UNESCAPE_BUF_SIZE, unescaped1); |
439 | } | ||
440 | 13 | } | |
441 | |||
442 | 13 | static void check_mspel_pixels(void) | |
443 | { | ||
444 | 13 | LOCAL_ALIGNED_16(uint8_t, src0, [32 * 32]); | |
445 | 13 | LOCAL_ALIGNED_16(uint8_t, src1, [32 * 32]); | |
446 | 13 | LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 32]); | |
447 | 13 | LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 32]); | |
448 | |||
449 | VC1DSPContext h; | ||
450 | |||
451 | 13 | const test tests[] = { | |
452 | VC1DSP_SIZED_TEST(put_vc1_mspel_pixels_tab[0][0], 16, 16) | ||
453 | VC1DSP_SIZED_TEST(put_vc1_mspel_pixels_tab[1][0], 8, 8) | ||
454 | VC1DSP_SIZED_TEST(avg_vc1_mspel_pixels_tab[0][0], 16, 16) | ||
455 | VC1DSP_SIZED_TEST(avg_vc1_mspel_pixels_tab[1][0], 8, 8) | ||
456 | }; | ||
457 | |||
458 | 13 | ff_vc1dsp_init(&h); | |
459 | |||
460 |
2/2✓ Branch 0 taken 52 times.
✓ Branch 1 taken 13 times.
|
65 | for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { |
461 | 52 | void (*func)(uint8_t *, const uint8_t*, ptrdiff_t, int) = *(void **)((intptr_t) &h + tests[t].offset); | |
462 |
2/2✓ Branch 3 taken 8 times.
✓ Branch 4 taken 44 times.
|
52 | if (check_func(func, "vc1dsp.%s", tests[t].name)) { |
463 |
2/2✓ Branch 1 taken 4 times.
✓ Branch 2 taken 4 times.
|
8 | declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, const uint8_t*, ptrdiff_t, int); |
464 |
2/2✓ Branch 1 taken 8192 times.
✓ Branch 2 taken 8 times.
|
8200 | RANDOMIZE_BUFFER8(dst, 32 * 32); |
465 |
2/2✓ Branch 1 taken 8192 times.
✓ Branch 2 taken 8 times.
|
8200 | RANDOMIZE_BUFFER8(src, 32 * 32); |
466 | 8 | call_ref(dst0, src0, 32, 0); | |
467 | 8 | call_new(dst1, src1, 32, 0); | |
468 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | if (memcmp(dst0, dst1, 32 * 32)) { |
469 | ✗ | fail(); | |
470 | } | ||
471 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
8 | bench_new(dst1, src0, 32, 0); |
472 | } | ||
473 | } | ||
474 | 13 | } | |
475 | |||
476 | 13 | void checkasm_check_vc1dsp(void) | |
477 | { | ||
478 | 13 | check_inv_trans_inplace(); | |
479 | 13 | check_inv_trans_adding(); | |
480 | 13 | report("inv_trans"); | |
481 | |||
482 | 13 | check_loop_filter(); | |
483 | 13 | report("loop_filter"); | |
484 | |||
485 | 13 | check_unescape(); | |
486 | 13 | report("unescape_buffer"); | |
487 | |||
488 | 13 | check_mspel_pixels(); | |
489 | 13 | report("mspel_pixels"); | |
490 | 13 | } | |
491 |