FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/tests/checkasm/sw_scale.c
Date: 2025-08-19 23:55:23
Exec Total Coverage
Lines: 216 240 90.0%
Functions: 8 12 66.7%
Branches: 120 182 65.9%

Line Branch Exec Source
1 /*
2 *
3 * This file is part of FFmpeg.
4 *
5 * FFmpeg is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * FFmpeg is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include <string.h>
21
22 #include "libavutil/common.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/mem_internal.h"
26
27 #include "libswscale/swscale.h"
28 #include "libswscale/swscale_internal.h"
29
30 #include "checkasm.h"
31
32 #define randomize_buffers(buf, size) \
33 do { \
34 int j; \
35 for (j = 0; j < size; j+=4) \
36 AV_WN32(buf + j, rnd()); \
37 } while (0)
38
39 608 static void yuv2planeX_8_ref(const int16_t *filter, int filterSize,
40 const int16_t **src, uint8_t *dest, int dstW,
41 const uint8_t *dither, int offset)
42 {
43 // This corresponds to the yuv2planeX_8_c function
44 int i;
45
2/2
✓ Branch 0 taken 122112 times.
✓ Branch 1 taken 608 times.
122720 for (i = 0; i < dstW; i++) {
46 122112 int val = dither[(i + offset) & 7] << 12;
47 int j;
48
2/2
✓ Branch 0 taken 915840 times.
✓ Branch 1 taken 122112 times.
1037952 for (j = 0; j < filterSize; j++)
49 915840 val += src[j][i] * filter[j];
50
51 122112 dest[i]= av_clip_uint8(val >> 19);
52 }
53 608 }
54
55 #define CMP_FUNC(bits) \
56 static int cmp_off_by_n_##bits(const uint##bits##_t *ref, const uint##bits##_t *test, \
57 size_t n, int accuracy) \
58 { \
59 for (size_t i = 0; i < n; i++) { \
60 if (abs((int)ref[i] - (int)test[i]) > accuracy) \
61 return 1; \
62 } \
63 return 0; \
64 }
65
66
3/4
✗ Branch 0 not taken.
✓ Branch 1 taken 362752 times.
✓ Branch 2 taken 362752 times.
✓ Branch 3 taken 824 times.
363576 CMP_FUNC(8)
67
3/4
✗ Branch 0 not taken.
✓ Branch 1 taken 1089536 times.
✓ Branch 2 taken 1089536 times.
✓ Branch 3 taken 2128 times.
1091664 CMP_FUNC(16)
68
69 #define SHOW_DIFF_FUNC(bits) \
70 static void print_data_##bits(const uint##bits##_t *p, size_t len, size_t offset) \
71 { \
72 size_t i = 0; \
73 for (; i < len; i++) { \
74 if (i % 8 == 0) { \
75 printf("0x%04zx: ", i+offset); \
76 } \
77 printf("0x%02x ", (uint32_t) p[i]); \
78 if (i % 8 == 7) { \
79 printf("\n"); \
80 } \
81 } \
82 if (i % 8 != 0) { \
83 printf("\n"); \
84 } \
85 } \
86 static size_t show_differences_##bits(const uint##bits##_t *a, const uint##bits##_t *b, \
87 size_t len) \
88 { \
89 for (size_t i = 0; i < len; i++) { \
90 if (a[i] != b[i]) { \
91 size_t offset_of_mismatch = i; \
92 size_t offset; \
93 if (i >= 8) i-=8; \
94 offset = i & (~7); \
95 printf("test a:\n"); \
96 print_data_##bits(&a[offset], 32, offset); \
97 printf("\ntest b:\n"); \
98 print_data_##bits(&b[offset], 32, offset); \
99 printf("\n"); \
100 return offset_of_mismatch; \
101 } \
102 } \
103 return len; \
104 }
105
106 SHOW_DIFF_FUNC(8)
107 SHOW_DIFF_FUNC(16)
108
109 26 static void check_yuv2yuv1(int accurate)
110 {
111 SwsContext *sws;
112 SwsInternal *c;
113 int osi, isi;
114 int dstW, offset;
115 size_t fail_offset;
116 26 const int input_sizes[] = {8, 24, 128, 144, 256, 512};
117 #define LARGEST_INPUT_SIZE 512
118
119 26 const int offsets[] = {0, 3, 8, 11, 16, 19};
120 26 const int OFFSET_SIZES = sizeof(offsets)/sizeof(offsets[0]);
121
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
26 const char *accurate_str = (accurate) ? "accurate" : "approximate";
122
123 26 declare_func(void,
124 const int16_t *src, uint8_t *dest,
125 int dstW, const uint8_t *dither, int offset);
126
127 26 LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_INPUT_SIZE]);
128 26 LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE]);
129 26 LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE]);
130 26 LOCAL_ALIGNED_8(uint8_t, dither, [8]);
131
132
2/2
✓ Branch 1 taken 52 times.
✓ Branch 2 taken 26 times.
78 randomize_buffers((uint8_t*)dither, 8);
133
2/2
✓ Branch 1 taken 6656 times.
✓ Branch 2 taken 26 times.
6682 randomize_buffers((uint8_t*)src_pixels, LARGEST_INPUT_SIZE * sizeof(int16_t));
134 26 sws = sws_alloc_context();
135
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
26 if (accurate)
136 13 sws->flags |= SWS_ACCURATE_RND;
137
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 26 times.
26 if (sws_init_context(sws, NULL, NULL) < 0)
138 fail();
139
140 26 c = sws_internal(sws);
141 26 ff_sws_init_scale(c);
142
2/2
✓ Branch 0 taken 156 times.
✓ Branch 1 taken 26 times.
182 for (isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); ++isi) {
143 156 dstW = input_sizes[isi];
144
2/2
✓ Branch 0 taken 936 times.
✓ Branch 1 taken 156 times.
1092 for (osi = 0; osi < OFFSET_SIZES; osi++) {
145 936 offset = offsets[osi];
146
2/2
✓ Branch 3 taken 144 times.
✓ Branch 4 taken 792 times.
936 if (check_func(c->yuv2plane1, "yuv2yuv1_%d_%d_%s", offset, dstW, accurate_str)){
147 144 memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
148 144 memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
149
150 144 call_ref(src_pixels, dst0, dstW, dither, offset);
151 144 call_new(src_pixels, dst1, dstW, dither, offset);
152
3/4
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 108 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 144 times.
144 if (cmp_off_by_n_8(dst0, dst1, dstW * sizeof(dst0[0]), accurate ? 0 : 2)) {
153 fail();
154 printf("failed: yuv2yuv1_%d_%di_%s\n", offset, dstW, accurate_str);
155 fail_offset = show_differences_8(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
156 printf("failing values: src: 0x%04x dither: 0x%02x dst-c: %02x dst-asm: %02x\n",
157 (int) src_pixels[fail_offset],
158 (int) dither[(fail_offset + fail_offset) & 7],
159 (int) dst0[fail_offset],
160 (int) dst1[fail_offset]);
161 }
162
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 120 times.
144 if (dstW == LARGEST_INPUT_SIZE)
163
1/8
✗ Branch 1 not taken.
✓ Branch 2 taken 24 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
24 bench_new(src_pixels, dst1, dstW, dither, offset);
164 }
165 }
166 }
167 26 sws_freeContext(sws);
168 26 }
169
170 234 static void check_yuv2yuvX(int accurate, int bit_depth, int dst_pix_format)
171 {
172 SwsContext *sws;
173 SwsInternal *c;
174 int fsi, osi, isi, i, j;
175 int dstW;
176 #define LARGEST_FILTER 16
177 // ff_yuv2planeX_8_sse2 can't handle odd filter sizes
178 234 const int filter_sizes[] = {2, 4, 8, 16};
179 234 const int FILTER_SIZES = sizeof(filter_sizes)/sizeof(filter_sizes[0]);
180 #define LARGEST_INPUT_SIZE 512
181 static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
182
2/2
✓ Branch 0 taken 117 times.
✓ Branch 1 taken 117 times.
234 const char *accurate_str = (accurate) ? "accurate" : "approximate";
183
184
2/2
✓ Branch 1 taken 216 times.
✓ Branch 2 taken 18 times.
234 declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *filter,
185 int filterSize, const int16_t **src, uint8_t *dest,
186 int dstW, const uint8_t *dither, int offset);
187
188 const int16_t **src;
189 234 LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
190 234 LOCAL_ALIGNED_16(int16_t, filter_coeff, [LARGEST_FILTER]);
191 234 LOCAL_ALIGNED_16(uint16_t, dst0, [LARGEST_INPUT_SIZE]);
192 234 LOCAL_ALIGNED_16(uint16_t, dst1, [LARGEST_INPUT_SIZE]);
193 234 LOCAL_ALIGNED_16(uint8_t, dither, [LARGEST_INPUT_SIZE]);
194 union VFilterData{
195 const int16_t *src;
196 uint16_t coeff[8];
197 } *vFilterData;
198 234 uint8_t d_val = rnd();
199 234 memset(dither, d_val, LARGEST_INPUT_SIZE);
200
2/2
✓ Branch 1 taken 958464 times.
✓ Branch 2 taken 234 times.
958698 randomize_buffers((uint8_t*)src_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));
201 234 sws = sws_alloc_context();
202 234 sws->dst_format = dst_pix_format;
203
2/2
✓ Branch 0 taken 117 times.
✓ Branch 1 taken 117 times.
234 if (accurate)
204 117 sws->flags |= SWS_ACCURATE_RND;
205
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 234 times.
234 if (sws_init_context(sws, NULL, NULL) < 0)
206 fail();
207
208 234 c = sws_internal(sws);
209 234 c->dstBpc = bit_depth;
210 234 ff_sws_init_scale(c);
211
2/2
✓ Branch 0 taken 1404 times.
✓ Branch 1 taken 234 times.
1638 for(isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); ++isi){
212 1404 dstW = input_sizes[isi];
213
2/2
✓ Branch 0 taken 5616 times.
✓ Branch 1 taken 1404 times.
7020 for(osi = 0; osi < 64; osi += 16){
214
2/2
✓ Branch 0 taken 1170 times.
✓ Branch 1 taken 4446 times.
5616 if (dstW <= osi)
215 1170 continue;
216
2/2
✓ Branch 0 taken 17784 times.
✓ Branch 1 taken 4446 times.
22230 for (fsi = 0; fsi < FILTER_SIZES; ++fsi) {
217 // Generate filter coefficients for the given filter size,
218 // with some properties:
219 // - The coefficients add up to the intended sum (4096, 1<<12)
220 // - The coefficients contain negative values
221 // - The filter intermediates don't overflow for worst case
222 // inputs (all positive coefficients are coupled with
223 // input_max and all negative coefficients with input_min,
224 // or vice versa).
225 // Produce a filter with all coefficients set to
226 // -((1<<12)/(filter_size-1)) except for one (randomly chosen)
227 // which is set to ((1<<13)-1).
228
2/2
✓ Branch 0 taken 133380 times.
✓ Branch 1 taken 17784 times.
151164 for (i = 0; i < filter_sizes[fsi]; ++i)
229 133380 filter_coeff[i] = -((1 << 12) / (filter_sizes[fsi] - 1));
230 17784 filter_coeff[rnd() % filter_sizes[fsi]] = (1 << 13) - 1;
231
232 17784 src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]);
233 17784 vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData));
234 17784 memset(vFilterData, 0, (filter_sizes[fsi] + 2) * sizeof(union VFilterData));
235
2/2
✓ Branch 0 taken 133380 times.
✓ Branch 1 taken 17784 times.
151164 for (i = 0; i < filter_sizes[fsi]; ++i) {
236 133380 src[i] = &src_pixels[i * LARGEST_INPUT_SIZE];
237 133380 vFilterData[i].src = src[i] - osi;
238
2/2
✓ Branch 0 taken 533520 times.
✓ Branch 1 taken 133380 times.
666900 for(j = 0; j < 4; ++j)
239 533520 vFilterData[i].coeff[j + 4] = filter_coeff[i];
240 }
241
6/6
✓ Branch 2 taken 15808 times.
✓ Branch 3 taken 1976 times.
✓ Branch 5 taken 7904 times.
✓ Branch 6 taken 7904 times.
✓ Branch 8 taken 2736 times.
✓ Branch 9 taken 15048 times.
17784 if (check_func(c->yuv2planeX, "yuv2yuvX_%d%s_%d_%d_%d_%s", bit_depth, (bit_depth == 8) ? "" : (isBE(dst_pix_format) ? "BE" : "LE"), filter_sizes[fsi], osi, dstW, accurate_str)) {
242 // use vFilterData for the mmx function
243
2/2
✓ Branch 0 taken 228 times.
✓ Branch 1 taken 2508 times.
2736 const int16_t *filter = c->use_mmx_vfilter ? (const int16_t*)vFilterData : &filter_coeff[0];
244 2736 memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
245 2736 memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
246
247
2/2
✓ Branch 0 taken 608 times.
✓ Branch 1 taken 2128 times.
2736 if (c->dstBpc == 8) {
248 // We can't use call_ref here, because we don't know if use_mmx_vfilter was set for that
249 // function or not, so we can't pass it the parameters correctly.
250
251 608 yuv2planeX_8_ref(&filter_coeff[0], filter_sizes[fsi], src, (uint8_t*)dst0, dstW - osi, dither, osi);
252 608 call_new(filter, filter_sizes[fsi], src, (uint8_t*)dst1, dstW - osi, dither, osi);
253
254
3/4
✓ Branch 0 taken 304 times.
✓ Branch 1 taken 304 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 608 times.
608 if (cmp_off_by_n_8((uint8_t*)dst0, (uint8_t*)dst1, LARGEST_INPUT_SIZE, accurate ? 0 : 2)) {
255 fail();
256 printf("failed: yuv2yuvX_%d_%d_%d_%d_%s\n", bit_depth, filter_sizes[fsi], osi, dstW, accurate_str);
257 show_differences_8((uint8_t*)dst0, (uint8_t*)dst1, LARGEST_INPUT_SIZE);
258 }
259 } else {
260 2128 call_ref(&filter_coeff[0], filter_sizes[fsi], src, (uint8_t*)dst0, dstW - osi, dither, osi);
261 2128 call_new(&filter_coeff[0], filter_sizes[fsi], src, (uint8_t*)dst1, dstW - osi, dither, osi);
262
263
3/4
✓ Branch 0 taken 1064 times.
✓ Branch 1 taken 1064 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2128 times.
2128 if (cmp_off_by_n_16(dst0, dst1, LARGEST_INPUT_SIZE, accurate ? 0 : 2)) {
264 fail();
265 printf("failed: yuv2yuvX_%d%s_%d_%d_%d_%s\n", bit_depth, isBE(dst_pix_format) ? "BE" : "LE", filter_sizes[fsi], osi, dstW, accurate_str);
266 show_differences_16(dst0, dst1, LARGEST_INPUT_SIZE);
267 }
268 }
269
2/2
✓ Branch 0 taken 576 times.
✓ Branch 1 taken 2160 times.
2736 if (dstW == LARGEST_INPUT_SIZE)
270
1/8
✗ Branch 1 not taken.
✓ Branch 2 taken 576 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
576 bench_new(filter, filter_sizes[fsi], src, (uint8_t*)dst1, dstW - osi, dither, osi);
271
272 }
273 17784 av_freep(&src);
274 17784 av_freep(&vFilterData);
275 }
276 }
277 }
278 234 sws_freeContext(sws);
279 #undef FILTER_SIZES
280 234 }
281
282 26 static void check_yuv2nv12cX(int accurate)
283 {
284 SwsContext *sws;
285 SwsInternal *c;
286 #define LARGEST_FILTER 16
287 26 const int filter_sizes[] = {2, 4, 8, 16};
288 #define LARGEST_INPUT_SIZE 512
289 static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
290
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
26 const char *accurate_str = (accurate) ? "accurate" : "approximate";
291
292
2/2
✓ Branch 1 taken 24 times.
✓ Branch 2 taken 2 times.
26 declare_func_emms(AV_CPU_FLAG_MMX, void, enum AVPixelFormat dstFormat,
293 const uint8_t *chrDither, const int16_t *chrFilter,
294 int chrFilterSize, const int16_t **chrUSrc,
295 const int16_t **chrVSrc, uint8_t *dest, int dstW);
296
297 const int16_t *srcU[LARGEST_FILTER], *srcV[LARGEST_FILTER];
298 26 LOCAL_ALIGNED_16(int16_t, srcU_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
299 26 LOCAL_ALIGNED_16(int16_t, srcV_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
300 26 LOCAL_ALIGNED_16(int16_t, filter_coeff, [LARGEST_FILTER]);
301 26 LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE * 2]);
302 26 LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE * 2]);
303 26 LOCAL_ALIGNED_16(uint8_t, dither, [LARGEST_INPUT_SIZE]);
304 26 uint8_t d_val = rnd();
305 26 memset(dither, d_val, LARGEST_INPUT_SIZE);
306
2/2
✓ Branch 1 taken 106496 times.
✓ Branch 2 taken 26 times.
106522 randomize_buffers((uint8_t*)srcU_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));
307
2/2
✓ Branch 1 taken 106496 times.
✓ Branch 2 taken 26 times.
106522 randomize_buffers((uint8_t*)srcV_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));
308
2/2
✓ Branch 0 taken 416 times.
✓ Branch 1 taken 26 times.
442 for (int i = 0; i < LARGEST_FILTER; i++) {
309 416 srcU[i] = &srcU_pixels[i * LARGEST_INPUT_SIZE];
310 416 srcV[i] = &srcV_pixels[i * LARGEST_INPUT_SIZE];
311 }
312
313 26 sws = sws_alloc_context();
314 26 sws->dst_format = AV_PIX_FMT_NV12;
315
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
26 if (accurate)
316 13 sws->flags |= SWS_ACCURATE_RND;
317
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 26 times.
26 if (sws_init_context(sws, NULL, NULL) < 0)
318 fail();
319
320 26 c = sws_internal(sws);
321 26 ff_sws_init_scale(c);
322
2/2
✓ Branch 0 taken 156 times.
✓ Branch 1 taken 26 times.
182 for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++){
323 156 const int dstW = input_sizes[isi];
324
2/2
✓ Branch 0 taken 624 times.
✓ Branch 1 taken 156 times.
780 for (int fsi = 0; fsi < FF_ARRAY_ELEMS(filter_sizes); fsi++) {
325 624 const int filter_size = filter_sizes[fsi];
326
2/2
✓ Branch 0 taken 4680 times.
✓ Branch 1 taken 624 times.
5304 for (int i = 0; i < filter_size; i++)
327 4680 filter_coeff[i] = -((1 << 12) / (filter_size - 1));
328 624 filter_coeff[rnd() % filter_size] = (1 << 13) - 1;
329
330
2/2
✓ Branch 3 taken 72 times.
✓ Branch 4 taken 552 times.
624 if (check_func(c->yuv2nv12cX, "yuv2nv12cX_%d_%d_%s", filter_size, dstW, accurate_str)){
331 72 memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
332 72 memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
333
334 72 call_ref(sws->dst_format, dither, &filter_coeff[0], filter_size, srcU, srcV, dst0, dstW);
335 72 call_new(sws->dst_format, dither, &filter_coeff[0], filter_size, srcU, srcV, dst1, dstW);
336
337
3/4
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 48 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 72 times.
72 if (cmp_off_by_n_8(dst0, dst1, dstW * 2 * sizeof(dst0[0]), accurate ? 0 : 2)) {
338 fail();
339 printf("failed: yuv2nv12wX_%d_%d_%s\n", filter_size, dstW, accurate_str);
340 show_differences_8(dst0, dst1, dstW * 2 * sizeof(dst0[0]));
341 }
342
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 60 times.
72 if (dstW == LARGEST_INPUT_SIZE)
343
1/8
✗ Branch 1 not taken.
✓ Branch 2 taken 12 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
12 bench_new(sws->dst_format, dither, &filter_coeff[0], filter_size, srcU, srcV, dst1, dstW);
344
345 }
346 }
347 }
348 26 sws_freeContext(sws);
349 26 }
350 #undef LARGEST_FILTER
351 #undef LARGEST_INPUT_SIZE
352
353 #undef SRC_PIXELS
354 #define SRC_PIXELS 512
355
356 13 static void check_hscale(void)
357 {
358 #define MAX_FILTER_WIDTH 40
359 #define FILTER_SIZES 6
360 static const int filter_sizes[FILTER_SIZES] = { 4, 8, 12, 16, 32, 40 };
361
362 #define HSCALE_PAIRS 2
363 static const int hscale_pairs[HSCALE_PAIRS][2] = {
364 { 8, 14 },
365 { 8, 18 },
366 };
367
368 #define LARGEST_INPUT_SIZE 512
369 static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
370
371 int i, j, fsi, hpi, width, dstWi;
372 SwsContext *sws;
373 SwsInternal *c;
374
375 // padded
376 13 LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]);
377 13 LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);
378 13 LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);
379
380 // padded
381 13 LOCAL_ALIGNED_32(int16_t, filter, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);
382 13 LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);
383 13 LOCAL_ALIGNED_32(int16_t, filterAvx2, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);
384 13 LOCAL_ALIGNED_32(int32_t, filterPosAvx, [SRC_PIXELS]);
385
386 // The dst parameter here is either int16_t or int32_t but we use void* to
387 // just cover both cases.
388 13 declare_func(void, SwsInternal *c, int16_t *dst, int dstW,
389 const uint8_t *src, const int16_t *filter,
390 const int32_t *filterPos, int filterSize);
391
392 13 sws = sws_alloc_context();
393
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 13 times.
13 if (sws_init_context(sws, NULL, NULL) < 0)
394 fail();
395
396 13 c = sws_internal(sws);
397
2/2
✓ Branch 1 taken 1794 times.
✓ Branch 2 taken 13 times.
1807 randomize_buffers(src, SRC_PIXELS + MAX_FILTER_WIDTH - 1);
398
399
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 13 times.
39 for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) {
400
2/2
✓ Branch 0 taken 156 times.
✓ Branch 1 taken 26 times.
182 for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
401
2/2
✓ Branch 0 taken 936 times.
✓ Branch 1 taken 156 times.
1092 for (dstWi = 0; dstWi < FF_ARRAY_ELEMS(input_sizes); dstWi++) {
402 936 width = filter_sizes[fsi];
403
404 936 c->srcBpc = hscale_pairs[hpi][0];
405 936 c->dstBpc = hscale_pairs[hpi][1];
406 936 c->hLumFilterSize = c->hChrFilterSize = width;
407
408
2/2
✓ Branch 0 taken 479232 times.
✓ Branch 1 taken 936 times.
480168 for (i = 0; i < SRC_PIXELS; i++) {
409 479232 filterPos[i] = i;
410 479232 filterPosAvx[i] = i;
411
412 // These filter coefficients are chosen to try break two corner
413 // cases, namely:
414 //
415 // - Negative filter coefficients. The filters output signed
416 // values, and it should be possible to end up with negative
417 // output values.
418 //
419 // - Positive clipping. The hscale filter function has clipping
420 // at (1<<15) - 1
421 //
422 // The coefficients sum to the 1.0 point for the hscale
423 // functions (1 << 14).
424
425
2/2
✓ Branch 0 taken 8945664 times.
✓ Branch 1 taken 479232 times.
9424896 for (j = 0; j < width; j++) {
426 8945664 filter[i * width + j] = -((1 << 14) / (width - 1));
427 }
428 479232 filter[i * width + (rnd() % width)] = ((1 << 15) - 1);
429 }
430
431
2/2
✓ Branch 0 taken 37440 times.
✓ Branch 1 taken 936 times.
38376 for (i = 0; i < MAX_FILTER_WIDTH; i++) {
432 // These values should be unused in SIMD implementations but
433 // may still be read, random coefficients here should help show
434 // issues where they are used in error.
435
436 37440 filter[SRC_PIXELS * width + i] = rnd();
437 }
438 936 sws->dst_w = c->chrDstW = input_sizes[dstWi];
439 936 ff_sws_init_scale(c);
440 936 memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));
441 936 ff_shuffle_filter_coefficients(c, filterPosAvx, width, filterAvx2, sws->dst_w);
442
443
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 936 times.
936 av_assert0(c->hyScale == c->hcScale);
444
2/2
✓ Branch 3 taken 288 times.
✓ Branch 4 taken 648 times.
936 if (check_func(c->hcScale, "hscale_%d_to_%d__fs_%d_dstW_%d", c->srcBpc, c->dstBpc + 1, width, sws->dst_w)) {
445 288 memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
446 288 memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));
447
448 288 call_ref(NULL, (int16_t *)dst0, sws->dst_w, src, filter, filterPos, width);
449 288 call_new(NULL, (int16_t *)dst1, sws->dst_w, src, filterAvx2, filterPosAvx, width);
450
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 288 times.
288 if (memcmp(dst0, dst1, sws->dst_w * sizeof(dst0[0])))
451 fail();
452
1/8
✗ Branch 1 not taken.
✓ Branch 2 taken 288 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
288 bench_new(NULL, (int16_t *)dst0, sws->dst_w, src, filter, filterPosAvx, width);
453 }
454 }
455 }
456 }
457 13 sws_freeContext(sws);
458 13 }
459
460 13 void checkasm_check_sw_scale(void)
461 {
462 13 check_hscale();
463 13 report("hscale");
464 13 check_yuv2yuv1(0);
465 13 check_yuv2yuv1(1);
466 13 report("yuv2yuv1");
467 13 check_yuv2yuvX(0, 8, AV_PIX_FMT_YUV420P);
468 13 check_yuv2yuvX(1, 8, AV_PIX_FMT_YUV420P);
469 13 report("yuv2yuvX_8");
470 13 check_yuv2yuvX(0, 9, AV_PIX_FMT_YUV420P9LE);
471 13 check_yuv2yuvX(1, 9, AV_PIX_FMT_YUV420P9LE);
472 13 report("yuv2yuvX_9LE");
473 13 check_yuv2yuvX(0, 9, AV_PIX_FMT_YUV420P9BE);
474 13 check_yuv2yuvX(1, 9, AV_PIX_FMT_YUV420P9BE);
475 13 report("yuv2yuvX_9BE");
476 13 check_yuv2yuvX(0, 10, AV_PIX_FMT_YUV420P10LE);
477 13 check_yuv2yuvX(1, 10, AV_PIX_FMT_YUV420P10LE);
478 13 report("yuv2yuvX_10LE");
479 13 check_yuv2yuvX(0, 10, AV_PIX_FMT_YUV420P10BE);
480 13 check_yuv2yuvX(1, 10, AV_PIX_FMT_YUV420P10BE);
481 13 report("yuv2yuvX_10BE");
482 13 check_yuv2yuvX(0, 12, AV_PIX_FMT_YUV420P12LE);
483 13 check_yuv2yuvX(1, 12, AV_PIX_FMT_YUV420P12LE);
484 13 report("yuv2yuvX_12LE");
485 13 check_yuv2yuvX(0, 12, AV_PIX_FMT_YUV420P12BE);
486 13 check_yuv2yuvX(1, 12, AV_PIX_FMT_YUV420P12BE);
487 13 report("yuv2yuvX_12BE");
488 13 check_yuv2yuvX(0, 14, AV_PIX_FMT_YUV420P14LE);
489 13 check_yuv2yuvX(1, 14, AV_PIX_FMT_YUV420P14LE);
490 13 report("yuv2yuvX_14LE");
491 13 check_yuv2yuvX(0, 14, AV_PIX_FMT_YUV420P14BE);
492 13 check_yuv2yuvX(1, 14, AV_PIX_FMT_YUV420P14BE);
493 13 report("yuv2yuvX_14BE");
494 13 check_yuv2nv12cX(0);
495 13 check_yuv2nv12cX(1);
496 13 report("yuv2nv12cX");
497 13 }
498