Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * Copyright (C) 2025 Niklas Haas | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License along | ||
17 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
19 | */ | ||
20 | |||
21 | #include <string.h> | ||
22 | |||
23 | #include "libavutil/avassert.h" | ||
24 | #include "libavutil/mem_internal.h" | ||
25 | #include "libavutil/refstruct.h" | ||
26 | |||
27 | #include "libswscale/ops.h" | ||
28 | #include "libswscale/ops_internal.h" | ||
29 | |||
30 | #include "checkasm.h" | ||
31 | |||
32 | enum { | ||
33 | LINES = 2, | ||
34 | NB_PLANES = 4, | ||
35 | PIXELS = 64, | ||
36 | }; | ||
37 | |||
38 | enum { | ||
39 | U8 = SWS_PIXEL_U8, | ||
40 | U16 = SWS_PIXEL_U16, | ||
41 | U32 = SWS_PIXEL_U32, | ||
42 | F32 = SWS_PIXEL_F32, | ||
43 | }; | ||
44 | |||
45 | #define FMT(fmt, ...) tprintf((char[256]) {0}, 256, fmt, __VA_ARGS__) | ||
46 | 12465 | static const char *tprintf(char buf[], size_t size, const char *fmt, ...) | |
47 | { | ||
48 | va_list ap; | ||
49 | 12465 | va_start(ap, fmt); | |
50 | 12465 | vsnprintf(buf, size, fmt, ap); | |
51 | 12465 | va_end(ap); | |
52 | 12465 | return buf; | |
53 | } | ||
54 | |||
55 | 16056 | static int rw_pixel_bits(const SwsOp *op) | |
56 | { | ||
57 |
2/2✓ Branch 0 taken 420 times.
✓ Branch 1 taken 15636 times.
|
16056 | const int elems = op->rw.packed ? op->rw.elems : 1; |
58 | 16056 | const int size = ff_sws_pixel_type_size(op->type); | |
59 | 16056 | const int bits = 8 >> op->rw.frac; | |
60 | av_assert1(bits >= 1); | ||
61 | 16056 | return elems * size * bits; | |
62 | } | ||
63 | |||
64 | 2981888 | static float rndf(void) | |
65 | { | ||
66 | union { uint32_t u; float f; } x; | ||
67 | do { | ||
68 | 3005531 | x.u = rnd(); | |
69 |
2/2✓ Branch 0 taken 23643 times.
✓ Branch 1 taken 2981888 times.
|
3005531 | } while (!isnormal(x.f)); |
70 | 2981888 | return x.f; | |
71 | } | ||
72 | |||
73 | 6240 | static void fill32f(float *line, int num, unsigned range) | |
74 | { | ||
75 | 6240 | const float scale = (float) range / UINT32_MAX; | |
76 |
2/2✓ Branch 0 taken 3194880 times.
✓ Branch 1 taken 6240 times.
|
3201120 | for (int i = 0; i < num; i++) |
77 |
2/2✓ Branch 0 taken 212992 times.
✓ Branch 1 taken 2981888 times.
|
3194880 | line[i] = range ? scale * rnd() : rndf(); |
78 | 6240 | } | |
79 | |||
80 | 18590 | static void fill32(uint32_t *line, int num, unsigned range) | |
81 | { | ||
82 |
2/2✓ Branch 0 taken 9518080 times.
✓ Branch 1 taken 18590 times.
|
9536670 | for (int i = 0; i < num; i++) |
83 |
3/4✓ Branch 0 taken 266240 times.
✓ Branch 1 taken 9251840 times.
✓ Branch 2 taken 266240 times.
✗ Branch 3 not taken.
|
9518080 | line[i] = (range && range < UINT_MAX) ? rnd() % (range + 1) : rnd(); |
84 | 18590 | } | |
85 | |||
86 | 7800 | static void fill16(uint16_t *line, int num, unsigned range) | |
87 | { | ||
88 |
2/2✓ Branch 0 taken 7319 times.
✓ Branch 1 taken 481 times.
|
7800 | if (!range) { |
89 | 7319 | fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 1), 0); | |
90 | } else { | ||
91 |
2/2✓ Branch 0 taken 492544 times.
✓ Branch 1 taken 481 times.
|
493025 | for (int i = 0; i < num; i++) |
92 | 492544 | line[i] = rnd() % (range + 1); | |
93 | } | ||
94 | 7800 | } | |
95 | |||
96 | 4888 | static void fill8(uint8_t *line, int num, unsigned range) | |
97 | { | ||
98 |
2/2✓ Branch 0 taken 4511 times.
✓ Branch 1 taken 377 times.
|
4888 | if (!range) { |
99 | 4511 | fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 2), 0); | |
100 | } else { | ||
101 |
2/2✓ Branch 0 taken 772096 times.
✓ Branch 1 taken 377 times.
|
772473 | for (int i = 0; i < num; i++) |
102 | 772096 | line[i] = rnd() % (range + 1); | |
103 | } | ||
104 | 4888 | } | |
105 | |||
106 | 6422 | static void check_ops(const char *report, const unsigned ranges[NB_PLANES], | |
107 | const SwsOp *ops) | ||
108 | { | ||
109 | 6422 | SwsContext *ctx = sws_alloc_context(); | |
110 | 6422 | SwsCompiledOp comp_ref = {0}, comp_new = {0}; | |
111 | 6422 | const SwsOpBackend *backend_new = NULL; | |
112 | 6422 | SwsOpList oplist = { .ops = (SwsOp *) ops }; | |
113 | const SwsOp *read_op, *write_op; | ||
114 | static const unsigned def_ranges[4] = {0}; | ||
115 |
2/2✓ Branch 0 taken 858 times.
✓ Branch 1 taken 5564 times.
|
6422 | if (!ranges) |
116 | 858 | ranges = def_ranges; | |
117 | |||
118 | 6422 | declare_func(void, const SwsOpExec *, const void *, int bx, int y, int bx_end, int y_end); | |
119 | |||
120 | DECLARE_ALIGNED_64(char, src0)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])]; | ||
121 | DECLARE_ALIGNED_64(char, src1)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])]; | ||
122 | DECLARE_ALIGNED_64(char, dst0)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])]; | ||
123 | DECLARE_ALIGNED_64(char, dst1)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])]; | ||
124 | |||
125 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6422 times.
|
6422 | if (!ctx) |
126 | ✗ | return; | |
127 | 6422 | ctx->flags = SWS_BITEXACT; | |
128 | |||
129 | 6422 | read_op = &ops[0]; | |
130 |
2/2✓ Branch 0 taken 19240 times.
✓ Branch 1 taken 6422 times.
|
25662 | for (oplist.num_ops = 0; ops[oplist.num_ops].op; oplist.num_ops++) |
131 | 19240 | write_op = &ops[oplist.num_ops]; | |
132 | |||
133 | 6422 | const int read_size = PIXELS * rw_pixel_bits(read_op) >> 3; | |
134 | 6422 | const int write_size = PIXELS * rw_pixel_bits(write_op) >> 3; | |
135 | |||
136 |
2/2✓ Branch 0 taken 25688 times.
✓ Branch 1 taken 6422 times.
|
32110 | for (int p = 0; p < NB_PLANES; p++) { |
137 | 25688 | void *plane = src0[p]; | |
138 |
4/5✓ Branch 0 taken 4888 times.
✓ Branch 1 taken 7800 times.
✓ Branch 2 taken 6760 times.
✓ Branch 3 taken 6240 times.
✗ Branch 4 not taken.
|
25688 | switch (read_op->type) { |
139 | 4888 | case U8: fill8(plane, sizeof(src0[p]) / sizeof(uint8_t), ranges[p]); break; | |
140 | 7800 | case U16: fill16(plane, sizeof(src0[p]) / sizeof(uint16_t), ranges[p]); break; | |
141 | 6760 | case U32: fill32(plane, sizeof(src0[p]) / sizeof(uint32_t), ranges[p]); break; | |
142 | 6240 | case F32: fill32f(plane, sizeof(src0[p]) / sizeof(uint32_t), ranges[p]); break; | |
143 | } | ||
144 | } | ||
145 | |||
146 | 6422 | memcpy(src1, src0, sizeof(src0)); | |
147 | 6422 | memset(dst0, 0, sizeof(dst0)); | |
148 | 6422 | memset(dst1, 0, sizeof(dst1)); | |
149 | |||
150 | /* Compile `ops` using both the asm and c backends */ | ||
151 |
2/2✓ Branch 0 taken 19266 times.
✓ Branch 1 taken 6422 times.
|
25688 | for (int n = 0; ff_sws_op_backends[n]; n++) { |
152 | 19266 | const SwsOpBackend *backend = ff_sws_op_backends[n]; | |
153 | 19266 | const bool is_ref = !strcmp(backend->name, "c"); | |
154 |
4/4✓ Branch 0 taken 12844 times.
✓ Branch 1 taken 6422 times.
✓ Branch 2 taken 11089 times.
✓ Branch 3 taken 1755 times.
|
19266 | if (is_ref || !comp_new.func) { |
155 | SwsCompiledOp comp; | ||
156 | 17511 | int ret = ff_sws_ops_compile_backend(ctx, backend, &oplist, &comp); | |
157 |
2/2✓ Branch 0 taken 8827 times.
✓ Branch 1 taken 8684 times.
|
17511 | if (ret == AVERROR(ENOTSUP)) |
158 | 8827 | continue; | |
159 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8684 times.
|
8684 | else if (ret < 0) |
160 | ✗ | fail(); | |
161 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8684 times.
|
8684 | else if (PIXELS % comp.block_size != 0) |
162 | ✗ | fail(); | |
163 | |||
164 |
2/2✓ Branch 0 taken 6422 times.
✓ Branch 1 taken 2262 times.
|
8684 | if (is_ref) |
165 | 6422 | comp_ref = comp; | |
166 |
2/2✓ Branch 0 taken 6422 times.
✓ Branch 1 taken 2262 times.
|
8684 | if (!comp_new.func) { |
167 | 6422 | comp_new = comp; | |
168 | 6422 | backend_new = backend; | |
169 | } | ||
170 | } | ||
171 | } | ||
172 | |||
173 |
2/4✓ Branch 0 taken 6422 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 6422 times.
|
6422 | av_assert0(comp_ref.func && comp_new.func); |
174 | |||
175 | 6422 | SwsOpExec exec = {0}; | |
176 | 6422 | exec.width = PIXELS; | |
177 | 6422 | exec.height = exec.slice_h = 1; | |
178 |
2/2✓ Branch 0 taken 25688 times.
✓ Branch 1 taken 6422 times.
|
32110 | for (int i = 0; i < NB_PLANES; i++) { |
179 | 25688 | exec.in_stride[i] = sizeof(src0[i][0]); | |
180 | 25688 | exec.out_stride[i] = sizeof(dst0[i][0]); | |
181 | 25688 | exec.in_bump[i] = exec.in_stride[i] - read_size; | |
182 | 25688 | exec.out_bump[i] = exec.out_stride[i] - write_size; | |
183 | } | ||
184 | |||
185 | /** | ||
186 | * Don't use check_func() because the actual function pointer may be a | ||
187 | * wrapper shared by multiple implementations. Instead, take a hash of both | ||
188 | * the backend pointer and the active CPU flags. | ||
189 | */ | ||
190 | 6422 | uintptr_t id = (uintptr_t) backend_new; | |
191 | 6422 | id ^= (id << 6) + (id >> 2) + 0x9e3779b97f4a7c15 + comp_new.cpu_flags; | |
192 | |||
193 | 6422 | checkasm_save_context(); | |
194 |
2/2✓ Branch 1 taken 803 times.
✓ Branch 2 taken 5619 times.
|
6422 | if (checkasm_check_func((void *) id, "%s", report)) { |
195 | 803 | func_new = comp_new.func; | |
196 | 803 | func_ref = comp_ref.func; | |
197 | |||
198 | 803 | exec.block_size_in = comp_ref.block_size * rw_pixel_bits(read_op) >> 3; | |
199 | 803 | exec.block_size_out = comp_ref.block_size * rw_pixel_bits(write_op) >> 3; | |
200 |
2/2✓ Branch 0 taken 3212 times.
✓ Branch 1 taken 803 times.
|
4015 | for (int i = 0; i < NB_PLANES; i++) { |
201 | 3212 | exec.in[i] = (void *) src0[i]; | |
202 | 3212 | exec.out[i] = (void *) dst0[i]; | |
203 | } | ||
204 | 803 | call_ref(&exec, comp_ref.priv, 0, 0, PIXELS / comp_ref.block_size, LINES); | |
205 | |||
206 | 803 | exec.block_size_in = comp_new.block_size * rw_pixel_bits(read_op) >> 3; | |
207 | 803 | exec.block_size_out = comp_new.block_size * rw_pixel_bits(write_op) >> 3; | |
208 |
2/2✓ Branch 0 taken 3212 times.
✓ Branch 1 taken 803 times.
|
4015 | for (int i = 0; i < NB_PLANES; i++) { |
209 | 3212 | exec.in[i] = (void *) src1[i]; | |
210 | 3212 | exec.out[i] = (void *) dst1[i]; | |
211 | } | ||
212 | 803 | call_new(&exec, comp_new.priv, 0, 0, PIXELS / comp_new.block_size, LINES); | |
213 | |||
214 |
2/2✓ Branch 0 taken 3131 times.
✓ Branch 1 taken 776 times.
|
3907 | for (int i = 0; i < NB_PLANES; i++) { |
215 | 3131 | const char *name = FMT("%s[%d]", report, i); | |
216 | 3131 | const int stride = sizeof(dst0[i][0]); | |
217 | |||
218 |
4/5✓ Branch 0 taken 517 times.
✓ Branch 1 taken 1130 times.
✓ Branch 2 taken 658 times.
✓ Branch 3 taken 826 times.
✗ Branch 4 not taken.
|
3131 | switch (write_op->type) { |
219 | 517 | case U8: | |
220 | 517 | checkasm_check(uint8_t, (void *) dst0[i], stride, | |
221 | (void *) dst1[i], stride, | ||
222 | write_size, LINES, name); | ||
223 | 517 | break; | |
224 | 1130 | case U16: | |
225 | 1130 | checkasm_check(uint16_t, (void *) dst0[i], stride, | |
226 | (void *) dst1[i], stride, | ||
227 | write_size >> 1, LINES, name); | ||
228 | 1130 | break; | |
229 | 658 | case U32: | |
230 | 658 | checkasm_check(uint32_t, (void *) dst0[i], stride, | |
231 | (void *) dst1[i], stride, | ||
232 | write_size >> 2, LINES, name); | ||
233 | 658 | break; | |
234 | 826 | case F32: | |
235 | 826 | checkasm_check(float_ulp, (void *) dst0[i], stride, | |
236 | (void *) dst1[i], stride, | ||
237 | write_size >> 2, LINES, name, 0); | ||
238 | 826 | break; | |
239 | } | ||
240 | |||
241 |
2/2✓ Branch 0 taken 27 times.
✓ Branch 1 taken 3104 times.
|
3131 | if (write_op->rw.packed) |
242 | 27 | break; | |
243 | } | ||
244 | |||
245 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 803 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
803 | bench_new(&exec, comp_new.priv, 0, 0, PIXELS / comp_new.block_size, LINES); |
246 | } | ||
247 | |||
248 |
3/4✓ Branch 0 taken 2262 times.
✓ Branch 1 taken 4160 times.
✓ Branch 2 taken 2262 times.
✗ Branch 3 not taken.
|
6422 | if (comp_new.func != comp_ref.func && comp_new.free) |
249 | 2262 | comp_new.free(comp_new.priv); | |
250 |
1/2✓ Branch 0 taken 6422 times.
✗ Branch 1 not taken.
|
6422 | if (comp_ref.free) |
251 | 6422 | comp_ref.free(comp_ref.priv); | |
252 | 6422 | sws_free_context(&ctx); | |
253 | } | ||
254 | |||
255 | #define CHECK_RANGES(NAME, RANGES, N_IN, N_OUT, IN, OUT, ...) \ | ||
256 | do { \ | ||
257 | check_ops(NAME, RANGES, (SwsOp[]) { \ | ||
258 | { \ | ||
259 | .op = SWS_OP_READ, \ | ||
260 | .type = IN, \ | ||
261 | .rw.elems = N_IN, \ | ||
262 | }, \ | ||
263 | __VA_ARGS__, \ | ||
264 | { \ | ||
265 | .op = SWS_OP_WRITE, \ | ||
266 | .type = OUT, \ | ||
267 | .rw.elems = N_OUT, \ | ||
268 | }, {0} \ | ||
269 | }); \ | ||
270 | } while (0) | ||
271 | |||
272 | #define MK_RANGES(R) ((const unsigned[]) { R, R, R, R }) | ||
273 | #define CHECK_RANGE(NAME, RANGE, N_IN, N_OUT, IN, OUT, ...) \ | ||
274 | CHECK_RANGES(NAME, MK_RANGES(RANGE), N_IN, N_OUT, IN, OUT, __VA_ARGS__) | ||
275 | |||
276 | #define CHECK_COMMON_RANGE(NAME, RANGE, IN, OUT, ...) \ | ||
277 | CHECK_RANGE(FMT("%s_p1000", NAME), RANGE, 1, 1, IN, OUT, __VA_ARGS__); \ | ||
278 | CHECK_RANGE(FMT("%s_p1110", NAME), RANGE, 3, 3, IN, OUT, __VA_ARGS__); \ | ||
279 | CHECK_RANGE(FMT("%s_p1111", NAME), RANGE, 4, 4, IN, OUT, __VA_ARGS__); \ | ||
280 | CHECK_RANGE(FMT("%s_p1001", NAME), RANGE, 4, 2, IN, OUT, __VA_ARGS__, { \ | ||
281 | .op = SWS_OP_SWIZZLE, \ | ||
282 | .type = OUT, \ | ||
283 | .swizzle = SWS_SWIZZLE(0, 3, 1, 2), \ | ||
284 | }) | ||
285 | |||
286 | #define CHECK(NAME, N_IN, N_OUT, IN, OUT, ...) \ | ||
287 | CHECK_RANGE(NAME, 0, N_IN, N_OUT, IN, OUT, __VA_ARGS__) | ||
288 | |||
289 | #define CHECK_COMMON(NAME, IN, OUT, ...) \ | ||
290 | CHECK_COMMON_RANGE(NAME, 0, IN, OUT, __VA_ARGS__) | ||
291 | |||
292 | 13 | static void check_read_write(void) | |
293 | { | ||
294 |
2/2✓ Branch 0 taken 52 times.
✓ Branch 1 taken 13 times.
|
65 | for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) { |
295 | 52 | const char *type = ff_sws_pixel_type_name(t); | |
296 |
2/2✓ Branch 0 taken 208 times.
✓ Branch 1 taken 52 times.
|
260 | for (int i = 1; i <= 4; i++) { |
297 | /* Test N->N planar read/write */ | ||
298 |
2/2✓ Branch 0 taken 520 times.
✓ Branch 1 taken 208 times.
|
728 | for (int o = 1; o <= i; o++) { |
299 | 520 | check_ops(FMT("rw_%d_%d_%s", i, o, type), NULL, (SwsOp[]) { | |
300 | { | ||
301 | .op = SWS_OP_READ, | ||
302 | .type = t, | ||
303 | .rw.elems = i, | ||
304 | }, { | ||
305 | .op = SWS_OP_WRITE, | ||
306 | .type = t, | ||
307 | .rw.elems = o, | ||
308 | }, {0} | ||
309 | }); | ||
310 | } | ||
311 | |||
312 | /* Test packed read/write */ | ||
313 |
2/2✓ Branch 0 taken 52 times.
✓ Branch 1 taken 156 times.
|
208 | if (i == 1) |
314 | 52 | continue; | |
315 | |||
316 | 156 | check_ops(FMT("read_packed%d_%s", i, type), NULL, (SwsOp[]) { | |
317 | { | ||
318 | .op = SWS_OP_READ, | ||
319 | .type = t, | ||
320 | .rw.elems = i, | ||
321 | .rw.packed = true, | ||
322 | }, { | ||
323 | .op = SWS_OP_WRITE, | ||
324 | .type = t, | ||
325 | .rw.elems = i, | ||
326 | }, {0} | ||
327 | }); | ||
328 | |||
329 | 156 | check_ops(FMT("write_packed%d_%s", i, type), NULL, (SwsOp[]) { | |
330 | { | ||
331 | .op = SWS_OP_READ, | ||
332 | .type = t, | ||
333 | .rw.elems = i, | ||
334 | }, { | ||
335 | .op = SWS_OP_WRITE, | ||
336 | .type = t, | ||
337 | .rw.elems = i, | ||
338 | .rw.packed = true, | ||
339 | }, {0} | ||
340 | }); | ||
341 | } | ||
342 | } | ||
343 | |||
344 | /* Test fractional reads/writes */ | ||
345 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 13 times.
|
52 | for (int frac = 1; frac <= 3; frac++) { |
346 | 39 | const int bits = 8 >> frac; | |
347 | 39 | const int range = (1 << bits) - 1; | |
348 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 26 times.
|
39 | if (bits == 2) |
349 | 13 | continue; /* no 2 bit packed formats currently exist */ | |
350 | |||
351 | 26 | check_ops(FMT("read_frac%d", frac), NULL, (SwsOp[]) { | |
352 | { | ||
353 | .op = SWS_OP_READ, | ||
354 | .type = U8, | ||
355 | .rw.elems = 1, | ||
356 | .rw.frac = frac, | ||
357 | }, { | ||
358 | .op = SWS_OP_WRITE, | ||
359 | .type = U8, | ||
360 | .rw.elems = 1, | ||
361 | }, {0} | ||
362 | }); | ||
363 | |||
364 | 26 | check_ops(FMT("write_frac%d", frac), MK_RANGES(range), (SwsOp[]) { | |
365 | { | ||
366 | .op = SWS_OP_READ, | ||
367 | .type = U8, | ||
368 | .rw.elems = 1, | ||
369 | }, { | ||
370 | .op = SWS_OP_WRITE, | ||
371 | .type = U8, | ||
372 | .rw.elems = 1, | ||
373 | .rw.frac = frac, | ||
374 | }, {0} | ||
375 | }); | ||
376 | } | ||
377 | 13 | } | |
378 | |||
379 | 13 | static void check_swap_bytes(void) | |
380 | { | ||
381 | 13 | CHECK_COMMON("swap_bytes_16", U16, U16, { | |
382 | .op = SWS_OP_SWAP_BYTES, | ||
383 | .type = U16, | ||
384 | }); | ||
385 | |||
386 | 13 | CHECK_COMMON("swap_bytes_32", U32, U32, { | |
387 | .op = SWS_OP_SWAP_BYTES, | ||
388 | .type = U32, | ||
389 | }); | ||
390 | 13 | } | |
391 | |||
392 | 13 | static void check_pack_unpack(void) | |
393 | { | ||
394 | const struct { | ||
395 | SwsPixelType type; | ||
396 | SwsPackOp op; | ||
397 | 13 | } patterns[] = { | |
398 | { U8, {{ 3, 3, 2 }}}, | ||
399 | { U8, {{ 2, 3, 3 }}}, | ||
400 | { U8, {{ 1, 2, 1 }}}, | ||
401 | {U16, {{ 5, 6, 5 }}}, | ||
402 | {U16, {{ 5, 5, 5 }}}, | ||
403 | {U16, {{ 4, 4, 4 }}}, | ||
404 | {U32, {{ 2, 10, 10, 10 }}}, | ||
405 | {U32, {{10, 10, 10, 2 }}}, | ||
406 | }; | ||
407 | |||
408 |
2/2✓ Branch 0 taken 104 times.
✓ Branch 1 taken 13 times.
|
117 | for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) { |
409 | 104 | const SwsPixelType type = patterns[i].type; | |
410 | 104 | const SwsPackOp pack = patterns[i].op; | |
411 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 78 times.
|
104 | const int num = pack.pattern[3] ? 4 : 3; |
412 | 104 | const char *pat = FMT("%d%d%d%d", pack.pattern[0], pack.pattern[1], | |
413 | pack.pattern[2], pack.pattern[3]); | ||
414 | 104 | const int total = pack.pattern[0] + pack.pattern[1] + | |
415 | 104 | pack.pattern[2] + pack.pattern[3]; | |
416 | 104 | const unsigned ranges[4] = { | |
417 | 104 | (1 << pack.pattern[0]) - 1, | |
418 | 104 | (1 << pack.pattern[1]) - 1, | |
419 | 104 | (1 << pack.pattern[2]) - 1, | |
420 | 104 | (1 << pack.pattern[3]) - 1, | |
421 | }; | ||
422 | |||
423 | 104 | CHECK_RANGES(FMT("pack_%s", pat), ranges, num, 1, type, type, { | |
424 | .op = SWS_OP_PACK, | ||
425 | .type = type, | ||
426 | .pack = pack, | ||
427 | }); | ||
428 | |||
429 | 104 | CHECK_RANGE(FMT("unpack_%s", pat), (1 << total) - 1, 1, num, type, type, { | |
430 | .op = SWS_OP_UNPACK, | ||
431 | .type = type, | ||
432 | .pack = pack, | ||
433 | }); | ||
434 | } | ||
435 | 13 | } | |
436 | |||
437 | 1139788 | static AVRational rndq(SwsPixelType t) | |
438 | { | ||
439 | 1139788 | const unsigned num = rnd(); | |
440 |
2/2✓ Branch 0 taken 1560 times.
✓ Branch 1 taken 1138228 times.
|
1139788 | if (ff_sws_pixel_type_is_int(t)) { |
441 | 1560 | const unsigned mask = (1 << (ff_sws_pixel_type_size(t) * 8)) - 1; | |
442 | 1560 | return (AVRational) { num & mask, 1 }; | |
443 | } else { | ||
444 | 1138228 | const unsigned den = rnd(); | |
445 |
1/2✓ Branch 0 taken 1138228 times.
✗ Branch 1 not taken.
|
1138228 | return (AVRational) { num, den ? den : 1 }; |
446 | } | ||
447 | } | ||
448 | |||
449 | 13 | static void check_clear(void) | |
450 | { | ||
451 |
2/2✓ Branch 0 taken 52 times.
✓ Branch 1 taken 13 times.
|
65 | for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) { |
452 | 52 | const char *type = ff_sws_pixel_type_name(t); | |
453 | 52 | const int bits = ff_sws_pixel_type_size(t) * 8; | |
454 | |||
455 | /* TODO: AVRational can't fit 32 bit constants */ | ||
456 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 26 times.
|
52 | if (bits < 32) { |
457 | 26 | const AVRational chroma = (AVRational) { 1 << (bits - 1), 1}; | |
458 | 26 | const AVRational alpha = (AVRational) { (1 << bits) - 1, 1}; | |
459 | 26 | const AVRational zero = (AVRational) { 0, 1}; | |
460 | 26 | const AVRational none = {0}; | |
461 | |||
462 | 104 | const SwsConst patterns[] = { | |
463 | /* Zero only */ | ||
464 | {.q4 = { none, none, none, zero }}, | ||
465 | {.q4 = { zero, none, none, none }}, | ||
466 | /* Alpha only */ | ||
467 | {.q4 = { none, none, none, alpha }}, | ||
468 | {.q4 = { alpha, none, none, none }}, | ||
469 | /* Chroma only */ | ||
470 | {.q4 = { chroma, chroma, none, none }}, | ||
471 | {.q4 = { none, chroma, chroma, none }}, | ||
472 | {.q4 = { none, none, chroma, chroma }}, | ||
473 | {.q4 = { chroma, none, chroma, none }}, | ||
474 | {.q4 = { none, chroma, none, chroma }}, | ||
475 | /* Alpha+chroma */ | ||
476 | {.q4 = { chroma, chroma, none, alpha }}, | ||
477 | {.q4 = { none, chroma, chroma, alpha }}, | ||
478 | {.q4 = { alpha, none, chroma, chroma }}, | ||
479 | {.q4 = { chroma, none, chroma, alpha }}, | ||
480 | {.q4 = { alpha, chroma, none, chroma }}, | ||
481 | /* Random values */ | ||
482 | 26 | {.q4 = { none, rndq(t), rndq(t), rndq(t) }}, | |
483 | 26 | {.q4 = { none, rndq(t), rndq(t), rndq(t) }}, | |
484 | 26 | {.q4 = { none, rndq(t), rndq(t), rndq(t) }}, | |
485 | 26 | {.q4 = { none, rndq(t), rndq(t), rndq(t) }}, | |
486 | }; | ||
487 | |||
488 |
2/2✓ Branch 0 taken 468 times.
✓ Branch 1 taken 26 times.
|
494 | for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) { |
489 | 468 | CHECK(FMT("clear_pattern_%s[%d]", type, i), 4, 4, t, t, { | |
490 | .op = SWS_OP_CLEAR, | ||
491 | .type = t, | ||
492 | .c = patterns[i], | ||
493 | }); | ||
494 | } | ||
495 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
|
26 | } else if (!ff_sws_pixel_type_is_int(t)) { |
496 | /* Floating point YUV doesn't exist, only alpha needs to be cleared */ | ||
497 | 13 | CHECK(FMT("clear_alpha_%s", type), 4, 4, t, t, { | |
498 | .op = SWS_OP_CLEAR, | ||
499 | .type = t, | ||
500 | .c.q4[3] = { 0, 1 }, | ||
501 | }); | ||
502 | } | ||
503 | } | ||
504 | 13 | } | |
505 | |||
506 | 13 | static void check_shift(void) | |
507 | { | ||
508 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 13 times.
|
52 | for (SwsPixelType t = U16; t < SWS_PIXEL_TYPE_NB; t++) { |
509 | 39 | const char *type = ff_sws_pixel_type_name(t); | |
510 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 26 times.
|
39 | if (!ff_sws_pixel_type_is_int(t)) |
511 | 13 | continue; | |
512 | |||
513 |
2/2✓ Branch 0 taken 208 times.
✓ Branch 1 taken 26 times.
|
234 | for (int shift = 1; shift <= 8; shift++) { |
514 | 208 | CHECK_COMMON(FMT("lshift%d_%s", shift, type), t, t, { | |
515 | .op = SWS_OP_LSHIFT, | ||
516 | .type = t, | ||
517 | .c.u = shift, | ||
518 | }); | ||
519 | |||
520 | 208 | CHECK_COMMON(FMT("rshift%d_%s", shift, type), t, t, { | |
521 | .op = SWS_OP_RSHIFT, | ||
522 | .type = t, | ||
523 | .c.u = shift, | ||
524 | }); | ||
525 | } | ||
526 | } | ||
527 | 13 | } | |
528 | |||
529 | 13 | static void check_swizzle(void) | |
530 | { | ||
531 |
2/2✓ Branch 0 taken 52 times.
✓ Branch 1 taken 13 times.
|
65 | for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) { |
532 | 52 | const char *type = ff_sws_pixel_type_name(t); | |
533 | static const int patterns[][4] = { | ||
534 | /* Pure swizzle */ | ||
535 | {3, 0, 1, 2}, | ||
536 | {3, 0, 2, 1}, | ||
537 | {2, 1, 0, 3}, | ||
538 | {3, 2, 1, 0}, | ||
539 | {3, 1, 0, 2}, | ||
540 | {3, 2, 0, 1}, | ||
541 | {1, 2, 0, 3}, | ||
542 | {1, 0, 2, 3}, | ||
543 | {2, 0, 1, 3}, | ||
544 | {2, 3, 1, 0}, | ||
545 | {2, 1, 3, 0}, | ||
546 | {1, 2, 3, 0}, | ||
547 | {1, 3, 2, 0}, | ||
548 | {0, 2, 1, 3}, | ||
549 | {0, 2, 3, 1}, | ||
550 | {0, 3, 1, 2}, | ||
551 | {3, 1, 2, 0}, | ||
552 | {0, 3, 2, 1}, | ||
553 | /* Luma expansion */ | ||
554 | {0, 0, 0, 3}, | ||
555 | {3, 0, 0, 0}, | ||
556 | {0, 0, 0, 1}, | ||
557 | {1, 0, 0, 0}, | ||
558 | }; | ||
559 | |||
560 |
2/2✓ Branch 0 taken 1144 times.
✓ Branch 1 taken 52 times.
|
1196 | for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) { |
561 | 1144 | const int x = patterns[i][0], y = patterns[i][1], | |
562 | 1144 | z = patterns[i][2], w = patterns[i][3]; | |
563 | 1144 | CHECK(FMT("swizzle_%d%d%d%d_%s", x, y, z, w, type), 4, 4, t, t, { | |
564 | .op = SWS_OP_SWIZZLE, | ||
565 | .type = t, | ||
566 | .swizzle = SWS_SWIZZLE(x, y, z, w), | ||
567 | }); | ||
568 | } | ||
569 | } | ||
570 | 13 | } | |
571 | |||
572 | 13 | static void check_convert(void) | |
573 | { | ||
574 |
2/2✓ Branch 0 taken 52 times.
✓ Branch 1 taken 13 times.
|
65 | for (SwsPixelType i = U8; i < SWS_PIXEL_TYPE_NB; i++) { |
575 | 52 | const char *itype = ff_sws_pixel_type_name(i); | |
576 | 52 | const int isize = ff_sws_pixel_type_size(i); | |
577 |
2/2✓ Branch 0 taken 208 times.
✓ Branch 1 taken 52 times.
|
260 | for (SwsPixelType o = U8; o < SWS_PIXEL_TYPE_NB; o++) { |
578 | 208 | const char *otype = ff_sws_pixel_type_name(o); | |
579 | 208 | const int osize = ff_sws_pixel_type_size(o); | |
580 | 208 | const char *name = FMT("convert_%s_%s", itype, otype); | |
581 |
2/2✓ Branch 0 taken 52 times.
✓ Branch 1 taken 156 times.
|
208 | if (i == o) |
582 | 52 | continue; | |
583 | |||
584 |
4/4✓ Branch 0 taken 91 times.
✓ Branch 1 taken 65 times.
✓ Branch 2 taken 13 times.
✓ Branch 3 taken 78 times.
|
156 | if (isize < osize || !ff_sws_pixel_type_is_int(o)) { |
585 | 78 | CHECK_COMMON(name, i, o, { | |
586 | .op = SWS_OP_CONVERT, | ||
587 | .type = i, | ||
588 | .convert.to = o, | ||
589 | }); | ||
590 |
3/4✓ Branch 0 taken 13 times.
✓ Branch 1 taken 65 times.
✓ Branch 2 taken 13 times.
✗ Branch 3 not taken.
|
78 | } else if (isize > osize || !ff_sws_pixel_type_is_int(i)) { |
591 | 78 | uint32_t range = (1 << osize * 8) - 1; | |
592 | 78 | CHECK_COMMON_RANGE(name, range, i, o, { | |
593 | .op = SWS_OP_CONVERT, | ||
594 | .type = i, | ||
595 | .convert.to = o, | ||
596 | }); | ||
597 | } | ||
598 | } | ||
599 | } | ||
600 | |||
601 | /* Check expanding conversions */ | ||
602 | 13 | CHECK_COMMON("expand16", U8, U16, { | |
603 | .op = SWS_OP_CONVERT, | ||
604 | .type = U8, | ||
605 | .convert.to = U16, | ||
606 | .convert.expand = true, | ||
607 | }); | ||
608 | |||
609 | 13 | CHECK_COMMON("expand32", U8, U32, { | |
610 | .op = SWS_OP_CONVERT, | ||
611 | .type = U8, | ||
612 | .convert.to = U32, | ||
613 | .convert.expand = true, | ||
614 | }); | ||
615 | 13 | } | |
616 | |||
617 | 13 | static void check_dither(void) | |
618 | { | ||
619 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
|
26 | for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) { |
620 | 13 | const char *type = ff_sws_pixel_type_name(t); | |
621 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 13 times.
|
13 | if (ff_sws_pixel_type_is_int(t)) |
622 | ✗ | continue; | |
623 | |||
624 | /* Test all sizes up to 256x256 */ | ||
625 |
2/2✓ Branch 0 taken 117 times.
✓ Branch 1 taken 13 times.
|
130 | for (int size_log2 = 0; size_log2 <= 8; size_log2++) { |
626 | 117 | const int size = 1 << size_log2; | |
627 | 117 | AVRational *matrix = av_refstruct_allocz(size * size * sizeof(*matrix)); | |
628 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 117 times.
|
117 | if (!matrix) { |
629 | ✗ | fail(); | |
630 | ✗ | return; | |
631 | } | ||
632 | |||
633 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 104 times.
|
117 | if (size == 1) { |
634 | 13 | matrix[0] = (AVRational) { 1, 2 }; | |
635 | } else { | ||
636 |
2/2✓ Branch 0 taken 1135940 times.
✓ Branch 1 taken 104 times.
|
1136044 | for (int i = 0; i < size * size; i++) |
637 | 1135940 | matrix[i] = rndq(t); | |
638 | } | ||
639 | |||
640 | 117 | CHECK_COMMON(FMT("dither_%dx%d_%s", size, size, type), t, t, { | |
641 | .op = SWS_OP_DITHER, | ||
642 | .type = t, | ||
643 | .dither.size_log2 = size_log2, | ||
644 | .dither.matrix = matrix, | ||
645 | }); | ||
646 | |||
647 | 117 | av_refstruct_unref(&matrix); | |
648 | } | ||
649 | } | ||
650 | } | ||
651 | |||
652 | 13 | static void check_min_max(void) | |
653 | { | ||
654 |
2/2✓ Branch 0 taken 52 times.
✓ Branch 1 taken 13 times.
|
65 | for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) { |
655 | 52 | const char *type = ff_sws_pixel_type_name(t); | |
656 | 52 | CHECK_COMMON(FMT("min_%s", type), t, t, { | |
657 | .op = SWS_OP_MIN, | ||
658 | .type = t, | ||
659 | .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) }, | ||
660 | }); | ||
661 | |||
662 | 52 | CHECK_COMMON(FMT("max_%s", type), t, t, { | |
663 | .op = SWS_OP_MAX, | ||
664 | .type = t, | ||
665 | .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) }, | ||
666 | }); | ||
667 | } | ||
668 | 13 | } | |
669 | |||
670 | 13 | static void check_linear(void) | |
671 | { | ||
672 | static const struct { | ||
673 | const char *name; | ||
674 | uint32_t mask; | ||
675 | } patterns[] = { | ||
676 | { "noop", 0 }, | ||
677 | { "luma", SWS_MASK_LUMA }, | ||
678 | { "alpha", SWS_MASK_ALPHA }, | ||
679 | { "luma+alpha", SWS_MASK_LUMA | SWS_MASK_ALPHA }, | ||
680 | { "dot3", 0x7 }, | ||
681 | { "dot4", 0xF }, | ||
682 | { "row0", SWS_MASK_ROW(0) }, | ||
683 | { "row0+alpha", SWS_MASK_ROW(0) | SWS_MASK_ALPHA }, | ||
684 | { "off3", SWS_MASK_OFF3 }, | ||
685 | { "off3+alpha", SWS_MASK_OFF3 | SWS_MASK_ALPHA }, | ||
686 | { "diag3", SWS_MASK_DIAG3 }, | ||
687 | { "diag4", SWS_MASK_DIAG4 }, | ||
688 | { "diag3+alpha", SWS_MASK_DIAG3 | SWS_MASK_ALPHA }, | ||
689 | { "diag3+off3", SWS_MASK_DIAG3 | SWS_MASK_OFF3 }, | ||
690 | { "diag3+off3+alpha", SWS_MASK_DIAG3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA }, | ||
691 | { "diag4+off4", SWS_MASK_DIAG4 | SWS_MASK_OFF4 }, | ||
692 | { "matrix3", SWS_MASK_MAT3 }, | ||
693 | { "matrix3+off3", SWS_MASK_MAT3 | SWS_MASK_OFF3 }, | ||
694 | { "matrix3+off3+alpha", SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA }, | ||
695 | { "matrix4", SWS_MASK_MAT4 }, | ||
696 | { "matrix4+off4", SWS_MASK_MAT4 | SWS_MASK_OFF4 }, | ||
697 | }; | ||
698 | |||
699 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
|
26 | for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) { |
700 | 13 | const char *type = ff_sws_pixel_type_name(t); | |
701 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 13 times.
|
13 | if (ff_sws_pixel_type_is_int(t)) |
702 | ✗ | continue; | |
703 | |||
704 |
2/2✓ Branch 0 taken 273 times.
✓ Branch 1 taken 13 times.
|
286 | for (int p = 0; p < FF_ARRAY_ELEMS(patterns); p++) { |
705 | 273 | const uint32_t mask = patterns[p].mask; | |
706 | 273 | SwsLinearOp lin = { .mask = mask }; | |
707 | |||
708 |
2/2✓ Branch 0 taken 1092 times.
✓ Branch 1 taken 273 times.
|
1365 | for (int i = 0; i < 4; i++) { |
709 |
2/2✓ Branch 0 taken 5460 times.
✓ Branch 1 taken 1092 times.
|
6552 | for (int j = 0; j < 5; j++) { |
710 |
2/2✓ Branch 0 taken 1820 times.
✓ Branch 1 taken 3640 times.
|
5460 | if (mask & SWS_MASK(i, j)) { |
711 | 1820 | lin.m[i][j] = rndq(t); | |
712 | } else { | ||
713 | 3640 | lin.m[i][j] = (AVRational) { i == j, 1 }; | |
714 | } | ||
715 | } | ||
716 | } | ||
717 | |||
718 | 273 | CHECK(FMT("linear_%s_%s", patterns[p].name, type), 4, 4, t, t, { | |
719 | .op = SWS_OP_LINEAR, | ||
720 | .type = t, | ||
721 | .lin = lin, | ||
722 | }); | ||
723 | } | ||
724 | } | ||
725 | 13 | } | |
726 | |||
727 | 13 | static void check_scale(void) | |
728 | { | ||
729 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
|
26 | for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) { |
730 | 13 | const char *type = ff_sws_pixel_type_name(t); | |
731 | 13 | const int bits = ff_sws_pixel_type_size(t) * 8; | |
732 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 13 times.
|
13 | if (ff_sws_pixel_type_is_int(t)) { |
733 | /* Ensure the result won't exceed the value range */ | ||
734 | ✗ | const unsigned max = (1 << bits) - 1; | |
735 | ✗ | const unsigned scale = rnd() & max; | |
736 | ✗ | const unsigned range = max / (scale ? scale : 1); | |
737 | ✗ | CHECK_COMMON_RANGE(FMT("scale_%s", type), range, t, t, { | |
738 | .op = SWS_OP_SCALE, | ||
739 | .type = t, | ||
740 | .c.q = { scale, 1 }, | ||
741 | }); | ||
742 | } else { | ||
743 | 13 | CHECK_COMMON(FMT("scale_%s", type), t, t, { | |
744 | .op = SWS_OP_SCALE, | ||
745 | .type = t, | ||
746 | .c.q = rndq(t), | ||
747 | }); | ||
748 | } | ||
749 | } | ||
750 | 13 | } | |
751 | |||
752 | 13 | void checkasm_check_sw_ops(void) | |
753 | { | ||
754 | 13 | check_read_write(); | |
755 | 13 | report("read_write"); | |
756 | 13 | check_swap_bytes(); | |
757 | 13 | report("swap_bytes"); | |
758 | 13 | check_pack_unpack(); | |
759 | 13 | report("pack_unpack"); | |
760 | 13 | check_clear(); | |
761 | 13 | report("clear"); | |
762 | 13 | check_shift(); | |
763 | 13 | report("shift"); | |
764 | 13 | check_swizzle(); | |
765 | 13 | report("swizzle"); | |
766 | 13 | check_convert(); | |
767 | 13 | report("convert"); | |
768 | 13 | check_dither(); | |
769 | 13 | report("dither"); | |
770 | 13 | check_min_max(); | |
771 | 13 | report("min_max"); | |
772 | 13 | check_linear(); | |
773 | 13 | report("linear"); | |
774 | 13 | check_scale(); | |
775 | 13 | report("scale"); | |
776 | 13 | } | |
777 |