FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libswscale/ops.c
Date: 2025-10-10 03:51:19
Exec Total Coverage
Lines: 255 500 51.0%
Functions: 15 29 51.7%
Branches: 151 310 48.7%

Line Branch Exec Source
1 /**
2 * Copyright (C) 2025 Niklas Haas
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/avassert.h"
22 #include "libavutil/bswap.h"
23 #include "libavutil/mem.h"
24 #include "libavutil/rational.h"
25 #include "libavutil/refstruct.h"
26
27 #include "ops.h"
28 #include "ops_internal.h"
29
30 extern const SwsOpBackend backend_c;
31 extern const SwsOpBackend backend_murder;
32 extern const SwsOpBackend backend_x86;
33
34 const SwsOpBackend * const ff_sws_op_backends[] = {
35 &backend_murder,
36 #if ARCH_X86_64 && HAVE_X86ASM
37 &backend_x86,
38 #endif
39 &backend_c,
40 NULL
41 };
42
43 #define RET(x) \
44 do { \
45 if ((ret = (x)) < 0) \
46 return ret; \
47 } while (0)
48
49 4374 const char *ff_sws_pixel_type_name(SwsPixelType type)
50 {
51
4/7
✓ Branch 0 taken 436 times.
✓ Branch 1 taken 1358 times.
✓ Branch 2 taken 1419 times.
✓ Branch 3 taken 1161 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
4374 switch (type) {
52 436 case SWS_PIXEL_U8: return "u8";
53 1358 case SWS_PIXEL_U16: return "u16";
54 1419 case SWS_PIXEL_U32: return "u32";
55 1161 case SWS_PIXEL_F32: return "f32";
56 case SWS_PIXEL_NONE: return "none";
57 case SWS_PIXEL_TYPE_NB: break;
58 }
59
60 av_unreachable("Invalid pixel type!");
61 return "ERR";
62 }
63
64 989489 int ff_sws_pixel_type_size(SwsPixelType type)
65 {
66
4/7
✓ Branch 0 taken 305978 times.
✓ Branch 1 taken 294776 times.
✓ Branch 2 taken 277230 times.
✓ Branch 3 taken 111505 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
989489 switch (type) {
67 305978 case SWS_PIXEL_U8: return sizeof(uint8_t);
68 294776 case SWS_PIXEL_U16: return sizeof(uint16_t);
69 277230 case SWS_PIXEL_U32: return sizeof(uint32_t);
70 111505 case SWS_PIXEL_F32: return sizeof(float);
71 case SWS_PIXEL_NONE: break;
72 case SWS_PIXEL_TYPE_NB: break;
73 }
74
75 av_unreachable("Invalid pixel type!");
76 return 0;
77 }
78
79 1209234 bool ff_sws_pixel_type_is_int(SwsPixelType type)
80 {
81
2/4
✓ Branch 0 taken 53729 times.
✓ Branch 1 taken 1155505 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
1209234 switch (type) {
82 53729 case SWS_PIXEL_U8:
83 case SWS_PIXEL_U16:
84 case SWS_PIXEL_U32:
85 53729 return true;
86 1155505 case SWS_PIXEL_F32:
87 1155505 return false;
88 case SWS_PIXEL_NONE:
89 case SWS_PIXEL_TYPE_NB: break;
90 }
91
92 av_unreachable("Invalid pixel type!");
93 return false;
94 }
95
96 SwsPixelType ff_sws_pixel_type_to_uint(SwsPixelType type)
97 {
98 if (!type)
99 return type;
100
101 switch (ff_sws_pixel_type_size(type)) {
102 case 8: return SWS_PIXEL_U8;
103 case 16: return SWS_PIXEL_U16;
104 case 32: return SWS_PIXEL_U32;
105 }
106
107 av_unreachable("Invalid pixel type!");
108 return SWS_PIXEL_NONE;
109 }
110
111 /* biased towards `a` */
112 4992 static AVRational av_min_q(AVRational a, AVRational b)
113 {
114
2/2
✓ Branch 1 taken 933 times.
✓ Branch 2 taken 4059 times.
4992 return av_cmp_q(a, b) == 1 ? b : a;
115 }
116
117 4992 static AVRational av_max_q(AVRational a, AVRational b)
118 {
119
2/2
✓ Branch 1 taken 1398 times.
✓ Branch 2 taken 3594 times.
4992 return av_cmp_q(a, b) == -1 ? b : a;
120 }
121
122 105326 void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
123 {
124 uint64_t mask[4];
125 int shift[4];
126
127
13/15
✓ Branch 0 taken 70044 times.
✓ Branch 1 taken 624 times.
✓ Branch 2 taken 624 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2288 times.
✓ Branch 5 taken 4992 times.
✓ Branch 6 taken 4992 times.
✓ Branch 7 taken 10140 times.
✓ Branch 8 taken 4368 times.
✓ Branch 9 taken 2808 times.
✓ Branch 10 taken 1248 times.
✓ Branch 11 taken 1248 times.
✓ Branch 12 taken 1638 times.
✓ Branch 13 taken 312 times.
✗ Branch 14 not taken.
105326 switch (op->op) {
128 70044 case SWS_OP_READ:
129 case SWS_OP_WRITE:
130 70044 return;
131 624 case SWS_OP_UNPACK: {
132 624 unsigned val = x[0].num;
133 624 ff_sws_pack_op_decode(op, mask, shift);
134
2/2
✓ Branch 0 taken 2496 times.
✓ Branch 1 taken 624 times.
3120 for (int i = 0; i < 4; i++)
135 2496 x[i] = Q((val >> shift[i]) & mask[i]);
136 624 return;
137 }
138 624 case SWS_OP_PACK: {
139 624 unsigned val = 0;
140 624 ff_sws_pack_op_decode(op, mask, shift);
141
2/2
✓ Branch 0 taken 2496 times.
✓ Branch 1 taken 624 times.
3120 for (int i = 0; i < 4; i++)
142 2496 val |= (x[i].num & mask[i]) << shift[i];
143 624 x[0] = Q(val);
144 624 return;
145 }
146 case SWS_OP_SWAP_BYTES:
147 switch (ff_sws_pixel_type_size(op->type)) {
148 case 2:
149 for (int i = 0; i < 4; i++)
150 x[i].num = av_bswap16(x[i].num);
151 break;
152 case 4:
153 for (int i = 0; i < 4; i++)
154 x[i].num = av_bswap32(x[i].num);
155 break;
156 }
157 return;
158 2288 case SWS_OP_CLEAR:
159
2/2
✓ Branch 0 taken 9152 times.
✓ Branch 1 taken 2288 times.
11440 for (int i = 0; i < 4; i++) {
160
2/2
✓ Branch 0 taken 5278 times.
✓ Branch 1 taken 3874 times.
9152 if (op->c.q4[i].den)
161 5278 x[i] = op->c.q4[i];
162 }
163 2288 return;
164 4992 case SWS_OP_LSHIFT: {
165 4992 AVRational mult = Q(1 << op->c.u);
166
2/2
✓ Branch 0 taken 19968 times.
✓ Branch 1 taken 4992 times.
24960 for (int i = 0; i < 4; i++)
167
2/2
✓ Branch 0 taken 14976 times.
✓ Branch 1 taken 4992 times.
19968 x[i] = x[i].den ? av_mul_q(x[i], mult) : x[i];
168 4992 return;
169 }
170 4992 case SWS_OP_RSHIFT: {
171 4992 AVRational mult = Q(1 << op->c.u);
172
2/2
✓ Branch 0 taken 19968 times.
✓ Branch 1 taken 4992 times.
24960 for (int i = 0; i < 4; i++)
173
2/2
✓ Branch 0 taken 14976 times.
✓ Branch 1 taken 4992 times.
19968 x[i] = x[i].den ? av_div_q(x[i], mult) : x[i];
174 4992 return;
175 }
176 10140 case SWS_OP_SWIZZLE: {
177 10140 const AVRational orig[4] = { x[0], x[1], x[2], x[3] };
178
2/2
✓ Branch 0 taken 40560 times.
✓ Branch 1 taken 10140 times.
50700 for (int i = 0; i < 4; i++)
179 40560 x[i] = orig[op->swizzle.in[i]];
180 10140 return;
181 }
182 4368 case SWS_OP_CONVERT:
183
2/2
✓ Branch 0 taken 3432 times.
✓ Branch 1 taken 936 times.
4368 if (ff_sws_pixel_type_is_int(op->convert.to)) {
184 3432 const AVRational scale = ff_sws_pixel_expand(op->type, op->convert.to);
185
2/2
✓ Branch 0 taken 13728 times.
✓ Branch 1 taken 3432 times.
17160 for (int i = 0; i < 4; i++) {
186
2/2
✓ Branch 0 taken 7488 times.
✓ Branch 1 taken 6240 times.
13728 x[i] = x[i].den ? Q(x[i].num / x[i].den) : x[i];
187
2/2
✓ Branch 0 taken 2496 times.
✓ Branch 1 taken 11232 times.
13728 if (op->convert.expand)
188 2496 x[i] = av_mul_q(x[i], scale);
189 }
190 }
191 4368 return;
192 2808 case SWS_OP_DITHER:
193
2/2
✓ Branch 0 taken 11232 times.
✓ Branch 1 taken 2808 times.
14040 for (int i = 0; i < 4; i++)
194
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 11232 times.
11232 x[i] = x[i].den ? av_add_q(x[i], av_make_q(1, 2)) : x[i];
195 2808 return;
196 1248 case SWS_OP_MIN:
197
2/2
✓ Branch 0 taken 4992 times.
✓ Branch 1 taken 1248 times.
6240 for (int i = 0; i < 4; i++)
198 4992 x[i] = av_min_q(x[i], op->c.q4[i]);
199 1248 return;
200 1248 case SWS_OP_MAX:
201
2/2
✓ Branch 0 taken 4992 times.
✓ Branch 1 taken 1248 times.
6240 for (int i = 0; i < 4; i++)
202 4992 x[i] = av_max_q(x[i], op->c.q4[i]);
203 1248 return;
204 1638 case SWS_OP_LINEAR: {
205 1638 const AVRational orig[4] = { x[0], x[1], x[2], x[3] };
206
2/2
✓ Branch 0 taken 6552 times.
✓ Branch 1 taken 1638 times.
8190 for (int i = 0; i < 4; i++) {
207 6552 AVRational sum = op->lin.m[i][4];
208
2/2
✓ Branch 0 taken 26208 times.
✓ Branch 1 taken 6552 times.
32760 for (int j = 0; j < 4; j++)
209 26208 sum = av_add_q(sum, av_mul_q(orig[j], op->lin.m[i][j]));
210 6552 x[i] = sum;
211 }
212 1638 return;
213 }
214 312 case SWS_OP_SCALE:
215
2/2
✓ Branch 0 taken 1248 times.
✓ Branch 1 taken 312 times.
1560 for (int i = 0; i < 4; i++)
216
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1248 times.
1248 x[i] = x[i].den ? av_mul_q(x[i], op->c.q) : x[i];
217 312 return;
218 }
219
220 av_unreachable("Invalid operation type!");
221 }
222
223 52975 static void op_uninit(SwsOp *op)
224 {
225
2/2
✓ Branch 0 taken 1404 times.
✓ Branch 1 taken 51571 times.
52975 switch (op->op) {
226 1404 case SWS_OP_DITHER:
227 1404 av_refstruct_unref(&op->dither.matrix);
228 1404 break;
229 }
230
231 52975 *op = (SwsOp) {0};
232 52975 }
233
234 SwsOpList *ff_sws_op_list_alloc(void)
235 {
236 SwsOpList *ops = av_mallocz(sizeof(SwsOpList));
237 if (!ops)
238 return NULL;
239
240 ff_fmt_clear(&ops->src);
241 ff_fmt_clear(&ops->dst);
242 return ops;
243 }
244
245 23788 void ff_sws_op_list_free(SwsOpList **p_ops)
246 {
247 23788 SwsOpList *ops = *p_ops;
248
2/2
✓ Branch 0 taken 6277 times.
✓ Branch 1 taken 17511 times.
23788 if (!ops)
249 6277 return;
250
251
2/2
✓ Branch 0 taken 52975 times.
✓ Branch 1 taken 17511 times.
70486 for (int i = 0; i < ops->num_ops; i++)
252 52975 op_uninit(&ops->ops[i]);
253
254 17511 av_freep(&ops->ops);
255 17511 av_free(ops);
256 17511 *p_ops = NULL;
257 }
258
259 17511 SwsOpList *ff_sws_op_list_duplicate(const SwsOpList *ops)
260 {
261 17511 SwsOpList *copy = av_malloc(sizeof(*copy));
262
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 17511 times.
17511 if (!copy)
263 return NULL;
264
265 17511 int num = ops->num_ops;
266
1/2
✓ Branch 0 taken 17511 times.
✗ Branch 1 not taken.
17511 if (num)
267 17511 num = 1 << av_ceil_log2(num);
268
269 17511 *copy = *ops;
270 17511 copy->ops = av_memdup(ops->ops, num * sizeof(ops->ops[0]));
271
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 17511 times.
17511 if (!copy->ops) {
272 av_free(copy);
273 return NULL;
274 }
275
276
2/2
✓ Branch 0 taken 52975 times.
✓ Branch 1 taken 17511 times.
70486 for (int i = 0; i < ops->num_ops; i++) {
277 52975 const SwsOp *op = &ops->ops[i];
278
2/2
✓ Branch 0 taken 1404 times.
✓ Branch 1 taken 51571 times.
52975 switch (op->op) {
279 1404 case SWS_OP_DITHER:
280 1404 av_refstruct_ref(copy->ops[i].dither.matrix);
281 1404 break;
282 }
283 }
284
285 17511 return copy;
286 }
287
288 void ff_sws_op_list_remove_at(SwsOpList *ops, int index, int count)
289 {
290 const int end = ops->num_ops - count;
291 av_assert2(index >= 0 && count >= 0 && index + count <= ops->num_ops);
292 op_uninit(&ops->ops[index]);
293 for (int i = index; i < end; i++)
294 ops->ops[i] = ops->ops[i + count];
295 ops->num_ops = end;
296 }
297
298 int ff_sws_op_list_insert_at(SwsOpList *ops, int index, SwsOp *op)
299 {
300 void *ret = av_dynarray2_add((void **) &ops->ops, &ops->num_ops, sizeof(*op), NULL);
301 if (!ret) {
302 op_uninit(op);
303 return AVERROR(ENOMEM);
304 }
305
306 for (int i = ops->num_ops - 1; i > index; i--)
307 ops->ops[i] = ops->ops[i - 1];
308 ops->ops[index] = *op;
309 return 0;
310 }
311
312 int ff_sws_op_list_append(SwsOpList *ops, SwsOp *op)
313 {
314 return ff_sws_op_list_insert_at(ops, ops->num_ops, op);
315 }
316
317 2130 int ff_sws_op_list_max_size(const SwsOpList *ops)
318 {
319 2130 int max_size = 0;
320
2/2
✓ Branch 0 taken 6618 times.
✓ Branch 1 taken 2130 times.
8748 for (int i = 0; i < ops->num_ops; i++) {
321 6618 const int size = ff_sws_pixel_type_size(ops->ops[i].type);
322 6618 max_size = FFMAX(max_size, size);
323 }
324
325 2130 return max_size;
326 }
327
328 uint32_t ff_sws_linear_mask(const SwsLinearOp c)
329 {
330 uint32_t mask = 0;
331 for (int i = 0; i < 4; i++) {
332 for (int j = 0; j < 5; j++) {
333 if (av_cmp_q(c.m[i][j], Q(i == j)))
334 mask |= SWS_MASK(i, j);
335 }
336 }
337 return mask;
338 }
339
340 105 static const char *describe_lin_mask(uint32_t mask)
341 {
342 /* Try to be fairly descriptive without assuming too much */
343 static const struct {
344 char name[24];
345 uint32_t mask;
346 } patterns[] = {
347 { "noop", 0 },
348 { "luma", SWS_MASK_LUMA },
349 { "alpha", SWS_MASK_ALPHA },
350 { "luma+alpha", SWS_MASK_LUMA | SWS_MASK_ALPHA },
351 { "dot3", 0x7 },
352 { "dot4", 0xF },
353 { "row0", SWS_MASK_ROW(0) },
354 { "row0+alpha", SWS_MASK_ROW(0) | SWS_MASK_ALPHA },
355 { "col0", SWS_MASK_COL(0) },
356 { "col0+off3", SWS_MASK_COL(0) | SWS_MASK_OFF3 },
357 { "off3", SWS_MASK_OFF3 },
358 { "off3+alpha", SWS_MASK_OFF3 | SWS_MASK_ALPHA },
359 { "diag3", SWS_MASK_DIAG3 },
360 { "diag4", SWS_MASK_DIAG4 },
361 { "diag3+alpha", SWS_MASK_DIAG3 | SWS_MASK_ALPHA },
362 { "diag3+off3", SWS_MASK_DIAG3 | SWS_MASK_OFF3 },
363 { "diag3+off3+alpha", SWS_MASK_DIAG3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
364 { "diag4+off4", SWS_MASK_DIAG4 | SWS_MASK_OFF4 },
365 { "matrix3", SWS_MASK_MAT3 },
366 { "matrix3+off3", SWS_MASK_MAT3 | SWS_MASK_OFF3 },
367 { "matrix3+off3+alpha", SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
368 { "matrix4", SWS_MASK_MAT4 },
369 { "matrix4+off4", SWS_MASK_MAT4 | SWS_MASK_OFF4 },
370 };
371
372
1/2
✓ Branch 0 taken 1275 times.
✗ Branch 1 not taken.
1275 for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
373
2/2
✓ Branch 0 taken 105 times.
✓ Branch 1 taken 1170 times.
1275 if (!(mask & ~patterns[i].mask))
374 105 return patterns[i].name;
375 }
376
377 av_unreachable("Invalid linear mask!");
378 return "ERR";
379 }
380
381 13528 static char describe_comp_flags(unsigned flags)
382 {
383
2/2
✓ Branch 0 taken 2865 times.
✓ Branch 1 taken 10663 times.
13528 if (flags & SWS_COMP_GARBAGE)
384 2865 return 'X';
385
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10663 times.
10663 else if (flags & SWS_COMP_ZERO)
386 return '0';
387
2/2
✓ Branch 0 taken 8098 times.
✓ Branch 1 taken 2565 times.
10663 else if (flags & SWS_COMP_EXACT)
388 8098 return '+';
389 else
390 2565 return '.';
391 }
392
393 22640 static const char *print_q(const AVRational q, char buf[], int buf_len)
394 {
395
2/2
✓ Branch 0 taken 4184 times.
✓ Branch 1 taken 18456 times.
22640 if (!q.den) {
396
2/4
✓ Branch 0 taken 4184 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 4184 times.
4184 return q.num > 0 ? "inf" : q.num < 0 ? "-inf" : "nan";
397
2/2
✓ Branch 0 taken 15112 times.
✓ Branch 1 taken 3344 times.
18456 } else if (q.den == 1) {
398 15112 snprintf(buf, buf_len, "%d", q.num);
399 15112 return buf;
400
3/4
✓ Branch 0 taken 1344 times.
✓ Branch 1 taken 2000 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1344 times.
3344 } else if (abs(q.num) > 1000 || abs(q.den) > 1000) {
401 2000 snprintf(buf, buf_len, "%f", av_q2d(q));
402 2000 return buf;
403 } else {
404 1344 snprintf(buf, buf_len, "%d/%d", q.num, q.den);
405 1344 return buf;
406 }
407 }
408
409 #define PRINTQ(q) print_q(q, (char[32]){0}, sizeof(char[32]) - 1)
410
411 1547 void ff_sws_op_list_print(void *log, int lev, const SwsOpList *ops)
412 {
413
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1547 times.
1547 if (!ops->num_ops) {
414 av_log(log, lev, " (empty)\n");
415 return;
416 }
417
418
2/2
✓ Branch 0 taken 3382 times.
✓ Branch 1 taken 1547 times.
4929 for (int i = 0; i < ops->num_ops; i++) {
419 3382 const SwsOp *op = &ops->ops[i];
420 16910 av_log(log, lev, " [%3s %c%c%c%c -> %c%c%c%c] ",
421 3382 ff_sws_pixel_type_name(op->type),
422
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3382 times.
3382 op->comps.unused[0] ? 'X' : '.',
423
2/2
✓ Branch 0 taken 1382 times.
✓ Branch 1 taken 2000 times.
3382 op->comps.unused[1] ? 'X' : '.',
424
2/2
✓ Branch 0 taken 1736 times.
✓ Branch 1 taken 1646 times.
3382 op->comps.unused[2] ? 'X' : '.',
425
2/2
✓ Branch 0 taken 1781 times.
✓ Branch 1 taken 1601 times.
3382 op->comps.unused[3] ? 'X' : '.',
426 3382 describe_comp_flags(op->comps.flags[0]),
427 3382 describe_comp_flags(op->comps.flags[1]),
428 3382 describe_comp_flags(op->comps.flags[2]),
429 3382 describe_comp_flags(op->comps.flags[3]));
430
431
12/16
✗ Branch 0 not taken.
✓ Branch 1 taken 1547 times.
✓ Branch 2 taken 30 times.
✓ Branch 3 taken 352 times.
✓ Branch 4 taken 352 times.
✓ Branch 5 taken 50 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 339 times.
✓ Branch 8 taken 223 times.
✓ Branch 9 taken 180 times.
✓ Branch 10 taken 92 times.
✓ Branch 11 taken 92 times.
✓ Branch 12 taken 105 times.
✓ Branch 13 taken 20 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
3382 switch (op->op) {
432 case SWS_OP_INVALID:
433 av_log(log, lev, "SWS_OP_INVALID\n");
434 break;
435 1547 case SWS_OP_READ:
436 case SWS_OP_WRITE:
437 3094 av_log(log, lev, "%-20s: %d elem(s) %s >> %d\n",
438 1547 op->op == SWS_OP_READ ? "SWS_OP_READ"
439 : "SWS_OP_WRITE",
440
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1547 times.
1547 op->rw.elems, op->rw.packed ? "packed" : "planar",
441
2/2
✓ Branch 0 taken 45 times.
✓ Branch 1 taken 1502 times.
1547 op->rw.frac);
442 1547 break;
443 30 case SWS_OP_SWAP_BYTES:
444 30 av_log(log, lev, "SWS_OP_SWAP_BYTES\n");
445 30 break;
446 352 case SWS_OP_LSHIFT:
447 352 av_log(log, lev, "%-20s: << %u\n", "SWS_OP_LSHIFT", op->c.u);
448 352 break;
449 352 case SWS_OP_RSHIFT:
450 352 av_log(log, lev, "%-20s: >> %u\n", "SWS_OP_RSHIFT", op->c.u);
451 352 break;
452 50 case SWS_OP_PACK:
453 case SWS_OP_UNPACK:
454 50 av_log(log, lev, "%-20s: {%d %d %d %d}\n",
455 50 op->op == SWS_OP_PACK ? "SWS_OP_PACK"
456 : "SWS_OP_UNPACK",
457 50 op->pack.pattern[0], op->pack.pattern[1],
458
2/2
✓ Branch 0 taken 25 times.
✓ Branch 1 taken 25 times.
50 op->pack.pattern[2], op->pack.pattern[3]);
459 50 break;
460 case SWS_OP_CLEAR:
461 av_log(log, lev, "%-20s: {%s %s %s %s}\n", "SWS_OP_CLEAR",
462 op->c.q4[0].den ? PRINTQ(op->c.q4[0]) : "_",
463 op->c.q4[1].den ? PRINTQ(op->c.q4[1]) : "_",
464 op->c.q4[2].den ? PRINTQ(op->c.q4[2]) : "_",
465 op->c.q4[3].den ? PRINTQ(op->c.q4[3]) : "_");
466 break;
467 339 case SWS_OP_SWIZZLE:
468 339 av_log(log, lev, "%-20s: %d%d%d%d\n", "SWS_OP_SWIZZLE",
469 339 op->swizzle.x, op->swizzle.y, op->swizzle.z, op->swizzle.w);
470 339 break;
471 223 case SWS_OP_CONVERT:
472 446 av_log(log, lev, "%-20s: %s -> %s%s\n", "SWS_OP_CONVERT",
473 223 ff_sws_pixel_type_name(op->type),
474 223 ff_sws_pixel_type_name(op->convert.to),
475
2/2
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 208 times.
223 op->convert.expand ? " (expand)" : "");
476 223 break;
477 180 case SWS_OP_DITHER:
478 180 av_log(log, lev, "%-20s: %dx%d matrix\n", "SWS_OP_DITHER",
479 180 1 << op->dither.size_log2, 1 << op->dither.size_log2);
480 180 break;
481 92 case SWS_OP_MIN:
482 92 av_log(log, lev, "%-20s: x <= {%s %s %s %s}\n", "SWS_OP_MIN",
483
1/2
✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
92 op->c.q4[0].den ? PRINTQ(op->c.q4[0]) : "_",
484
1/2
✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
92 op->c.q4[1].den ? PRINTQ(op->c.q4[1]) : "_",
485
1/2
✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
92 op->c.q4[2].den ? PRINTQ(op->c.q4[2]) : "_",
486
1/2
✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
92 op->c.q4[3].den ? PRINTQ(op->c.q4[3]) : "_");
487 92 break;
488 92 case SWS_OP_MAX:
489 92 av_log(log, lev, "%-20s: {%s %s %s %s} <= x\n", "SWS_OP_MAX",
490
1/2
✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
92 op->c.q4[0].den ? PRINTQ(op->c.q4[0]) : "_",
491
1/2
✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
92 op->c.q4[1].den ? PRINTQ(op->c.q4[1]) : "_",
492
1/2
✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
92 op->c.q4[2].den ? PRINTQ(op->c.q4[2]) : "_",
493
1/2
✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
92 op->c.q4[3].den ? PRINTQ(op->c.q4[3]) : "_");
494 92 break;
495 105 case SWS_OP_LINEAR:
496 105 av_log(log, lev, "%-20s: %s [[%s %s %s %s %s] "
497 "[%s %s %s %s %s] "
498 "[%s %s %s %s %s] "
499 "[%s %s %s %s %s]]\n",
500 105 "SWS_OP_LINEAR", describe_lin_mask(op->lin.mask),
501 105 PRINTQ(op->lin.m[0][0]), PRINTQ(op->lin.m[0][1]), PRINTQ(op->lin.m[0][2]), PRINTQ(op->lin.m[0][3]), PRINTQ(op->lin.m[0][4]),
502 105 PRINTQ(op->lin.m[1][0]), PRINTQ(op->lin.m[1][1]), PRINTQ(op->lin.m[1][2]), PRINTQ(op->lin.m[1][3]), PRINTQ(op->lin.m[1][4]),
503 105 PRINTQ(op->lin.m[2][0]), PRINTQ(op->lin.m[2][1]), PRINTQ(op->lin.m[2][2]), PRINTQ(op->lin.m[2][3]), PRINTQ(op->lin.m[2][4]),
504 105 PRINTQ(op->lin.m[3][0]), PRINTQ(op->lin.m[3][1]), PRINTQ(op->lin.m[3][2]), PRINTQ(op->lin.m[3][3]), PRINTQ(op->lin.m[3][4]));
505 105 break;
506 20 case SWS_OP_SCALE:
507 20 av_log(log, lev, "%-20s: * %s\n", "SWS_OP_SCALE",
508 20 PRINTQ(op->c.q));
509 20 break;
510 case SWS_OP_TYPE_NB:
511 break;
512 }
513
514
3/4
✓ Branch 0 taken 909 times.
✓ Branch 1 taken 2473 times.
✓ Branch 2 taken 909 times.
✗ Branch 3 not taken.
3382 if (op->comps.min[0].den || op->comps.min[1].den ||
515
2/4
✓ Branch 0 taken 909 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 909 times.
✗ Branch 3 not taken.
909 op->comps.min[2].den || op->comps.min[3].den ||
516
2/4
✓ Branch 0 taken 909 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 909 times.
✗ Branch 3 not taken.
909 op->comps.max[0].den || op->comps.max[1].den ||
517
2/4
✓ Branch 0 taken 909 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 909 times.
909 op->comps.max[2].den || op->comps.max[3].den)
518 {
519 2473 av_log(log, AV_LOG_TRACE, " min: {%s, %s, %s, %s}, max: {%s, %s, %s, %s}\n",
520 2473 PRINTQ(op->comps.min[0]), PRINTQ(op->comps.min[1]),
521 2473 PRINTQ(op->comps.min[2]), PRINTQ(op->comps.min[3]),
522 2473 PRINTQ(op->comps.max[0]), PRINTQ(op->comps.max[1]),
523 2473 PRINTQ(op->comps.max[2]), PRINTQ(op->comps.max[3]));
524 }
525
526 }
527
528 1547 av_log(log, lev, " (X = unused, + = exact, 0 = zero)\n");
529 }
530
531 17511 int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend,
532 const SwsOpList *ops, SwsCompiledOp *out)
533 {
534 SwsOpList *copy, rest;
535 17511 SwsCompiledOp compiled = {0};
536 17511 int ret = 0;
537
538 17511 copy = ff_sws_op_list_duplicate(ops);
539
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 17511 times.
17511 if (!copy)
540 return AVERROR(ENOMEM);
541
542 /* Ensure these are always set during compilation */
543 17511 ff_sws_op_list_update_comps(copy);
544
545 /* Make an on-stack copy of `ops` to ensure we can still properly clean up
546 * the copy afterwards */
547 17511 rest = *copy;
548
549 17511 ret = backend->compile(ctx, &rest, &compiled);
550
2/2
✓ Branch 0 taken 8827 times.
✓ Branch 1 taken 8684 times.
17511 if (ret < 0) {
551
1/2
✓ Branch 0 taken 8827 times.
✗ Branch 1 not taken.
8827 int msg_lev = ret == AVERROR(ENOTSUP) ? AV_LOG_TRACE : AV_LOG_ERROR;
552 17654 av_log(ctx, msg_lev, "Backend '%s' failed to compile operations: %s\n",
553 8827 backend->name, av_err2str(ret));
554
2/2
✓ Branch 0 taken 1547 times.
✓ Branch 1 taken 7280 times.
8827 if (rest.num_ops != ops->num_ops) {
555 1547 av_log(ctx, msg_lev, "Uncompiled remainder:\n");
556 1547 ff_sws_op_list_print(ctx, msg_lev, &rest);
557 }
558 } else {
559 8684 *out = compiled;
560 }
561
562 17511 ff_sws_op_list_free(&copy);
563 17511 return ret;
564 }
565
566 int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
567 {
568 for (int n = 0; ff_sws_op_backends[n]; n++) {
569 const SwsOpBackend *backend = ff_sws_op_backends[n];
570 if (ff_sws_ops_compile_backend(ctx, backend, ops, out) < 0)
571 continue;
572
573 av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': "
574 "block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n",
575 backend->name, out->block_size, out->over_read, out->over_write,
576 out->cpu_flags);
577 return 0;
578 }
579
580 av_log(ctx, AV_LOG_WARNING, "No backend found for operations:\n");
581 ff_sws_op_list_print(ctx, AV_LOG_WARNING, ops);
582 return AVERROR(ENOTSUP);
583 }
584
585 typedef struct SwsOpPass {
586 SwsCompiledOp comp;
587 SwsOpExec exec_base;
588 int num_blocks;
589 int tail_off_in;
590 int tail_off_out;
591 int tail_size_in;
592 int tail_size_out;
593 int planes_in;
594 int planes_out;
595 int pixel_bits_in;
596 int pixel_bits_out;
597 bool memcpy_in;
598 bool memcpy_out;
599 } SwsOpPass;
600
601 static void op_pass_free(void *ptr)
602 {
603 SwsOpPass *p = ptr;
604 if (!p)
605 return;
606
607 if (p->comp.free)
608 p->comp.free(p->comp.priv);
609
610 av_free(p);
611 }
612
613 static void op_pass_setup(const SwsImg *out, const SwsImg *in, const SwsPass *pass)
614 {
615 const AVPixFmtDescriptor *indesc = av_pix_fmt_desc_get(in->fmt);
616 const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out->fmt);
617
618 SwsOpPass *p = pass->priv;
619 SwsOpExec *exec = &p->exec_base;
620 const SwsCompiledOp *comp = &p->comp;
621 const int block_size = comp->block_size;
622 p->num_blocks = (pass->width + block_size - 1) / block_size;
623
624 /* Set up main loop parameters */
625 const int aligned_w = p->num_blocks * block_size;
626 const int safe_width = (p->num_blocks - 1) * block_size;
627 const int tail_size = pass->width - safe_width;
628 p->tail_off_in = safe_width * p->pixel_bits_in >> 3;
629 p->tail_off_out = safe_width * p->pixel_bits_out >> 3;
630 p->tail_size_in = tail_size * p->pixel_bits_in >> 3;
631 p->tail_size_out = tail_size * p->pixel_bits_out >> 3;
632 p->memcpy_in = false;
633 p->memcpy_out = false;
634
635 for (int i = 0; i < p->planes_in; i++) {
636 const int sub_x = (i == 1 || i == 2) ? indesc->log2_chroma_w : 0;
637 const int plane_w = (aligned_w + sub_x) >> sub_x;
638 const int plane_pad = (comp->over_read + sub_x) >> sub_x;
639 const int plane_size = plane_w * p->pixel_bits_in >> 3;
640 p->memcpy_in |= plane_size + plane_pad > in->linesize[i];
641 exec->in_stride[i] = in->linesize[i];
642 }
643
644 for (int i = 0; i < p->planes_out; i++) {
645 const int sub_x = (i == 1 || i == 2) ? outdesc->log2_chroma_w : 0;
646 const int plane_w = (aligned_w + sub_x) >> sub_x;
647 const int plane_pad = (comp->over_write + sub_x) >> sub_x;
648 const int plane_size = plane_w * p->pixel_bits_out >> 3;
649 p->memcpy_out |= plane_size + plane_pad > out->linesize[i];
650 exec->out_stride[i] = out->linesize[i];
651 }
652
653 /* Pre-fill pointer bump for the main section only; this value does not
654 * matter at all for the tail / last row handlers because they only ever
655 * process a single line */
656 const int blocks_main = p->num_blocks - p->memcpy_out;
657 for (int i = 0; i < 4; i++) {
658 exec->in_bump[i] = in->linesize[i] - blocks_main * exec->block_size_in;
659 exec->out_bump[i] = out->linesize[i] - blocks_main * exec->block_size_out;
660 }
661 }
662
663 /* Dispatch kernel over the last column of the image using memcpy */
664 static av_always_inline void
665 handle_tail(const SwsOpPass *p, SwsOpExec *exec,
666 const SwsImg *out_base, const bool copy_out,
667 const SwsImg *in_base, const bool copy_in,
668 int y, const int h)
669 {
670 DECLARE_ALIGNED_64(uint8_t, tmp)[2][4][sizeof(uint32_t[128])];
671
672 const SwsCompiledOp *comp = &p->comp;
673 const int tail_size_in = p->tail_size_in;
674 const int tail_size_out = p->tail_size_out;
675 const int bx = p->num_blocks - 1;
676
677 SwsImg in = ff_sws_img_shift(in_base, y);
678 SwsImg out = ff_sws_img_shift(out_base, y);
679 for (int i = 0; i < p->planes_in; i++) {
680 in.data[i] += p->tail_off_in;
681 if (copy_in) {
682 exec->in[i] = (void *) tmp[0][i];
683 exec->in_stride[i] = sizeof(tmp[0][i]);
684 } else {
685 exec->in[i] = in.data[i];
686 }
687 }
688
689 for (int i = 0; i < p->planes_out; i++) {
690 out.data[i] += p->tail_off_out;
691 if (copy_out) {
692 exec->out[i] = (void *) tmp[1][i];
693 exec->out_stride[i] = sizeof(tmp[1][i]);
694 } else {
695 exec->out[i] = out.data[i];
696 }
697 }
698
699 for (int y_end = y + h; y < y_end; y++) {
700 if (copy_in) {
701 for (int i = 0; i < p->planes_in; i++) {
702 av_assert2(tmp[0][i] + tail_size_in < (uint8_t *) tmp[1]);
703 memcpy(tmp[0][i], in.data[i], tail_size_in);
704 in.data[i] += in.linesize[i];
705 }
706 }
707
708 comp->func(exec, comp->priv, bx, y, p->num_blocks, y + 1);
709
710 if (copy_out) {
711 for (int i = 0; i < p->planes_out; i++) {
712 av_assert2(tmp[1][i] + tail_size_out < (uint8_t *) tmp[2]);
713 memcpy(out.data[i], tmp[1][i], tail_size_out);
714 out.data[i] += out.linesize[i];
715 }
716 }
717
718 for (int i = 0; i < 4; i++) {
719 if (!copy_in)
720 exec->in[i] += in.linesize[i];
721 if (!copy_out)
722 exec->out[i] += out.linesize[i];
723 }
724 }
725 }
726
727 static void op_pass_run(const SwsImg *out_base, const SwsImg *in_base,
728 const int y, const int h, const SwsPass *pass)
729 {
730 const SwsOpPass *p = pass->priv;
731 const SwsCompiledOp *comp = &p->comp;
732 const SwsImg in = ff_sws_img_shift(in_base, y);
733 const SwsImg out = ff_sws_img_shift(out_base, y);
734
735 /* Fill exec metadata for this slice */
736 DECLARE_ALIGNED_32(SwsOpExec, exec) = p->exec_base;
737 exec.slice_y = y;
738 exec.slice_h = h;
739 for (int i = 0; i < 4; i++) {
740 exec.in[i] = in.data[i];
741 exec.out[i] = out.data[i];
742 }
743
744 /**
745 * To ensure safety, we need to consider the following:
746 *
747 * 1. We can overread the input, unless this is the last line of an
748 * unpadded buffer. All defined operations can handle arbitrary pixel
749 * input, so overread of arbitrary data is fine.
750 *
751 * 2. We can overwrite the output, as long as we don't write more than the
752 * amount of pixels that fit into one linesize. So we always need to
753 * memcpy the last column on the output side if unpadded.
754 *
755 * 3. For the last row, we also need to memcpy the remainder of the input,
756 * to avoid reading past the end of the buffer. Note that since we know
757 * the run() function is called on stripes of the same buffer, we don't
758 * need to worry about this for the end of a slice.
759 */
760
761 const int last_slice = y + h == pass->height;
762 const bool memcpy_in = last_slice && p->memcpy_in;
763 const bool memcpy_out = p->memcpy_out;
764 const int num_blocks = p->num_blocks;
765 const int blocks_main = num_blocks - memcpy_out;
766 const int h_main = h - memcpy_in;
767
768 /* Handle main section */
769 comp->func(&exec, comp->priv, 0, y, blocks_main, y + h_main);
770
771 if (memcpy_in) {
772 /* Safe part of last row */
773 for (int i = 0; i < 4; i++) {
774 exec.in[i] += h_main * in.linesize[i];
775 exec.out[i] += h_main * out.linesize[i];
776 }
777 comp->func(&exec, comp->priv, 0, y + h_main, num_blocks - 1, y + h);
778 }
779
780 /* Handle last column via memcpy, takes over `exec` so call these last */
781 if (memcpy_out)
782 handle_tail(p, &exec, out_base, true, in_base, false, y, h_main);
783 if (memcpy_in)
784 handle_tail(p, &exec, out_base, memcpy_out, in_base, true, y + h_main, 1);
785 }
786
787 static int rw_planes(const SwsOp *op)
788 {
789 return op->rw.packed ? 1 : op->rw.elems;
790 }
791
792 static int rw_pixel_bits(const SwsOp *op)
793 {
794 const int elems = op->rw.packed ? op->rw.elems : 1;
795 const int size = ff_sws_pixel_type_size(op->type);
796 const int bits = 8 >> op->rw.frac;
797 av_assert1(bits >= 1);
798 return elems * size * bits;
799 }
800
801 int ff_sws_compile_pass(SwsGraph *graph, SwsOpList *ops, int flags, SwsFormat dst,
802 SwsPass *input, SwsPass **output)
803 {
804 SwsContext *ctx = graph->ctx;
805 SwsOpPass *p = NULL;
806 const SwsOp *read = &ops->ops[0];
807 const SwsOp *write = &ops->ops[ops->num_ops - 1];
808 SwsPass *pass;
809 int ret;
810
811 if (ops->num_ops < 2) {
812 av_log(ctx, AV_LOG_ERROR, "Need at least two operations.\n");
813 return AVERROR(EINVAL);
814 }
815
816 if (read->op != SWS_OP_READ || write->op != SWS_OP_WRITE) {
817 av_log(ctx, AV_LOG_ERROR, "First and last operations must be a read "
818 "and write, respectively.\n");
819 return AVERROR(EINVAL);
820 }
821
822 if (flags & SWS_OP_FLAG_OPTIMIZE)
823 RET(ff_sws_op_list_optimize(ops));
824 else
825 ff_sws_op_list_update_comps(ops);
826
827 p = av_mallocz(sizeof(*p));
828 if (!p)
829 return AVERROR(ENOMEM);
830
831 ret = ff_sws_ops_compile(ctx, ops, &p->comp);
832 if (ret < 0)
833 goto fail;
834
835 p->planes_in = rw_planes(read);
836 p->planes_out = rw_planes(write);
837 p->pixel_bits_in = rw_pixel_bits(read);
838 p->pixel_bits_out = rw_pixel_bits(write);
839 p->exec_base = (SwsOpExec) {
840 .width = dst.width,
841 .height = dst.height,
842 .block_size_in = p->comp.block_size * p->pixel_bits_in >> 3,
843 .block_size_out = p->comp.block_size * p->pixel_bits_out >> 3,
844 };
845
846 pass = ff_sws_graph_add_pass(graph, dst.format, dst.width, dst.height, input,
847 1, p, op_pass_run);
848 if (!pass) {
849 ret = AVERROR(ENOMEM);
850 goto fail;
851 }
852 pass->setup = op_pass_setup;
853 pass->free = op_pass_free;
854
855 *output = pass;
856 return 0;
857
858 fail:
859 op_pass_free(p);
860 return ret;
861 }
862