FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libswscale/ops_optimizer.c
Date: 2025-09-09 00:08:58
Exec Total Coverage
Lines: 201 510 39.4%
Functions: 3 10 30.0%
Branches: 144 439 32.8%

Line Branch Exec Source
1 /**
2 * Copyright (C) 2025 Niklas Haas
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/avassert.h"
22 #include "libavutil/bswap.h"
23 #include "libavutil/rational.h"
24
25 #include "ops.h"
26 #include "ops_internal.h"
27
28 #define RET(x) \
29 do { \
30 if ((ret = (x)) < 0) \
31 return ret; \
32 } while (0)
33
34 /* Returns true for operations that are independent per channel. These can
35 * usually be commuted freely other such operations. */
36 static bool op_type_is_independent(SwsOpType op)
37 {
38 switch (op) {
39 case SWS_OP_SWAP_BYTES:
40 case SWS_OP_LSHIFT:
41 case SWS_OP_RSHIFT:
42 case SWS_OP_CONVERT:
43 case SWS_OP_DITHER:
44 case SWS_OP_MIN:
45 case SWS_OP_MAX:
46 case SWS_OP_SCALE:
47 return true;
48 case SWS_OP_INVALID:
49 case SWS_OP_READ:
50 case SWS_OP_WRITE:
51 case SWS_OP_SWIZZLE:
52 case SWS_OP_CLEAR:
53 case SWS_OP_LINEAR:
54 case SWS_OP_PACK:
55 case SWS_OP_UNPACK:
56 return false;
57 case SWS_OP_TYPE_NB:
58 break;
59 }
60
61 av_unreachable("Invalid operation type!");
62 return false;
63 }
64
65 /* merge_comp_flags() forms a monoid with flags_identity as the null element */
66 static const unsigned flags_identity = SWS_COMP_ZERO | SWS_COMP_EXACT;
67 6357 static unsigned merge_comp_flags(unsigned a, unsigned b)
68 {
69 6357 const unsigned flags_or = SWS_COMP_GARBAGE;
70 6357 const unsigned flags_and = SWS_COMP_ZERO | SWS_COMP_EXACT;
71 6357 return ((a & b) & flags_and) | ((a | b) & flags_or);
72 }
73
74 /* Infer + propagate known information about components */
75 17511 void ff_sws_op_list_update_comps(SwsOpList *ops)
76 {
77 17511 SwsComps next = { .unused = {true, true, true, true} };
78 17511 SwsComps prev = { .flags = {
79 SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE,
80 }};
81
82 /* Forwards pass, propagates knowledge about the incoming pixel values */
83
2/2
✓ Branch 0 taken 52975 times.
✓ Branch 1 taken 17511 times.
70486 for (int n = 0; n < ops->num_ops; n++) {
84 52975 SwsOp *op = &ops->ops[n];
85
86 /* Prefill min/max values automatically; may have to be fixed in
87 * special cases */
88 52975 memcpy(op->comps.min, prev.min, sizeof(prev.min));
89 52975 memcpy(op->comps.max, prev.max, sizeof(prev.max));
90
91
2/2
✓ Branch 0 taken 52663 times.
✓ Branch 1 taken 312 times.
52975 if (op->op != SWS_OP_SWAP_BYTES) {
92 52663 ff_sws_apply_op_q(op, op->comps.min);
93 52663 ff_sws_apply_op_q(op, op->comps.max);
94 }
95
96
11/12
✓ Branch 0 taken 17511 times.
✓ Branch 1 taken 17511 times.
✓ Branch 2 taken 6552 times.
✓ Branch 3 taken 1404 times.
✓ Branch 4 taken 312 times.
✓ Branch 5 taken 312 times.
✓ Branch 6 taken 1144 times.
✓ Branch 7 taken 5070 times.
✓ Branch 8 taken 2184 times.
✓ Branch 9 taken 819 times.
✓ Branch 10 taken 156 times.
✗ Branch 11 not taken.
52975 switch (op->op) {
97 17511 case SWS_OP_READ:
98
2/2
✓ Branch 0 taken 56134 times.
✓ Branch 1 taken 17511 times.
73645 for (int i = 0; i < op->rw.elems; i++) {
99
2/2
✓ Branch 0 taken 41756 times.
✓ Branch 1 taken 14378 times.
56134 if (ff_sws_pixel_type_is_int(op->type)) {
100 41756 int bits = 8 * ff_sws_pixel_type_size(op->type);
101
3/4
✓ Branch 0 taken 40703 times.
✓ Branch 1 taken 1053 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 40703 times.
41756 if (!op->rw.packed && ops->src.desc) {
102 /* Use legal value range from pixdesc if available;
103 * we don't need to do this for packed formats because
104 * non-byte-aligned packed formats will necessarily go
105 * through SWS_OP_UNPACK anyway */
106 for (int c = 0; c < 4; c++) {
107 if (ops->src.desc->comp[c].plane == i) {
108 bits = ops->src.desc->comp[c].depth;
109 break;
110 }
111 }
112 }
113
114 41756 op->comps.flags[i] = SWS_COMP_EXACT;
115 41756 op->comps.min[i] = Q(0);
116 41756 op->comps.max[i] = Q((1ULL << bits) - 1);
117 }
118 }
119
2/2
✓ Branch 0 taken 13910 times.
✓ Branch 1 taken 17511 times.
31421 for (int i = op->rw.elems; i < 4; i++)
120 13910 op->comps.flags[i] = prev.flags[i];
121 17511 break;
122 17511 case SWS_OP_WRITE:
123
2/2
✓ Branch 0 taken 49946 times.
✓ Branch 1 taken 17511 times.
67457 for (int i = 0; i < op->rw.elems; i++)
124 av_assert1(!(prev.flags[i] & SWS_COMP_GARBAGE));
125 /* fall through */
126 case SWS_OP_SWAP_BYTES:
127 case SWS_OP_LSHIFT:
128 case SWS_OP_RSHIFT:
129 case SWS_OP_MIN:
130 case SWS_OP_MAX:
131 /* Linearly propagate flags per component */
132
2/2
✓ Branch 0 taken 96252 times.
✓ Branch 1 taken 24063 times.
120315 for (int i = 0; i < 4; i++)
133 96252 op->comps.flags[i] = prev.flags[i];
134 24063 break;
135 1404 case SWS_OP_DITHER:
136 /* Strip zero flag because of the nonzero dithering offset */
137
2/2
✓ Branch 0 taken 5616 times.
✓ Branch 1 taken 1404 times.
7020 for (int i = 0; i < 4; i++)
138 5616 op->comps.flags[i] = prev.flags[i] & ~SWS_COMP_ZERO;
139 1404 break;
140 312 case SWS_OP_UNPACK:
141
2/2
✓ Branch 0 taken 1248 times.
✓ Branch 1 taken 312 times.
1560 for (int i = 0; i < 4; i++) {
142
2/2
✓ Branch 0 taken 1014 times.
✓ Branch 1 taken 234 times.
1248 if (op->pack.pattern[i])
143 1014 op->comps.flags[i] = prev.flags[0];
144 else
145 234 op->comps.flags[i] = SWS_COMP_GARBAGE;
146 }
147 312 break;
148 312 case SWS_OP_PACK: {
149 312 unsigned flags = flags_identity;
150
2/2
✓ Branch 0 taken 1248 times.
✓ Branch 1 taken 312 times.
1560 for (int i = 0; i < 4; i++) {
151
2/2
✓ Branch 0 taken 1014 times.
✓ Branch 1 taken 234 times.
1248 if (op->pack.pattern[i])
152 1014 flags = merge_comp_flags(flags, prev.flags[i]);
153
2/2
✓ Branch 0 taken 936 times.
✓ Branch 1 taken 312 times.
1248 if (i > 0) /* clear remaining comps for sanity */
154 936 op->comps.flags[i] = SWS_COMP_GARBAGE;
155 }
156 312 op->comps.flags[0] = flags;
157 312 break;
158 }
159 1144 case SWS_OP_CLEAR:
160
2/2
✓ Branch 0 taken 4576 times.
✓ Branch 1 taken 1144 times.
5720 for (int i = 0; i < 4; i++) {
161
2/2
✓ Branch 0 taken 2639 times.
✓ Branch 1 taken 1937 times.
4576 if (op->c.q4[i].den) {
162
2/2
✓ Branch 0 taken 132 times.
✓ Branch 1 taken 2507 times.
2639 if (op->c.q4[i].num == 0) {
163 132 op->comps.flags[i] = SWS_COMP_ZERO | SWS_COMP_EXACT;
164
1/2
✓ Branch 0 taken 2507 times.
✗ Branch 1 not taken.
2507 } else if (op->c.q4[i].den == 1) {
165 2507 op->comps.flags[i] = SWS_COMP_EXACT;
166 }
167 } else {
168 1937 op->comps.flags[i] = prev.flags[i];
169 }
170 }
171 1144 break;
172 5070 case SWS_OP_SWIZZLE:
173
2/2
✓ Branch 0 taken 20280 times.
✓ Branch 1 taken 5070 times.
25350 for (int i = 0; i < 4; i++)
174 20280 op->comps.flags[i] = prev.flags[op->swizzle.in[i]];
175 5070 break;
176 2184 case SWS_OP_CONVERT:
177
2/2
✓ Branch 0 taken 8736 times.
✓ Branch 1 taken 2184 times.
10920 for (int i = 0; i < 4; i++) {
178 8736 op->comps.flags[i] = prev.flags[i];
179
2/2
✓ Branch 0 taken 6864 times.
✓ Branch 1 taken 1872 times.
8736 if (ff_sws_pixel_type_is_int(op->convert.to))
180 6864 op->comps.flags[i] |= SWS_COMP_EXACT;
181 }
182 2184 break;
183 819 case SWS_OP_LINEAR:
184
2/2
✓ Branch 0 taken 3276 times.
✓ Branch 1 taken 819 times.
4095 for (int i = 0; i < 4; i++) {
185 3276 unsigned flags = flags_identity;
186 3276 AVRational min = Q(0), max = Q(0);
187
2/2
✓ Branch 0 taken 13104 times.
✓ Branch 1 taken 3276 times.
16380 for (int j = 0; j < 4; j++) {
188 13104 const AVRational k = op->lin.m[i][j];
189 13104 AVRational mink = av_mul_q(prev.min[j], k);
190 13104 AVRational maxk = av_mul_q(prev.max[j], k);
191
2/2
✓ Branch 0 taken 5343 times.
✓ Branch 1 taken 7761 times.
13104 if (k.num) {
192 5343 flags = merge_comp_flags(flags, prev.flags[j]);
193
2/2
✓ Branch 0 taken 4017 times.
✓ Branch 1 taken 1326 times.
5343 if (k.den != 1) /* fractional coefficient */
194 4017 flags &= ~SWS_COMP_EXACT;
195
2/2
✓ Branch 0 taken 2064 times.
✓ Branch 1 taken 3279 times.
5343 if (k.num < 0)
196 2064 FFSWAP(AVRational, mink, maxk);
197 5343 min = av_add_q(min, mink);
198 5343 max = av_add_q(max, maxk);
199 }
200 }
201
2/2
✓ Branch 0 taken 1443 times.
✓ Branch 1 taken 1833 times.
3276 if (op->lin.m[i][4].num) { /* nonzero offset */
202 1443 flags &= ~SWS_COMP_ZERO;
203
1/2
✓ Branch 0 taken 1443 times.
✗ Branch 1 not taken.
1443 if (op->lin.m[i][4].den != 1) /* fractional offset */
204 1443 flags &= ~SWS_COMP_EXACT;
205 1443 min = av_add_q(min, op->lin.m[i][4]);
206 1443 max = av_add_q(max, op->lin.m[i][4]);
207 }
208 3276 op->comps.flags[i] = flags;
209 3276 op->comps.min[i] = min;
210 3276 op->comps.max[i] = max;
211 }
212 819 break;
213 156 case SWS_OP_SCALE:
214
2/2
✓ Branch 0 taken 624 times.
✓ Branch 1 taken 156 times.
780 for (int i = 0; i < 4; i++) {
215 624 op->comps.flags[i] = prev.flags[i];
216
1/2
✓ Branch 0 taken 624 times.
✗ Branch 1 not taken.
624 if (op->c.q.den != 1) /* fractional scale */
217 624 op->comps.flags[i] &= ~SWS_COMP_EXACT;
218
2/2
✓ Branch 0 taken 300 times.
✓ Branch 1 taken 324 times.
624 if (op->c.q.num < 0)
219 300 FFSWAP(AVRational, op->comps.min[i], op->comps.max[i]);
220 }
221 156 break;
222
223 case SWS_OP_INVALID:
224 case SWS_OP_TYPE_NB:
225 av_unreachable("Invalid operation type!");
226 }
227
228 52975 prev = op->comps;
229 }
230
231 /* Backwards pass, solves for component dependencies */
232
2/2
✓ Branch 0 taken 52975 times.
✓ Branch 1 taken 17511 times.
70486 for (int n = ops->num_ops - 1; n >= 0; n--) {
233 52975 SwsOp *op = &ops->ops[n];
234
235
7/8
✓ Branch 0 taken 35022 times.
✓ Branch 1 taken 10296 times.
✓ Branch 2 taken 312 times.
✓ Branch 3 taken 312 times.
✓ Branch 4 taken 1144 times.
✓ Branch 5 taken 5070 times.
✓ Branch 6 taken 819 times.
✗ Branch 7 not taken.
52975 switch (op->op) {
236 35022 case SWS_OP_READ:
237 case SWS_OP_WRITE:
238
2/2
✓ Branch 0 taken 106080 times.
✓ Branch 1 taken 35022 times.
141102 for (int i = 0; i < op->rw.elems; i++)
239 106080 op->comps.unused[i] = op->op == SWS_OP_READ;
240
2/2
✓ Branch 0 taken 34008 times.
✓ Branch 1 taken 35022 times.
69030 for (int i = op->rw.elems; i < 4; i++)
241 34008 op->comps.unused[i] = next.unused[i];
242 35022 break;
243 10296 case SWS_OP_SWAP_BYTES:
244 case SWS_OP_LSHIFT:
245 case SWS_OP_RSHIFT:
246 case SWS_OP_CONVERT:
247 case SWS_OP_DITHER:
248 case SWS_OP_MIN:
249 case SWS_OP_MAX:
250 case SWS_OP_SCALE:
251
2/2
✓ Branch 0 taken 41184 times.
✓ Branch 1 taken 10296 times.
51480 for (int i = 0; i < 4; i++)
252 41184 op->comps.unused[i] = next.unused[i];
253 10296 break;
254 312 case SWS_OP_UNPACK: {
255 312 bool unused = true;
256
2/2
✓ Branch 0 taken 1248 times.
✓ Branch 1 taken 312 times.
1560 for (int i = 0; i < 4; i++) {
257
2/2
✓ Branch 0 taken 1014 times.
✓ Branch 1 taken 234 times.
1248 if (op->pack.pattern[i])
258 1014 unused &= next.unused[i];
259 1248 op->comps.unused[i] = i > 0;
260 }
261 312 op->comps.unused[0] = unused;
262 312 break;
263 }
264 312 case SWS_OP_PACK:
265
2/2
✓ Branch 0 taken 1248 times.
✓ Branch 1 taken 312 times.
1560 for (int i = 0; i < 4; i++) {
266
2/2
✓ Branch 0 taken 1014 times.
✓ Branch 1 taken 234 times.
1248 if (op->pack.pattern[i])
267 1014 op->comps.unused[i] = next.unused[0];
268 else
269 234 op->comps.unused[i] = true;
270 }
271 312 break;
272 1144 case SWS_OP_CLEAR:
273
2/2
✓ Branch 0 taken 4576 times.
✓ Branch 1 taken 1144 times.
5720 for (int i = 0; i < 4; i++) {
274
2/2
✓ Branch 0 taken 2639 times.
✓ Branch 1 taken 1937 times.
4576 if (op->c.q4[i].den)
275 2639 op->comps.unused[i] = true;
276 else
277 1937 op->comps.unused[i] = next.unused[i];
278 }
279 1144 break;
280 5070 case SWS_OP_SWIZZLE: {
281 5070 bool unused[4] = { true, true, true, true };
282
2/2
✓ Branch 0 taken 20280 times.
✓ Branch 1 taken 5070 times.
25350 for (int i = 0; i < 4; i++)
283 20280 unused[op->swizzle.in[i]] &= next.unused[i];
284
2/2
✓ Branch 0 taken 20280 times.
✓ Branch 1 taken 5070 times.
25350 for (int i = 0; i < 4; i++)
285 20280 op->comps.unused[i] = unused[i];
286 5070 break;
287 }
288 819 case SWS_OP_LINEAR:
289
2/2
✓ Branch 0 taken 3276 times.
✓ Branch 1 taken 819 times.
4095 for (int j = 0; j < 4; j++) {
290 3276 bool unused = true;
291
2/2
✓ Branch 0 taken 13104 times.
✓ Branch 1 taken 3276 times.
16380 for (int i = 0; i < 4; i++) {
292
2/2
✓ Branch 0 taken 5343 times.
✓ Branch 1 taken 7761 times.
13104 if (op->lin.m[i][j].num)
293 5343 unused &= next.unused[i];
294 }
295 3276 op->comps.unused[j] = unused;
296 }
297 819 break;
298 }
299
300 52975 next = op->comps;
301 }
302 17511 }
303
304 /* returns log2(x) only if x is a power of two, or 0 otherwise */
305 static int exact_log2(const int x)
306 {
307 int p;
308 if (x <= 0)
309 return 0;
310 p = av_log2(x);
311 return (1 << p) == x ? p : 0;
312 }
313
314 static int exact_log2_q(const AVRational x)
315 {
316 if (x.den == 1)
317 return exact_log2(x.num);
318 else if (x.num == 1)
319 return -exact_log2(x.den);
320 else
321 return 0;
322 }
323
324 /**
325 * If a linear operation can be reduced to a scalar multiplication, returns
326 * the corresponding scaling factor, or 0 otherwise.
327 */
328 static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next,
329 SwsConst *out_scale)
330 {
331 SwsConst scale = {0};
332
333 /* There are components not on the main diagonal */
334 if (c->mask & ~SWS_MASK_DIAG4)
335 return false;
336
337 for (int i = 0; i < 4; i++) {
338 const AVRational s = c->m[i][i];
339 if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i])
340 continue;
341 if (scale.q.den && av_cmp_q(s, scale.q))
342 return false;
343 scale.q = s;
344 }
345
346 if (scale.q.den)
347 *out_scale = scale;
348 return scale.q.den;
349 }
350
351 /* Extracts an integer clear operation (subset) from the given linear op. */
352 static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev,
353 SwsConst *out_clear)
354 {
355 SwsConst clear = {0};
356 bool ret = false;
357
358 for (int i = 0; i < 4; i++) {
359 bool const_row = c->m[i][4].den == 1; /* offset is integer */
360 for (int j = 0; j < 4; j++) {
361 const_row &= c->m[i][j].num == 0 || /* scalar is zero */
362 (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
363 }
364 if (const_row && (c->mask & SWS_MASK_ROW(i))) {
365 clear.q4[i] = c->m[i][4];
366 for (int j = 0; j < 5; j++)
367 c->m[i][j] = Q(i == j);
368 c->mask &= ~SWS_MASK_ROW(i);
369 ret = true;
370 }
371 }
372
373 if (ret)
374 *out_clear = clear;
375 return ret;
376 }
377
378 /* Unswizzle a linear operation by aligning single-input rows with
379 * their corresponding diagonal */
380 static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
381 {
382 SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
383 SwsLinearOp c = *op;
384
385 for (int i = 0; i < 4; i++) {
386 int idx = -1;
387 for (int j = 0; j < 4; j++) {
388 if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO))
389 continue;
390 if (idx >= 0)
391 return false; /* multiple inputs */
392 idx = j;
393 }
394
395 if (idx >= 0 && idx != i) {
396 /* Move coefficient to the diagonal */
397 c.m[i][i] = c.m[i][idx];
398 c.m[i][idx] = Q(0);
399 swiz.in[i] = idx;
400 }
401 }
402
403 if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
404 return false; /* no swizzle was identified */
405
406 c.mask = ff_sws_linear_mask(c);
407 *out_swiz = swiz;
408 *op = c;
409 return true;
410 }
411
412 int ff_sws_op_list_optimize(SwsOpList *ops)
413 {
414 int ret;
415
416 retry:
417 ff_sws_op_list_update_comps(ops);
418
419 for (int n = 0; n < ops->num_ops;) {
420 SwsOp dummy = {0};
421 SwsOp *op = &ops->ops[n];
422 SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
423 SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
424
425 /* common helper variable */
426 bool noop = true;
427
428 switch (op->op) {
429 case SWS_OP_READ:
430 /* Optimized further into refcopy / memcpy */
431 if (next->op == SWS_OP_WRITE &&
432 next->rw.elems == op->rw.elems &&
433 next->rw.packed == op->rw.packed &&
434 next->rw.frac == op->rw.frac)
435 {
436 ff_sws_op_list_remove_at(ops, n, 2);
437 av_assert1(ops->num_ops == 0);
438 return 0;
439 }
440
441 /* Skip reading extra unneeded components */
442 if (!op->rw.packed) {
443 int needed = op->rw.elems;
444 while (needed > 0 && next->comps.unused[needed - 1])
445 needed--;
446 if (op->rw.elems != needed) {
447 op->rw.elems = needed;
448 goto retry;
449 }
450 }
451 break;
452
453 case SWS_OP_SWAP_BYTES:
454 /* Redundant (double) swap */
455 if (next->op == SWS_OP_SWAP_BYTES) {
456 ff_sws_op_list_remove_at(ops, n, 2);
457 goto retry;
458 }
459 break;
460
461 case SWS_OP_UNPACK:
462 /* Redundant unpack+pack */
463 if (next->op == SWS_OP_PACK && next->type == op->type &&
464 next->pack.pattern[0] == op->pack.pattern[0] &&
465 next->pack.pattern[1] == op->pack.pattern[1] &&
466 next->pack.pattern[2] == op->pack.pattern[2] &&
467 next->pack.pattern[3] == op->pack.pattern[3])
468 {
469 ff_sws_op_list_remove_at(ops, n, 2);
470 goto retry;
471 }
472 break;
473
474 case SWS_OP_LSHIFT:
475 case SWS_OP_RSHIFT:
476 /* Two shifts in the same direction */
477 if (next->op == op->op) {
478 op->c.u += next->c.u;
479 ff_sws_op_list_remove_at(ops, n + 1, 1);
480 goto retry;
481 }
482
483 /* No-op shift */
484 if (!op->c.u) {
485 ff_sws_op_list_remove_at(ops, n, 1);
486 goto retry;
487 }
488 break;
489
490 case SWS_OP_CLEAR:
491 for (int i = 0; i < 4; i++) {
492 if (!op->c.q4[i].den)
493 continue;
494
495 if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
496 !(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
497 op->c.q4[i].num == 0)
498 {
499 /* Redundant clear-to-zero of zero component */
500 op->c.q4[i].den = 0;
501 } else if (next->comps.unused[i]) {
502 /* Unnecessary clear of unused component */
503 op->c.q4[i] = (AVRational) {0, 0};
504 } else if (op->c.q4[i].den) {
505 noop = false;
506 }
507 }
508
509 if (noop) {
510 ff_sws_op_list_remove_at(ops, n, 1);
511 goto retry;
512 }
513
514 /* Transitive clear */
515 if (next->op == SWS_OP_CLEAR) {
516 for (int i = 0; i < 4; i++) {
517 if (next->c.q4[i].den)
518 op->c.q4[i] = next->c.q4[i];
519 }
520 ff_sws_op_list_remove_at(ops, n + 1, 1);
521 goto retry;
522 }
523
524 /* Prefer to clear as late as possible, to avoid doing
525 * redundant work */
526 if ((op_type_is_independent(next->op) && next->op != SWS_OP_SWAP_BYTES) ||
527 next->op == SWS_OP_SWIZZLE)
528 {
529 if (next->op == SWS_OP_CONVERT)
530 op->type = next->convert.to;
531 ff_sws_apply_op_q(next, op->c.q4);
532 FFSWAP(SwsOp, *op, *next);
533 goto retry;
534 }
535 break;
536
537 case SWS_OP_SWIZZLE: {
538 bool seen[4] = {0};
539 bool has_duplicates = false;
540 for (int i = 0; i < 4; i++) {
541 if (next->comps.unused[i])
542 continue;
543 if (op->swizzle.in[i] != i)
544 noop = false;
545 has_duplicates |= seen[op->swizzle.in[i]];
546 seen[op->swizzle.in[i]] = true;
547 }
548
549 /* Identity swizzle */
550 if (noop) {
551 ff_sws_op_list_remove_at(ops, n, 1);
552 goto retry;
553 }
554
555 /* Transitive swizzle */
556 if (next->op == SWS_OP_SWIZZLE) {
557 const SwsSwizzleOp orig = op->swizzle;
558 for (int i = 0; i < 4; i++)
559 op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
560 ff_sws_op_list_remove_at(ops, n + 1, 1);
561 goto retry;
562 }
563
564 /* Try to push swizzles with duplicates towards the output */
565 if (has_duplicates && op_type_is_independent(next->op)) {
566 if (next->op == SWS_OP_CONVERT)
567 op->type = next->convert.to;
568 if (next->op == SWS_OP_MIN || next->op == SWS_OP_MAX) {
569 /* Un-swizzle the next operation */
570 const SwsConst c = next->c;
571 for (int i = 0; i < 4; i++) {
572 if (!next->comps.unused[i])
573 next->c.q4[op->swizzle.in[i]] = c.q4[i];
574 }
575 }
576 FFSWAP(SwsOp, *op, *next);
577 goto retry;
578 }
579
580 /* Move swizzle out of the way between two converts so that
581 * they may be merged */
582 if (prev->op == SWS_OP_CONVERT && next->op == SWS_OP_CONVERT) {
583 op->type = next->convert.to;
584 FFSWAP(SwsOp, *op, *next);
585 goto retry;
586 }
587 break;
588 }
589
590 case SWS_OP_CONVERT:
591 /* No-op conversion */
592 if (op->type == op->convert.to) {
593 ff_sws_op_list_remove_at(ops, n, 1);
594 goto retry;
595 }
596
597 /* Transitive conversion */
598 if (next->op == SWS_OP_CONVERT &&
599 op->convert.expand == next->convert.expand)
600 {
601 av_assert1(op->convert.to == next->type);
602 op->convert.to = next->convert.to;
603 ff_sws_op_list_remove_at(ops, n + 1, 1);
604 goto retry;
605 }
606
607 /* Conversion followed by integer expansion */
608 if (next->op == SWS_OP_SCALE && !op->convert.expand &&
609 !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to)))
610 {
611 op->convert.expand = true;
612 ff_sws_op_list_remove_at(ops, n + 1, 1);
613 goto retry;
614 }
615 break;
616
617 case SWS_OP_MIN:
618 for (int i = 0; i < 4; i++) {
619 if (next->comps.unused[i] || !op->c.q4[i].den)
620 continue;
621 if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0)
622 noop = false;
623 }
624
625 if (noop) {
626 ff_sws_op_list_remove_at(ops, n, 1);
627 goto retry;
628 }
629 break;
630
631 case SWS_OP_MAX:
632 for (int i = 0; i < 4; i++) {
633 if (next->comps.unused[i] || !op->c.q4[i].den)
634 continue;
635 if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0)
636 noop = false;
637 }
638
639 if (noop) {
640 ff_sws_op_list_remove_at(ops, n, 1);
641 goto retry;
642 }
643 break;
644
645 case SWS_OP_DITHER:
646 for (int i = 0; i < 4; i++) {
647 noop &= (prev->comps.flags[i] & SWS_COMP_EXACT) ||
648 next->comps.unused[i];
649 }
650
651 if (noop) {
652 ff_sws_op_list_remove_at(ops, n, 1);
653 goto retry;
654 }
655 break;
656
657 case SWS_OP_LINEAR: {
658 SwsSwizzleOp swizzle;
659 SwsConst c;
660
661 /* No-op (identity) linear operation */
662 if (!op->lin.mask) {
663 ff_sws_op_list_remove_at(ops, n, 1);
664 goto retry;
665 }
666
667 if (next->op == SWS_OP_LINEAR) {
668 /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
669 const SwsLinearOp m1 = op->lin;
670 const SwsLinearOp m2 = next->lin;
671 for (int i = 0; i < 4; i++) {
672 for (int j = 0; j < 5; j++) {
673 AVRational sum = Q(0);
674 for (int k = 0; k < 4; k++)
675 sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
676 if (j == 4) /* m1.m[4][j] == 1 */
677 sum = av_add_q(sum, m2.m[i][4]);
678 op->lin.m[i][j] = sum;
679 }
680 }
681 op->lin.mask = ff_sws_linear_mask(op->lin);
682 ff_sws_op_list_remove_at(ops, n + 1, 1);
683 goto retry;
684 }
685
686 /* Optimize away zero columns */
687 for (int j = 0; j < 4; j++) {
688 const uint32_t col = SWS_MASK_COL(j);
689 if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
690 continue;
691 for (int i = 0; i < 4; i++)
692 op->lin.m[i][j] = Q(i == j);
693 op->lin.mask &= ~col;
694 goto retry;
695 }
696
697 /* Optimize away unused rows */
698 for (int i = 0; i < 4; i++) {
699 const uint32_t row = SWS_MASK_ROW(i);
700 if (!next->comps.unused[i] || !(op->lin.mask & row))
701 continue;
702 for (int j = 0; j < 5; j++)
703 op->lin.m[i][j] = Q(i == j);
704 op->lin.mask &= ~row;
705 goto retry;
706 }
707
708 /* Convert constant rows to explicit clear instruction */
709 if (extract_constant_rows(&op->lin, prev->comps, &c)) {
710 RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
711 .op = SWS_OP_CLEAR,
712 .type = op->type,
713 .comps = op->comps,
714 .c = c,
715 }));
716 goto retry;
717 }
718
719 /* Multiplication by scalar constant */
720 if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) {
721 op->op = SWS_OP_SCALE;
722 op->c = c;
723 goto retry;
724 }
725
726 /* Swizzle by fixed pattern */
727 if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
728 RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
729 .op = SWS_OP_SWIZZLE,
730 .type = op->type,
731 .swizzle = swizzle,
732 }));
733 goto retry;
734 }
735 break;
736 }
737
738 case SWS_OP_SCALE: {
739 const int factor2 = exact_log2_q(op->c.q);
740
741 /* No-op scaling */
742 if (op->c.q.num == 1 && op->c.q.den == 1) {
743 ff_sws_op_list_remove_at(ops, n, 1);
744 goto retry;
745 }
746
747 /* Scaling by integer before conversion to int */
748 if (op->c.q.den == 1 &&
749 next->op == SWS_OP_CONVERT &&
750 ff_sws_pixel_type_is_int(next->convert.to))
751 {
752 op->type = next->convert.to;
753 FFSWAP(SwsOp, *op, *next);
754 goto retry;
755 }
756
757 /* Scaling by exact power of two */
758 if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
759 op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
760 op->c.u = FFABS(factor2);
761 goto retry;
762 }
763 break;
764 }
765 }
766
767 /* No optimization triggered, move on to next operation */
768 n++;
769 }
770
771 return 0;
772 }
773
774 2154 int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
775 int size, uint8_t clear_val,
776 int *read_bytes, int *write_bytes)
777 {
778 2154 const SwsOp read = ops->ops[0];
779 2154 const int read_size = ff_sws_pixel_type_size(read.type);
780 2154 uint32_t mask[4] = {0};
781
782
2/4
✓ Branch 0 taken 2154 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2154 times.
2154 if (!ops->num_ops || read.op != SWS_OP_READ)
783 return AVERROR(EINVAL);
784
6/6
✓ Branch 0 taken 2142 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 2070 times.
✓ Branch 3 taken 72 times.
✓ Branch 4 taken 1614 times.
✓ Branch 5 taken 456 times.
2154 if (read.rw.frac || (!read.rw.packed && read.rw.elems > 1))
785 1626 return AVERROR(ENOTSUP);
786
787
2/2
✓ Branch 0 taken 672 times.
✓ Branch 1 taken 528 times.
1200 for (int i = 0; i < read.rw.elems; i++)
788 672 mask[i] = 0x01010101 * i * read_size + 0x03020100;
789
790
1/2
✓ Branch 0 taken 552 times.
✗ Branch 1 not taken.
552 for (int opidx = 1; opidx < ops->num_ops; opidx++) {
791 552 const SwsOp *op = &ops->ops[opidx];
792
4/6
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 84 times.
✓ Branch 4 taken 108 times.
✓ Branch 5 taken 348 times.
552 switch (op->op) {
793 case SWS_OP_SWIZZLE: {
794 uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
795 for (int i = 0; i < 4; i++)
796 mask[i] = orig[op->swizzle.in[i]];
797 break;
798 }
799
800 12 case SWS_OP_SWAP_BYTES:
801
2/2
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 12 times.
60 for (int i = 0; i < 4; i++) {
802
2/3
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
48 switch (ff_sws_pixel_type_size(op->type)) {
803 24 case 2: mask[i] = av_bswap16(mask[i]); break;
804 24 case 4: mask[i] = av_bswap32(mask[i]); break;
805 }
806 }
807 12 break;
808
809 case SWS_OP_CLEAR:
810 for (int i = 0; i < 4; i++) {
811 if (!op->c.q4[i].den)
812 continue;
813 if (op->c.q4[i].num != 0 || !clear_val)
814 return AVERROR(ENOTSUP);
815 mask[i] = 0x1010101ul * clear_val;
816 }
817 break;
818
819 84 case SWS_OP_CONVERT: {
820
2/2
✓ Branch 0 taken 72 times.
✓ Branch 1 taken 12 times.
84 if (!op->convert.expand)
821 72 return AVERROR(ENOTSUP);
822
2/2
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 12 times.
60 for (int i = 0; i < 4; i++) {
823
1/3
✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
48 switch (ff_sws_pixel_type_size(op->type)) {
824 48 case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break;
825 case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
826 }
827 }
828 12 break;
829 }
830
831 108 case SWS_OP_WRITE: {
832
5/6
✓ Branch 0 taken 96 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 96 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 72 times.
✓ Branch 5 taken 24 times.
108 if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1))
833 84 return AVERROR(ENOTSUP);
834
835 /* Initialize to no-op */
836 24 memset(shuffle, clear_val, size);
837
838 24 const int write_size = ff_sws_pixel_type_size(op->type);
839 24 const int read_chunk = read.rw.elems * read_size;
840 24 const int write_chunk = op->rw.elems * write_size;
841 24 const int num_groups = size / FFMAX(read_chunk, write_chunk);
842
2/2
✓ Branch 0 taken 144 times.
✓ Branch 1 taken 24 times.
168 for (int n = 0; n < num_groups; n++) {
843 144 const int base_in = n * read_chunk;
844 144 const int base_out = n * write_chunk;
845
2/2
✓ Branch 0 taken 144 times.
✓ Branch 1 taken 144 times.
288 for (int i = 0; i < op->rw.elems; i++) {
846 144 const int offset = base_out + i * write_size;
847
2/2
✓ Branch 0 taken 384 times.
✓ Branch 1 taken 144 times.
528 for (int b = 0; b < write_size; b++) {
848 384 const uint8_t idx = mask[i] >> (b * 8);
849
1/2
✓ Branch 0 taken 384 times.
✗ Branch 1 not taken.
384 if (idx != clear_val)
850 384 shuffle[offset + b] = base_in + idx;
851 }
852 }
853 }
854
855 24 *read_bytes = num_groups * read_chunk;
856 24 *write_bytes = num_groups * write_chunk;
857 24 return num_groups;
858 }
859
860 348 default:
861 348 return AVERROR(ENOTSUP);
862 }
863 }
864
865 return AVERROR(EINVAL);
866 }
867