FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libswscale/ops_optimizer.c
Date: 2026-03-12 17:57:28
Exec Total Coverage
Lines: 402 429 93.7%
Functions: 9 9 100.0%
Branches: 331 376 88.0%

Line Branch Exec Source
1 /**
2 * Copyright (C) 2025 Niklas Haas
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/avassert.h"
22 #include "libavutil/bswap.h"
23 #include "libavutil/rational.h"
24
25 #include "ops.h"
26 #include "ops_internal.h"
27
28 #define RET(x) \
29 do { \
30 if ((ret = (x)) < 0) \
31 return ret; \
32 } while (0)
33
34 /**
35 * Try to commute a clear op with the next operation. Makes any adjustments
36 * to the operations as needed, but does not perform the actual commutation.
37 *
38 * Returns whether successful.
39 */
40 51904 static bool op_commute_clear(SwsOp *op, SwsOp *next)
41 {
42 SwsOp tmp;
43
44 av_assert1(op->op == SWS_OP_CLEAR);
45
4/6
✓ Branch 0 taken 9120 times.
✓ Branch 1 taken 15310 times.
✓ Branch 2 taken 3529 times.
✓ Branch 3 taken 23945 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
51904 switch (next->op) {
46 9120 case SWS_OP_CONVERT:
47 9120 op->type = next->convert.to;
48 /* fall through */
49 24430 case SWS_OP_LSHIFT:
50 case SWS_OP_RSHIFT:
51 case SWS_OP_DITHER:
52 case SWS_OP_MIN:
53 case SWS_OP_MAX:
54 case SWS_OP_SCALE:
55 case SWS_OP_READ:
56 case SWS_OP_SWIZZLE:
57 24430 ff_sws_apply_op_q(next, op->c.q4);
58 24430 return true;
59 3529 case SWS_OP_SWAP_BYTES:
60
2/3
✓ Branch 0 taken 2920 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 609 times.
3529 switch (next->type) {
61 2920 case SWS_PIXEL_U16:
62 2920 ff_sws_apply_op_q(next, op->c.q4); /* always works */
63 2920 return true;
64 case SWS_PIXEL_U32:
65 for (int i = 0; i < 4; i++) {
66 uint32_t v = av_bswap32(op->c.q4[i].num);
67 if (v > INT_MAX)
68 return false; /* can't represent as AVRational anymore */
69 tmp.c.q4[i] = Q(v);
70 }
71 op->c = tmp.c;
72 return true;
73 609 default:
74 609 return false;
75 }
76 23945 case SWS_OP_INVALID:
77 case SWS_OP_WRITE:
78 case SWS_OP_LINEAR:
79 case SWS_OP_PACK:
80 case SWS_OP_UNPACK:
81 case SWS_OP_CLEAR:
82 23945 return false;
83 case SWS_OP_TYPE_NB:
84 break;
85 }
86
87 av_unreachable("Invalid operation type!");
88 return false;
89 }
90
91 /**
92 * Try to commute a swizzle op with the next operation. Makes any adjustments
93 * to the operations as needed, but does not perform the actual commutation.
94 *
95 * Returns whether successful.
96 */
97 60367 static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
98 {
99 60367 bool seen[4] = {0};
100
101 av_assert1(op->op == SWS_OP_SWIZZLE);
102
5/7
✓ Branch 0 taken 19578 times.
✓ Branch 1 taken 11779 times.
✓ Branch 2 taken 3178 times.
✓ Branch 3 taken 2814 times.
✓ Branch 4 taken 23018 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
60367 switch (next->op) {
103 19578 case SWS_OP_CONVERT:
104 19578 op->type = next->convert.to;
105 /* fall through */
106 31357 case SWS_OP_SWAP_BYTES:
107 case SWS_OP_LSHIFT:
108 case SWS_OP_RSHIFT:
109 case SWS_OP_SCALE:
110 31357 return true;
111
112 /**
113 * We can commute per-channel ops only if the per-channel constants are the
114 * same for all duplicated channels; e.g.:
115 * SWIZZLE {0, 0, 0, 3}
116 * NEXT {x, x, x, w}
117 * ->
118 * NEXT {x, _, _, w}
119 * SWIZZLE {0, 0, 0, 3}
120 */
121 3178 case SWS_OP_MIN:
122 case SWS_OP_MAX: {
123 3178 const SwsConst c = next->c;
124
2/2
✓ Branch 0 taken 12712 times.
✓ Branch 1 taken 3178 times.
15890 for (int i = 0; i < 4; i++) {
125
2/2
✓ Branch 0 taken 2934 times.
✓ Branch 1 taken 9778 times.
12712 if (next->comps.unused[i])
126 2934 continue;
127 9778 const int j = op->swizzle.in[i];
128
3/4
✓ Branch 0 taken 2124 times.
✓ Branch 1 taken 7654 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2124 times.
9778 if (seen[j] && av_cmp_q(next->c.q4[j], c.q4[i]))
129 return false;
130 9778 next->c.q4[j] = c.q4[i];
131 9778 seen[j] = true;
132 }
133 3178 return true;
134 }
135
136 2814 case SWS_OP_DITHER: {
137 2814 const SwsDitherOp d = next->dither;
138
2/2
✓ Branch 0 taken 10440 times.
✓ Branch 1 taken 2406 times.
12846 for (int i = 0; i < 4; i++) {
139
2/2
✓ Branch 0 taken 2260 times.
✓ Branch 1 taken 8180 times.
10440 if (next->comps.unused[i])
140 2260 continue;
141 8180 const int j = op->swizzle.in[i];
142
4/4
✓ Branch 0 taken 1740 times.
✓ Branch 1 taken 6440 times.
✓ Branch 2 taken 408 times.
✓ Branch 3 taken 1332 times.
8180 if (seen[j] && next->dither.y_offset[j] != d.y_offset[i])
143 408 return false;
144 7772 next->dither.y_offset[j] = d.y_offset[i];
145 7772 seen[j] = true;
146 }
147 2406 return true;
148 }
149
150 23018 case SWS_OP_INVALID:
151 case SWS_OP_READ:
152 case SWS_OP_WRITE:
153 case SWS_OP_SWIZZLE:
154 case SWS_OP_CLEAR:
155 case SWS_OP_LINEAR:
156 case SWS_OP_PACK:
157 case SWS_OP_UNPACK:
158 23018 return false;
159 case SWS_OP_TYPE_NB:
160 break;
161 }
162
163 av_unreachable("Invalid operation type!");
164 return false;
165 }
166
167 /* returns log2(x) only if x is a power of two, or 0 otherwise */
168 39107 static int exact_log2(const int x)
169 {
170 int p;
171
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 39107 times.
39107 if (x <= 0)
172 return 0;
173 39107 p = av_log2(x);
174
2/2
✓ Branch 0 taken 14607 times.
✓ Branch 1 taken 24500 times.
39107 return (1 << p) == x ? p : 0;
175 }
176
177 77966 static int exact_log2_q(const AVRational x)
178 {
179
2/2
✓ Branch 0 taken 21513 times.
✓ Branch 1 taken 56453 times.
77966 if (x.den == 1)
180 21513 return exact_log2(x.num);
181
2/2
✓ Branch 0 taken 17594 times.
✓ Branch 1 taken 38859 times.
56453 else if (x.num == 1)
182 17594 return -exact_log2(x.den);
183 else
184 38859 return 0;
185 }
186
187 /**
188 * If a linear operation can be reduced to a scalar multiplication, returns
189 * the corresponding scaling factor, or 0 otherwise.
190 */
191 110683 static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next,
192 SwsConst *out_scale)
193 {
194 110683 SwsConst scale = {0};
195
196 /* There are components not on the main diagonal */
197
2/2
✓ Branch 0 taken 84166 times.
✓ Branch 1 taken 26517 times.
110683 if (c->mask & ~SWS_MASK_DIAG4)
198 84166 return false;
199
200
2/2
✓ Branch 0 taken 81106 times.
✓ Branch 1 taken 10262 times.
91368 for (int i = 0; i < 4; i++) {
201 81106 const AVRational s = c->m[i][i];
202
3/4
✓ Branch 0 taken 81106 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10702 times.
✓ Branch 3 taken 70404 times.
81106 if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i])
203 10702 continue;
204
4/4
✓ Branch 0 taken 43887 times.
✓ Branch 1 taken 26517 times.
✓ Branch 3 taken 16255 times.
✓ Branch 4 taken 27632 times.
70404 if (scale.q.den && av_cmp_q(s, scale.q))
205 16255 return false;
206 54149 scale.q = s;
207 }
208
209
1/2
✓ Branch 0 taken 10262 times.
✗ Branch 1 not taken.
10262 if (scale.q.den)
210 10262 *out_scale = scale;
211 10262 return scale.q.den;
212 }
213
214 /* Extracts an integer clear operation (subset) from the given linear op. */
215 119429 static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev,
216 SwsConst *out_clear)
217 {
218 119429 SwsConst clear = {0};
219 119429 bool ret = false;
220
221
2/2
✓ Branch 0 taken 477716 times.
✓ Branch 1 taken 119429 times.
597145 for (int i = 0; i < 4; i++) {
222 477716 bool const_row = c->m[i][4].den == 1; /* offset is integer */
223
2/2
✓ Branch 0 taken 1910864 times.
✓ Branch 1 taken 477716 times.
2388580 for (int j = 0; j < 4; j++) {
224
2/2
✓ Branch 0 taken 788502 times.
✓ Branch 1 taken 1122362 times.
2699366 const_row &= c->m[i][j].num == 0 || /* scalar is zero */
225
2/2
✓ Branch 0 taken 17994 times.
✓ Branch 1 taken 770508 times.
788502 (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
226 }
227
4/4
✓ Branch 0 taken 11430 times.
✓ Branch 1 taken 466286 times.
✓ Branch 2 taken 10740 times.
✓ Branch 3 taken 690 times.
477716 if (const_row && (c->mask & SWS_MASK_ROW(i))) {
228 10740 clear.q4[i] = c->m[i][4];
229
2/2
✓ Branch 0 taken 53700 times.
✓ Branch 1 taken 10740 times.
64440 for (int j = 0; j < 5; j++)
230 53700 c->m[i][j] = Q(i == j);
231 10740 c->mask &= ~SWS_MASK_ROW(i);
232 10740 ret = true;
233 }
234 }
235
236
2/2
✓ Branch 0 taken 8746 times.
✓ Branch 1 taken 110683 times.
119429 if (ret)
237 8746 *out_clear = clear;
238 119429 return ret;
239 }
240
241 /* Unswizzle a linear operation by aligning single-input rows with
242 * their corresponding diagonal */
243 100421 static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
244 {
245 100421 SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
246 100421 SwsLinearOp c = *op;
247
248 /* Find non-zero coefficients in the main 4x4 matrix */
249 100421 uint32_t nonzero = 0;
250
2/2
✓ Branch 0 taken 401684 times.
✓ Branch 1 taken 100421 times.
502105 for (int i = 0; i < 4; i++) {
251
2/2
✓ Branch 0 taken 1606736 times.
✓ Branch 1 taken 401684 times.
2008420 for (int j = 0; j < 4; j++) {
252
4/4
✓ Branch 0 taken 695866 times.
✓ Branch 1 taken 910870 times.
✓ Branch 2 taken 5874 times.
✓ Branch 3 taken 689992 times.
1606736 if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO))
253 916744 continue;
254 689992 nonzero |= SWS_MASK(i, j);
255 }
256 }
257
258 /* If a value is unique in its row and the target column is
259 * empty, move it there and update the input swizzle */
260
2/2
✓ Branch 0 taken 401684 times.
✓ Branch 1 taken 100421 times.
502105 for (int i = 0; i < 4; i++) {
261
2/2
✓ Branch 0 taken 395810 times.
✓ Branch 1 taken 5874 times.
401684 if (nonzero & SWS_MASK_COL(i))
262 395810 continue; /* target column is not empty */
263
2/2
✓ Branch 0 taken 7944 times.
✓ Branch 1 taken 690 times.
8634 for (int j = 0; j < 4; j++) {
264
2/2
✓ Branch 0 taken 5184 times.
✓ Branch 1 taken 2760 times.
7944 if ((nonzero & SWS_MASK_ROW(i)) == SWS_MASK(i, j)) {
265 /* Move coefficient to the diagonal */
266 5184 c.m[i][i] = c.m[i][j];
267 5184 c.m[i][j] = Q(0);
268 5184 swiz.in[i] = j;
269 5184 break;
270 }
271 }
272 }
273
274
2/2
✓ Branch 0 taken 97829 times.
✓ Branch 1 taken 2592 times.
100421 if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
275 97829 return false; /* no swizzle was identified */
276
277 2592 c.mask = ff_sws_linear_mask(c);
278 2592 *out_swiz = swiz;
279 2592 *op = c;
280 2592 return true;
281 }
282
283 31704 int ff_sws_op_list_optimize(SwsOpList *ops)
284 {
285 int ret;
286
287 369071 retry:
288 400775 ff_sws_op_list_update_comps(ops);
289
290 /* Apply all in-place optimizations (that do not re-order the list) */
291
2/2
✓ Branch 0 taken 2295687 times.
✓ Branch 1 taken 97819 times.
2393506 for (int n = 0; n < ops->num_ops; n++) {
292 2295687 SwsOp dummy = {0};
293 2295687 SwsOp *op = &ops->ops[n];
294
2/2
✓ Branch 0 taken 1894912 times.
✓ Branch 1 taken 400775 times.
2295687 SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
295
2/2
✓ Branch 0 taken 2197868 times.
✓ Branch 1 taken 97819 times.
2295687 SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
296
297 /* common helper variable */
298 2295687 bool noop = true;
299
300
4/4
✓ Branch 0 taken 76349 times.
✓ Branch 1 taken 2219338 times.
✓ Branch 2 taken 2030 times.
✓ Branch 3 taken 74319 times.
2295687 if (next->comps.unused[0] && next->comps.unused[1] &&
301
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2030 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
2030 next->comps.unused[2] && next->comps.unused[3])
302 {
303 /* Remove completely unused operations */
304 ff_sws_op_list_remove_at(ops, n, 1);
305 302956 goto retry;
306 }
307
308
13/13
✓ Branch 0 taken 400775 times.
✓ Branch 1 taken 191954 times.
✓ Branch 2 taken 74731 times.
✓ Branch 3 taken 42882 times.
✓ Branch 4 taken 163399 times.
✓ Branch 5 taken 279539 times.
✓ Branch 6 taken 479592 times.
✓ Branch 7 taken 88800 times.
✓ Branch 8 taken 56308 times.
✓ Branch 9 taken 98471 times.
✓ Branch 10 taken 224401 times.
✓ Branch 11 taken 77966 times.
✓ Branch 12 taken 116869 times.
2295687 switch (op->op) {
309 400775 case SWS_OP_READ:
310 /* "Compress" planar reads where not all components are needed */
311
2/2
✓ Branch 0 taken 221889 times.
✓ Branch 1 taken 178886 times.
400775 if (!op->rw.packed) {
312 221889 SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
313 221889 int nb_planes = 0;
314
2/2
✓ Branch 0 taken 546443 times.
✓ Branch 1 taken 221889 times.
768332 for (int i = 0; i < op->rw.elems; i++) {
315
2/2
✓ Branch 0 taken 5180 times.
✓ Branch 1 taken 541263 times.
546443 if (next->comps.unused[i]) {
316 5180 swiz.in[i] = 3 - (i - nb_planes); /* map to unused plane */
317 5180 continue;
318 }
319
320 541263 const int idx = nb_planes++;
321 av_assert1(idx <= i);
322 541263 ops->order_src.in[idx] = ops->order_src.in[i];
323 541263 swiz.in[i] = idx;
324 }
325
326
2/2
✓ Branch 0 taken 4230 times.
✓ Branch 1 taken 217659 times.
221889 if (nb_planes < op->rw.elems) {
327 4230 op->rw.elems = nb_planes;
328
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 4230 times.
4230 RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
329 .op = SWS_OP_SWIZZLE,
330 .type = op->type,
331 .swizzle = swiz,
332 }));
333 4230 goto retry;
334 }
335 }
336 396545 break;
337
338 191954 case SWS_OP_SWAP_BYTES:
339 /* Redundant (double) swap */
340
2/2
✓ Branch 0 taken 209 times.
✓ Branch 1 taken 191745 times.
191954 if (next->op == SWS_OP_SWAP_BYTES) {
341 209 ff_sws_op_list_remove_at(ops, n, 2);
342 209 goto retry;
343 }
344 191745 break;
345
346 74731 case SWS_OP_UNPACK:
347 /* Redundant unpack+pack */
348
3/4
✓ Branch 0 taken 46 times.
✓ Branch 1 taken 74685 times.
✓ Branch 2 taken 46 times.
✗ Branch 3 not taken.
74731 if (next->op == SWS_OP_PACK && next->type == op->type &&
349
1/2
✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
46 next->pack.pattern[0] == op->pack.pattern[0] &&
350
1/2
✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
46 next->pack.pattern[1] == op->pack.pattern[1] &&
351
1/2
✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
46 next->pack.pattern[2] == op->pack.pattern[2] &&
352
1/2
✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
46 next->pack.pattern[3] == op->pack.pattern[3])
353 {
354 46 ff_sws_op_list_remove_at(ops, n, 2);
355 46 goto retry;
356 }
357 74685 break;
358
359 42882 case SWS_OP_LSHIFT:
360 case SWS_OP_RSHIFT:
361 /* Two shifts in the same direction */
362
2/2
✓ Branch 0 taken 204 times.
✓ Branch 1 taken 42678 times.
42882 if (next->op == op->op) {
363 204 op->c.u += next->c.u;
364 204 ff_sws_op_list_remove_at(ops, n + 1, 1);
365 204 goto retry;
366 }
367
368 /* No-op shift */
369
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 42678 times.
42678 if (!op->c.u) {
370 ff_sws_op_list_remove_at(ops, n, 1);
371 goto retry;
372 }
373 42678 break;
374
375 163399 case SWS_OP_CLEAR:
376
2/2
✓ Branch 0 taken 653596 times.
✓ Branch 1 taken 163399 times.
816995 for (int i = 0; i < 4; i++) {
377
2/2
✓ Branch 0 taken 437815 times.
✓ Branch 1 taken 215781 times.
653596 if (!op->c.q4[i].den)
378 437815 continue;
379
380
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 215781 times.
215781 if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
381 !(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
382 op->c.q4[i].num == 0)
383 {
384 /* Redundant clear-to-zero of zero component */
385 op->c.q4[i].den = 0;
386
2/2
✓ Branch 0 taken 31704 times.
✓ Branch 1 taken 184077 times.
215781 } else if (next->comps.unused[i]) {
387 /* Unnecessary clear of unused component */
388 31704 op->c.q4[i] = (AVRational) {0, 0};
389
1/2
✓ Branch 0 taken 184077 times.
✗ Branch 1 not taken.
184077 } else if (op->c.q4[i].den) {
390 184077 noop = false;
391 }
392 }
393
394
2/2
✓ Branch 0 taken 31704 times.
✓ Branch 1 taken 131695 times.
163399 if (noop) {
395 31704 ff_sws_op_list_remove_at(ops, n, 1);
396 31704 goto retry;
397 }
398
399 /* Transitive clear */
400
2/2
✓ Branch 0 taken 190 times.
✓ Branch 1 taken 131505 times.
131695 if (next->op == SWS_OP_CLEAR) {
401
2/2
✓ Branch 0 taken 760 times.
✓ Branch 1 taken 190 times.
950 for (int i = 0; i < 4; i++) {
402
2/2
✓ Branch 0 taken 190 times.
✓ Branch 1 taken 570 times.
760 if (next->c.q4[i].den)
403 190 op->c.q4[i] = next->c.q4[i];
404 }
405 190 ff_sws_op_list_remove_at(ops, n + 1, 1);
406 190 goto retry;
407 }
408 131505 break;
409
410 279539 case SWS_OP_SWIZZLE:
411
2/2
✓ Branch 0 taken 1118156 times.
✓ Branch 1 taken 279539 times.
1397695 for (int i = 0; i < 4; i++) {
412
2/2
✓ Branch 0 taken 274964 times.
✓ Branch 1 taken 843192 times.
1118156 if (next->comps.unused[i])
413 274964 continue;
414
2/2
✓ Branch 0 taken 577097 times.
✓ Branch 1 taken 266095 times.
843192 if (op->swizzle.in[i] != i)
415 577097 noop = false;
416 }
417
418 /* Identity swizzle */
419
2/2
✓ Branch 0 taken 45109 times.
✓ Branch 1 taken 234430 times.
279539 if (noop) {
420 45109 ff_sws_op_list_remove_at(ops, n, 1);
421 45109 goto retry;
422 }
423
424 /* Transitive swizzle */
425
2/2
✓ Branch 0 taken 3586 times.
✓ Branch 1 taken 230844 times.
234430 if (next->op == SWS_OP_SWIZZLE) {
426 3586 const SwsSwizzleOp orig = op->swizzle;
427
2/2
✓ Branch 0 taken 14344 times.
✓ Branch 1 taken 3586 times.
17930 for (int i = 0; i < 4; i++)
428 14344 op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
429 3586 ff_sws_op_list_remove_at(ops, n + 1, 1);
430 3586 goto retry;
431 }
432
433 /* Swizzle planes instead of components, if possible */
434
4/4
✓ Branch 0 taken 49443 times.
✓ Branch 1 taken 181401 times.
✓ Branch 2 taken 8896 times.
✓ Branch 3 taken 40547 times.
230844 if (prev->op == SWS_OP_READ && !prev->rw.packed) {
435
2/2
✓ Branch 0 taken 13178 times.
✓ Branch 1 taken 1090 times.
14268 for (int dst = 0; dst < prev->rw.elems; dst++) {
436 13178 const int src = op->swizzle.in[dst];
437
4/4
✓ Branch 0 taken 8180 times.
✓ Branch 1 taken 4998 times.
✓ Branch 2 taken 7806 times.
✓ Branch 3 taken 374 times.
13178 if (src > dst && src < prev->rw.elems) {
438 7806 FFSWAP(int, ops->order_src.in[dst], ops->order_src.in[src]);
439
2/2
✓ Branch 0 taken 27330 times.
✓ Branch 1 taken 7806 times.
35136 for (int i = dst; i < 4; i++) {
440
2/2
✓ Branch 0 taken 7806 times.
✓ Branch 1 taken 19524 times.
27330 if (op->swizzle.in[i] == dst)
441 7806 op->swizzle.in[i] = src;
442
2/2
✓ Branch 0 taken 7806 times.
✓ Branch 1 taken 11718 times.
19524 else if (op->swizzle.in[i] == src)
443 7806 op->swizzle.in[i] = dst;
444 }
445 7806 goto retry;
446 }
447 }
448 }
449
450
4/4
✓ Branch 0 taken 25297 times.
✓ Branch 1 taken 197741 times.
✓ Branch 2 taken 13413 times.
✓ Branch 3 taken 11884 times.
223038 if (next->op == SWS_OP_WRITE && !next->rw.packed) {
451
2/2
✓ Branch 0 taken 15805 times.
✓ Branch 1 taken 1118 times.
16923 for (int dst = 0; dst < next->rw.elems; dst++) {
452 15805 const int src = op->swizzle.in[dst];
453
4/4
✓ Branch 0 taken 12751 times.
✓ Branch 1 taken 3054 times.
✓ Branch 2 taken 12295 times.
✓ Branch 3 taken 456 times.
15805 if (src > dst && src < next->rw.elems) {
454 12295 FFSWAP(int, ops->order_dst.in[dst], ops->order_dst.in[src]);
455 12295 FFSWAP(int, op->swizzle.in[dst], op->swizzle.in[src]);
456 12295 goto retry;
457 }
458 }
459 }
460 210743 break;
461
462 479592 case SWS_OP_CONVERT:
463 /* No-op conversion */
464
2/2
✓ Branch 0 taken 9088 times.
✓ Branch 1 taken 470504 times.
479592 if (op->type == op->convert.to) {
465 9088 ff_sws_op_list_remove_at(ops, n, 1);
466 9088 goto retry;
467 }
468
469 /* Transitive conversion */
470
2/2
✓ Branch 0 taken 12941 times.
✓ Branch 1 taken 457563 times.
470504 if (next->op == SWS_OP_CONVERT &&
471
1/2
✓ Branch 0 taken 12941 times.
✗ Branch 1 not taken.
12941 op->convert.expand == next->convert.expand)
472 {
473 av_assert1(op->convert.to == next->type);
474 12941 op->convert.to = next->convert.to;
475 12941 ff_sws_op_list_remove_at(ops, n + 1, 1);
476 12941 goto retry;
477 }
478
479 /* Conversion followed by integer expansion */
480
3/4
✓ Branch 0 taken 60026 times.
✓ Branch 1 taken 397537 times.
✓ Branch 2 taken 60026 times.
✗ Branch 3 not taken.
457563 if (next->op == SWS_OP_SCALE && !op->convert.expand &&
481
1/2
✓ Branch 0 taken 60026 times.
✗ Branch 1 not taken.
60026 ff_sws_pixel_type_is_int(op->type) &&
482
4/4
✓ Branch 0 taken 1078 times.
✓ Branch 1 taken 58948 times.
✓ Branch 2 taken 360 times.
✓ Branch 3 taken 718 times.
61104 ff_sws_pixel_type_is_int(op->convert.to) &&
483 1078 !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to)))
484 {
485 360 op->convert.expand = true;
486 360 ff_sws_op_list_remove_at(ops, n + 1, 1);
487 360 goto retry;
488 }
489 457203 break;
490
491 88800 case SWS_OP_MIN:
492
2/2
✓ Branch 0 taken 355200 times.
✓ Branch 1 taken 88800 times.
444000 for (int i = 0; i < 4; i++) {
493
4/4
✓ Branch 0 taken 261646 times.
✓ Branch 1 taken 93554 times.
✓ Branch 2 taken 3000 times.
✓ Branch 3 taken 258646 times.
355200 if (next->comps.unused[i] || !op->c.q4[i].den)
494 96554 continue;
495
2/2
✓ Branch 1 taken 201974 times.
✓ Branch 2 taken 56672 times.
258646 if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0)
496 201974 noop = false;
497 }
498
499
2/2
✓ Branch 0 taken 13260 times.
✓ Branch 1 taken 75540 times.
88800 if (noop) {
500 13260 ff_sws_op_list_remove_at(ops, n, 1);
501 13260 goto retry;
502 }
503 75540 break;
504
505 56308 case SWS_OP_MAX:
506
2/2
✓ Branch 0 taken 225232 times.
✓ Branch 1 taken 56308 times.
281540 for (int i = 0; i < 4; i++) {
507
3/4
✓ Branch 0 taken 165892 times.
✓ Branch 1 taken 59340 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 165892 times.
225232 if (next->comps.unused[i] || !op->c.q4[i].den)
508 59340 continue;
509
2/2
✓ Branch 1 taken 93120 times.
✓ Branch 2 taken 72772 times.
165892 if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0)
510 93120 noop = false;
511 }
512
513
2/2
✓ Branch 0 taken 21302 times.
✓ Branch 1 taken 35006 times.
56308 if (noop) {
514 21302 ff_sws_op_list_remove_at(ops, n, 1);
515 21302 goto retry;
516 }
517 35006 break;
518
519 98471 case SWS_OP_DITHER:
520
2/2
✓ Branch 0 taken 380124 times.
✓ Branch 1 taken 87107 times.
467231 for (int i = 0; i < 4; i++) {
521
4/4
✓ Branch 0 taken 284215 times.
✓ Branch 1 taken 95909 times.
✓ Branch 2 taken 32716 times.
✓ Branch 3 taken 251499 times.
380124 if (next->comps.unused[i] || op->dither.y_offset[i] < 0)
522 128625 continue;
523
2/2
✓ Branch 0 taken 11364 times.
✓ Branch 1 taken 240135 times.
251499 if (prev->comps.flags[i] & SWS_COMP_EXACT) {
524 11364 op->dither.y_offset[i] = -1; /* unnecessary dither */
525 11364 goto retry;
526 } else {
527 240135 noop = false;
528 }
529 }
530
531
2/2
✓ Branch 0 taken 1712 times.
✓ Branch 1 taken 85395 times.
87107 if (noop) {
532 1712 ff_sws_op_list_remove_at(ops, n, 1);
533 1712 goto retry;
534 }
535 85395 break;
536
537 224401 case SWS_OP_LINEAR: {
538 SwsSwizzleOp swizzle;
539 SwsConst c;
540
541 /* No-op (identity) linear operation */
542
2/2
✓ Branch 0 taken 3018 times.
✓ Branch 1 taken 221383 times.
224401 if (!op->lin.mask) {
543 3018 ff_sws_op_list_remove_at(ops, n, 1);
544 126572 goto retry;
545 }
546
547
2/2
✓ Branch 0 taken 59722 times.
✓ Branch 1 taken 161661 times.
221383 if (next->op == SWS_OP_LINEAR) {
548 /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
549 59722 const SwsLinearOp m1 = op->lin;
550 59722 const SwsLinearOp m2 = next->lin;
551
2/2
✓ Branch 0 taken 238888 times.
✓ Branch 1 taken 59722 times.
298610 for (int i = 0; i < 4; i++) {
552
2/2
✓ Branch 0 taken 1194440 times.
✓ Branch 1 taken 238888 times.
1433328 for (int j = 0; j < 5; j++) {
553 1194440 AVRational sum = Q(0);
554
2/2
✓ Branch 0 taken 4777760 times.
✓ Branch 1 taken 1194440 times.
5972200 for (int k = 0; k < 4; k++)
555 4777760 sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
556
2/2
✓ Branch 0 taken 238888 times.
✓ Branch 1 taken 955552 times.
1194440 if (j == 4) /* m1.m[4][j] == 1 */
557 238888 sum = av_add_q(sum, m2.m[i][4]);
558 1194440 op->lin.m[i][j] = sum;
559 }
560 }
561 59722 op->lin.mask = ff_sws_linear_mask(op->lin);
562 59722 ff_sws_op_list_remove_at(ops, n + 1, 1);
563 59722 goto retry;
564 }
565
566 /* Optimize away zero columns */
567
2/2
✓ Branch 0 taken 634416 times.
✓ Branch 1 taken 147525 times.
781941 for (int j = 0; j < 4; j++) {
568 634416 const uint32_t col = SWS_MASK_COL(j);
569
4/4
✓ Branch 0 taken 44074 times.
✓ Branch 1 taken 590342 times.
✓ Branch 2 taken 29938 times.
✓ Branch 3 taken 14136 times.
634416 if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
570 620280 continue;
571
2/2
✓ Branch 0 taken 56544 times.
✓ Branch 1 taken 14136 times.
70680 for (int i = 0; i < 4; i++)
572 56544 op->lin.m[i][j] = Q(i == j);
573 14136 op->lin.mask &= ~col;
574 14136 goto retry;
575 }
576
577 /* Optimize away unused rows */
578
2/2
✓ Branch 0 taken 577872 times.
✓ Branch 1 taken 119429 times.
697301 for (int i = 0; i < 4; i++) {
579 577872 const uint32_t row = SWS_MASK_ROW(i);
580
4/4
✓ Branch 0 taken 197992 times.
✓ Branch 1 taken 379880 times.
✓ Branch 2 taken 169896 times.
✓ Branch 3 taken 28096 times.
577872 if (!next->comps.unused[i] || !(op->lin.mask & row))
581 549776 continue;
582
2/2
✓ Branch 0 taken 140480 times.
✓ Branch 1 taken 28096 times.
168576 for (int j = 0; j < 5; j++)
583 140480 op->lin.m[i][j] = Q(i == j);
584 28096 op->lin.mask &= ~row;
585 28096 goto retry;
586 }
587
588 /* Convert constant rows to explicit clear instruction */
589
2/2
✓ Branch 1 taken 8746 times.
✓ Branch 2 taken 110683 times.
119429 if (extract_constant_rows(&op->lin, prev->comps, &c)) {
590
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 8746 times.
8746 RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
591 .op = SWS_OP_CLEAR,
592 .type = op->type,
593 .comps = op->comps,
594 .c = c,
595 }));
596 8746 goto retry;
597 }
598
599 /* Multiplication by scalar constant */
600
2/2
✓ Branch 1 taken 10262 times.
✓ Branch 2 taken 100421 times.
110683 if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) {
601 10262 op->op = SWS_OP_SCALE;
602 10262 op->c = c;
603 10262 goto retry;
604 }
605
606 /* Swizzle by fixed pattern */
607
2/2
✓ Branch 1 taken 2592 times.
✓ Branch 2 taken 97829 times.
100421 if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
608
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2592 times.
2592 RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
609 .op = SWS_OP_SWIZZLE,
610 .type = op->type,
611 .swizzle = swizzle,
612 }));
613 2592 goto retry;
614 }
615 97829 break;
616 }
617
618 77966 case SWS_OP_SCALE: {
619 77966 const int factor2 = exact_log2_q(op->c.q);
620
621 /* No-op scaling */
622
3/4
✓ Branch 0 taken 17594 times.
✓ Branch 1 taken 60372 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 17594 times.
77966 if (op->c.q.num == 1 && op->c.q.den == 1) {
623 ff_sws_op_list_remove_at(ops, n, 1);
624 goto retry;
625 }
626
627 /* Scaling by exact power of two */
628
4/4
✓ Branch 0 taken 14607 times.
✓ Branch 1 taken 63359 times.
✓ Branch 2 taken 978 times.
✓ Branch 3 taken 13629 times.
77966 if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
629
1/2
✓ Branch 0 taken 978 times.
✗ Branch 1 not taken.
978 op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
630 978 op->c.u = FFABS(factor2);
631 978 goto retry;
632 }
633 76988 break;
634 }
635 }
636 }
637
638 /* Push clears to the back to void any unused components */
639
2/2
✓ Branch 0 taken 735517 times.
✓ Branch 1 taken 70469 times.
805986 for (int n = 0; n < ops->num_ops - 1; n++) {
640 735517 SwsOp *op = &ops->ops[n];
641 735517 SwsOp *next = &ops->ops[n + 1];
642
643
2/2
✓ Branch 0 taken 51904 times.
✓ Branch 1 taken 683613 times.
735517 switch (op->op) {
644 51904 case SWS_OP_CLEAR:
645
2/2
✓ Branch 1 taken 27350 times.
✓ Branch 2 taken 24554 times.
51904 if (op_commute_clear(op, next)) {
646 27350 FFSWAP(SwsOp, *op, *next);
647 27350 goto retry;
648 }
649 24554 break;
650 }
651 }
652
653 /* Apply any remaining preferential re-ordering optimizations; do these
654 * last because they are more likely to block other optimizations if done
655 * too aggressively */
656
2/2
✓ Branch 0 taken 403812 times.
✓ Branch 1 taken 31704 times.
435516 for (int n = 0; n < ops->num_ops - 1; n++) {
657 403812 SwsOp *op = &ops->ops[n];
658 403812 SwsOp *next = &ops->ops[n + 1];
659
660
3/3
✓ Branch 0 taken 60367 times.
✓ Branch 1 taken 24204 times.
✓ Branch 2 taken 319241 times.
403812 switch (op->op) {
661 60367 case SWS_OP_SWIZZLE: {
662 /* Try to push swizzles towards the output */
663
2/2
✓ Branch 1 taken 36941 times.
✓ Branch 2 taken 23426 times.
60367 if (op_commute_swizzle(op, next)) {
664 36941 FFSWAP(SwsOp, *op, *next);
665 36941 goto retry;
666 }
667 23426 break;
668 }
669
670 24204 case SWS_OP_SCALE:
671 /* Scaling by integer before conversion to int */
672
4/4
✓ Branch 0 taken 6362 times.
✓ Branch 1 taken 17842 times.
✓ Branch 2 taken 1824 times.
✓ Branch 3 taken 4538 times.
24204 if (op->c.q.den == 1 && next->op == SWS_OP_CONVERT &&
673
1/2
✓ Branch 0 taken 1824 times.
✗ Branch 1 not taken.
1824 ff_sws_pixel_type_is_int(next->convert.to))
674 {
675 1824 op->type = next->convert.to;
676 1824 FFSWAP(SwsOp, *op, *next);
677 1824 goto retry;
678 }
679 22380 break;
680 }
681 }
682
683 31704 return 0;
684 }
685
686 16765 int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
687 int size, uint8_t clear_val,
688 int *read_bytes, int *write_bytes)
689 {
690
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16765 times.
16765 if (!ops->num_ops)
691 return AVERROR(EINVAL);
692
693 16765 const SwsOp *read = ff_sws_op_list_input(ops);
694
7/8
✓ Branch 0 taken 16765 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16513 times.
✓ Branch 3 taken 252 times.
✓ Branch 4 taken 10241 times.
✓ Branch 5 taken 6272 times.
✓ Branch 6 taken 7741 times.
✓ Branch 7 taken 2500 times.
16765 if (!read || read->rw.frac || (!read->rw.packed && read->rw.elems > 1))
695 7993 return AVERROR(ENOTSUP);
696
697 8772 const int read_size = ff_sws_pixel_type_size(read->type);
698 8772 uint32_t mask[4] = {0};
699
2/2
✓ Branch 0 taken 18817 times.
✓ Branch 1 taken 8772 times.
27589 for (int i = 0; i < read->rw.elems; i++)
700 18817 mask[i] = 0x01010101 * i * read_size + 0x03020100;
701
702
1/2
✓ Branch 0 taken 12052 times.
✗ Branch 1 not taken.
12052 for (int opidx = 1; opidx < ops->num_ops; opidx++) {
703 12052 const SwsOp *op = &ops->ops[opidx];
704
6/6
✓ Branch 0 taken 379 times.
✓ Branch 1 taken 2677 times.
✓ Branch 2 taken 210 times.
✓ Branch 3 taken 4938 times.
✓ Branch 4 taken 528 times.
✓ Branch 5 taken 3320 times.
12052 switch (op->op) {
705 379 case SWS_OP_SWIZZLE: {
706 379 uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
707
2/2
✓ Branch 0 taken 1516 times.
✓ Branch 1 taken 379 times.
1895 for (int i = 0; i < 4; i++)
708 1516 mask[i] = orig[op->swizzle.in[i]];
709 379 break;
710 }
711
712 2677 case SWS_OP_SWAP_BYTES:
713
2/2
✓ Branch 0 taken 10708 times.
✓ Branch 1 taken 2677 times.
13385 for (int i = 0; i < 4; i++) {
714
2/3
✓ Branch 0 taken 10200 times.
✓ Branch 1 taken 508 times.
✗ Branch 2 not taken.
10708 switch (ff_sws_pixel_type_size(op->type)) {
715 10200 case 2: mask[i] = av_bswap16(mask[i]); break;
716 508 case 4: mask[i] = av_bswap32(mask[i]); break;
717 }
718 }
719 2677 break;
720
721 210 case SWS_OP_CLEAR:
722
2/2
✓ Branch 0 taken 758 times.
✓ Branch 1 taken 54 times.
812 for (int i = 0; i < 4; i++) {
723
2/2
✓ Branch 0 taken 548 times.
✓ Branch 1 taken 210 times.
758 if (!op->c.q4[i].den)
724 548 continue;
725
3/4
✓ Branch 0 taken 54 times.
✓ Branch 1 taken 156 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 54 times.
210 if (op->c.q4[i].num != 0 || !clear_val)
726 156 return AVERROR(ENOTSUP);
727 54 mask[i] = 0x1010101ul * clear_val;
728 }
729 54 break;
730
731 4938 case SWS_OP_CONVERT: {
732
2/2
✓ Branch 0 taken 4768 times.
✓ Branch 1 taken 170 times.
4938 if (!op->convert.expand)
733 4768 return AVERROR(ENOTSUP);
734
2/2
✓ Branch 0 taken 680 times.
✓ Branch 1 taken 170 times.
850 for (int i = 0; i < 4; i++) {
735
1/3
✓ Branch 0 taken 680 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
680 switch (ff_sws_pixel_type_size(op->type)) {
736 680 case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break;
737 case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
738 }
739 }
740 170 break;
741 }
742
743 528 case SWS_OP_WRITE: {
744
6/6
✓ Branch 0 taken 514 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 262 times.
✓ Branch 3 taken 252 times.
✓ Branch 4 taken 211 times.
✓ Branch 5 taken 51 times.
528 if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1))
745 225 return AVERROR(ENOTSUP);
746
747 /* Initialize to no-op */
748 303 memset(shuffle, clear_val, size);
749
750 303 const int write_size = ff_sws_pixel_type_size(op->type);
751 303 const int read_chunk = read->rw.elems * read_size;
752 303 const int write_chunk = op->rw.elems * write_size;
753 303 const int num_groups = size / FFMAX(read_chunk, write_chunk);
754
2/2
✓ Branch 0 taken 1110 times.
✓ Branch 1 taken 303 times.
1413 for (int n = 0; n < num_groups; n++) {
755 1110 const int base_in = n * read_chunk;
756 1110 const int base_out = n * write_chunk;
757
2/2
✓ Branch 0 taken 2842 times.
✓ Branch 1 taken 1110 times.
3952 for (int i = 0; i < op->rw.elems; i++) {
758 2842 const int offset = base_out + i * write_size;
759
2/2
✓ Branch 0 taken 4354 times.
✓ Branch 1 taken 2842 times.
7196 for (int b = 0; b < write_size; b++) {
760 4354 const uint8_t idx = mask[i] >> (b * 8);
761
2/2
✓ Branch 0 taken 4138 times.
✓ Branch 1 taken 216 times.
4354 if (idx != clear_val)
762 4138 shuffle[offset + b] = base_in + idx;
763 }
764 }
765 }
766
767 303 *read_bytes = num_groups * read_chunk;
768 303 *write_bytes = num_groups * write_chunk;
769 303 return num_groups;
770 }
771
772 3320 default:
773 3320 return AVERROR(ENOTSUP);
774 }
775 }
776
777 return AVERROR(EINVAL);
778 }
779