FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libswscale/ops_optimizer.c
Date: 2026-01-10 09:36:38
Exec Total Coverage
Lines: 376 394 95.4%
Functions: 9 9 100.0%
Branches: 305 342 89.2%

Line Branch Exec Source
1 /**
2 * Copyright (C) 2025 Niklas Haas
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/avassert.h"
22 #include "libavutil/bswap.h"
23 #include "libavutil/rational.h"
24
25 #include "ops.h"
26 #include "ops_internal.h"
27
28 #define RET(x) \
29 do { \
30 if ((ret = (x)) < 0) \
31 return ret; \
32 } while (0)
33
34 /**
35 * Try to commute a clear op with the next operation. Makes any adjustments
36 * to the operations as needed, but does not perform the actual commutation.
37 *
38 * Returns whether successful.
39 */
40 42613 static bool op_commute_clear(SwsOp *op, SwsOp *next)
41 {
42 av_assert1(op->op == SWS_OP_CLEAR);
43
3/5
✓ Branch 0 taken 9120 times.
✓ Branch 1 taken 17286 times.
✓ Branch 2 taken 16207 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
42613 switch (next->op) {
44 9120 case SWS_OP_CONVERT:
45 9120 op->type = next->convert.to;
46 /* fall through */
47 26406 case SWS_OP_LSHIFT:
48 case SWS_OP_RSHIFT:
49 case SWS_OP_DITHER:
50 case SWS_OP_MIN:
51 case SWS_OP_MAX:
52 case SWS_OP_SCALE:
53 case SWS_OP_READ:
54 case SWS_OP_SWIZZLE:
55 26406 ff_sws_apply_op_q(next, op->c.q4);
56 26406 return true;
57 16207 case SWS_OP_INVALID:
58 case SWS_OP_SWAP_BYTES:
59 case SWS_OP_WRITE:
60 case SWS_OP_LINEAR:
61 case SWS_OP_PACK:
62 case SWS_OP_UNPACK:
63 case SWS_OP_CLEAR:
64 16207 return false;
65 case SWS_OP_TYPE_NB:
66 break;
67 }
68
69 av_unreachable("Invalid operation type!");
70 return false;
71 }
72
73 /**
74 * Try to commute a swizzle op with the next operation. Makes any adjustments
75 * to the operations as needed, but does not perform the actual commutation.
76 *
77 * Returns whether successful.
78 */
79 8684 static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
80 {
81 8684 bool seen[4] = {0};
82
83 av_assert1(op->op == SWS_OP_SWIZZLE);
84
5/7
✓ Branch 0 taken 1704 times.
✓ Branch 1 taken 2408 times.
✓ Branch 2 taken 1062 times.
✓ Branch 3 taken 1074 times.
✓ Branch 4 taken 2436 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
8684 switch (next->op) {
85 1704 case SWS_OP_CONVERT:
86 1704 op->type = next->convert.to;
87 /* fall through */
88 4112 case SWS_OP_SWAP_BYTES:
89 case SWS_OP_LSHIFT:
90 case SWS_OP_RSHIFT:
91 case SWS_OP_SCALE:
92 4112 return true;
93
94 /**
95 * We can commute per-channel ops only if the per-channel constants are the
96 * same for all duplicated channels; e.g.:
97 * SWIZZLE {0, 0, 0, 3}
98 * NEXT {x, x, x, w}
99 * ->
100 * NEXT {x, _, _, w}
101 * SWIZZLE {0, 0, 0, 3}
102 */
103 1062 case SWS_OP_MIN:
104 case SWS_OP_MAX: {
105 1062 const SwsConst c = next->c;
106
2/2
✓ Branch 0 taken 4248 times.
✓ Branch 1 taken 1062 times.
5310 for (int i = 0; i < 4; i++) {
107
2/2
✓ Branch 0 taken 962 times.
✓ Branch 1 taken 3286 times.
4248 if (next->comps.unused[i])
108 962 continue;
109 3286 const int j = op->swizzle.in[i];
110
3/4
✓ Branch 0 taken 2124 times.
✓ Branch 1 taken 1162 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2124 times.
3286 if (seen[j] && av_cmp_q(next->c.q4[j], c.q4[i]))
111 return false;
112 3286 next->c.q4[j] = c.q4[i];
113 3286 seen[j] = true;
114 }
115 1062 return true;
116 }
117
118 1074 case SWS_OP_DITHER: {
119 1074 const SwsDitherOp d = next->dither;
120
2/2
✓ Branch 0 taken 3480 times.
✓ Branch 1 taken 666 times.
4146 for (int i = 0; i < 4; i++) {
121
2/2
✓ Branch 0 taken 620 times.
✓ Branch 1 taken 2860 times.
3480 if (next->comps.unused[i])
122 620 continue;
123 2860 const int j = op->swizzle.in[i];
124
4/4
✓ Branch 0 taken 1740 times.
✓ Branch 1 taken 1120 times.
✓ Branch 2 taken 408 times.
✓ Branch 3 taken 1332 times.
2860 if (seen[j] && next->dither.y_offset[j] != d.y_offset[i])
125 408 return false;
126 2452 next->dither.y_offset[j] = d.y_offset[i];
127 2452 seen[j] = true;
128 }
129 666 return true;
130 }
131
132 2436 case SWS_OP_INVALID:
133 case SWS_OP_READ:
134 case SWS_OP_WRITE:
135 case SWS_OP_SWIZZLE:
136 case SWS_OP_CLEAR:
137 case SWS_OP_LINEAR:
138 case SWS_OP_PACK:
139 case SWS_OP_UNPACK:
140 2436 return false;
141 case SWS_OP_TYPE_NB:
142 break;
143 }
144
145 av_unreachable("Invalid operation type!");
146 return false;
147 }
148
149 /* returns log2(x) only if x is a power of two, or 0 otherwise */
150 25059 static int exact_log2(const int x)
151 {
152 int p;
153
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 25059 times.
25059 if (x <= 0)
154 return 0;
155 25059 p = av_log2(x);
156
2/2
✓ Branch 0 taken 9770 times.
✓ Branch 1 taken 15289 times.
25059 return (1 << p) == x ? p : 0;
157 }
158
159 47529 static int exact_log2_q(const AVRational x)
160 {
161
2/2
✓ Branch 0 taken 14392 times.
✓ Branch 1 taken 33137 times.
47529 if (x.den == 1)
162 14392 return exact_log2(x.num);
163
2/2
✓ Branch 0 taken 10667 times.
✓ Branch 1 taken 22470 times.
33137 else if (x.num == 1)
164 10667 return -exact_log2(x.den);
165 else
166 22470 return 0;
167 }
168
169 /**
170 * If a linear operation can be reduced to a scalar multiplication, returns
171 * the corresponding scaling factor, or 0 otherwise.
172 */
173 88678 static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next,
174 SwsConst *out_scale)
175 {
176 88678 SwsConst scale = {0};
177
178 /* There are components not on the main diagonal */
179
2/2
✓ Branch 0 taken 66227 times.
✓ Branch 1 taken 22451 times.
88678 if (c->mask & ~SWS_MASK_DIAG4)
180 66227 return false;
181
182
2/2
✓ Branch 0 taken 71168 times.
✓ Branch 1 taken 10262 times.
81430 for (int i = 0; i < 4; i++) {
183 71168 const AVRational s = c->m[i][i];
184
3/4
✓ Branch 0 taken 71168 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10702 times.
✓ Branch 3 taken 60466 times.
71168 if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i])
185 10702 continue;
186
4/4
✓ Branch 0 taken 38015 times.
✓ Branch 1 taken 22451 times.
✓ Branch 3 taken 12189 times.
✓ Branch 4 taken 25826 times.
60466 if (scale.q.den && av_cmp_q(s, scale.q))
187 12189 return false;
188 48277 scale.q = s;
189 }
190
191
1/2
✓ Branch 0 taken 10262 times.
✗ Branch 1 not taken.
10262 if (scale.q.den)
192 10262 *out_scale = scale;
193 10262 return scale.q.den;
194 }
195
196 /* Extracts an integer clear operation (subset) from the given linear op. */
197 97424 static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev,
198 SwsConst *out_clear)
199 {
200 97424 SwsConst clear = {0};
201 97424 bool ret = false;
202
203
2/2
✓ Branch 0 taken 389696 times.
✓ Branch 1 taken 97424 times.
487120 for (int i = 0; i < 4; i++) {
204 389696 bool const_row = c->m[i][4].den == 1; /* offset is integer */
205
2/2
✓ Branch 0 taken 1558784 times.
✓ Branch 1 taken 389696 times.
1948480 for (int j = 0; j < 4; j++) {
206
2/2
✓ Branch 0 taken 638984 times.
✓ Branch 1 taken 919800 times.
2197768 const_row &= c->m[i][j].num == 0 || /* scalar is zero */
207
2/2
✓ Branch 0 taken 17994 times.
✓ Branch 1 taken 620990 times.
638984 (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
208 }
209
4/4
✓ Branch 0 taken 11430 times.
✓ Branch 1 taken 378266 times.
✓ Branch 2 taken 10740 times.
✓ Branch 3 taken 690 times.
389696 if (const_row && (c->mask & SWS_MASK_ROW(i))) {
210 10740 clear.q4[i] = c->m[i][4];
211
2/2
✓ Branch 0 taken 53700 times.
✓ Branch 1 taken 10740 times.
64440 for (int j = 0; j < 5; j++)
212 53700 c->m[i][j] = Q(i == j);
213 10740 c->mask &= ~SWS_MASK_ROW(i);
214 10740 ret = true;
215 }
216 }
217
218
2/2
✓ Branch 0 taken 8746 times.
✓ Branch 1 taken 88678 times.
97424 if (ret)
219 8746 *out_clear = clear;
220 97424 return ret;
221 }
222
223 /* Unswizzle a linear operation by aligning single-input rows with
224 * their corresponding diagonal */
225 78416 static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
226 {
227 78416 SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
228 78416 SwsLinearOp c = *op;
229
230
2/2
✓ Branch 0 taken 165914 times.
✓ Branch 1 taken 29166 times.
195080 for (int i = 0; i < 4; i++) {
231 165914 int idx = -1;
232
2/2
✓ Branch 0 taken 579840 times.
✓ Branch 1 taken 116664 times.
696504 for (int j = 0; j < 4; j++) {
233
4/4
✓ Branch 0 taken 220348 times.
✓ Branch 1 taken 359492 times.
✓ Branch 2 taken 5874 times.
✓ Branch 3 taken 214474 times.
579840 if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO))
234 365366 continue;
235
2/2
✓ Branch 0 taken 49250 times.
✓ Branch 1 taken 165224 times.
214474 if (idx >= 0)
236 49250 return false; /* multiple inputs */
237 165224 idx = j;
238 }
239
240
4/4
✓ Branch 0 taken 115974 times.
✓ Branch 1 taken 690 times.
✓ Branch 2 taken 5184 times.
✓ Branch 3 taken 110790 times.
116664 if (idx >= 0 && idx != i) {
241 /* Move coefficient to the diagonal */
242 5184 c.m[i][i] = c.m[i][idx];
243 5184 c.m[i][idx] = Q(0);
244 5184 swiz.in[i] = idx;
245 }
246 }
247
248
2/2
✓ Branch 0 taken 26574 times.
✓ Branch 1 taken 2592 times.
29166 if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
249 26574 return false; /* no swizzle was identified */
250
251 2592 c.mask = ff_sws_linear_mask(c);
252 2592 *out_swiz = swiz;
253 2592 *op = c;
254 2592 return true;
255 }
256
257 31704 int ff_sws_op_list_optimize(SwsOpList *ops)
258 {
259 int ret;
260
261 295294 retry:
262 326998 ff_sws_op_list_update_comps(ops);
263
264 /* Apply all in-place optimizations (that do not re-order the list) */
265
2/2
✓ Branch 0 taken 1847600 times.
✓ Branch 1 taken 71587 times.
1919187 for (int n = 0; n < ops->num_ops; n++) {
266 1847600 SwsOp dummy = {0};
267 1847600 SwsOp *op = &ops->ops[n];
268
2/2
✓ Branch 0 taken 1520602 times.
✓ Branch 1 taken 326998 times.
1847600 SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
269
2/2
✓ Branch 0 taken 1776013 times.
✓ Branch 1 taken 71587 times.
1847600 SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
270
271 /* common helper variable */
272 1847600 bool noop = true;
273
274
13/13
✓ Branch 0 taken 326998 times.
✓ Branch 1 taken 147727 times.
✓ Branch 2 taken 63720 times.
✓ Branch 3 taken 30268 times.
✓ Branch 4 taken 142764 times.
✓ Branch 5 taken 240201 times.
✓ Branch 6 taken 380720 times.
✓ Branch 7 taken 65794 times.
✓ Branch 8 taken 45594 times.
✓ Branch 9 taken 67486 times.
✓ Branch 10 taken 202396 times.
✓ Branch 11 taken 47529 times.
✓ Branch 12 taken 86403 times.
1847600 switch (op->op) {
275 326998 case SWS_OP_READ:
276 /* Optimized further into refcopy / memcpy */
277
2/2
✓ Branch 0 taken 179 times.
✓ Branch 1 taken 326819 times.
326998 if (next->op == SWS_OP_WRITE &&
278
2/2
✓ Branch 0 taken 153 times.
✓ Branch 1 taken 26 times.
179 next->rw.elems == op->rw.elems &&
279
1/2
✓ Branch 0 taken 153 times.
✗ Branch 1 not taken.
153 next->rw.packed == op->rw.packed &&
280
2/2
✓ Branch 0 taken 149 times.
✓ Branch 1 taken 4 times.
153 next->rw.frac == op->rw.frac)
281 {
282 149 ff_sws_op_list_remove_at(ops, n, 2);
283 av_assert1(ops->num_ops == 0);
284 149 return 0;
285 }
286
287 /* Skip reading extra unneeded components */
288
2/2
✓ Branch 0 taken 180643 times.
✓ Branch 1 taken 146206 times.
326849 if (!op->rw.packed) {
289 180643 int needed = op->rw.elems;
290
3/4
✓ Branch 0 taken 185679 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 5036 times.
✓ Branch 3 taken 180643 times.
185679 while (needed > 0 && next->comps.unused[needed - 1])
291 5036 needed--;
292
2/2
✓ Branch 0 taken 4158 times.
✓ Branch 1 taken 176485 times.
180643 if (op->rw.elems != needed) {
293 4158 op->rw.elems = needed;
294 255262 goto retry;
295 }
296 }
297 322691 break;
298
299 147727 case SWS_OP_SWAP_BYTES:
300 /* Redundant (double) swap */
301
2/2
✓ Branch 0 taken 100 times.
✓ Branch 1 taken 147627 times.
147727 if (next->op == SWS_OP_SWAP_BYTES) {
302 100 ff_sws_op_list_remove_at(ops, n, 2);
303 100 goto retry;
304 }
305 147627 break;
306
307 63720 case SWS_OP_UNPACK:
308 /* Redundant unpack+pack */
309
3/4
✓ Branch 0 taken 46 times.
✓ Branch 1 taken 63674 times.
✓ Branch 2 taken 46 times.
✗ Branch 3 not taken.
63720 if (next->op == SWS_OP_PACK && next->type == op->type &&
310
1/2
✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
46 next->pack.pattern[0] == op->pack.pattern[0] &&
311
1/2
✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
46 next->pack.pattern[1] == op->pack.pattern[1] &&
312
1/2
✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
46 next->pack.pattern[2] == op->pack.pattern[2] &&
313
1/2
✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
46 next->pack.pattern[3] == op->pack.pattern[3])
314 {
315 46 ff_sws_op_list_remove_at(ops, n, 2);
316 46 goto retry;
317 }
318 63674 break;
319
320 30268 case SWS_OP_LSHIFT:
321 case SWS_OP_RSHIFT:
322 /* Two shifts in the same direction */
323
2/2
✓ Branch 0 taken 204 times.
✓ Branch 1 taken 30064 times.
30268 if (next->op == op->op) {
324 204 op->c.u += next->c.u;
325 204 ff_sws_op_list_remove_at(ops, n + 1, 1);
326 204 goto retry;
327 }
328
329 /* No-op shift */
330
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 30064 times.
30064 if (!op->c.u) {
331 ff_sws_op_list_remove_at(ops, n, 1);
332 goto retry;
333 }
334 30064 break;
335
336 142764 case SWS_OP_CLEAR:
337
2/2
✓ Branch 0 taken 571056 times.
✓ Branch 1 taken 142764 times.
713820 for (int i = 0; i < 4; i++) {
338
2/2
✓ Branch 0 taken 380050 times.
✓ Branch 1 taken 191006 times.
571056 if (!op->c.q4[i].den)
339 380050 continue;
340
341
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 191006 times.
191006 if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
342 !(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
343 op->c.q4[i].num == 0)
344 {
345 /* Redundant clear-to-zero of zero component */
346 op->c.q4[i].den = 0;
347
2/2
✓ Branch 0 taken 31704 times.
✓ Branch 1 taken 159302 times.
191006 } else if (next->comps.unused[i]) {
348 /* Unnecessary clear of unused component */
349 31704 op->c.q4[i] = (AVRational) {0, 0};
350
1/2
✓ Branch 0 taken 159302 times.
✗ Branch 1 not taken.
159302 } else if (op->c.q4[i].den) {
351 159302 noop = false;
352 }
353 }
354
355
2/2
✓ Branch 0 taken 31704 times.
✓ Branch 1 taken 111060 times.
142764 if (noop) {
356 31704 ff_sws_op_list_remove_at(ops, n, 1);
357 31704 goto retry;
358 }
359
360 /* Transitive clear */
361
2/2
✓ Branch 0 taken 190 times.
✓ Branch 1 taken 110870 times.
111060 if (next->op == SWS_OP_CLEAR) {
362
2/2
✓ Branch 0 taken 760 times.
✓ Branch 1 taken 190 times.
950 for (int i = 0; i < 4; i++) {
363
2/2
✓ Branch 0 taken 190 times.
✓ Branch 1 taken 570 times.
760 if (next->c.q4[i].den)
364 190 op->c.q4[i] = next->c.q4[i];
365 }
366 190 ff_sws_op_list_remove_at(ops, n + 1, 1);
367 190 goto retry;
368 }
369 110870 break;
370
371 240201 case SWS_OP_SWIZZLE:
372
2/2
✓ Branch 0 taken 960804 times.
✓ Branch 1 taken 240201 times.
1201005 for (int i = 0; i < 4; i++) {
373
2/2
✓ Branch 0 taken 235808 times.
✓ Branch 1 taken 724996 times.
960804 if (next->comps.unused[i])
374 235808 continue;
375
2/2
✓ Branch 0 taken 536114 times.
✓ Branch 1 taken 188882 times.
724996 if (op->swizzle.in[i] != i)
376 536114 noop = false;
377 }
378
379 /* Identity swizzle */
380
2/2
✓ Branch 0 taken 30397 times.
✓ Branch 1 taken 209804 times.
240201 if (noop) {
381 30397 ff_sws_op_list_remove_at(ops, n, 1);
382 30397 goto retry;
383 }
384
385 /* Transitive swizzle */
386
2/2
✓ Branch 0 taken 2333 times.
✓ Branch 1 taken 207471 times.
209804 if (next->op == SWS_OP_SWIZZLE) {
387 2333 const SwsSwizzleOp orig = op->swizzle;
388
2/2
✓ Branch 0 taken 9332 times.
✓ Branch 1 taken 2333 times.
11665 for (int i = 0; i < 4; i++)
389 9332 op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
390 2333 ff_sws_op_list_remove_at(ops, n + 1, 1);
391 2333 goto retry;
392 }
393 207471 break;
394
395 380720 case SWS_OP_CONVERT:
396 /* No-op conversion */
397
2/2
✓ Branch 0 taken 9088 times.
✓ Branch 1 taken 371632 times.
380720 if (op->type == op->convert.to) {
398 9088 ff_sws_op_list_remove_at(ops, n, 1);
399 9088 goto retry;
400 }
401
402 /* Transitive conversion */
403
2/2
✓ Branch 0 taken 12938 times.
✓ Branch 1 taken 358694 times.
371632 if (next->op == SWS_OP_CONVERT &&
404
1/2
✓ Branch 0 taken 12938 times.
✗ Branch 1 not taken.
12938 op->convert.expand == next->convert.expand)
405 {
406 av_assert1(op->convert.to == next->type);
407 12938 op->convert.to = next->convert.to;
408 12938 ff_sws_op_list_remove_at(ops, n + 1, 1);
409 12938 goto retry;
410 }
411
412 /* Conversion followed by integer expansion */
413
5/6
✓ Branch 0 taken 36389 times.
✓ Branch 1 taken 322305 times.
✓ Branch 2 taken 36389 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 360 times.
✓ Branch 5 taken 36029 times.
395083 if (next->op == SWS_OP_SCALE && !op->convert.expand &&
414 36389 !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to)))
415 {
416 360 op->convert.expand = true;
417 360 ff_sws_op_list_remove_at(ops, n + 1, 1);
418 360 goto retry;
419 }
420 358334 break;
421
422 65794 case SWS_OP_MIN:
423
2/2
✓ Branch 0 taken 263176 times.
✓ Branch 1 taken 65794 times.
328970 for (int i = 0; i < 4; i++) {
424
4/4
✓ Branch 0 taken 191584 times.
✓ Branch 1 taken 71592 times.
✓ Branch 2 taken 2940 times.
✓ Branch 3 taken 188644 times.
263176 if (next->comps.unused[i] || !op->c.q4[i].den)
425 74532 continue;
426
2/2
✓ Branch 1 taken 135306 times.
✓ Branch 2 taken 53338 times.
188644 if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0)
427 135306 noop = false;
428 }
429
430
2/2
✓ Branch 0 taken 13180 times.
✓ Branch 1 taken 52614 times.
65794 if (noop) {
431 13180 ff_sws_op_list_remove_at(ops, n, 1);
432 13180 goto retry;
433 }
434 52614 break;
435
436 45594 case SWS_OP_MAX:
437
2/2
✓ Branch 0 taken 182376 times.
✓ Branch 1 taken 45594 times.
227970 for (int i = 0; i < 4; i++) {
438
3/4
✓ Branch 0 taken 132408 times.
✓ Branch 1 taken 49968 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 132408 times.
182376 if (next->comps.unused[i] || !op->c.q4[i].den)
439 49968 continue;
440
2/2
✓ Branch 1 taken 61768 times.
✓ Branch 2 taken 70640 times.
132408 if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0)
441 61768 noop = false;
442 }
443
444
2/2
✓ Branch 0 taken 21302 times.
✓ Branch 1 taken 24292 times.
45594 if (noop) {
445 21302 ff_sws_op_list_remove_at(ops, n, 1);
446 21302 goto retry;
447 }
448 24292 break;
449
450 67486 case SWS_OP_DITHER:
451
2/2
✓ Branch 0 taken 269944 times.
✓ Branch 1 taken 67486 times.
337430 for (int i = 0; i < 4; i++) {
452
2/2
✓ Branch 0 taken 231201 times.
✓ Branch 1 taken 38743 times.
501145 noop &= (prev->comps.flags[i] & SWS_COMP_EXACT) ||
453
2/2
✓ Branch 0 taken 60795 times.
✓ Branch 1 taken 170406 times.
231201 next->comps.unused[i];
454 }
455
456
2/2
✓ Branch 0 taken 1712 times.
✓ Branch 1 taken 65774 times.
67486 if (noop) {
457 1712 ff_sws_op_list_remove_at(ops, n, 1);
458 1712 goto retry;
459 }
460 65774 break;
461
462 202396 case SWS_OP_LINEAR: {
463 SwsSwizzleOp swizzle;
464 SwsConst c;
465
466 /* No-op (identity) linear operation */
467
2/2
✓ Branch 0 taken 3018 times.
✓ Branch 1 taken 199378 times.
202396 if (!op->lin.mask) {
468 3018 ff_sws_op_list_remove_at(ops, n, 1);
469 126572 goto retry;
470 }
471
472
2/2
✓ Branch 0 taken 59722 times.
✓ Branch 1 taken 139656 times.
199378 if (next->op == SWS_OP_LINEAR) {
473 /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
474 59722 const SwsLinearOp m1 = op->lin;
475 59722 const SwsLinearOp m2 = next->lin;
476
2/2
✓ Branch 0 taken 238888 times.
✓ Branch 1 taken 59722 times.
298610 for (int i = 0; i < 4; i++) {
477
2/2
✓ Branch 0 taken 1194440 times.
✓ Branch 1 taken 238888 times.
1433328 for (int j = 0; j < 5; j++) {
478 1194440 AVRational sum = Q(0);
479
2/2
✓ Branch 0 taken 4777760 times.
✓ Branch 1 taken 1194440 times.
5972200 for (int k = 0; k < 4; k++)
480 4777760 sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
481
2/2
✓ Branch 0 taken 238888 times.
✓ Branch 1 taken 955552 times.
1194440 if (j == 4) /* m1.m[4][j] == 1 */
482 238888 sum = av_add_q(sum, m2.m[i][4]);
483 1194440 op->lin.m[i][j] = sum;
484 }
485 }
486 59722 op->lin.mask = ff_sws_linear_mask(op->lin);
487 59722 ff_sws_op_list_remove_at(ops, n + 1, 1);
488 59722 goto retry;
489 }
490
491 /* Optimize away zero columns */
492
2/2
✓ Branch 0 taken 546396 times.
✓ Branch 1 taken 125520 times.
671916 for (int j = 0; j < 4; j++) {
493 546396 const uint32_t col = SWS_MASK_COL(j);
494
4/4
✓ Branch 0 taken 44074 times.
✓ Branch 1 taken 502322 times.
✓ Branch 2 taken 29938 times.
✓ Branch 3 taken 14136 times.
546396 if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
495 532260 continue;
496
2/2
✓ Branch 0 taken 56544 times.
✓ Branch 1 taken 14136 times.
70680 for (int i = 0; i < 4; i++)
497 56544 op->lin.m[i][j] = Q(i == j);
498 14136 op->lin.mask &= ~col;
499 14136 goto retry;
500 }
501
502 /* Optimize away unused rows */
503
2/2
✓ Branch 0 taken 489852 times.
✓ Branch 1 taken 97424 times.
587276 for (int i = 0; i < 4; i++) {
504 489852 const uint32_t row = SWS_MASK_ROW(i);
505
4/4
✓ Branch 0 taken 169910 times.
✓ Branch 1 taken 319942 times.
✓ Branch 2 taken 141814 times.
✓ Branch 3 taken 28096 times.
489852 if (!next->comps.unused[i] || !(op->lin.mask & row))
506 461756 continue;
507
2/2
✓ Branch 0 taken 140480 times.
✓ Branch 1 taken 28096 times.
168576 for (int j = 0; j < 5; j++)
508 140480 op->lin.m[i][j] = Q(i == j);
509 28096 op->lin.mask &= ~row;
510 28096 goto retry;
511 }
512
513 /* Convert constant rows to explicit clear instruction */
514
2/2
✓ Branch 1 taken 8746 times.
✓ Branch 2 taken 88678 times.
97424 if (extract_constant_rows(&op->lin, prev->comps, &c)) {
515
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 8746 times.
8746 RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
516 .op = SWS_OP_CLEAR,
517 .type = op->type,
518 .comps = op->comps,
519 .c = c,
520 }));
521 8746 goto retry;
522 }
523
524 /* Multiplication by scalar constant */
525
2/2
✓ Branch 1 taken 10262 times.
✓ Branch 2 taken 78416 times.
88678 if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) {
526 10262 op->op = SWS_OP_SCALE;
527 10262 op->c = c;
528 10262 goto retry;
529 }
530
531 /* Swizzle by fixed pattern */
532
2/2
✓ Branch 1 taken 2592 times.
✓ Branch 2 taken 75824 times.
78416 if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
533
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2592 times.
2592 RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
534 .op = SWS_OP_SWIZZLE,
535 .type = op->type,
536 .swizzle = swizzle,
537 }));
538 2592 goto retry;
539 }
540 75824 break;
541 }
542
543 47529 case SWS_OP_SCALE: {
544 47529 const int factor2 = exact_log2_q(op->c.q);
545
546 /* No-op scaling */
547
3/4
✓ Branch 0 taken 10667 times.
✓ Branch 1 taken 36862 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 10667 times.
47529 if (op->c.q.num == 1 && op->c.q.den == 1) {
548 ff_sws_op_list_remove_at(ops, n, 1);
549 goto retry;
550 }
551
552 /* Scaling by exact power of two */
553
4/4
✓ Branch 0 taken 9770 times.
✓ Branch 1 taken 37759 times.
✓ Branch 2 taken 978 times.
✓ Branch 3 taken 8792 times.
47529 if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
554
1/2
✓ Branch 0 taken 978 times.
✗ Branch 1 not taken.
978 op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
555 978 op->c.u = FFABS(factor2);
556 978 goto retry;
557 }
558 46551 break;
559 }
560 }
561 }
562
563 /* Push clears to the back to void any unused components */
564
2/2
✓ Branch 0 taken 461110 times.
✓ Branch 1 taken 45181 times.
506291 for (int n = 1; n < ops->num_ops - 1; n++) { /* exclude READ/WRITE */
565 461110 SwsOp *op = &ops->ops[n];
566 461110 SwsOp *next = &ops->ops[n + 1];
567
568
2/2
✓ Branch 0 taken 42613 times.
✓ Branch 1 taken 418497 times.
461110 switch (op->op) {
569 42613 case SWS_OP_CLEAR:
570
2/2
✓ Branch 1 taken 26406 times.
✓ Branch 2 taken 16207 times.
42613 if (op_commute_clear(op, next)) {
571 26406 FFSWAP(SwsOp, *op, *next);
572 26406 goto retry;
573 }
574 16207 break;
575 }
576 }
577
578 /* Apply any remaining preferential re-ordering optimizations; do these
579 * last because they are more likely to block other optimizations if done
580 * too aggressively */
581
2/2
✓ Branch 0 taken 263786 times.
✓ Branch 1 taken 31555 times.
295341 for (int n = 1; n < ops->num_ops - 1; n++) { /* exclude READ/WRITE */
582 263786 SwsOp *op = &ops->ops[n];
583 263786 SwsOp *prev = &ops->ops[n - 1];
584 263786 SwsOp *next = &ops->ops[n + 1];
585
586
3/3
✓ Branch 0 taken 47891 times.
✓ Branch 1 taken 15207 times.
✓ Branch 2 taken 200688 times.
263786 switch (op->op) {
587 47891 case SWS_OP_SWIZZLE: {
588 47891 bool seen[4] = {0};
589 47891 bool has_duplicates = false;
590
2/2
✓ Branch 0 taken 191564 times.
✓ Branch 1 taken 47891 times.
239455 for (int i = 0; i < 4; i++) {
591
2/2
✓ Branch 0 taken 46363 times.
✓ Branch 1 taken 145201 times.
191564 if (next->comps.unused[i])
592 46363 continue;
593 145201 has_duplicates |= seen[op->swizzle.in[i]];
594 145201 seen[op->swizzle.in[i]] = true;
595 }
596
597 /* Try to push swizzles with duplicates towards the output */
598
4/4
✓ Branch 0 taken 8684 times.
✓ Branch 1 taken 39207 times.
✓ Branch 3 taken 5840 times.
✓ Branch 4 taken 2844 times.
47891 if (has_duplicates && op_commute_swizzle(op, next)) {
599 5840 FFSWAP(SwsOp, *op, *next);
600 11826 goto retry;
601 }
602
603 /* Move swizzle out of the way between two converts so that
604 * they may be merged */
605
4/4
✓ Branch 0 taken 22255 times.
✓ Branch 1 taken 19796 times.
✓ Branch 2 taken 5986 times.
✓ Branch 3 taken 16269 times.
42051 if (prev->op == SWS_OP_CONVERT && next->op == SWS_OP_CONVERT) {
606 5986 op->type = next->convert.to;
607 5986 FFSWAP(SwsOp, *op, *next);
608 5986 goto retry;
609 }
610 36065 break;
611 }
612
613 15207 case SWS_OP_SCALE:
614 /* Scaling by integer before conversion to int */
615
4/4
✓ Branch 0 taken 4582 times.
✓ Branch 1 taken 10625 times.
✓ Branch 2 taken 1800 times.
✓ Branch 3 taken 2782 times.
15207 if (op->c.q.den == 1 && next->op == SWS_OP_CONVERT &&
616
1/2
✓ Branch 0 taken 1800 times.
✗ Branch 1 not taken.
1800 ff_sws_pixel_type_is_int(next->convert.to))
617 {
618 1800 op->type = next->convert.to;
619 1800 FFSWAP(SwsOp, *op, *next);
620 1800 goto retry;
621 }
622 13407 break;
623 }
624 }
625
626 31555 return 0;
627 }
628
629 16767 int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
630 int size, uint8_t clear_val,
631 int *read_bytes, int *write_bytes)
632 {
633 16767 const SwsOp read = ops->ops[0];
634 16767 const int read_size = ff_sws_pixel_type_size(read.type);
635 16767 uint32_t mask[4] = {0};
636
637
2/4
✓ Branch 0 taken 16767 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 16767 times.
16767 if (!ops->num_ops || read.op != SWS_OP_READ)
638 return AVERROR(EINVAL);
639
6/6
✓ Branch 0 taken 16515 times.
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 10243 times.
✓ Branch 3 taken 6272 times.
✓ Branch 4 taken 7743 times.
✓ Branch 5 taken 2500 times.
16767 if (read.rw.frac || (!read.rw.packed && read.rw.elems > 1))
640 7995 return AVERROR(ENOTSUP);
641
642
2/2
✓ Branch 0 taken 18817 times.
✓ Branch 1 taken 8772 times.
27589 for (int i = 0; i < read.rw.elems; i++)
643 18817 mask[i] = 0x01010101 * i * read_size + 0x03020100;
644
645
1/2
✓ Branch 0 taken 14659 times.
✗ Branch 1 not taken.
14659 for (int opidx = 1; opidx < ops->num_ops; opidx++) {
646 14659 const SwsOp *op = &ops->ops[opidx];
647
6/6
✓ Branch 0 taken 2977 times.
✓ Branch 1 taken 2686 times.
✓ Branch 2 taken 210 times.
✓ Branch 3 taken 4938 times.
✓ Branch 4 taken 528 times.
✓ Branch 5 taken 3320 times.
14659 switch (op->op) {
648 2977 case SWS_OP_SWIZZLE: {
649 2977 uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
650
2/2
✓ Branch 0 taken 11908 times.
✓ Branch 1 taken 2977 times.
14885 for (int i = 0; i < 4; i++)
651 11908 mask[i] = orig[op->swizzle.in[i]];
652 2977 break;
653 }
654
655 2686 case SWS_OP_SWAP_BYTES:
656
2/2
✓ Branch 0 taken 10744 times.
✓ Branch 1 taken 2686 times.
13430 for (int i = 0; i < 4; i++) {
657
2/3
✓ Branch 0 taken 10236 times.
✓ Branch 1 taken 508 times.
✗ Branch 2 not taken.
10744 switch (ff_sws_pixel_type_size(op->type)) {
658 10236 case 2: mask[i] = av_bswap16(mask[i]); break;
659 508 case 4: mask[i] = av_bswap32(mask[i]); break;
660 }
661 }
662 2686 break;
663
664 210 case SWS_OP_CLEAR:
665
2/2
✓ Branch 0 taken 758 times.
✓ Branch 1 taken 54 times.
812 for (int i = 0; i < 4; i++) {
666
2/2
✓ Branch 0 taken 548 times.
✓ Branch 1 taken 210 times.
758 if (!op->c.q4[i].den)
667 548 continue;
668
3/4
✓ Branch 0 taken 54 times.
✓ Branch 1 taken 156 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 54 times.
210 if (op->c.q4[i].num != 0 || !clear_val)
669 156 return AVERROR(ENOTSUP);
670 54 mask[i] = 0x1010101ul * clear_val;
671 }
672 54 break;
673
674 4938 case SWS_OP_CONVERT: {
675
2/2
✓ Branch 0 taken 4768 times.
✓ Branch 1 taken 170 times.
4938 if (!op->convert.expand)
676 4768 return AVERROR(ENOTSUP);
677
2/2
✓ Branch 0 taken 680 times.
✓ Branch 1 taken 170 times.
850 for (int i = 0; i < 4; i++) {
678
1/3
✓ Branch 0 taken 680 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
680 switch (ff_sws_pixel_type_size(op->type)) {
679 680 case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break;
680 case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
681 }
682 }
683 170 break;
684 }
685
686 528 case SWS_OP_WRITE: {
687
6/6
✓ Branch 0 taken 514 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 262 times.
✓ Branch 3 taken 252 times.
✓ Branch 4 taken 211 times.
✓ Branch 5 taken 51 times.
528 if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1))
688 225 return AVERROR(ENOTSUP);
689
690 /* Initialize to no-op */
691 303 memset(shuffle, clear_val, size);
692
693 303 const int write_size = ff_sws_pixel_type_size(op->type);
694 303 const int read_chunk = read.rw.elems * read_size;
695 303 const int write_chunk = op->rw.elems * write_size;
696 303 const int num_groups = size / FFMAX(read_chunk, write_chunk);
697
2/2
✓ Branch 0 taken 1110 times.
✓ Branch 1 taken 303 times.
1413 for (int n = 0; n < num_groups; n++) {
698 1110 const int base_in = n * read_chunk;
699 1110 const int base_out = n * write_chunk;
700
2/2
✓ Branch 0 taken 2842 times.
✓ Branch 1 taken 1110 times.
3952 for (int i = 0; i < op->rw.elems; i++) {
701 2842 const int offset = base_out + i * write_size;
702
2/2
✓ Branch 0 taken 4354 times.
✓ Branch 1 taken 2842 times.
7196 for (int b = 0; b < write_size; b++) {
703 4354 const uint8_t idx = mask[i] >> (b * 8);
704
2/2
✓ Branch 0 taken 4138 times.
✓ Branch 1 taken 216 times.
4354 if (idx != clear_val)
705 4138 shuffle[offset + b] = base_in + idx;
706 }
707 }
708 }
709
710 303 *read_bytes = num_groups * read_chunk;
711 303 *write_bytes = num_groups * write_chunk;
712 303 return num_groups;
713 }
714
715 3320 default:
716 3320 return AVERROR(ENOTSUP);
717 }
718 }
719
720 return AVERROR(EINVAL);
721 }
722