FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libswscale/ops_optimizer.c
Date: 2026-04-18 02:30:19
Exec Total Coverage
Lines: 417 570 73.2%
Functions: 9 13 69.2%
Branches: 339 480 70.6%

Line Branch Exec Source
1 /**
2 * Copyright (C) 2025 Niklas Haas
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/avassert.h"
22 #include "libavutil/bswap.h"
23 #include "libavutil/rational.h"
24
25 #include "ops.h"
26 #include "ops_internal.h"
27
28 #define RET(x) \
29 do { \
30 if ((ret = (x)) < 0) \
31 return ret; \
32 } while (0)
33
34 /**
35 * Try to commute a clear op with the next operation. Makes any adjustments
36 * to the operations as needed, but does not perform the actual commutation.
37 *
38 * Returns whether successful.
39 */
40 51904 static bool op_commute_clear(SwsOp *op, SwsOp *next)
41 {
42 51904 SwsClearOp tmp = {0};
43
44 av_assert1(op->op == SWS_OP_CLEAR);
45
5/7
✓ Branch 0 taken 9120 times.
✓ Branch 1 taken 8778 times.
✓ Branch 2 taken 6532 times.
✓ Branch 3 taken 3529 times.
✓ Branch 4 taken 23945 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
51904 switch (next->op) {
46 9120 case SWS_OP_CONVERT:
47 9120 op->type = next->convert.to;
48 /* fall through */
49 17898 case SWS_OP_LSHIFT:
50 case SWS_OP_RSHIFT:
51 case SWS_OP_DITHER:
52 case SWS_OP_MIN:
53 case SWS_OP_MAX:
54 case SWS_OP_SCALE:
55 case SWS_OP_READ:
56 case SWS_OP_FILTER_H:
57 case SWS_OP_FILTER_V:
58 17898 ff_sws_apply_op_q(next, op->clear.value);
59 17898 return true;
60 6532 case SWS_OP_SWIZZLE:
61 6532 op->clear.mask = ff_sws_comp_mask_swizzle(op->clear.mask, next->swizzle);
62 6532 ff_sws_apply_op_q(next, op->clear.value);
63 6532 return true;
64 3529 case SWS_OP_SWAP_BYTES:
65
2/3
✓ Branch 0 taken 2920 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 609 times.
3529 switch (next->type) {
66 2920 case SWS_PIXEL_U16:
67 2920 ff_sws_apply_op_q(next, op->clear.value); /* always works */
68 2920 return true;
69 case SWS_PIXEL_U32:
70 for (int i = 0; i < 4; i++) {
71 if (!SWS_COMP_TEST(op->clear.mask, i))
72 continue;
73 uint32_t v = av_bswap32(op->clear.value[i].num);
74 if (v > INT_MAX)
75 return false; /* can't represent as AVRational anymore */
76 tmp.value[i] = Q(v);
77 }
78 op->clear = tmp;
79 return true;
80 609 default:
81 609 return false;
82 }
83 23945 case SWS_OP_INVALID:
84 case SWS_OP_WRITE:
85 case SWS_OP_LINEAR:
86 case SWS_OP_PACK:
87 case SWS_OP_UNPACK:
88 case SWS_OP_CLEAR:
89 23945 return false;
90 case SWS_OP_TYPE_NB:
91 break;
92 }
93
94 av_unreachable("Invalid operation type!");
95 return false;
96 }
97
98 /**
99 * Try to commute a swizzle op with the next operation. Makes any adjustments
100 * to the operations as needed, but does not perform the actual commutation.
101 *
102 * Returns whether successful.
103 */
104 60367 static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
105 {
106 60367 bool seen[4] = {0};
107
108 av_assert1(op->op == SWS_OP_SWIZZLE);
109
5/7
✓ Branch 0 taken 19578 times.
✓ Branch 1 taken 11779 times.
✓ Branch 2 taken 3178 times.
✓ Branch 3 taken 2814 times.
✓ Branch 4 taken 23018 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
60367 switch (next->op) {
110 19578 case SWS_OP_CONVERT:
111 19578 op->type = next->convert.to;
112 /* fall through */
113 31357 case SWS_OP_SWAP_BYTES:
114 case SWS_OP_LSHIFT:
115 case SWS_OP_RSHIFT:
116 case SWS_OP_SCALE:
117 case SWS_OP_FILTER_H:
118 case SWS_OP_FILTER_V:
119 31357 return true;
120
121 /**
122 * We can commute per-channel ops only if the per-channel constants are the
123 * same for all duplicated channels; e.g.:
124 * SWIZZLE {0, 0, 0, 3}
125 * NEXT {x, x, x, w}
126 * ->
127 * NEXT {x, _, _, w}
128 * SWIZZLE {0, 0, 0, 3}
129 */
130 3178 case SWS_OP_MIN:
131 case SWS_OP_MAX: {
132 3178 const SwsClampOp c = next->clamp;
133
2/2
✓ Branch 0 taken 12712 times.
✓ Branch 1 taken 3178 times.
15890 for (int i = 0; i < 4; i++) {
134
2/2
✓ Branch 0 taken 2934 times.
✓ Branch 1 taken 9778 times.
12712 if (!SWS_OP_NEEDED(op, i))
135 2934 continue;
136 9778 const int j = op->swizzle.in[i];
137
3/4
✓ Branch 0 taken 2124 times.
✓ Branch 1 taken 7654 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2124 times.
9778 if (seen[j] && av_cmp_q(next->clamp.limit[j], c.limit[i]))
138 return false;
139 9778 next->clamp.limit[j] = c.limit[i];
140 9778 seen[j] = true;
141 }
142 3178 return true;
143 }
144
145 2814 case SWS_OP_DITHER: {
146 2814 const SwsDitherOp d = next->dither;
147
2/2
✓ Branch 0 taken 10440 times.
✓ Branch 1 taken 2406 times.
12846 for (int i = 0; i < 4; i++) {
148
2/2
✓ Branch 0 taken 2260 times.
✓ Branch 1 taken 8180 times.
10440 if (!SWS_OP_NEEDED(op, i))
149 2260 continue;
150 8180 const int j = op->swizzle.in[i];
151
4/4
✓ Branch 0 taken 1740 times.
✓ Branch 1 taken 6440 times.
✓ Branch 2 taken 408 times.
✓ Branch 3 taken 1332 times.
8180 if (seen[j] && next->dither.y_offset[j] != d.y_offset[i])
152 408 return false;
153 7772 next->dither.y_offset[j] = d.y_offset[i];
154 7772 seen[j] = true;
155 }
156 2406 return true;
157 }
158
159 23018 case SWS_OP_INVALID:
160 case SWS_OP_READ:
161 case SWS_OP_WRITE:
162 case SWS_OP_SWIZZLE:
163 case SWS_OP_CLEAR:
164 case SWS_OP_LINEAR:
165 case SWS_OP_PACK:
166 case SWS_OP_UNPACK:
167 23018 return false;
168 case SWS_OP_TYPE_NB:
169 break;
170 }
171
172 av_unreachable("Invalid operation type!");
173 return false;
174 }
175
176 /**
177 * Try to commute a filter op with the previous operation. Makes any
178 * adjustments to the operations as needed, but does not perform the actual
179 * commutation.
180 *
181 * Returns whether successful.
182 */
183 static bool op_commute_filter(SwsOp *op, SwsOp *prev)
184 {
185 switch (prev->op) {
186 case SWS_OP_SWIZZLE:
187 case SWS_OP_SCALE:
188 case SWS_OP_LINEAR:
189 case SWS_OP_DITHER:
190 prev->type = SWS_PIXEL_F32;
191 return true;
192 case SWS_OP_CONVERT:
193 if (prev->convert.to == SWS_PIXEL_F32) {
194 av_assert0(!prev->convert.expand);
195 FFSWAP(SwsPixelType, op->type, prev->type);
196 return true;
197 }
198 return false;
199 case SWS_OP_INVALID:
200 case SWS_OP_READ:
201 case SWS_OP_WRITE:
202 case SWS_OP_SWAP_BYTES:
203 case SWS_OP_UNPACK:
204 case SWS_OP_PACK:
205 case SWS_OP_LSHIFT:
206 case SWS_OP_RSHIFT:
207 case SWS_OP_CLEAR:
208 case SWS_OP_MIN:
209 case SWS_OP_MAX:
210 case SWS_OP_FILTER_H:
211 case SWS_OP_FILTER_V:
212 return false;
213 case SWS_OP_TYPE_NB:
214 break;
215 }
216
217 av_unreachable("Invalid operation type!");
218 return false;
219 }
220
221 /* returns log2(x) only if x is a power of two, or 0 otherwise */
222 41654 static int exact_log2(const int x)
223 {
224 int p;
225
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 41654 times.
41654 if (x <= 0)
226 return 0;
227 41654 p = av_log2(x);
228
2/2
✓ Branch 0 taken 15230 times.
✓ Branch 1 taken 26424 times.
41654 return (1 << p) == x ? p : 0;
229 }
230
231 83751 static int exact_log2_q(const AVRational x)
232 {
233
2/2
✓ Branch 0 taken 22547 times.
✓ Branch 1 taken 61204 times.
83751 if (x.den == 1)
234 22547 return exact_log2(x.num);
235
2/2
✓ Branch 0 taken 19107 times.
✓ Branch 1 taken 42097 times.
61204 else if (x.num == 1)
236 19107 return -exact_log2(x.den);
237 else
238 42097 return 0;
239 }
240
241 /**
242 * If a linear operation can be reduced to a scalar multiplication, returns
243 * the corresponding scaling factor, or 0 otherwise.
244 */
245 121637 static bool extract_scalar(const SwsLinearOp *c, SwsComps comps, SwsComps prev,
246 SwsScaleOp *out_scale)
247 {
248 121637 SwsScaleOp scale = {0};
249
250 /* There are components not on the main diagonal */
251
2/2
✓ Branch 0 taken 93244 times.
✓ Branch 1 taken 28393 times.
121637 if (c->mask & ~SWS_MASK_DIAG4)
252 93244 return false;
253
254
2/2
✓ Branch 0 taken 85250 times.
✓ Branch 1 taken 10262 times.
95512 for (int i = 0; i < 4; i++) {
255 85250 const AVRational s = c->m[i][i];
256
1/2
✓ Branch 0 taken 85250 times.
✗ Branch 1 not taken.
85250 if ((prev.flags[i] & SWS_COMP_ZERO) ||
257
2/2
✓ Branch 0 taken 10702 times.
✓ Branch 1 taken 74548 times.
85250 (comps.flags[i] & SWS_COMP_GARBAGE))
258 10702 continue;
259
4/4
✓ Branch 0 taken 46155 times.
✓ Branch 1 taken 28393 times.
✓ Branch 3 taken 18131 times.
✓ Branch 4 taken 28024 times.
74548 if (scale.factor.den && av_cmp_q(s, scale.factor))
260 18131 return false;
261 56417 scale.factor = s;
262 }
263
264
1/2
✓ Branch 0 taken 10262 times.
✗ Branch 1 not taken.
10262 if (scale.factor.den)
265 10262 *out_scale = scale;
266 10262 return scale.factor.den;
267 }
268
269 /* Extracts an integer clear operation (subset) from the given linear op. */
270 130383 static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev,
271 SwsClearOp *out_clear)
272 {
273 130383 SwsClearOp clear = {0};
274 130383 bool ret = false;
275
276
2/2
✓ Branch 0 taken 521532 times.
✓ Branch 1 taken 130383 times.
651915 for (int i = 0; i < 4; i++) {
277 521532 bool const_row = c->m[i][4].den == 1; /* offset is integer */
278
2/2
✓ Branch 0 taken 2086128 times.
✓ Branch 1 taken 521532 times.
2607660 for (int j = 0; j < 4; j++) {
279
2/2
✓ Branch 0 taken 856102 times.
✓ Branch 1 taken 1230026 times.
2942230 const_row &= c->m[i][j].num == 0 || /* scalar is zero */
280
2/2
✓ Branch 0 taken 17304 times.
✓ Branch 1 taken 838798 times.
856102 (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
281 }
282
3/4
✓ Branch 0 taken 10740 times.
✓ Branch 1 taken 510792 times.
✓ Branch 2 taken 10740 times.
✗ Branch 3 not taken.
521532 if (const_row && (c->mask & SWS_MASK_ROW(i))) {
283 10740 clear.mask |= SWS_COMP(i);
284 10740 clear.value[i] = c->m[i][4];
285
2/2
✓ Branch 0 taken 53700 times.
✓ Branch 1 taken 10740 times.
64440 for (int j = 0; j < 5; j++)
286 53700 c->m[i][j] = Q(i == j);
287 10740 c->mask &= ~SWS_MASK_ROW(i);
288 10740 ret = true;
289 }
290 }
291
292
2/2
✓ Branch 0 taken 8746 times.
✓ Branch 1 taken 121637 times.
130383 if (ret)
293 8746 *out_clear = clear;
294 130383 return ret;
295 }
296
297 /* Unswizzle a linear operation by aligning single-input rows with
298 * their corresponding diagonal */
299 111375 static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
300 {
301 111375 SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
302 111375 SwsLinearOp c = *op;
303
304 /* Find non-zero coefficients in the main 4x4 matrix */
305 111375 uint32_t nonzero = 0;
306
2/2
✓ Branch 0 taken 445500 times.
✓ Branch 1 taken 111375 times.
556875 for (int i = 0; i < 4; i++) {
307
2/2
✓ Branch 0 taken 1782000 times.
✓ Branch 1 taken 445500 times.
2227500 for (int j = 0; j < 4; j++) {
308
4/4
✓ Branch 0 taken 763466 times.
✓ Branch 1 taken 1018534 times.
✓ Branch 2 taken 5184 times.
✓ Branch 3 taken 758282 times.
1782000 if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO))
309 1023718 continue;
310 758282 nonzero |= SWS_MASK(i, j);
311 }
312 }
313
314 /* If a value is unique in its row and the target column is
315 * empty, move it there and update the input swizzle */
316
2/2
✓ Branch 0 taken 445500 times.
✓ Branch 1 taken 111375 times.
556875 for (int i = 0; i < 4; i++) {
317
2/2
✓ Branch 0 taken 440316 times.
✓ Branch 1 taken 5184 times.
445500 if (nonzero & SWS_MASK_COL(i))
318 440316 continue; /* target column is not empty */
319
1/2
✓ Branch 0 taken 5184 times.
✗ Branch 1 not taken.
5184 for (int j = 0; j < 4; j++) {
320
1/2
✓ Branch 0 taken 5184 times.
✗ Branch 1 not taken.
5184 if ((nonzero & SWS_MASK_ROW(i)) == SWS_MASK(i, j)) {
321 /* Move coefficient to the diagonal */
322 5184 c.m[i][i] = c.m[i][j];
323 5184 c.m[i][j] = Q(0);
324 5184 swiz.in[i] = j;
325 5184 break;
326 }
327 }
328 }
329
330
2/2
✓ Branch 0 taken 108783 times.
✓ Branch 1 taken 2592 times.
111375 if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
331 108783 return false; /* no swizzle was identified */
332
333 2592 c.mask = ff_sws_linear_mask(c);
334 2592 *out_swiz = swiz;
335 2592 *op = c;
336 2592 return true;
337 }
338
339 31704 int ff_sws_op_list_optimize(SwsOpList *ops)
340 {
341 int ret;
342
343 386416 retry:
344 418120 ff_sws_op_list_update_comps(ops);
345
346 /* Try to push filters towards the input; do this first to unblock
347 * in-place optimizations like linear op fusion */
348
2/2
✓ Branch 0 taken 4086978 times.
✓ Branch 1 taken 418120 times.
4505098 for (int n = 1; n < ops->num_ops; n++) {
349 4086978 SwsOp *op = &ops->ops[n];
350 4086978 SwsOp *prev = &ops->ops[n - 1];
351
352
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4086978 times.
4086978 switch (op->op) {
353 case SWS_OP_FILTER_H:
354 case SWS_OP_FILTER_V:
355 if (op_commute_filter(op, prev)) {
356 FFSWAP(SwsOp, *op, *prev);
357 goto retry;
358 }
359 break;
360 }
361 }
362
363 /* Apply all in-place optimizations (that do not re-order the list) */
364
2/2
✓ Branch 0 taken 2383753 times.
✓ Branch 1 taken 97819 times.
2481572 for (int n = 0; n < ops->num_ops; n++) {
365 2383753 SwsOp dummy = {0};
366 2383753 SwsOp *op = &ops->ops[n];
367
2/2
✓ Branch 0 taken 1965633 times.
✓ Branch 1 taken 418120 times.
2383753 SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
368
2/2
✓ Branch 0 taken 2285934 times.
✓ Branch 1 taken 97819 times.
2383753 SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
369
370 /* common helper variable */
371 2383753 bool noop = true;
372
373
4/4
✓ Branch 0 taken 178158 times.
✓ Branch 1 taken 2205595 times.
✓ Branch 2 taken 100059 times.
✓ Branch 3 taken 78099 times.
2383753 if (!SWS_OP_NEEDED(op, 0) && !SWS_OP_NEEDED(op, 1) &&
374
3/4
✓ Branch 0 taken 97819 times.
✓ Branch 1 taken 2240 times.
✓ Branch 2 taken 97819 times.
✗ Branch 3 not taken.
100059 !SWS_OP_NEEDED(op, 2) && !SWS_OP_NEEDED(op, 3) &&
375
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 97819 times.
97819 op->op != SWS_OP_WRITE)
376 {
377 /* Remove any operation whose output is not needed */
378 ff_sws_op_list_remove_at(ops, n, 1);
379 320301 goto retry;
380 }
381
382
13/14
✓ Branch 0 taken 418120 times.
✓ Branch 1 taken 198572 times.
✓ Branch 2 taken 77772 times.
✓ Branch 3 taken 44114 times.
✓ Branch 4 taken 164131 times.
✓ Branch 5 taken 286900 times.
✓ Branch 6 taken 497029 times.
✓ Branch 7 taken 88800 times.
✓ Branch 8 taken 56524 times.
✓ Branch 9 taken 115816 times.
✓ Branch 10 taken 235355 times.
✓ Branch 11 taken 83751 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 116869 times.
2383753 switch (op->op) {
383 418120 case SWS_OP_READ:
384 /* "Compress" planar reads where not all components are needed */
385
2/2
✓ Branch 0 taken 231385 times.
✓ Branch 1 taken 186735 times.
418120 if (!op->rw.packed) {
386 231385 SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
387 231385 int nb_planes = 0;
388
2/2
✓ Branch 0 taken 567045 times.
✓ Branch 1 taken 231385 times.
798430 for (int i = 0; i < op->rw.elems; i++) {
389
2/2
✓ Branch 0 taken 5180 times.
✓ Branch 1 taken 561865 times.
567045 if (!SWS_OP_NEEDED(op, i)) {
390 5180 swiz.in[i] = 3 - (i - nb_planes); /* map to unused plane */
391 5180 continue;
392 }
393
394 561865 const int idx = nb_planes++;
395 av_assert1(idx <= i);
396 561865 ops->plane_src[idx] = ops->plane_src[i];
397 561865 swiz.in[i] = idx;
398 }
399
400
2/2
✓ Branch 0 taken 4230 times.
✓ Branch 1 taken 227155 times.
231385 if (nb_planes < op->rw.elems) {
401 4230 op->rw.elems = nb_planes;
402
2/4
✓ Branch 0 taken 4230 times.
✗ Branch 1 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 4230 times.
4230 RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
403 .op = SWS_OP_SWIZZLE,
404 .type = op->rw.filter ? SWS_PIXEL_F32 : op->type,
405 .swizzle = swiz,
406 }));
407 4230 goto retry;
408 }
409 }
410 413890 break;
411
412 198572 case SWS_OP_SWAP_BYTES:
413 /* Redundant (double) swap */
414
2/2
✓ Branch 0 taken 209 times.
✓ Branch 1 taken 198363 times.
198572 if (next->op == SWS_OP_SWAP_BYTES) {
415 209 ff_sws_op_list_remove_at(ops, n, 2);
416 209 goto retry;
417 }
418 198363 break;
419
420 77772 case SWS_OP_UNPACK:
421 /* Redundant unpack+pack */
422
3/4
✓ Branch 0 taken 46 times.
✓ Branch 1 taken 77726 times.
✓ Branch 2 taken 46 times.
✗ Branch 3 not taken.
77772 if (next->op == SWS_OP_PACK && next->type == op->type &&
423
1/2
✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
46 next->pack.pattern[0] == op->pack.pattern[0] &&
424
1/2
✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
46 next->pack.pattern[1] == op->pack.pattern[1] &&
425
1/2
✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
46 next->pack.pattern[2] == op->pack.pattern[2] &&
426
1/2
✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
46 next->pack.pattern[3] == op->pack.pattern[3])
427 {
428 46 ff_sws_op_list_remove_at(ops, n, 2);
429 46 goto retry;
430 }
431 77726 break;
432
433 44114 case SWS_OP_LSHIFT:
434 case SWS_OP_RSHIFT:
435 /* Two shifts in the same direction */
436
2/2
✓ Branch 0 taken 204 times.
✓ Branch 1 taken 43910 times.
44114 if (next->op == op->op) {
437 204 op->shift.amount += next->shift.amount;
438 204 ff_sws_op_list_remove_at(ops, n + 1, 1);
439 204 goto retry;
440 }
441
442 /* No-op shift */
443
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 43910 times.
43910 if (!op->shift.amount) {
444 ff_sws_op_list_remove_at(ops, n, 1);
445 goto retry;
446 }
447 43910 break;
448
449 164131 case SWS_OP_CLEAR:
450
2/2
✓ Branch 0 taken 656524 times.
✓ Branch 1 taken 164131 times.
820655 for (int i = 0; i < 4; i++) {
451
2/2
✓ Branch 0 taken 439631 times.
✓ Branch 1 taken 216893 times.
656524 if (!SWS_COMP_TEST(op->clear.mask, i))
452 439631 continue;
453
454
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 216893 times.
216893 if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
455 !(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
456 op->clear.value[i].num == 0)
457 {
458 /* Redundant clear-to-zero of zero component */
459 op->clear.mask ^= SWS_COMP(i);
460
2/2
✓ Branch 0 taken 31704 times.
✓ Branch 1 taken 185189 times.
216893 } else if (!SWS_OP_NEEDED(op, i)) {
461 /* Unnecessary clear of unused component */
462 31704 op->clear.mask ^= SWS_COMP(i);
463 } else {
464 185189 noop = false;
465 }
466 }
467
468
2/2
✓ Branch 0 taken 31704 times.
✓ Branch 1 taken 132427 times.
164131 if (noop) {
469 31704 ff_sws_op_list_remove_at(ops, n, 1);
470 31704 goto retry;
471 }
472
473 /* Transitive clear */
474
2/2
✓ Branch 0 taken 190 times.
✓ Branch 1 taken 132237 times.
132427 if (next->op == SWS_OP_CLEAR) {
475
2/2
✓ Branch 0 taken 760 times.
✓ Branch 1 taken 190 times.
950 for (int i = 0; i < 4; i++) {
476
2/2
✓ Branch 0 taken 190 times.
✓ Branch 1 taken 570 times.
760 if (SWS_COMP_TEST(next->clear.mask, i))
477 190 op->clear.value[i] = next->clear.value[i];
478 }
479 190 op->clear.mask |= next->clear.mask;
480 190 ff_sws_op_list_remove_at(ops, n + 1, 1);
481 190 goto retry;
482 }
483 132237 break;
484
485 286900 case SWS_OP_SWIZZLE:
486
2/2
✓ Branch 0 taken 1147600 times.
✓ Branch 1 taken 286900 times.
1434500 for (int i = 0; i < 4; i++) {
487
2/2
✓ Branch 0 taken 283491 times.
✓ Branch 1 taken 864109 times.
1147600 if (!SWS_OP_NEEDED(op, i))
488 283491 continue;
489
2/2
✓ Branch 0 taken 594279 times.
✓ Branch 1 taken 269830 times.
864109 if (op->swizzle.in[i] != i)
490 594279 noop = false;
491 }
492
493 /* Identity swizzle */
494
2/2
✓ Branch 0 taken 45109 times.
✓ Branch 1 taken 241791 times.
286900 if (noop) {
495 45109 ff_sws_op_list_remove_at(ops, n, 1);
496 45109 goto retry;
497 }
498
499 /* Transitive swizzle */
500
2/2
✓ Branch 0 taken 3586 times.
✓ Branch 1 taken 238205 times.
241791 if (next->op == SWS_OP_SWIZZLE) {
501 3586 const SwsSwizzleOp orig = op->swizzle;
502
2/2
✓ Branch 0 taken 14344 times.
✓ Branch 1 taken 3586 times.
17930 for (int i = 0; i < 4; i++)
503 14344 op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
504 3586 ff_sws_op_list_remove_at(ops, n + 1, 1);
505 3586 goto retry;
506 }
507
508 /* Swizzle planes instead of components, if possible */
509
4/4
✓ Branch 0 taken 51682 times.
✓ Branch 1 taken 186523 times.
✓ Branch 2 taken 8992 times.
✓ Branch 3 taken 42690 times.
238205 if (prev->op == SWS_OP_READ && !prev->rw.packed) {
510
2/2
✓ Branch 0 taken 13310 times.
✓ Branch 1 taken 1186 times.
14496 for (int dst = 0; dst < prev->rw.elems; dst++) {
511 13310 const int src = op->swizzle.in[dst];
512
4/4
✓ Branch 0 taken 8216 times.
✓ Branch 1 taken 5094 times.
✓ Branch 2 taken 7806 times.
✓ Branch 3 taken 410 times.
13310 if (src > dst && src < prev->rw.elems) {
513 7806 FFSWAP(int, ops->plane_src[dst], ops->plane_src[src]);
514
2/2
✓ Branch 0 taken 27330 times.
✓ Branch 1 taken 7806 times.
35136 for (int i = dst; i < 4; i++) {
515
2/2
✓ Branch 0 taken 7806 times.
✓ Branch 1 taken 19524 times.
27330 if (op->swizzle.in[i] == dst)
516 7806 op->swizzle.in[i] = src;
517
2/2
✓ Branch 0 taken 7806 times.
✓ Branch 1 taken 11718 times.
19524 else if (op->swizzle.in[i] == src)
518 7806 op->swizzle.in[i] = dst;
519 }
520 7806 goto retry;
521 }
522 }
523 }
524
525
4/4
✓ Branch 0 taken 25297 times.
✓ Branch 1 taken 205102 times.
✓ Branch 2 taken 13413 times.
✓ Branch 3 taken 11884 times.
230399 if (next->op == SWS_OP_WRITE && !next->rw.packed) {
526
2/2
✓ Branch 0 taken 15805 times.
✓ Branch 1 taken 1118 times.
16923 for (int dst = 0; dst < next->rw.elems; dst++) {
527 15805 const int src = op->swizzle.in[dst];
528
4/4
✓ Branch 0 taken 12751 times.
✓ Branch 1 taken 3054 times.
✓ Branch 2 taken 12295 times.
✓ Branch 3 taken 456 times.
15805 if (src > dst && src < next->rw.elems) {
529 12295 FFSWAP(int, ops->plane_dst[dst], ops->plane_dst[src]);
530 12295 FFSWAP(int, op->swizzle.in[dst], op->swizzle.in[src]);
531 12295 goto retry;
532 }
533 }
534 }
535 218104 break;
536
537 497029 case SWS_OP_CONVERT:
538 /* No-op conversion */
539
2/2
✓ Branch 0 taken 9088 times.
✓ Branch 1 taken 487941 times.
497029 if (op->type == op->convert.to) {
540 9088 ff_sws_op_list_remove_at(ops, n, 1);
541 9088 goto retry;
542 }
543
544 /* Transitive conversion */
545
2/2
✓ Branch 0 taken 12941 times.
✓ Branch 1 taken 475000 times.
487941 if (next->op == SWS_OP_CONVERT &&
546
1/2
✓ Branch 0 taken 12941 times.
✗ Branch 1 not taken.
12941 op->convert.expand == next->convert.expand)
547 {
548 av_assert1(op->convert.to == next->type);
549 12941 op->convert.to = next->convert.to;
550 12941 ff_sws_op_list_remove_at(ops, n + 1, 1);
551 12941 goto retry;
552 }
553
554 /* Conversion followed by integer expansion */
555
3/4
✓ Branch 0 taken 64481 times.
✓ Branch 1 taken 410519 times.
✓ Branch 2 taken 64481 times.
✗ Branch 3 not taken.
475000 if (next->op == SWS_OP_SCALE && !op->convert.expand &&
556
1/2
✓ Branch 0 taken 64481 times.
✗ Branch 1 not taken.
64481 ff_sws_pixel_type_is_int(op->type) &&
557
4/4
✓ Branch 0 taken 1078 times.
✓ Branch 1 taken 63403 times.
✓ Branch 2 taken 360 times.
✓ Branch 3 taken 718 times.
65559 ff_sws_pixel_type_is_int(op->convert.to) &&
558 1078 !av_cmp_q(next->scale.factor,
559 ff_sws_pixel_expand(op->type, op->convert.to)))
560 {
561 360 op->convert.expand = true;
562 360 ff_sws_op_list_remove_at(ops, n + 1, 1);
563 360 goto retry;
564 }
565 474640 break;
566
567 88800 case SWS_OP_MIN:
568
2/2
✓ Branch 0 taken 355200 times.
✓ Branch 1 taken 88800 times.
444000 for (int i = 0; i < 4; i++) {
569
4/4
✓ Branch 0 taken 261646 times.
✓ Branch 1 taken 93554 times.
✓ Branch 2 taken 3000 times.
✓ Branch 3 taken 258646 times.
355200 if (!SWS_OP_NEEDED(op, i) || !op->clamp.limit[i].den)
570 96554 continue;
571
2/2
✓ Branch 1 taken 201974 times.
✓ Branch 2 taken 56672 times.
258646 if (av_cmp_q(op->clamp.limit[i], prev->comps.max[i]) < 0)
572 201974 noop = false;
573 }
574
575
2/2
✓ Branch 0 taken 13260 times.
✓ Branch 1 taken 75540 times.
88800 if (noop) {
576 13260 ff_sws_op_list_remove_at(ops, n, 1);
577 13260 goto retry;
578 }
579 75540 break;
580
581 56524 case SWS_OP_MAX:
582
2/2
✓ Branch 0 taken 226096 times.
✓ Branch 1 taken 56524 times.
282620 for (int i = 0; i < 4; i++) {
583
3/4
✓ Branch 0 taken 166108 times.
✓ Branch 1 taken 59988 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 166108 times.
226096 if (!SWS_OP_NEEDED(op, i) || !op->clamp.limit[i].den)
584 59988 continue;
585
2/2
✓ Branch 1 taken 93488 times.
✓ Branch 2 taken 72620 times.
166108 if (av_cmp_q(prev->comps.min[i], op->clamp.limit[i]) < 0)
586 93488 noop = false;
587 }
588
589
2/2
✓ Branch 0 taken 21150 times.
✓ Branch 1 taken 35374 times.
56524 if (noop) {
590 21150 ff_sws_op_list_remove_at(ops, n, 1);
591 21150 goto retry;
592 }
593 35374 break;
594
595 115816 case SWS_OP_DITHER:
596
2/2
✓ Branch 0 taken 440575 times.
✓ Branch 1 taken 86955 times.
527530 for (int i = 0; i < 4; i++) {
597
2/2
✓ Branch 0 taken 138559 times.
✓ Branch 1 taken 302016 times.
440575 if (op->dither.y_offset[i] < 0)
598 138559 continue;
599
4/4
✓ Branch 0 taken 284519 times.
✓ Branch 1 taken 17497 times.
✓ Branch 2 taken 11364 times.
✓ Branch 3 taken 273155 times.
302016 if (!SWS_OP_NEEDED(op, i) || (prev->comps.flags[i] & SWS_COMP_EXACT)) {
600 28861 op->dither.y_offset[i] = -1; /* unnecessary dither */
601 28861 goto retry;
602 } else {
603 273155 noop = false;
604 }
605 }
606
607
2/2
✓ Branch 0 taken 1712 times.
✓ Branch 1 taken 85243 times.
86955 if (noop) {
608 1712 ff_sws_op_list_remove_at(ops, n, 1);
609 1712 goto retry;
610 }
611 85243 break;
612
613 235355 case SWS_OP_LINEAR: {
614 SwsSwizzleOp swizzle;
615 SwsClearOp clear;
616 SwsScaleOp scale;
617
618 /* No-op (identity) linear operation */
619
2/2
✓ Branch 0 taken 3018 times.
✓ Branch 1 taken 232337 times.
235355 if (!op->lin.mask) {
620 3018 ff_sws_op_list_remove_at(ops, n, 1);
621 126572 goto retry;
622 }
623
624
2/2
✓ Branch 0 taken 59722 times.
✓ Branch 1 taken 172615 times.
232337 if (next->op == SWS_OP_LINEAR) {
625 /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
626 59722 const SwsLinearOp m1 = op->lin;
627 59722 const SwsLinearOp m2 = next->lin;
628
2/2
✓ Branch 0 taken 238888 times.
✓ Branch 1 taken 59722 times.
298610 for (int i = 0; i < 4; i++) {
629
2/2
✓ Branch 0 taken 1194440 times.
✓ Branch 1 taken 238888 times.
1433328 for (int j = 0; j < 5; j++) {
630 1194440 AVRational sum = Q(0);
631
2/2
✓ Branch 0 taken 4777760 times.
✓ Branch 1 taken 1194440 times.
5972200 for (int k = 0; k < 4; k++)
632 4777760 sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
633
2/2
✓ Branch 0 taken 238888 times.
✓ Branch 1 taken 955552 times.
1194440 if (j == 4) /* m1.m[4][j] == 1 */
634 238888 sum = av_add_q(sum, m2.m[i][4]);
635 1194440 op->lin.m[i][j] = sum;
636 }
637 }
638 59722 op->lin.mask = ff_sws_linear_mask(op->lin);
639 59722 ff_sws_op_list_remove_at(ops, n + 1, 1);
640 59722 goto retry;
641 }
642
643 /* Optimize away zero columns */
644
2/2
✓ Branch 0 taken 678232 times.
✓ Branch 1 taken 158479 times.
836711 for (int j = 0; j < 4; j++) {
645 678232 const uint32_t col = SWS_MASK_COL(j);
646
4/4
✓ Branch 0 taken 43384 times.
✓ Branch 1 taken 634848 times.
✓ Branch 2 taken 29248 times.
✓ Branch 3 taken 14136 times.
678232 if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
647 664096 continue;
648
2/2
✓ Branch 0 taken 56544 times.
✓ Branch 1 taken 14136 times.
70680 for (int i = 0; i < 4; i++)
649 56544 op->lin.m[i][j] = Q(i == j);
650 14136 op->lin.mask &= ~col;
651 14136 goto retry;
652 }
653
654 /* Optimize away unused rows */
655
2/2
✓ Branch 0 taken 621688 times.
✓ Branch 1 taken 130383 times.
752071 for (int i = 0; i < 4; i++) {
656 621688 const uint32_t row = SWS_MASK_ROW(i);
657
4/4
✓ Branch 0 taken 219254 times.
✓ Branch 1 taken 402434 times.
✓ Branch 2 taken 191158 times.
✓ Branch 3 taken 28096 times.
621688 if (SWS_OP_NEEDED(op, i) || !(op->lin.mask & row))
658 593592 continue;
659
2/2
✓ Branch 0 taken 140480 times.
✓ Branch 1 taken 28096 times.
168576 for (int j = 0; j < 5; j++)
660 140480 op->lin.m[i][j] = Q(i == j);
661 28096 op->lin.mask &= ~row;
662 28096 goto retry;
663 }
664
665 /* Convert constant rows to explicit clear instruction */
666
2/2
✓ Branch 1 taken 8746 times.
✓ Branch 2 taken 121637 times.
130383 if (extract_constant_rows(&op->lin, prev->comps, &clear)) {
667
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 8746 times.
8746 RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
668 .op = SWS_OP_CLEAR,
669 .type = op->type,
670 .comps = op->comps,
671 .clear = clear,
672 }));
673 8746 goto retry;
674 }
675
676 /* Multiplication by scalar constant */
677
2/2
✓ Branch 1 taken 10262 times.
✓ Branch 2 taken 111375 times.
121637 if (extract_scalar(&op->lin, op->comps, prev->comps, &scale)) {
678 10262 op->op = SWS_OP_SCALE;
679 10262 op->scale = scale;
680 10262 goto retry;
681 }
682
683 /* Swizzle by fixed pattern */
684
2/2
✓ Branch 1 taken 2592 times.
✓ Branch 2 taken 108783 times.
111375 if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
685
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2592 times.
2592 RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
686 .op = SWS_OP_SWIZZLE,
687 .type = op->type,
688 .swizzle = swizzle,
689 }));
690 2592 goto retry;
691 }
692 108783 break;
693 }
694
695 83751 case SWS_OP_SCALE: {
696 83751 const int factor2 = exact_log2_q(op->scale.factor);
697
698 /* No-op scaling */
699
3/4
✓ Branch 0 taken 19107 times.
✓ Branch 1 taken 64644 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 19107 times.
83751 if (op->scale.factor.num == 1 && op->scale.factor.den == 1) {
700 ff_sws_op_list_remove_at(ops, n, 1);
701 goto retry;
702 }
703
704 /* Merge consecutive scaling operations (that don't overflow) */
705
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 83751 times.
83751 if (next->op == SWS_OP_SCALE) {
706 int64_t p = op->scale.factor.num * (int64_t) next->scale.factor.num;
707 int64_t q = op->scale.factor.den * (int64_t) next->scale.factor.den;
708 if (FFABS(p) <= INT_MAX && FFABS(q) <= INT_MAX) {
709 av_reduce(&op->scale.factor.num, &op->scale.factor.den, p, q, INT_MAX);
710 ff_sws_op_list_remove_at(ops, n + 1, 1);
711 goto retry;
712 }
713 }
714
715 /* Scaling by exact power of two */
716
4/4
✓ Branch 0 taken 15230 times.
✓ Branch 1 taken 68521 times.
✓ Branch 2 taken 978 times.
✓ Branch 3 taken 14252 times.
83751 if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
717
1/2
✓ Branch 0 taken 978 times.
✗ Branch 1 not taken.
978 op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
718 978 op->shift.amount = FFABS(factor2);
719 978 goto retry;
720 }
721 82773 break;
722 }
723
724 case SWS_OP_FILTER_H:
725 case SWS_OP_FILTER_V:
726 /* Merge with prior simple planar read */
727 if (prev->op == SWS_OP_READ && !prev->rw.filter &&
728 !prev->rw.packed && !prev->rw.frac) {
729 prev->rw.filter = op->op;
730 prev->rw.kernel = av_refstruct_ref(op->filter.kernel);
731 ff_sws_op_list_remove_at(ops, n, 1);
732 goto retry;
733 }
734 break;
735 }
736 }
737
738 /* Push clears to the back to void any unused components */
739
2/2
✓ Branch 0 taken 735733 times.
✓ Branch 1 taken 70469 times.
806202 for (int n = 0; n < ops->num_ops - 1; n++) {
740 735733 SwsOp *op = &ops->ops[n];
741 735733 SwsOp *next = &ops->ops[n + 1];
742
743
2/2
✓ Branch 0 taken 51904 times.
✓ Branch 1 taken 683829 times.
735733 switch (op->op) {
744 51904 case SWS_OP_CLEAR:
745
2/2
✓ Branch 1 taken 27350 times.
✓ Branch 2 taken 24554 times.
51904 if (op_commute_clear(op, next)) {
746 27350 FFSWAP(SwsOp, *op, *next);
747 27350 goto retry;
748 }
749 24554 break;
750 }
751 }
752
753 /* Apply any remaining preferential re-ordering optimizations; do these
754 * last because they are more likely to block other optimizations if done
755 * too aggressively */
756
2/2
✓ Branch 0 taken 403964 times.
✓ Branch 1 taken 31704 times.
435668 for (int n = 0; n < ops->num_ops - 1; n++) {
757 403964 SwsOp *op = &ops->ops[n];
758 403964 SwsOp *next = &ops->ops[n + 1];
759
760
3/3
✓ Branch 0 taken 60367 times.
✓ Branch 1 taken 24204 times.
✓ Branch 2 taken 319393 times.
403964 switch (op->op) {
761 60367 case SWS_OP_SWIZZLE: {
762 /* Try to push swizzles towards the output */
763
2/2
✓ Branch 1 taken 36941 times.
✓ Branch 2 taken 23426 times.
60367 if (op_commute_swizzle(op, next)) {
764 36941 FFSWAP(SwsOp, *op, *next);
765 36941 goto retry;
766 }
767 23426 break;
768 }
769
770 24204 case SWS_OP_SCALE:
771 /* Scaling by integer before conversion to int */
772
4/4
✓ Branch 0 taken 6362 times.
✓ Branch 1 taken 17842 times.
✓ Branch 2 taken 1824 times.
✓ Branch 3 taken 4538 times.
24204 if (op->scale.factor.den == 1 && next->op == SWS_OP_CONVERT &&
773
1/2
✓ Branch 0 taken 1824 times.
✗ Branch 1 not taken.
1824 ff_sws_pixel_type_is_int(next->convert.to))
774 {
775 1824 op->type = next->convert.to;
776 1824 FFSWAP(SwsOp, *op, *next);
777 1824 goto retry;
778 }
779 22380 break;
780 }
781 }
782
783 31704 return 0;
784 }
785
786 20440 int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
787 int size, uint8_t clear_val,
788 int *read_bytes, int *write_bytes)
789 {
790
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 20440 times.
20440 if (!ops->num_ops)
791 return AVERROR(EINVAL);
792
793 20440 const SwsOp *read = ff_sws_op_list_input(ops);
794
5/6
✓ Branch 0 taken 20440 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 20188 times.
✓ Branch 3 taken 252 times.
✓ Branch 4 taken 17500 times.
✓ Branch 5 taken 2688 times.
20440 if (!read || read->rw.frac || read->rw.filter ||
795
4/4
✓ Branch 0 taken 11228 times.
✓ Branch 1 taken 6272 times.
✓ Branch 2 taken 9162 times.
✓ Branch 3 taken 2066 times.
17500 (!read->rw.packed && read->rw.elems > 1))
796 12102 return AVERROR(ENOTSUP);
797
798 8338 const int read_size = ff_sws_pixel_type_size(read->type);
799 8338 uint32_t mask[4] = {0};
800
2/2
✓ Branch 0 taken 18383 times.
✓ Branch 1 taken 8338 times.
26721 for (int i = 0; i < read->rw.elems; i++)
801 18383 mask[i] = 0x01010101 * i * read_size + 0x03020100;
802
803
1/2
✓ Branch 0 taken 11590 times.
✗ Branch 1 not taken.
11590 for (int opidx = 1; opidx < ops->num_ops; opidx++) {
804 11590 const SwsOp *op = &ops->ops[opidx];
805
6/6
✓ Branch 0 taken 379 times.
✓ Branch 1 taken 2663 times.
✓ Branch 2 taken 210 times.
✓ Branch 3 taken 4840 times.
✓ Branch 4 taken 528 times.
✓ Branch 5 taken 2970 times.
11590 switch (op->op) {
806 379 case SWS_OP_SWIZZLE: {
807 379 uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
808
2/2
✓ Branch 0 taken 1516 times.
✓ Branch 1 taken 379 times.
1895 for (int i = 0; i < 4; i++)
809 1516 mask[i] = orig[op->swizzle.in[i]];
810 379 break;
811 }
812
813 2663 case SWS_OP_SWAP_BYTES:
814
2/2
✓ Branch 0 taken 10652 times.
✓ Branch 1 taken 2663 times.
13315 for (int i = 0; i < 4; i++) {
815
2/3
✓ Branch 0 taken 10172 times.
✓ Branch 1 taken 480 times.
✗ Branch 2 not taken.
10652 switch (ff_sws_pixel_type_size(op->type)) {
816 10172 case 2: mask[i] = av_bswap16(mask[i]); break;
817 480 case 4: mask[i] = av_bswap32(mask[i]); break;
818 }
819 }
820 2663 break;
821
822 210 case SWS_OP_CLEAR:
823
2/2
✓ Branch 0 taken 758 times.
✓ Branch 1 taken 54 times.
812 for (int i = 0; i < 4; i++) {
824
2/2
✓ Branch 0 taken 548 times.
✓ Branch 1 taken 210 times.
758 if (!SWS_COMP_TEST(op->clear.mask, i))
825 548 continue;
826
3/4
✓ Branch 0 taken 54 times.
✓ Branch 1 taken 156 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 54 times.
210 if (op->clear.value[i].num != 0 || !clear_val)
827 156 return AVERROR(ENOTSUP);
828 54 mask[i] = 0x1010101ul * clear_val;
829 }
830 54 break;
831
832 4840 case SWS_OP_CONVERT: {
833
2/2
✓ Branch 0 taken 4684 times.
✓ Branch 1 taken 156 times.
4840 if (!op->convert.expand)
834 4684 return AVERROR(ENOTSUP);
835
2/2
✓ Branch 0 taken 624 times.
✓ Branch 1 taken 156 times.
780 for (int i = 0; i < 4; i++) {
836
1/3
✓ Branch 0 taken 624 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
624 switch (ff_sws_pixel_type_size(op->type)) {
837 624 case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break;
838 case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
839 }
840 }
841 156 break;
842 }
843
844 528 case SWS_OP_WRITE: {
845
3/4
✓ Branch 0 taken 514 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 514 times.
✗ Branch 3 not taken.
528 if (op->rw.frac || op->rw.filter ||
846
4/4
✓ Branch 0 taken 262 times.
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 211 times.
✓ Branch 3 taken 51 times.
514 (!op->rw.packed && op->rw.elems > 1))
847 225 return AVERROR(ENOTSUP);
848
849 /* Initialize to no-op */
850 303 memset(shuffle, clear_val, size);
851
852 303 const int write_size = ff_sws_pixel_type_size(op->type);
853 303 const int read_chunk = read->rw.elems * read_size;
854 303 const int write_chunk = op->rw.elems * write_size;
855 303 const int num_groups = size / FFMAX(read_chunk, write_chunk);
856
2/2
✓ Branch 0 taken 1166 times.
✓ Branch 1 taken 303 times.
1469 for (int n = 0; n < num_groups; n++) {
857 1166 const int base_in = n * read_chunk;
858 1166 const int base_out = n * write_chunk;
859
2/2
✓ Branch 0 taken 2898 times.
✓ Branch 1 taken 1166 times.
4064 for (int i = 0; i < op->rw.elems; i++) {
860 2898 const int offset = base_out + i * write_size;
861
2/2
✓ Branch 0 taken 4354 times.
✓ Branch 1 taken 2898 times.
7252 for (int b = 0; b < write_size; b++) {
862 4354 const uint8_t idx = mask[i] >> (b * 8);
863
2/2
✓ Branch 0 taken 4138 times.
✓ Branch 1 taken 216 times.
4354 if (idx != clear_val)
864 4138 shuffle[offset + b] = base_in + idx;
865 }
866 }
867 }
868
869 303 *read_bytes = num_groups * read_chunk;
870 303 *write_bytes = num_groups * write_chunk;
871 303 return num_groups;
872 }
873
874 2970 default:
875 2970 return AVERROR(ENOTSUP);
876 }
877 }
878
879 return AVERROR(EINVAL);
880 }
881
882 /**
883 * Determine a suitable intermediate buffer format for a given combination
884 * of pixel types and number of planes. The exact interpretation of these
885 * formats does not matter at all; since they will only ever be used as
886 * temporary intermediate buffers. We still need to pick *some* format as
887 * a consequence of ff_sws_graph_add_pass() taking an AVPixelFormat for the
888 * output buffer.
889 */
890 static enum AVPixelFormat get_planar_fmt(SwsPixelType type, int nb_planes)
891 {
892 switch (ff_sws_pixel_type_size(type)) {
893 case 1:
894 switch (nb_planes) {
895 case 1: return AV_PIX_FMT_GRAY8;
896 case 2: return AV_PIX_FMT_YUV444P; // FIXME: no 2-plane planar fmt
897 case 3: return AV_PIX_FMT_YUV444P;
898 case 4: return AV_PIX_FMT_YUVA444P;
899 }
900 break;
901 case 2:
902 switch (nb_planes) {
903 case 1: return AV_PIX_FMT_GRAY16;
904 case 2: return AV_PIX_FMT_YUV444P16; // FIXME: no 2-plane planar fmt
905 case 3: return AV_PIX_FMT_YUV444P16;
906 case 4: return AV_PIX_FMT_YUVA444P16;
907 }
908 break;
909 case 4:
910 switch (nb_planes) {
911 case 1: return AV_PIX_FMT_GRAYF32;
912 case 2: return AV_PIX_FMT_GBRPF32; // FIXME: no 2-plane planar fmt
913 case 3: return AV_PIX_FMT_GBRPF32;
914 case 4: return AV_PIX_FMT_GBRAPF32;
915 }
916 break;
917 }
918
919 av_unreachable("Invalid pixel type or number of planes?");
920 return AV_PIX_FMT_NONE;
921 }
922
923 static void get_input_size(const SwsOpList *ops, SwsFormat *fmt)
924 {
925 fmt->width = ops->src.width;
926 fmt->height = ops->src.height;
927
928 const SwsOp *read = ff_sws_op_list_input(ops);
929 if (read && read->rw.filter == SWS_OP_FILTER_V) {
930 fmt->height = read->rw.kernel->dst_size;
931 } else if (read && read->rw.filter == SWS_OP_FILTER_H) {
932 fmt->width = read->rw.kernel->dst_size;
933 }
934 }
935
936 int ff_sws_op_list_subpass(SwsOpList *ops1, SwsOpList **out_rest)
937 {
938 const SwsOp *op;
939 int ret, idx;
940
941 for (idx = 0; idx < ops1->num_ops; idx++) {
942 op = &ops1->ops[idx];
943 if (op->op == SWS_OP_FILTER_H || op->op == SWS_OP_FILTER_V)
944 break;
945 }
946
947 if (idx == ops1->num_ops) {
948 *out_rest = NULL;
949 return 0;
950 }
951
952 av_assert0(idx > 0);
953 const SwsOp *prev = &ops1->ops[idx - 1];
954
955 SwsOpList *ops2 = ff_sws_op_list_duplicate(ops1);
956 if (!ops2)
957 return AVERROR(ENOMEM);
958
959 /**
960 * Not all components may be needed; but we need the ones that *are*
961 * used to be contiguous for the write/read operations. So, first
962 * compress them into a linearly ascending list of components
963 */
964 int nb_planes = 0;
965 SwsSwizzleOp swiz_wr = SWS_SWIZZLE(0, 1, 2, 3);
966 SwsSwizzleOp swiz_rd = SWS_SWIZZLE(0, 1, 2, 3);
967 for (int i = 0; i < 4; i++) {
968 if (SWS_OP_NEEDED(prev, i)) {
969 const int o = nb_planes++;
970 swiz_wr.in[o] = i;
971 swiz_rd.in[i] = o;
972 }
973 }
974
975 /* Determine metadata for the intermediate format */
976 const SwsPixelType type = op->type;
977 ops2->src.format = get_planar_fmt(type, nb_planes);
978 ops2->src.desc = av_pix_fmt_desc_get(ops2->src.format);
979 get_input_size(ops1, &ops2->src);
980 ops1->dst = ops2->src;
981
982 for (int i = 0; i < nb_planes; i++) {
983 ops1->plane_dst[i] = ops2->plane_src[i] = i;
984 ops2->comps_src.flags[i] = prev->comps.flags[swiz_wr.in[i]];
985 }
986
987 ff_sws_op_list_remove_at(ops1, idx, ops1->num_ops - idx);
988 ff_sws_op_list_remove_at(ops2, 0, idx);
989 op = NULL; /* the above command may invalidate op */
990
991 if (swiz_wr.mask != SWS_SWIZZLE(0, 1, 2, 3).mask) {
992 ret = ff_sws_op_list_append(ops1, &(SwsOp) {
993 .op = SWS_OP_SWIZZLE,
994 .type = type,
995 .swizzle = swiz_wr,
996 });
997 if (ret < 0)
998 goto fail;
999 }
1000
1001 ret = ff_sws_op_list_append(ops1, &(SwsOp) {
1002 .op = SWS_OP_WRITE,
1003 .type = type,
1004 .rw.elems = nb_planes,
1005 });
1006 if (ret < 0)
1007 goto fail;
1008
1009 ret = ff_sws_op_list_insert_at(ops2, 0, &(SwsOp) {
1010 .op = SWS_OP_READ,
1011 .type = type,
1012 .rw.elems = nb_planes,
1013 });
1014 if (ret < 0)
1015 goto fail;
1016
1017 if (swiz_rd.mask != SWS_SWIZZLE(0, 1, 2, 3).mask) {
1018 ret = ff_sws_op_list_insert_at(ops2, 1, &(SwsOp) {
1019 .op = SWS_OP_SWIZZLE,
1020 .type = type,
1021 .swizzle = swiz_rd,
1022 });
1023 if (ret < 0)
1024 goto fail;
1025 }
1026
1027 ret = ff_sws_op_list_optimize(ops1);
1028 if (ret < 0)
1029 goto fail;
1030
1031 ret = ff_sws_op_list_optimize(ops2);
1032 if (ret < 0)
1033 goto fail;
1034
1035 *out_rest = ops2;
1036 return 0;
1037
1038 fail:
1039 ff_sws_op_list_free(&ops2);
1040 return ret;
1041 }
1042