Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * Copyright (C) 2025 Niklas Haas | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | #include "libavutil/avassert.h" | ||
22 | #include "libavutil/bswap.h" | ||
23 | #include "libavutil/rational.h" | ||
24 | |||
25 | #include "ops.h" | ||
26 | #include "ops_internal.h" | ||
27 | |||
28 | #define RET(x) \ | ||
29 | do { \ | ||
30 | if ((ret = (x)) < 0) \ | ||
31 | return ret; \ | ||
32 | } while (0) | ||
33 | |||
34 | /* Returns true for operations that are independent per channel. These can | ||
35 | * usually be commuted freely other such operations. */ | ||
36 | ✗ | static bool op_type_is_independent(SwsOpType op) | |
37 | { | ||
38 | ✗ | switch (op) { | |
39 | ✗ | case SWS_OP_SWAP_BYTES: | |
40 | case SWS_OP_LSHIFT: | ||
41 | case SWS_OP_RSHIFT: | ||
42 | case SWS_OP_CONVERT: | ||
43 | case SWS_OP_DITHER: | ||
44 | case SWS_OP_MIN: | ||
45 | case SWS_OP_MAX: | ||
46 | case SWS_OP_SCALE: | ||
47 | ✗ | return true; | |
48 | ✗ | case SWS_OP_INVALID: | |
49 | case SWS_OP_READ: | ||
50 | case SWS_OP_WRITE: | ||
51 | case SWS_OP_SWIZZLE: | ||
52 | case SWS_OP_CLEAR: | ||
53 | case SWS_OP_LINEAR: | ||
54 | case SWS_OP_PACK: | ||
55 | case SWS_OP_UNPACK: | ||
56 | ✗ | return false; | |
57 | ✗ | case SWS_OP_TYPE_NB: | |
58 | ✗ | break; | |
59 | } | ||
60 | |||
61 | ✗ | av_unreachable("Invalid operation type!"); | |
62 | return false; | ||
63 | } | ||
64 | |||
65 | /* merge_comp_flags() forms a monoid with flags_identity as the null element */ | ||
66 | static const unsigned flags_identity = SWS_COMP_ZERO | SWS_COMP_EXACT; | ||
67 | 6357 | static unsigned merge_comp_flags(unsigned a, unsigned b) | |
68 | { | ||
69 | 6357 | const unsigned flags_or = SWS_COMP_GARBAGE; | |
70 | 6357 | const unsigned flags_and = SWS_COMP_ZERO | SWS_COMP_EXACT; | |
71 | 6357 | return ((a & b) & flags_and) | ((a | b) & flags_or); | |
72 | } | ||
73 | |||
74 | /* Infer + propagate known information about components */ | ||
75 | 17511 | void ff_sws_op_list_update_comps(SwsOpList *ops) | |
76 | { | ||
77 | 17511 | SwsComps next = { .unused = {true, true, true, true} }; | |
78 | 17511 | SwsComps prev = { .flags = { | |
79 | SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, | ||
80 | }}; | ||
81 | |||
82 | /* Forwards pass, propagates knowledge about the incoming pixel values */ | ||
83 |
2/2✓ Branch 0 taken 52975 times.
✓ Branch 1 taken 17511 times.
|
70486 | for (int n = 0; n < ops->num_ops; n++) { |
84 | 52975 | SwsOp *op = &ops->ops[n]; | |
85 | |||
86 | /* Prefill min/max values automatically; may have to be fixed in | ||
87 | * special cases */ | ||
88 | 52975 | memcpy(op->comps.min, prev.min, sizeof(prev.min)); | |
89 | 52975 | memcpy(op->comps.max, prev.max, sizeof(prev.max)); | |
90 | |||
91 |
2/2✓ Branch 0 taken 52663 times.
✓ Branch 1 taken 312 times.
|
52975 | if (op->op != SWS_OP_SWAP_BYTES) { |
92 | 52663 | ff_sws_apply_op_q(op, op->comps.min); | |
93 | 52663 | ff_sws_apply_op_q(op, op->comps.max); | |
94 | } | ||
95 | |||
96 |
11/12✓ Branch 0 taken 17511 times.
✓ Branch 1 taken 17511 times.
✓ Branch 2 taken 6552 times.
✓ Branch 3 taken 1404 times.
✓ Branch 4 taken 312 times.
✓ Branch 5 taken 312 times.
✓ Branch 6 taken 1144 times.
✓ Branch 7 taken 5070 times.
✓ Branch 8 taken 2184 times.
✓ Branch 9 taken 819 times.
✓ Branch 10 taken 156 times.
✗ Branch 11 not taken.
|
52975 | switch (op->op) { |
97 | 17511 | case SWS_OP_READ: | |
98 |
2/2✓ Branch 0 taken 56134 times.
✓ Branch 1 taken 17511 times.
|
73645 | for (int i = 0; i < op->rw.elems; i++) { |
99 |
2/2✓ Branch 0 taken 41756 times.
✓ Branch 1 taken 14378 times.
|
56134 | if (ff_sws_pixel_type_is_int(op->type)) { |
100 | 41756 | int bits = 8 * ff_sws_pixel_type_size(op->type); | |
101 |
3/4✓ Branch 0 taken 40703 times.
✓ Branch 1 taken 1053 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 40703 times.
|
41756 | if (!op->rw.packed && ops->src.desc) { |
102 | /* Use legal value range from pixdesc if available; | ||
103 | * we don't need to do this for packed formats because | ||
104 | * non-byte-aligned packed formats will necessarily go | ||
105 | * through SWS_OP_UNPACK anyway */ | ||
106 | ✗ | for (int c = 0; c < 4; c++) { | |
107 | ✗ | if (ops->src.desc->comp[c].plane == i) { | |
108 | ✗ | bits = ops->src.desc->comp[c].depth; | |
109 | ✗ | break; | |
110 | } | ||
111 | } | ||
112 | } | ||
113 | |||
114 | 41756 | op->comps.flags[i] = SWS_COMP_EXACT; | |
115 | 41756 | op->comps.min[i] = Q(0); | |
116 | 41756 | op->comps.max[i] = Q((1ULL << bits) - 1); | |
117 | } | ||
118 | } | ||
119 |
2/2✓ Branch 0 taken 13910 times.
✓ Branch 1 taken 17511 times.
|
31421 | for (int i = op->rw.elems; i < 4; i++) |
120 | 13910 | op->comps.flags[i] = prev.flags[i]; | |
121 | 17511 | break; | |
122 | 17511 | case SWS_OP_WRITE: | |
123 |
2/2✓ Branch 0 taken 49946 times.
✓ Branch 1 taken 17511 times.
|
67457 | for (int i = 0; i < op->rw.elems; i++) |
124 | av_assert1(!(prev.flags[i] & SWS_COMP_GARBAGE)); | ||
125 | /* fall through */ | ||
126 | case SWS_OP_SWAP_BYTES: | ||
127 | case SWS_OP_LSHIFT: | ||
128 | case SWS_OP_RSHIFT: | ||
129 | case SWS_OP_MIN: | ||
130 | case SWS_OP_MAX: | ||
131 | /* Linearly propagate flags per component */ | ||
132 |
2/2✓ Branch 0 taken 96252 times.
✓ Branch 1 taken 24063 times.
|
120315 | for (int i = 0; i < 4; i++) |
133 | 96252 | op->comps.flags[i] = prev.flags[i]; | |
134 | 24063 | break; | |
135 | 1404 | case SWS_OP_DITHER: | |
136 | /* Strip zero flag because of the nonzero dithering offset */ | ||
137 |
2/2✓ Branch 0 taken 5616 times.
✓ Branch 1 taken 1404 times.
|
7020 | for (int i = 0; i < 4; i++) |
138 | 5616 | op->comps.flags[i] = prev.flags[i] & ~SWS_COMP_ZERO; | |
139 | 1404 | break; | |
140 | 312 | case SWS_OP_UNPACK: | |
141 |
2/2✓ Branch 0 taken 1248 times.
✓ Branch 1 taken 312 times.
|
1560 | for (int i = 0; i < 4; i++) { |
142 |
2/2✓ Branch 0 taken 1014 times.
✓ Branch 1 taken 234 times.
|
1248 | if (op->pack.pattern[i]) |
143 | 1014 | op->comps.flags[i] = prev.flags[0]; | |
144 | else | ||
145 | 234 | op->comps.flags[i] = SWS_COMP_GARBAGE; | |
146 | } | ||
147 | 312 | break; | |
148 | 312 | case SWS_OP_PACK: { | |
149 | 312 | unsigned flags = flags_identity; | |
150 |
2/2✓ Branch 0 taken 1248 times.
✓ Branch 1 taken 312 times.
|
1560 | for (int i = 0; i < 4; i++) { |
151 |
2/2✓ Branch 0 taken 1014 times.
✓ Branch 1 taken 234 times.
|
1248 | if (op->pack.pattern[i]) |
152 | 1014 | flags = merge_comp_flags(flags, prev.flags[i]); | |
153 |
2/2✓ Branch 0 taken 936 times.
✓ Branch 1 taken 312 times.
|
1248 | if (i > 0) /* clear remaining comps for sanity */ |
154 | 936 | op->comps.flags[i] = SWS_COMP_GARBAGE; | |
155 | } | ||
156 | 312 | op->comps.flags[0] = flags; | |
157 | 312 | break; | |
158 | } | ||
159 | 1144 | case SWS_OP_CLEAR: | |
160 |
2/2✓ Branch 0 taken 4576 times.
✓ Branch 1 taken 1144 times.
|
5720 | for (int i = 0; i < 4; i++) { |
161 |
2/2✓ Branch 0 taken 2639 times.
✓ Branch 1 taken 1937 times.
|
4576 | if (op->c.q4[i].den) { |
162 |
2/2✓ Branch 0 taken 132 times.
✓ Branch 1 taken 2507 times.
|
2639 | if (op->c.q4[i].num == 0) { |
163 | 132 | op->comps.flags[i] = SWS_COMP_ZERO | SWS_COMP_EXACT; | |
164 |
1/2✓ Branch 0 taken 2507 times.
✗ Branch 1 not taken.
|
2507 | } else if (op->c.q4[i].den == 1) { |
165 | 2507 | op->comps.flags[i] = SWS_COMP_EXACT; | |
166 | } | ||
167 | } else { | ||
168 | 1937 | op->comps.flags[i] = prev.flags[i]; | |
169 | } | ||
170 | } | ||
171 | 1144 | break; | |
172 | 5070 | case SWS_OP_SWIZZLE: | |
173 |
2/2✓ Branch 0 taken 20280 times.
✓ Branch 1 taken 5070 times.
|
25350 | for (int i = 0; i < 4; i++) |
174 | 20280 | op->comps.flags[i] = prev.flags[op->swizzle.in[i]]; | |
175 | 5070 | break; | |
176 | 2184 | case SWS_OP_CONVERT: | |
177 |
2/2✓ Branch 0 taken 8736 times.
✓ Branch 1 taken 2184 times.
|
10920 | for (int i = 0; i < 4; i++) { |
178 | 8736 | op->comps.flags[i] = prev.flags[i]; | |
179 |
2/2✓ Branch 0 taken 6864 times.
✓ Branch 1 taken 1872 times.
|
8736 | if (ff_sws_pixel_type_is_int(op->convert.to)) |
180 | 6864 | op->comps.flags[i] |= SWS_COMP_EXACT; | |
181 | } | ||
182 | 2184 | break; | |
183 | 819 | case SWS_OP_LINEAR: | |
184 |
2/2✓ Branch 0 taken 3276 times.
✓ Branch 1 taken 819 times.
|
4095 | for (int i = 0; i < 4; i++) { |
185 | 3276 | unsigned flags = flags_identity; | |
186 | 3276 | AVRational min = Q(0), max = Q(0); | |
187 |
2/2✓ Branch 0 taken 13104 times.
✓ Branch 1 taken 3276 times.
|
16380 | for (int j = 0; j < 4; j++) { |
188 | 13104 | const AVRational k = op->lin.m[i][j]; | |
189 | 13104 | AVRational mink = av_mul_q(prev.min[j], k); | |
190 | 13104 | AVRational maxk = av_mul_q(prev.max[j], k); | |
191 |
2/2✓ Branch 0 taken 5343 times.
✓ Branch 1 taken 7761 times.
|
13104 | if (k.num) { |
192 | 5343 | flags = merge_comp_flags(flags, prev.flags[j]); | |
193 |
2/2✓ Branch 0 taken 4017 times.
✓ Branch 1 taken 1326 times.
|
5343 | if (k.den != 1) /* fractional coefficient */ |
194 | 4017 | flags &= ~SWS_COMP_EXACT; | |
195 |
2/2✓ Branch 0 taken 2064 times.
✓ Branch 1 taken 3279 times.
|
5343 | if (k.num < 0) |
196 | 2064 | FFSWAP(AVRational, mink, maxk); | |
197 | 5343 | min = av_add_q(min, mink); | |
198 | 5343 | max = av_add_q(max, maxk); | |
199 | } | ||
200 | } | ||
201 |
2/2✓ Branch 0 taken 1443 times.
✓ Branch 1 taken 1833 times.
|
3276 | if (op->lin.m[i][4].num) { /* nonzero offset */ |
202 | 1443 | flags &= ~SWS_COMP_ZERO; | |
203 |
1/2✓ Branch 0 taken 1443 times.
✗ Branch 1 not taken.
|
1443 | if (op->lin.m[i][4].den != 1) /* fractional offset */ |
204 | 1443 | flags &= ~SWS_COMP_EXACT; | |
205 | 1443 | min = av_add_q(min, op->lin.m[i][4]); | |
206 | 1443 | max = av_add_q(max, op->lin.m[i][4]); | |
207 | } | ||
208 | 3276 | op->comps.flags[i] = flags; | |
209 | 3276 | op->comps.min[i] = min; | |
210 | 3276 | op->comps.max[i] = max; | |
211 | } | ||
212 | 819 | break; | |
213 | 156 | case SWS_OP_SCALE: | |
214 |
2/2✓ Branch 0 taken 624 times.
✓ Branch 1 taken 156 times.
|
780 | for (int i = 0; i < 4; i++) { |
215 | 624 | op->comps.flags[i] = prev.flags[i]; | |
216 |
1/2✓ Branch 0 taken 624 times.
✗ Branch 1 not taken.
|
624 | if (op->c.q.den != 1) /* fractional scale */ |
217 | 624 | op->comps.flags[i] &= ~SWS_COMP_EXACT; | |
218 |
2/2✓ Branch 0 taken 300 times.
✓ Branch 1 taken 324 times.
|
624 | if (op->c.q.num < 0) |
219 | 300 | FFSWAP(AVRational, op->comps.min[i], op->comps.max[i]); | |
220 | } | ||
221 | 156 | break; | |
222 | |||
223 | case SWS_OP_INVALID: | ||
224 | case SWS_OP_TYPE_NB: | ||
225 | av_unreachable("Invalid operation type!"); | ||
226 | } | ||
227 | |||
228 | 52975 | prev = op->comps; | |
229 | } | ||
230 | |||
231 | /* Backwards pass, solves for component dependencies */ | ||
232 |
2/2✓ Branch 0 taken 52975 times.
✓ Branch 1 taken 17511 times.
|
70486 | for (int n = ops->num_ops - 1; n >= 0; n--) { |
233 | 52975 | SwsOp *op = &ops->ops[n]; | |
234 | |||
235 |
7/8✓ Branch 0 taken 35022 times.
✓ Branch 1 taken 10296 times.
✓ Branch 2 taken 312 times.
✓ Branch 3 taken 312 times.
✓ Branch 4 taken 1144 times.
✓ Branch 5 taken 5070 times.
✓ Branch 6 taken 819 times.
✗ Branch 7 not taken.
|
52975 | switch (op->op) { |
236 | 35022 | case SWS_OP_READ: | |
237 | case SWS_OP_WRITE: | ||
238 |
2/2✓ Branch 0 taken 106080 times.
✓ Branch 1 taken 35022 times.
|
141102 | for (int i = 0; i < op->rw.elems; i++) |
239 | 106080 | op->comps.unused[i] = op->op == SWS_OP_READ; | |
240 |
2/2✓ Branch 0 taken 34008 times.
✓ Branch 1 taken 35022 times.
|
69030 | for (int i = op->rw.elems; i < 4; i++) |
241 | 34008 | op->comps.unused[i] = next.unused[i]; | |
242 | 35022 | break; | |
243 | 10296 | case SWS_OP_SWAP_BYTES: | |
244 | case SWS_OP_LSHIFT: | ||
245 | case SWS_OP_RSHIFT: | ||
246 | case SWS_OP_CONVERT: | ||
247 | case SWS_OP_DITHER: | ||
248 | case SWS_OP_MIN: | ||
249 | case SWS_OP_MAX: | ||
250 | case SWS_OP_SCALE: | ||
251 |
2/2✓ Branch 0 taken 41184 times.
✓ Branch 1 taken 10296 times.
|
51480 | for (int i = 0; i < 4; i++) |
252 | 41184 | op->comps.unused[i] = next.unused[i]; | |
253 | 10296 | break; | |
254 | 312 | case SWS_OP_UNPACK: { | |
255 | 312 | bool unused = true; | |
256 |
2/2✓ Branch 0 taken 1248 times.
✓ Branch 1 taken 312 times.
|
1560 | for (int i = 0; i < 4; i++) { |
257 |
2/2✓ Branch 0 taken 1014 times.
✓ Branch 1 taken 234 times.
|
1248 | if (op->pack.pattern[i]) |
258 | 1014 | unused &= next.unused[i]; | |
259 | 1248 | op->comps.unused[i] = i > 0; | |
260 | } | ||
261 | 312 | op->comps.unused[0] = unused; | |
262 | 312 | break; | |
263 | } | ||
264 | 312 | case SWS_OP_PACK: | |
265 |
2/2✓ Branch 0 taken 1248 times.
✓ Branch 1 taken 312 times.
|
1560 | for (int i = 0; i < 4; i++) { |
266 |
2/2✓ Branch 0 taken 1014 times.
✓ Branch 1 taken 234 times.
|
1248 | if (op->pack.pattern[i]) |
267 | 1014 | op->comps.unused[i] = next.unused[0]; | |
268 | else | ||
269 | 234 | op->comps.unused[i] = true; | |
270 | } | ||
271 | 312 | break; | |
272 | 1144 | case SWS_OP_CLEAR: | |
273 |
2/2✓ Branch 0 taken 4576 times.
✓ Branch 1 taken 1144 times.
|
5720 | for (int i = 0; i < 4; i++) { |
274 |
2/2✓ Branch 0 taken 2639 times.
✓ Branch 1 taken 1937 times.
|
4576 | if (op->c.q4[i].den) |
275 | 2639 | op->comps.unused[i] = true; | |
276 | else | ||
277 | 1937 | op->comps.unused[i] = next.unused[i]; | |
278 | } | ||
279 | 1144 | break; | |
280 | 5070 | case SWS_OP_SWIZZLE: { | |
281 | 5070 | bool unused[4] = { true, true, true, true }; | |
282 |
2/2✓ Branch 0 taken 20280 times.
✓ Branch 1 taken 5070 times.
|
25350 | for (int i = 0; i < 4; i++) |
283 | 20280 | unused[op->swizzle.in[i]] &= next.unused[i]; | |
284 |
2/2✓ Branch 0 taken 20280 times.
✓ Branch 1 taken 5070 times.
|
25350 | for (int i = 0; i < 4; i++) |
285 | 20280 | op->comps.unused[i] = unused[i]; | |
286 | 5070 | break; | |
287 | } | ||
288 | 819 | case SWS_OP_LINEAR: | |
289 |
2/2✓ Branch 0 taken 3276 times.
✓ Branch 1 taken 819 times.
|
4095 | for (int j = 0; j < 4; j++) { |
290 | 3276 | bool unused = true; | |
291 |
2/2✓ Branch 0 taken 13104 times.
✓ Branch 1 taken 3276 times.
|
16380 | for (int i = 0; i < 4; i++) { |
292 |
2/2✓ Branch 0 taken 5343 times.
✓ Branch 1 taken 7761 times.
|
13104 | if (op->lin.m[i][j].num) |
293 | 5343 | unused &= next.unused[i]; | |
294 | } | ||
295 | 3276 | op->comps.unused[j] = unused; | |
296 | } | ||
297 | 819 | break; | |
298 | } | ||
299 | |||
300 | 52975 | next = op->comps; | |
301 | } | ||
302 | 17511 | } | |
303 | |||
304 | /* returns log2(x) only if x is a power of two, or 0 otherwise */ | ||
305 | ✗ | static int exact_log2(const int x) | |
306 | { | ||
307 | int p; | ||
308 | ✗ | if (x <= 0) | |
309 | ✗ | return 0; | |
310 | ✗ | p = av_log2(x); | |
311 | ✗ | return (1 << p) == x ? p : 0; | |
312 | } | ||
313 | |||
314 | ✗ | static int exact_log2_q(const AVRational x) | |
315 | { | ||
316 | ✗ | if (x.den == 1) | |
317 | ✗ | return exact_log2(x.num); | |
318 | ✗ | else if (x.num == 1) | |
319 | ✗ | return -exact_log2(x.den); | |
320 | else | ||
321 | ✗ | return 0; | |
322 | } | ||
323 | |||
324 | /** | ||
325 | * If a linear operation can be reduced to a scalar multiplication, returns | ||
326 | * the corresponding scaling factor, or 0 otherwise. | ||
327 | */ | ||
328 | ✗ | static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next, | |
329 | SwsConst *out_scale) | ||
330 | { | ||
331 | ✗ | SwsConst scale = {0}; | |
332 | |||
333 | /* There are components not on the main diagonal */ | ||
334 | ✗ | if (c->mask & ~SWS_MASK_DIAG4) | |
335 | ✗ | return false; | |
336 | |||
337 | ✗ | for (int i = 0; i < 4; i++) { | |
338 | ✗ | const AVRational s = c->m[i][i]; | |
339 | ✗ | if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i]) | |
340 | ✗ | continue; | |
341 | ✗ | if (scale.q.den && av_cmp_q(s, scale.q)) | |
342 | ✗ | return false; | |
343 | ✗ | scale.q = s; | |
344 | } | ||
345 | |||
346 | ✗ | if (scale.q.den) | |
347 | ✗ | *out_scale = scale; | |
348 | ✗ | return scale.q.den; | |
349 | } | ||
350 | |||
351 | /* Extracts an integer clear operation (subset) from the given linear op. */ | ||
352 | ✗ | static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev, | |
353 | SwsConst *out_clear) | ||
354 | { | ||
355 | ✗ | SwsConst clear = {0}; | |
356 | ✗ | bool ret = false; | |
357 | |||
358 | ✗ | for (int i = 0; i < 4; i++) { | |
359 | ✗ | bool const_row = c->m[i][4].den == 1; /* offset is integer */ | |
360 | ✗ | for (int j = 0; j < 4; j++) { | |
361 | ✗ | const_row &= c->m[i][j].num == 0 || /* scalar is zero */ | |
362 | ✗ | (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */ | |
363 | } | ||
364 | ✗ | if (const_row && (c->mask & SWS_MASK_ROW(i))) { | |
365 | ✗ | clear.q4[i] = c->m[i][4]; | |
366 | ✗ | for (int j = 0; j < 5; j++) | |
367 | ✗ | c->m[i][j] = Q(i == j); | |
368 | ✗ | c->mask &= ~SWS_MASK_ROW(i); | |
369 | ✗ | ret = true; | |
370 | } | ||
371 | } | ||
372 | |||
373 | ✗ | if (ret) | |
374 | ✗ | *out_clear = clear; | |
375 | ✗ | return ret; | |
376 | } | ||
377 | |||
378 | /* Unswizzle a linear operation by aligning single-input rows with | ||
379 | * their corresponding diagonal */ | ||
380 | ✗ | static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz) | |
381 | { | ||
382 | ✗ | SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3); | |
383 | ✗ | SwsLinearOp c = *op; | |
384 | |||
385 | ✗ | for (int i = 0; i < 4; i++) { | |
386 | ✗ | int idx = -1; | |
387 | ✗ | for (int j = 0; j < 4; j++) { | |
388 | ✗ | if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO)) | |
389 | ✗ | continue; | |
390 | ✗ | if (idx >= 0) | |
391 | ✗ | return false; /* multiple inputs */ | |
392 | ✗ | idx = j; | |
393 | } | ||
394 | |||
395 | ✗ | if (idx >= 0 && idx != i) { | |
396 | /* Move coefficient to the diagonal */ | ||
397 | ✗ | c.m[i][i] = c.m[i][idx]; | |
398 | ✗ | c.m[i][idx] = Q(0); | |
399 | ✗ | swiz.in[i] = idx; | |
400 | } | ||
401 | } | ||
402 | |||
403 | ✗ | if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask) | |
404 | ✗ | return false; /* no swizzle was identified */ | |
405 | |||
406 | ✗ | c.mask = ff_sws_linear_mask(c); | |
407 | ✗ | *out_swiz = swiz; | |
408 | ✗ | *op = c; | |
409 | ✗ | return true; | |
410 | } | ||
411 | |||
412 | ✗ | int ff_sws_op_list_optimize(SwsOpList *ops) | |
413 | { | ||
414 | int ret; | ||
415 | |||
416 | ✗ | retry: | |
417 | ✗ | ff_sws_op_list_update_comps(ops); | |
418 | |||
419 | ✗ | for (int n = 0; n < ops->num_ops;) { | |
420 | ✗ | SwsOp dummy = {0}; | |
421 | ✗ | SwsOp *op = &ops->ops[n]; | |
422 | ✗ | SwsOp *prev = n ? &ops->ops[n - 1] : &dummy; | |
423 | ✗ | SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy; | |
424 | |||
425 | /* common helper variable */ | ||
426 | ✗ | bool noop = true; | |
427 | |||
428 | ✗ | switch (op->op) { | |
429 | ✗ | case SWS_OP_READ: | |
430 | /* Optimized further into refcopy / memcpy */ | ||
431 | ✗ | if (next->op == SWS_OP_WRITE && | |
432 | ✗ | next->rw.elems == op->rw.elems && | |
433 | ✗ | next->rw.packed == op->rw.packed && | |
434 | ✗ | next->rw.frac == op->rw.frac) | |
435 | { | ||
436 | ✗ | ff_sws_op_list_remove_at(ops, n, 2); | |
437 | av_assert1(ops->num_ops == 0); | ||
438 | ✗ | return 0; | |
439 | } | ||
440 | |||
441 | /* Skip reading extra unneeded components */ | ||
442 | ✗ | if (!op->rw.packed) { | |
443 | ✗ | int needed = op->rw.elems; | |
444 | ✗ | while (needed > 0 && next->comps.unused[needed - 1]) | |
445 | ✗ | needed--; | |
446 | ✗ | if (op->rw.elems != needed) { | |
447 | ✗ | op->rw.elems = needed; | |
448 | ✗ | goto retry; | |
449 | } | ||
450 | } | ||
451 | ✗ | break; | |
452 | |||
453 | ✗ | case SWS_OP_SWAP_BYTES: | |
454 | /* Redundant (double) swap */ | ||
455 | ✗ | if (next->op == SWS_OP_SWAP_BYTES) { | |
456 | ✗ | ff_sws_op_list_remove_at(ops, n, 2); | |
457 | ✗ | goto retry; | |
458 | } | ||
459 | ✗ | break; | |
460 | |||
461 | ✗ | case SWS_OP_UNPACK: | |
462 | /* Redundant unpack+pack */ | ||
463 | ✗ | if (next->op == SWS_OP_PACK && next->type == op->type && | |
464 | ✗ | next->pack.pattern[0] == op->pack.pattern[0] && | |
465 | ✗ | next->pack.pattern[1] == op->pack.pattern[1] && | |
466 | ✗ | next->pack.pattern[2] == op->pack.pattern[2] && | |
467 | ✗ | next->pack.pattern[3] == op->pack.pattern[3]) | |
468 | { | ||
469 | ✗ | ff_sws_op_list_remove_at(ops, n, 2); | |
470 | ✗ | goto retry; | |
471 | } | ||
472 | ✗ | break; | |
473 | |||
474 | ✗ | case SWS_OP_LSHIFT: | |
475 | case SWS_OP_RSHIFT: | ||
476 | /* Two shifts in the same direction */ | ||
477 | ✗ | if (next->op == op->op) { | |
478 | ✗ | op->c.u += next->c.u; | |
479 | ✗ | ff_sws_op_list_remove_at(ops, n + 1, 1); | |
480 | ✗ | goto retry; | |
481 | } | ||
482 | |||
483 | /* No-op shift */ | ||
484 | ✗ | if (!op->c.u) { | |
485 | ✗ | ff_sws_op_list_remove_at(ops, n, 1); | |
486 | ✗ | goto retry; | |
487 | } | ||
488 | ✗ | break; | |
489 | |||
490 | ✗ | case SWS_OP_CLEAR: | |
491 | ✗ | for (int i = 0; i < 4; i++) { | |
492 | ✗ | if (!op->c.q4[i].den) | |
493 | ✗ | continue; | |
494 | |||
495 | ✗ | if ((prev->comps.flags[i] & SWS_COMP_ZERO) && | |
496 | ✗ | !(prev->comps.flags[i] & SWS_COMP_GARBAGE) && | |
497 | ✗ | op->c.q4[i].num == 0) | |
498 | { | ||
499 | /* Redundant clear-to-zero of zero component */ | ||
500 | ✗ | op->c.q4[i].den = 0; | |
501 | ✗ | } else if (next->comps.unused[i]) { | |
502 | /* Unnecessary clear of unused component */ | ||
503 | ✗ | op->c.q4[i] = (AVRational) {0, 0}; | |
504 | ✗ | } else if (op->c.q4[i].den) { | |
505 | ✗ | noop = false; | |
506 | } | ||
507 | } | ||
508 | |||
509 | ✗ | if (noop) { | |
510 | ✗ | ff_sws_op_list_remove_at(ops, n, 1); | |
511 | ✗ | goto retry; | |
512 | } | ||
513 | |||
514 | /* Transitive clear */ | ||
515 | ✗ | if (next->op == SWS_OP_CLEAR) { | |
516 | ✗ | for (int i = 0; i < 4; i++) { | |
517 | ✗ | if (next->c.q4[i].den) | |
518 | ✗ | op->c.q4[i] = next->c.q4[i]; | |
519 | } | ||
520 | ✗ | ff_sws_op_list_remove_at(ops, n + 1, 1); | |
521 | ✗ | goto retry; | |
522 | } | ||
523 | |||
524 | /* Prefer to clear as late as possible, to avoid doing | ||
525 | * redundant work */ | ||
526 | ✗ | if ((op_type_is_independent(next->op) && next->op != SWS_OP_SWAP_BYTES) || | |
527 | ✗ | next->op == SWS_OP_SWIZZLE) | |
528 | { | ||
529 | ✗ | if (next->op == SWS_OP_CONVERT) | |
530 | ✗ | op->type = next->convert.to; | |
531 | ✗ | ff_sws_apply_op_q(next, op->c.q4); | |
532 | ✗ | FFSWAP(SwsOp, *op, *next); | |
533 | ✗ | goto retry; | |
534 | } | ||
535 | ✗ | break; | |
536 | |||
537 | ✗ | case SWS_OP_SWIZZLE: { | |
538 | ✗ | bool seen[4] = {0}; | |
539 | ✗ | bool has_duplicates = false; | |
540 | ✗ | for (int i = 0; i < 4; i++) { | |
541 | ✗ | if (next->comps.unused[i]) | |
542 | ✗ | continue; | |
543 | ✗ | if (op->swizzle.in[i] != i) | |
544 | ✗ | noop = false; | |
545 | ✗ | has_duplicates |= seen[op->swizzle.in[i]]; | |
546 | ✗ | seen[op->swizzle.in[i]] = true; | |
547 | } | ||
548 | |||
549 | /* Identity swizzle */ | ||
550 | ✗ | if (noop) { | |
551 | ✗ | ff_sws_op_list_remove_at(ops, n, 1); | |
552 | ✗ | goto retry; | |
553 | } | ||
554 | |||
555 | /* Transitive swizzle */ | ||
556 | ✗ | if (next->op == SWS_OP_SWIZZLE) { | |
557 | ✗ | const SwsSwizzleOp orig = op->swizzle; | |
558 | ✗ | for (int i = 0; i < 4; i++) | |
559 | ✗ | op->swizzle.in[i] = orig.in[next->swizzle.in[i]]; | |
560 | ✗ | ff_sws_op_list_remove_at(ops, n + 1, 1); | |
561 | ✗ | goto retry; | |
562 | } | ||
563 | |||
564 | /* Try to push swizzles with duplicates towards the output */ | ||
565 | ✗ | if (has_duplicates && op_type_is_independent(next->op)) { | |
566 | ✗ | if (next->op == SWS_OP_CONVERT) | |
567 | ✗ | op->type = next->convert.to; | |
568 | ✗ | if (next->op == SWS_OP_MIN || next->op == SWS_OP_MAX) { | |
569 | /* Un-swizzle the next operation */ | ||
570 | ✗ | const SwsConst c = next->c; | |
571 | ✗ | for (int i = 0; i < 4; i++) { | |
572 | ✗ | if (!next->comps.unused[i]) | |
573 | ✗ | next->c.q4[op->swizzle.in[i]] = c.q4[i]; | |
574 | } | ||
575 | } | ||
576 | ✗ | FFSWAP(SwsOp, *op, *next); | |
577 | ✗ | goto retry; | |
578 | } | ||
579 | |||
580 | /* Move swizzle out of the way between two converts so that | ||
581 | * they may be merged */ | ||
582 | ✗ | if (prev->op == SWS_OP_CONVERT && next->op == SWS_OP_CONVERT) { | |
583 | ✗ | op->type = next->convert.to; | |
584 | ✗ | FFSWAP(SwsOp, *op, *next); | |
585 | ✗ | goto retry; | |
586 | } | ||
587 | ✗ | break; | |
588 | } | ||
589 | |||
590 | ✗ | case SWS_OP_CONVERT: | |
591 | /* No-op conversion */ | ||
592 | ✗ | if (op->type == op->convert.to) { | |
593 | ✗ | ff_sws_op_list_remove_at(ops, n, 1); | |
594 | ✗ | goto retry; | |
595 | } | ||
596 | |||
597 | /* Transitive conversion */ | ||
598 | ✗ | if (next->op == SWS_OP_CONVERT && | |
599 | ✗ | op->convert.expand == next->convert.expand) | |
600 | { | ||
601 | av_assert1(op->convert.to == next->type); | ||
602 | ✗ | op->convert.to = next->convert.to; | |
603 | ✗ | ff_sws_op_list_remove_at(ops, n + 1, 1); | |
604 | ✗ | goto retry; | |
605 | } | ||
606 | |||
607 | /* Conversion followed by integer expansion */ | ||
608 | ✗ | if (next->op == SWS_OP_SCALE && !op->convert.expand && | |
609 | ✗ | !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to))) | |
610 | { | ||
611 | ✗ | op->convert.expand = true; | |
612 | ✗ | ff_sws_op_list_remove_at(ops, n + 1, 1); | |
613 | ✗ | goto retry; | |
614 | } | ||
615 | ✗ | break; | |
616 | |||
617 | ✗ | case SWS_OP_MIN: | |
618 | ✗ | for (int i = 0; i < 4; i++) { | |
619 | ✗ | if (next->comps.unused[i] || !op->c.q4[i].den) | |
620 | ✗ | continue; | |
621 | ✗ | if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0) | |
622 | ✗ | noop = false; | |
623 | } | ||
624 | |||
625 | ✗ | if (noop) { | |
626 | ✗ | ff_sws_op_list_remove_at(ops, n, 1); | |
627 | ✗ | goto retry; | |
628 | } | ||
629 | ✗ | break; | |
630 | |||
631 | ✗ | case SWS_OP_MAX: | |
632 | ✗ | for (int i = 0; i < 4; i++) { | |
633 | ✗ | if (next->comps.unused[i] || !op->c.q4[i].den) | |
634 | ✗ | continue; | |
635 | ✗ | if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0) | |
636 | ✗ | noop = false; | |
637 | } | ||
638 | |||
639 | ✗ | if (noop) { | |
640 | ✗ | ff_sws_op_list_remove_at(ops, n, 1); | |
641 | ✗ | goto retry; | |
642 | } | ||
643 | ✗ | break; | |
644 | |||
645 | ✗ | case SWS_OP_DITHER: | |
646 | ✗ | for (int i = 0; i < 4; i++) { | |
647 | ✗ | noop &= (prev->comps.flags[i] & SWS_COMP_EXACT) || | |
648 | ✗ | next->comps.unused[i]; | |
649 | } | ||
650 | |||
651 | ✗ | if (noop) { | |
652 | ✗ | ff_sws_op_list_remove_at(ops, n, 1); | |
653 | ✗ | goto retry; | |
654 | } | ||
655 | ✗ | break; | |
656 | |||
657 | ✗ | case SWS_OP_LINEAR: { | |
658 | SwsSwizzleOp swizzle; | ||
659 | SwsConst c; | ||
660 | |||
661 | /* No-op (identity) linear operation */ | ||
662 | ✗ | if (!op->lin.mask) { | |
663 | ✗ | ff_sws_op_list_remove_at(ops, n, 1); | |
664 | ✗ | goto retry; | |
665 | } | ||
666 | |||
667 | ✗ | if (next->op == SWS_OP_LINEAR) { | |
668 | /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */ | ||
669 | ✗ | const SwsLinearOp m1 = op->lin; | |
670 | ✗ | const SwsLinearOp m2 = next->lin; | |
671 | ✗ | for (int i = 0; i < 4; i++) { | |
672 | ✗ | for (int j = 0; j < 5; j++) { | |
673 | ✗ | AVRational sum = Q(0); | |
674 | ✗ | for (int k = 0; k < 4; k++) | |
675 | ✗ | sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j])); | |
676 | ✗ | if (j == 4) /* m1.m[4][j] == 1 */ | |
677 | ✗ | sum = av_add_q(sum, m2.m[i][4]); | |
678 | ✗ | op->lin.m[i][j] = sum; | |
679 | } | ||
680 | } | ||
681 | ✗ | op->lin.mask = ff_sws_linear_mask(op->lin); | |
682 | ✗ | ff_sws_op_list_remove_at(ops, n + 1, 1); | |
683 | ✗ | goto retry; | |
684 | } | ||
685 | |||
686 | /* Optimize away zero columns */ | ||
687 | ✗ | for (int j = 0; j < 4; j++) { | |
688 | ✗ | const uint32_t col = SWS_MASK_COL(j); | |
689 | ✗ | if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col)) | |
690 | ✗ | continue; | |
691 | ✗ | for (int i = 0; i < 4; i++) | |
692 | ✗ | op->lin.m[i][j] = Q(i == j); | |
693 | ✗ | op->lin.mask &= ~col; | |
694 | ✗ | goto retry; | |
695 | } | ||
696 | |||
697 | /* Optimize away unused rows */ | ||
698 | ✗ | for (int i = 0; i < 4; i++) { | |
699 | ✗ | const uint32_t row = SWS_MASK_ROW(i); | |
700 | ✗ | if (!next->comps.unused[i] || !(op->lin.mask & row)) | |
701 | ✗ | continue; | |
702 | ✗ | for (int j = 0; j < 5; j++) | |
703 | ✗ | op->lin.m[i][j] = Q(i == j); | |
704 | ✗ | op->lin.mask &= ~row; | |
705 | ✗ | goto retry; | |
706 | } | ||
707 | |||
708 | /* Convert constant rows to explicit clear instruction */ | ||
709 | ✗ | if (extract_constant_rows(&op->lin, prev->comps, &c)) { | |
710 | ✗ | RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) { | |
711 | .op = SWS_OP_CLEAR, | ||
712 | .type = op->type, | ||
713 | .comps = op->comps, | ||
714 | .c = c, | ||
715 | })); | ||
716 | ✗ | goto retry; | |
717 | } | ||
718 | |||
719 | /* Multiplication by scalar constant */ | ||
720 | ✗ | if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) { | |
721 | ✗ | op->op = SWS_OP_SCALE; | |
722 | ✗ | op->c = c; | |
723 | ✗ | goto retry; | |
724 | } | ||
725 | |||
726 | /* Swizzle by fixed pattern */ | ||
727 | ✗ | if (extract_swizzle(&op->lin, prev->comps, &swizzle)) { | |
728 | ✗ | RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) { | |
729 | .op = SWS_OP_SWIZZLE, | ||
730 | .type = op->type, | ||
731 | .swizzle = swizzle, | ||
732 | })); | ||
733 | ✗ | goto retry; | |
734 | } | ||
735 | ✗ | break; | |
736 | } | ||
737 | |||
738 | ✗ | case SWS_OP_SCALE: { | |
739 | ✗ | const int factor2 = exact_log2_q(op->c.q); | |
740 | |||
741 | /* No-op scaling */ | ||
742 | ✗ | if (op->c.q.num == 1 && op->c.q.den == 1) { | |
743 | ✗ | ff_sws_op_list_remove_at(ops, n, 1); | |
744 | ✗ | goto retry; | |
745 | } | ||
746 | |||
747 | /* Scaling by integer before conversion to int */ | ||
748 | ✗ | if (op->c.q.den == 1 && | |
749 | ✗ | next->op == SWS_OP_CONVERT && | |
750 | ✗ | ff_sws_pixel_type_is_int(next->convert.to)) | |
751 | { | ||
752 | ✗ | op->type = next->convert.to; | |
753 | ✗ | FFSWAP(SwsOp, *op, *next); | |
754 | ✗ | goto retry; | |
755 | } | ||
756 | |||
757 | /* Scaling by exact power of two */ | ||
758 | ✗ | if (factor2 && ff_sws_pixel_type_is_int(op->type)) { | |
759 | ✗ | op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT; | |
760 | ✗ | op->c.u = FFABS(factor2); | |
761 | ✗ | goto retry; | |
762 | } | ||
763 | ✗ | break; | |
764 | } | ||
765 | } | ||
766 | |||
767 | /* No optimization triggered, move on to next operation */ | ||
768 | ✗ | n++; | |
769 | } | ||
770 | |||
771 | ✗ | return 0; | |
772 | } | ||
773 | |||
774 | 2154 | int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[], | |
775 | int size, uint8_t clear_val, | ||
776 | int *read_bytes, int *write_bytes) | ||
777 | { | ||
778 | 2154 | const SwsOp read = ops->ops[0]; | |
779 | 2154 | const int read_size = ff_sws_pixel_type_size(read.type); | |
780 | 2154 | uint32_t mask[4] = {0}; | |
781 | |||
782 |
2/4✓ Branch 0 taken 2154 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2154 times.
|
2154 | if (!ops->num_ops || read.op != SWS_OP_READ) |
783 | ✗ | return AVERROR(EINVAL); | |
784 |
6/6✓ Branch 0 taken 2142 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 2070 times.
✓ Branch 3 taken 72 times.
✓ Branch 4 taken 1614 times.
✓ Branch 5 taken 456 times.
|
2154 | if (read.rw.frac || (!read.rw.packed && read.rw.elems > 1)) |
785 | 1626 | return AVERROR(ENOTSUP); | |
786 | |||
787 |
2/2✓ Branch 0 taken 672 times.
✓ Branch 1 taken 528 times.
|
1200 | for (int i = 0; i < read.rw.elems; i++) |
788 | 672 | mask[i] = 0x01010101 * i * read_size + 0x03020100; | |
789 | |||
790 |
1/2✓ Branch 0 taken 552 times.
✗ Branch 1 not taken.
|
552 | for (int opidx = 1; opidx < ops->num_ops; opidx++) { |
791 | 552 | const SwsOp *op = &ops->ops[opidx]; | |
792 |
4/6✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 84 times.
✓ Branch 4 taken 108 times.
✓ Branch 5 taken 348 times.
|
552 | switch (op->op) { |
793 | ✗ | case SWS_OP_SWIZZLE: { | |
794 | ✗ | uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] }; | |
795 | ✗ | for (int i = 0; i < 4; i++) | |
796 | ✗ | mask[i] = orig[op->swizzle.in[i]]; | |
797 | ✗ | break; | |
798 | } | ||
799 | |||
800 | 12 | case SWS_OP_SWAP_BYTES: | |
801 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 12 times.
|
60 | for (int i = 0; i < 4; i++) { |
802 |
2/3✓ Branch 0 taken 24 times.
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
48 | switch (ff_sws_pixel_type_size(op->type)) { |
803 | 24 | case 2: mask[i] = av_bswap16(mask[i]); break; | |
804 | 24 | case 4: mask[i] = av_bswap32(mask[i]); break; | |
805 | } | ||
806 | } | ||
807 | 12 | break; | |
808 | |||
809 | ✗ | case SWS_OP_CLEAR: | |
810 | ✗ | for (int i = 0; i < 4; i++) { | |
811 | ✗ | if (!op->c.q4[i].den) | |
812 | ✗ | continue; | |
813 | ✗ | if (op->c.q4[i].num != 0 || !clear_val) | |
814 | ✗ | return AVERROR(ENOTSUP); | |
815 | ✗ | mask[i] = 0x1010101ul * clear_val; | |
816 | } | ||
817 | ✗ | break; | |
818 | |||
819 | 84 | case SWS_OP_CONVERT: { | |
820 |
2/2✓ Branch 0 taken 72 times.
✓ Branch 1 taken 12 times.
|
84 | if (!op->convert.expand) |
821 | 72 | return AVERROR(ENOTSUP); | |
822 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 12 times.
|
60 | for (int i = 0; i < 4; i++) { |
823 |
1/3✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
|
48 | switch (ff_sws_pixel_type_size(op->type)) { |
824 | 48 | case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break; | |
825 | ✗ | case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break; | |
826 | } | ||
827 | } | ||
828 | 12 | break; | |
829 | } | ||
830 | |||
831 | 108 | case SWS_OP_WRITE: { | |
832 |
5/6✓ Branch 0 taken 96 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 96 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 72 times.
✓ Branch 5 taken 24 times.
|
108 | if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1)) |
833 | 84 | return AVERROR(ENOTSUP); | |
834 | |||
835 | /* Initialize to no-op */ | ||
836 | 24 | memset(shuffle, clear_val, size); | |
837 | |||
838 | 24 | const int write_size = ff_sws_pixel_type_size(op->type); | |
839 | 24 | const int read_chunk = read.rw.elems * read_size; | |
840 | 24 | const int write_chunk = op->rw.elems * write_size; | |
841 | 24 | const int num_groups = size / FFMAX(read_chunk, write_chunk); | |
842 |
2/2✓ Branch 0 taken 144 times.
✓ Branch 1 taken 24 times.
|
168 | for (int n = 0; n < num_groups; n++) { |
843 | 144 | const int base_in = n * read_chunk; | |
844 | 144 | const int base_out = n * write_chunk; | |
845 |
2/2✓ Branch 0 taken 144 times.
✓ Branch 1 taken 144 times.
|
288 | for (int i = 0; i < op->rw.elems; i++) { |
846 | 144 | const int offset = base_out + i * write_size; | |
847 |
2/2✓ Branch 0 taken 384 times.
✓ Branch 1 taken 144 times.
|
528 | for (int b = 0; b < write_size; b++) { |
848 | 384 | const uint8_t idx = mask[i] >> (b * 8); | |
849 |
1/2✓ Branch 0 taken 384 times.
✗ Branch 1 not taken.
|
384 | if (idx != clear_val) |
850 | 384 | shuffle[offset + b] = base_in + idx; | |
851 | } | ||
852 | } | ||
853 | } | ||
854 | |||
855 | 24 | *read_bytes = num_groups * read_chunk; | |
856 | 24 | *write_bytes = num_groups * write_chunk; | |
857 | 24 | return num_groups; | |
858 | } | ||
859 | |||
860 | 348 | default: | |
861 | 348 | return AVERROR(ENOTSUP); | |
862 | } | ||
863 | } | ||
864 | |||
865 | ✗ | return AVERROR(EINVAL); | |
866 | } | ||
867 |