Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * Copyright (C) 2025 Niklas Haas | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | #include "libavutil/avassert.h" | ||
22 | #include "libavutil/mem.h" | ||
23 | #include "libavutil/rational.h" | ||
24 | |||
25 | #include "ops_chain.h" | ||
26 | |||
27 | #define Q(N) ((AVRational) { N, 1 }) | ||
28 | |||
29 | 8552 | SwsOpChain *ff_sws_op_chain_alloc(void) | |
30 | { | ||
31 | 8552 | return av_mallocz(sizeof(SwsOpChain)); | |
32 | } | ||
33 | |||
34 | 8552 | void ff_sws_op_chain_free(SwsOpChain *chain) | |
35 | { | ||
36 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8552 times.
|
8552 | if (!chain) |
37 | ✗ | return; | |
38 | |||
39 |
2/2✓ Branch 0 taken 31241 times.
✓ Branch 1 taken 8552 times.
|
39793 | for (int i = 0; i < chain->num_impl + 1; i++) { |
40 |
2/2✓ Branch 0 taken 786 times.
✓ Branch 1 taken 30455 times.
|
31241 | if (chain->free[i]) |
41 | 786 | chain->free[i](chain->impl[i].priv.ptr); | |
42 | } | ||
43 | |||
44 | 8552 | av_free(chain); | |
45 | } | ||
46 | |||
47 | 22689 | int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, | |
48 | void (*free)(void *), const SwsOpPriv *priv) | ||
49 | { | ||
50 | 22689 | const int idx = chain->num_impl; | |
51 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 22689 times.
|
22689 | if (idx == SWS_MAX_OPS) |
52 | ✗ | return AVERROR(EINVAL); | |
53 | |||
54 | av_assert1(func); | ||
55 | 22689 | chain->impl[idx].cont = func; | |
56 | 22689 | chain->impl[idx + 1].priv = *priv; | |
57 | 22689 | chain->free[idx + 1] = free; | |
58 | 22689 | chain->num_impl++; | |
59 | 22689 | return 0; | |
60 | } | ||
61 | |||
62 | /** | ||
63 | * Match an operation against a reference operation. Returns a score for how | ||
64 | * well the reference matches the operation, or 0 if there is no match. | ||
65 | * | ||
66 | * If `ref->comps` has any flags set, they must be set in `op` as well. | ||
67 | * Likewise, if `ref->comps` has any components marked as unused, they must be | ||
68 | * marked as as unused in `ops` as well. | ||
69 | * | ||
70 | * For SWS_OP_LINEAR, `ref->linear.mask` must be a strict superset of | ||
71 | * `op->linear.mask`, but may not contain any columns explicitly ignored by | ||
72 | * `op->comps.unused`. | ||
73 | * | ||
74 | * For SWS_OP_READ, SWS_OP_WRITE, SWS_OP_SWAP_BYTES and SWS_OP_SWIZZLE, the | ||
75 | * exact type is not checked, just the size. | ||
76 | * | ||
77 | * Components set in `next.unused` are ignored when matching. If `flexible` | ||
78 | * is true, the op body is ignored - only the operation, pixel type, and | ||
79 | * component masks are checked. | ||
80 | */ | ||
81 | 6229446 | static int op_match(const SwsOp *op, const SwsOpEntry *entry, const SwsComps next) | |
82 | { | ||
83 | 6229446 | int score = 10; | |
84 |
2/2✓ Branch 0 taken 5682745 times.
✓ Branch 1 taken 546701 times.
|
6229446 | if (op->op != entry->op) |
85 | 5682745 | return 0; | |
86 | |||
87 |
2/2✓ Branch 0 taken 454518 times.
✓ Branch 1 taken 92183 times.
|
546701 | switch (op->op) { |
88 | 454518 | case SWS_OP_READ: | |
89 | case SWS_OP_WRITE: | ||
90 | case SWS_OP_SWAP_BYTES: | ||
91 | case SWS_OP_SWIZZLE: | ||
92 | /* Only the size matters for these operations */ | ||
93 |
2/2✓ Branch 0 taken 290030 times.
✓ Branch 1 taken 164488 times.
|
454518 | if (ff_sws_pixel_type_size(op->type) != ff_sws_pixel_type_size(entry->type)) |
94 | 290030 | return 0; | |
95 | 164488 | break; | |
96 | 92183 | default: | |
97 |
2/2✓ Branch 0 taken 55588 times.
✓ Branch 1 taken 36595 times.
|
92183 | if (op->type != entry->type) |
98 | 55588 | return 0; | |
99 | 36595 | break; | |
100 | } | ||
101 | |||
102 |
2/2✓ Branch 0 taken 785560 times.
✓ Branch 1 taken 187590 times.
|
973150 | for (int i = 0; i < 4; i++) { |
103 |
2/2✓ Branch 0 taken 33373 times.
✓ Branch 1 taken 752187 times.
|
785560 | if (entry->unused[i]) { |
104 |
2/2✓ Branch 0 taken 19880 times.
✓ Branch 1 taken 13493 times.
|
33373 | if (op->comps.unused[i]) |
105 | 19880 | score += 1; /* Operating on fewer components is better .. */ | |
106 | else | ||
107 | 13493 | return 0; /* .. but not too few! */ | |
108 | } | ||
109 | } | ||
110 | |||
111 |
2/2✓ Branch 0 taken 2169 times.
✓ Branch 1 taken 185421 times.
|
187590 | if (op->op == SWS_OP_CLEAR) { |
112 | /* Clear pattern must match exactly, regardless of `entry->flexible` */ | ||
113 |
2/2✓ Branch 0 taken 6136 times.
✓ Branch 1 taken 565 times.
|
6701 | for (int i = 0; i < 4; i++) { |
114 |
3/4✓ Branch 0 taken 6136 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1604 times.
✓ Branch 3 taken 4532 times.
|
6136 | if (!next.unused[i] && entry->unused[i] != !!op->c.q4[i].den) |
115 | 1604 | return 0; | |
116 | } | ||
117 | } | ||
118 | |||
119 | /* Flexible variants always match, but lower the score to prioritize more | ||
120 | * specific implementations if they exist */ | ||
121 |
2/2✓ Branch 0 taken 5524 times.
✓ Branch 1 taken 180462 times.
|
185986 | if (entry->flexible) |
122 | 5524 | return score - 5; | |
123 | |||
124 |
8/14✗ Branch 0 not taken.
✓ Branch 1 taken 116902 times.
✓ Branch 2 taken 244 times.
✓ Branch 3 taken 706 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 47146 times.
✓ Branch 7 taken 6398 times.
✓ Branch 8 taken 4941 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 4116 times.
✓ Branch 11 taken 9 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
|
180462 | switch (op->op) { |
125 | ✗ | case SWS_OP_INVALID: | |
126 | ✗ | return 0; | |
127 | 116902 | case SWS_OP_READ: | |
128 | case SWS_OP_WRITE: | ||
129 |
2/2✓ Branch 0 taken 29717 times.
✓ Branch 1 taken 87185 times.
|
116902 | if (op->rw.elems != entry->rw.elems || |
130 |
2/2✓ Branch 0 taken 27943 times.
✓ Branch 1 taken 1774 times.
|
29717 | op->rw.frac != entry->rw.frac || |
131 |
4/4✓ Branch 0 taken 25136 times.
✓ Branch 1 taken 2807 times.
✓ Branch 2 taken 12559 times.
✓ Branch 3 taken 12577 times.
|
27943 | (op->rw.elems > 1 && op->rw.packed != entry->rw.packed)) |
132 | 101518 | return 0; | |
133 | 15384 | return score; | |
134 | 244 | case SWS_OP_SWAP_BYTES: | |
135 | 244 | return score; | |
136 | 706 | case SWS_OP_PACK: | |
137 | case SWS_OP_UNPACK: | ||
138 |
4/4✓ Branch 0 taken 1524 times.
✓ Branch 1 taken 56 times.
✓ Branch 2 taken 1326 times.
✓ Branch 3 taken 198 times.
|
1580 | for (int i = 0; i < 4 && op->pack.pattern[i]; i++) { |
139 |
2/2✓ Branch 0 taken 452 times.
✓ Branch 1 taken 874 times.
|
1326 | if (op->pack.pattern[i] != entry->pack.pattern[i]) |
140 | 452 | return 0; | |
141 | } | ||
142 | 254 | return score; | |
143 | ✗ | case SWS_OP_CLEAR: | |
144 | ✗ | for (int i = 0; i < 4; i++) { | |
145 | ✗ | if (!op->c.q4[i].den) | |
146 | ✗ | continue; | |
147 | ✗ | if (av_cmp_q(op->c.q4[i], Q(entry->clear_value)) && !next.unused[i]) | |
148 | ✗ | return 0; | |
149 | } | ||
150 | ✗ | return score; | |
151 | ✗ | case SWS_OP_LSHIFT: | |
152 | case SWS_OP_RSHIFT: | ||
153 | av_assert1(entry->flexible); | ||
154 | ✗ | return score; | |
155 | 47146 | case SWS_OP_SWIZZLE: | |
156 |
2/2✓ Branch 0 taken 67200 times.
✓ Branch 1 taken 3046 times.
|
70246 | for (int i = 0; i < 4; i++) { |
157 |
4/4✓ Branch 0 taken 45906 times.
✓ Branch 1 taken 21294 times.
✓ Branch 2 taken 44100 times.
✓ Branch 3 taken 1806 times.
|
67200 | if (op->swizzle.in[i] != entry->swizzle.in[i] && !next.unused[i]) |
158 | 44100 | return 0; | |
159 | } | ||
160 | 3046 | return score; | |
161 | 6398 | case SWS_OP_CONVERT: | |
162 |
2/2✓ Branch 0 taken 2234 times.
✓ Branch 1 taken 4164 times.
|
6398 | if (op->convert.to != entry->convert.to || |
163 |
2/2✓ Branch 0 taken 496 times.
✓ Branch 1 taken 1738 times.
|
2234 | op->convert.expand != entry->convert.expand) |
164 | 4660 | return 0; | |
165 | 1738 | return score; | |
166 | 4941 | case SWS_OP_DITHER: | |
167 |
2/2✓ Branch 0 taken 549 times.
✓ Branch 1 taken 4392 times.
|
4941 | return op->dither.size_log2 == entry->dither_size ? score : 0; |
168 | ✗ | case SWS_OP_MIN: | |
169 | case SWS_OP_MAX: | ||
170 | av_assert1(entry->flexible); | ||
171 | ✗ | return score; | |
172 | 4116 | case SWS_OP_LINEAR: | |
173 | /* All required elements must be present */ | ||
174 |
2/2✓ Branch 0 taken 2870 times.
✓ Branch 1 taken 1246 times.
|
4116 | if (op->lin.mask & ~entry->linear_mask) |
175 | 2870 | return 0; | |
176 | /* To avoid operating on possibly undefined memory, filter out | ||
177 | * implementations that operate on more input components */ | ||
178 |
2/2✓ Branch 0 taken 4984 times.
✓ Branch 1 taken 1246 times.
|
6230 | for (int i = 0; i < 4; i++) { |
179 |
3/4✓ Branch 0 taken 4410 times.
✓ Branch 1 taken 574 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 4410 times.
|
4984 | if ((entry->linear_mask & SWS_MASK_COL(i)) && op->comps.unused[i]) |
180 | ✗ | return 0; | |
181 | } | ||
182 | /* Prioritize smaller implementations */ | ||
183 | 1246 | score += av_popcount(SWS_MASK_ALL ^ entry->linear_mask); | |
184 | 1246 | return score; | |
185 | 9 | case SWS_OP_SCALE: | |
186 | 9 | return score; | |
187 | ✗ | case SWS_OP_TYPE_NB: | |
188 | ✗ | break; | |
189 | } | ||
190 | |||
191 | ✗ | av_unreachable("Invalid operation type!"); | |
192 | return 0; | ||
193 | } | ||
194 | |||
195 | 23853 | int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables, | |
196 | SwsOpList *ops, const int block_size, | ||
197 | SwsOpChain *chain) | ||
198 | { | ||
199 | static const SwsOp dummy = { .comps.unused = { true, true, true, true }}; | ||
200 |
2/2✓ Branch 0 taken 16897 times.
✓ Branch 1 taken 6956 times.
|
23853 | const SwsOp *next = ops->num_ops > 1 ? &ops->ops[1] : &dummy; |
201 | 23853 | const unsigned cpu_flags = av_get_cpu_flags(); | |
202 | 23853 | const SwsOpEntry *best = NULL; | |
203 | 23853 | const SwsOp *op = &ops->ops[0]; | |
204 | 23853 | int ret, best_score = 0, best_cpu_flags; | |
205 | 23853 | SwsOpPriv priv = {0}; | |
206 | |||
207 |
2/2✓ Branch 0 taken 109251 times.
✓ Branch 1 taken 23853 times.
|
133104 | for (int n = 0; n < num_tables; n++) { |
208 | 109251 | const SwsOpTable *table = tables[n]; | |
209 |
3/4✓ Branch 0 taken 109251 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 86492 times.
✓ Branch 3 taken 22759 times.
|
109251 | if (table->block_size && table->block_size != block_size || |
210 |
2/2✓ Branch 0 taken 5230 times.
✓ Branch 1 taken 81262 times.
|
86492 | table->cpu_flags & ~cpu_flags) |
211 | 27989 | continue; | |
212 | |||
213 |
2/2✓ Branch 0 taken 6229446 times.
✓ Branch 1 taken 81262 times.
|
6310708 | for (int i = 0; table->entries[i]; i++) { |
214 | 6229446 | const SwsOpEntry *entry = table->entries[i]; | |
215 | 6229446 | int score = op_match(op, entry, next->comps); | |
216 |
2/2✓ Branch 0 taken 22206 times.
✓ Branch 1 taken 6207240 times.
|
6229446 | if (score > best_score) { |
217 | 22206 | best_score = score; | |
218 | 22206 | best_cpu_flags = table->cpu_flags; | |
219 | 22206 | best = entry; | |
220 | } | ||
221 | } | ||
222 | } | ||
223 | |||
224 |
2/2✓ Branch 0 taken 1647 times.
✓ Branch 1 taken 22206 times.
|
23853 | if (!best) |
225 | 1647 | return AVERROR(ENOTSUP); | |
226 | |||
227 |
2/2✓ Branch 0 taken 3577 times.
✓ Branch 1 taken 18629 times.
|
22206 | if (best->setup) { |
228 | 3577 | ret = best->setup(op, &priv); | |
229 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3577 times.
|
3577 | if (ret < 0) |
230 | ✗ | return ret; | |
231 | } | ||
232 | |||
233 | 22206 | chain->cpu_flags |= best_cpu_flags; | |
234 | 22206 | ret = ff_sws_op_chain_append(chain, best->func, best->free, &priv); | |
235 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 22206 times.
|
22206 | if (ret < 0) { |
236 | ✗ | if (best->free) | |
237 | ✗ | best->free(&priv); | |
238 | ✗ | return ret; | |
239 | } | ||
240 | |||
241 | 22206 | ops->ops++; | |
242 | 22206 | ops->num_ops--; | |
243 |
2/2✓ Branch 0 taken 15301 times.
✓ Branch 1 taken 6905 times.
|
22206 | return ops->num_ops ? AVERROR(EAGAIN) : 0; |
244 | } | ||
245 | |||
246 | #define q2pixel(type, q) ((q).den ? (type) (q).num / (q).den : 0) | ||
247 | |||
248 | 1664 | int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out) | |
249 | { | ||
250 | 1664 | out->u8[0] = op->c.u; | |
251 | 1664 | return 0; | |
252 | } | ||
253 | |||
254 | ✗ | int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out) | |
255 | { | ||
256 | ✗ | switch (op->type) { | |
257 | ✗ | case SWS_PIXEL_U8: out->u8[0] = op->c.u; return 0; | |
258 | ✗ | case SWS_PIXEL_U16: out->u16[0] = op->c.u; return 0; | |
259 | ✗ | case SWS_PIXEL_U32: out->u32[0] = op->c.u; return 0; | |
260 | ✗ | case SWS_PIXEL_F32: out->f32[0] = op->c.u; return 0; | |
261 | ✗ | default: return AVERROR(EINVAL); | |
262 | } | ||
263 | } | ||
264 | |||
265 | 56 | int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out) | |
266 | { | ||
267 |
1/5✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 56 times.
✗ Branch 4 not taken.
|
56 | switch (op->type) { |
268 | ✗ | case SWS_PIXEL_U8: out->u8[0] = q2pixel(uint8_t, op->c.q); return 0; | |
269 | ✗ | case SWS_PIXEL_U16: out->u16[0] = q2pixel(uint16_t, op->c.q); return 0; | |
270 | ✗ | case SWS_PIXEL_U32: out->u32[0] = q2pixel(uint32_t, op->c.q); return 0; | |
271 |
1/2✓ Branch 0 taken 56 times.
✗ Branch 1 not taken.
|
56 | case SWS_PIXEL_F32: out->f32[0] = q2pixel(float, op->c.q); return 0; |
272 | ✗ | default: return AVERROR(EINVAL); | |
273 | } | ||
274 | |||
275 | return 0; | ||
276 | } | ||
277 | |||
278 | 989 | int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out) | |
279 | { | ||
280 |
2/2✓ Branch 0 taken 3956 times.
✓ Branch 1 taken 989 times.
|
4945 | for (int i = 0; i < 4; i++) { |
281 |
4/5✓ Branch 0 taken 1352 times.
✓ Branch 1 taken 1688 times.
✓ Branch 2 taken 416 times.
✓ Branch 3 taken 500 times.
✗ Branch 4 not taken.
|
3956 | switch (op->type) { |
282 |
2/2✓ Branch 0 taken 949 times.
✓ Branch 1 taken 403 times.
|
1352 | case SWS_PIXEL_U8: out->u8[i] = q2pixel(uint8_t, op->c.q4[i]); break; |
283 |
2/2✓ Branch 0 taken 1171 times.
✓ Branch 1 taken 517 times.
|
1688 | case SWS_PIXEL_U16: out->u16[i] = q2pixel(uint16_t, op->c.q4[i]); break; |
284 |
1/2✓ Branch 0 taken 416 times.
✗ Branch 1 not taken.
|
416 | case SWS_PIXEL_U32: out->u32[i] = q2pixel(uint32_t, op->c.q4[i]); break; |
285 |
2/2✓ Branch 0 taken 461 times.
✓ Branch 1 taken 39 times.
|
500 | case SWS_PIXEL_F32: out->f32[i] = q2pixel(float, op->c.q4[i]); break; |
286 | ✗ | default: return AVERROR(EINVAL); | |
287 | } | ||
288 | } | ||
289 | |||
290 | 989 | return 0; | |
291 | } | ||
292 |