Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * Copyright (C) 2025 Niklas Haas | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | #include "libavutil/avassert.h" | ||
22 | #include "libavutil/bswap.h" | ||
23 | #include "libavutil/mem.h" | ||
24 | #include "libavutil/rational.h" | ||
25 | #include "libavutil/refstruct.h" | ||
26 | |||
27 | #include "ops.h" | ||
28 | #include "ops_internal.h" | ||
29 | |||
30 | extern const SwsOpBackend backend_c; | ||
31 | extern const SwsOpBackend backend_murder; | ||
32 | extern const SwsOpBackend backend_x86; | ||
33 | |||
34 | const SwsOpBackend * const ff_sws_op_backends[] = { | ||
35 | &backend_murder, | ||
36 | #if ARCH_X86_64 && HAVE_X86ASM | ||
37 | &backend_x86, | ||
38 | #endif | ||
39 | &backend_c, | ||
40 | NULL | ||
41 | }; | ||
42 | |||
43 | #define RET(x) \ | ||
44 | do { \ | ||
45 | if ((ret = (x)) < 0) \ | ||
46 | return ret; \ | ||
47 | } while (0) | ||
48 | |||
49 | 4374 | const char *ff_sws_pixel_type_name(SwsPixelType type) | |
50 | { | ||
51 |
4/7✓ Branch 0 taken 436 times.
✓ Branch 1 taken 1358 times.
✓ Branch 2 taken 1419 times.
✓ Branch 3 taken 1161 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
|
4374 | switch (type) { |
52 | 436 | case SWS_PIXEL_U8: return "u8"; | |
53 | 1358 | case SWS_PIXEL_U16: return "u16"; | |
54 | 1419 | case SWS_PIXEL_U32: return "u32"; | |
55 | 1161 | case SWS_PIXEL_F32: return "f32"; | |
56 | ✗ | case SWS_PIXEL_NONE: return "none"; | |
57 | ✗ | case SWS_PIXEL_TYPE_NB: break; | |
58 | } | ||
59 | |||
60 | ✗ | av_unreachable("Invalid pixel type!"); | |
61 | return "ERR"; | ||
62 | } | ||
63 | |||
64 | 989489 | int ff_sws_pixel_type_size(SwsPixelType type) | |
65 | { | ||
66 |
4/7✓ Branch 0 taken 305978 times.
✓ Branch 1 taken 294776 times.
✓ Branch 2 taken 277230 times.
✓ Branch 3 taken 111505 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
|
989489 | switch (type) { |
67 | 305978 | case SWS_PIXEL_U8: return sizeof(uint8_t); | |
68 | 294776 | case SWS_PIXEL_U16: return sizeof(uint16_t); | |
69 | 277230 | case SWS_PIXEL_U32: return sizeof(uint32_t); | |
70 | 111505 | case SWS_PIXEL_F32: return sizeof(float); | |
71 | ✗ | case SWS_PIXEL_NONE: break; | |
72 | ✗ | case SWS_PIXEL_TYPE_NB: break; | |
73 | } | ||
74 | |||
75 | ✗ | av_unreachable("Invalid pixel type!"); | |
76 | return 0; | ||
77 | } | ||
78 | |||
79 | 1209234 | bool ff_sws_pixel_type_is_int(SwsPixelType type) | |
80 | { | ||
81 |
2/4✓ Branch 0 taken 53729 times.
✓ Branch 1 taken 1155505 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
1209234 | switch (type) { |
82 | 53729 | case SWS_PIXEL_U8: | |
83 | case SWS_PIXEL_U16: | ||
84 | case SWS_PIXEL_U32: | ||
85 | 53729 | return true; | |
86 | 1155505 | case SWS_PIXEL_F32: | |
87 | 1155505 | return false; | |
88 | ✗ | case SWS_PIXEL_NONE: | |
89 | ✗ | case SWS_PIXEL_TYPE_NB: break; | |
90 | } | ||
91 | |||
92 | ✗ | av_unreachable("Invalid pixel type!"); | |
93 | return false; | ||
94 | } | ||
95 | |||
96 | ✗ | SwsPixelType ff_sws_pixel_type_to_uint(SwsPixelType type) | |
97 | { | ||
98 | ✗ | if (!type) | |
99 | ✗ | return type; | |
100 | |||
101 | ✗ | switch (ff_sws_pixel_type_size(type)) { | |
102 | ✗ | case 8: return SWS_PIXEL_U8; | |
103 | ✗ | case 16: return SWS_PIXEL_U16; | |
104 | ✗ | case 32: return SWS_PIXEL_U32; | |
105 | } | ||
106 | |||
107 | ✗ | av_unreachable("Invalid pixel type!"); | |
108 | return SWS_PIXEL_NONE; | ||
109 | } | ||
110 | |||
111 | /* biased towards `a` */ | ||
112 | 4992 | static AVRational av_min_q(AVRational a, AVRational b) | |
113 | { | ||
114 |
2/2✓ Branch 1 taken 933 times.
✓ Branch 2 taken 4059 times.
|
4992 | return av_cmp_q(a, b) == 1 ? b : a; |
115 | } | ||
116 | |||
117 | 4992 | static AVRational av_max_q(AVRational a, AVRational b) | |
118 | { | ||
119 |
2/2✓ Branch 1 taken 1398 times.
✓ Branch 2 taken 3594 times.
|
4992 | return av_cmp_q(a, b) == -1 ? b : a; |
120 | } | ||
121 | |||
122 | 105326 | void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4]) | |
123 | { | ||
124 | uint64_t mask[4]; | ||
125 | int shift[4]; | ||
126 | |||
127 |
13/15✓ Branch 0 taken 70044 times.
✓ Branch 1 taken 624 times.
✓ Branch 2 taken 624 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2288 times.
✓ Branch 5 taken 4992 times.
✓ Branch 6 taken 4992 times.
✓ Branch 7 taken 10140 times.
✓ Branch 8 taken 4368 times.
✓ Branch 9 taken 2808 times.
✓ Branch 10 taken 1248 times.
✓ Branch 11 taken 1248 times.
✓ Branch 12 taken 1638 times.
✓ Branch 13 taken 312 times.
✗ Branch 14 not taken.
|
105326 | switch (op->op) { |
128 | 70044 | case SWS_OP_READ: | |
129 | case SWS_OP_WRITE: | ||
130 | 70044 | return; | |
131 | 624 | case SWS_OP_UNPACK: { | |
132 | 624 | unsigned val = x[0].num; | |
133 | 624 | ff_sws_pack_op_decode(op, mask, shift); | |
134 |
2/2✓ Branch 0 taken 2496 times.
✓ Branch 1 taken 624 times.
|
3120 | for (int i = 0; i < 4; i++) |
135 | 2496 | x[i] = Q((val >> shift[i]) & mask[i]); | |
136 | 624 | return; | |
137 | } | ||
138 | 624 | case SWS_OP_PACK: { | |
139 | 624 | unsigned val = 0; | |
140 | 624 | ff_sws_pack_op_decode(op, mask, shift); | |
141 |
2/2✓ Branch 0 taken 2496 times.
✓ Branch 1 taken 624 times.
|
3120 | for (int i = 0; i < 4; i++) |
142 | 2496 | val |= (x[i].num & mask[i]) << shift[i]; | |
143 | 624 | x[0] = Q(val); | |
144 | 624 | return; | |
145 | } | ||
146 | ✗ | case SWS_OP_SWAP_BYTES: | |
147 | ✗ | switch (ff_sws_pixel_type_size(op->type)) { | |
148 | ✗ | case 2: | |
149 | ✗ | for (int i = 0; i < 4; i++) | |
150 | ✗ | x[i].num = av_bswap16(x[i].num); | |
151 | ✗ | break; | |
152 | ✗ | case 4: | |
153 | ✗ | for (int i = 0; i < 4; i++) | |
154 | ✗ | x[i].num = av_bswap32(x[i].num); | |
155 | ✗ | break; | |
156 | } | ||
157 | ✗ | return; | |
158 | 2288 | case SWS_OP_CLEAR: | |
159 |
2/2✓ Branch 0 taken 9152 times.
✓ Branch 1 taken 2288 times.
|
11440 | for (int i = 0; i < 4; i++) { |
160 |
2/2✓ Branch 0 taken 5278 times.
✓ Branch 1 taken 3874 times.
|
9152 | if (op->c.q4[i].den) |
161 | 5278 | x[i] = op->c.q4[i]; | |
162 | } | ||
163 | 2288 | return; | |
164 | 4992 | case SWS_OP_LSHIFT: { | |
165 | 4992 | AVRational mult = Q(1 << op->c.u); | |
166 |
2/2✓ Branch 0 taken 19968 times.
✓ Branch 1 taken 4992 times.
|
24960 | for (int i = 0; i < 4; i++) |
167 |
2/2✓ Branch 0 taken 14976 times.
✓ Branch 1 taken 4992 times.
|
19968 | x[i] = x[i].den ? av_mul_q(x[i], mult) : x[i]; |
168 | 4992 | return; | |
169 | } | ||
170 | 4992 | case SWS_OP_RSHIFT: { | |
171 | 4992 | AVRational mult = Q(1 << op->c.u); | |
172 |
2/2✓ Branch 0 taken 19968 times.
✓ Branch 1 taken 4992 times.
|
24960 | for (int i = 0; i < 4; i++) |
173 |
2/2✓ Branch 0 taken 14976 times.
✓ Branch 1 taken 4992 times.
|
19968 | x[i] = x[i].den ? av_div_q(x[i], mult) : x[i]; |
174 | 4992 | return; | |
175 | } | ||
176 | 10140 | case SWS_OP_SWIZZLE: { | |
177 | 10140 | const AVRational orig[4] = { x[0], x[1], x[2], x[3] }; | |
178 |
2/2✓ Branch 0 taken 40560 times.
✓ Branch 1 taken 10140 times.
|
50700 | for (int i = 0; i < 4; i++) |
179 | 40560 | x[i] = orig[op->swizzle.in[i]]; | |
180 | 10140 | return; | |
181 | } | ||
182 | 4368 | case SWS_OP_CONVERT: | |
183 |
2/2✓ Branch 0 taken 3432 times.
✓ Branch 1 taken 936 times.
|
4368 | if (ff_sws_pixel_type_is_int(op->convert.to)) { |
184 | 3432 | const AVRational scale = ff_sws_pixel_expand(op->type, op->convert.to); | |
185 |
2/2✓ Branch 0 taken 13728 times.
✓ Branch 1 taken 3432 times.
|
17160 | for (int i = 0; i < 4; i++) { |
186 |
2/2✓ Branch 0 taken 7488 times.
✓ Branch 1 taken 6240 times.
|
13728 | x[i] = x[i].den ? Q(x[i].num / x[i].den) : x[i]; |
187 |
2/2✓ Branch 0 taken 2496 times.
✓ Branch 1 taken 11232 times.
|
13728 | if (op->convert.expand) |
188 | 2496 | x[i] = av_mul_q(x[i], scale); | |
189 | } | ||
190 | } | ||
191 | 4368 | return; | |
192 | 2808 | case SWS_OP_DITHER: | |
193 |
2/2✓ Branch 0 taken 11232 times.
✓ Branch 1 taken 2808 times.
|
14040 | for (int i = 0; i < 4; i++) |
194 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 11232 times.
|
11232 | x[i] = x[i].den ? av_add_q(x[i], av_make_q(1, 2)) : x[i]; |
195 | 2808 | return; | |
196 | 1248 | case SWS_OP_MIN: | |
197 |
2/2✓ Branch 0 taken 4992 times.
✓ Branch 1 taken 1248 times.
|
6240 | for (int i = 0; i < 4; i++) |
198 | 4992 | x[i] = av_min_q(x[i], op->c.q4[i]); | |
199 | 1248 | return; | |
200 | 1248 | case SWS_OP_MAX: | |
201 |
2/2✓ Branch 0 taken 4992 times.
✓ Branch 1 taken 1248 times.
|
6240 | for (int i = 0; i < 4; i++) |
202 | 4992 | x[i] = av_max_q(x[i], op->c.q4[i]); | |
203 | 1248 | return; | |
204 | 1638 | case SWS_OP_LINEAR: { | |
205 | 1638 | const AVRational orig[4] = { x[0], x[1], x[2], x[3] }; | |
206 |
2/2✓ Branch 0 taken 6552 times.
✓ Branch 1 taken 1638 times.
|
8190 | for (int i = 0; i < 4; i++) { |
207 | 6552 | AVRational sum = op->lin.m[i][4]; | |
208 |
2/2✓ Branch 0 taken 26208 times.
✓ Branch 1 taken 6552 times.
|
32760 | for (int j = 0; j < 4; j++) |
209 | 26208 | sum = av_add_q(sum, av_mul_q(orig[j], op->lin.m[i][j])); | |
210 | 6552 | x[i] = sum; | |
211 | } | ||
212 | 1638 | return; | |
213 | } | ||
214 | 312 | case SWS_OP_SCALE: | |
215 |
2/2✓ Branch 0 taken 1248 times.
✓ Branch 1 taken 312 times.
|
1560 | for (int i = 0; i < 4; i++) |
216 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1248 times.
|
1248 | x[i] = x[i].den ? av_mul_q(x[i], op->c.q) : x[i]; |
217 | 312 | return; | |
218 | } | ||
219 | |||
220 | ✗ | av_unreachable("Invalid operation type!"); | |
221 | } | ||
222 | |||
223 | 52975 | static void op_uninit(SwsOp *op) | |
224 | { | ||
225 |
2/2✓ Branch 0 taken 1404 times.
✓ Branch 1 taken 51571 times.
|
52975 | switch (op->op) { |
226 | 1404 | case SWS_OP_DITHER: | |
227 | 1404 | av_refstruct_unref(&op->dither.matrix); | |
228 | 1404 | break; | |
229 | } | ||
230 | |||
231 | 52975 | *op = (SwsOp) {0}; | |
232 | 52975 | } | |
233 | |||
234 | ✗ | SwsOpList *ff_sws_op_list_alloc(void) | |
235 | { | ||
236 | ✗ | SwsOpList *ops = av_mallocz(sizeof(SwsOpList)); | |
237 | ✗ | if (!ops) | |
238 | ✗ | return NULL; | |
239 | |||
240 | ✗ | ff_fmt_clear(&ops->src); | |
241 | ✗ | ff_fmt_clear(&ops->dst); | |
242 | ✗ | return ops; | |
243 | } | ||
244 | |||
245 | 23788 | void ff_sws_op_list_free(SwsOpList **p_ops) | |
246 | { | ||
247 | 23788 | SwsOpList *ops = *p_ops; | |
248 |
2/2✓ Branch 0 taken 6277 times.
✓ Branch 1 taken 17511 times.
|
23788 | if (!ops) |
249 | 6277 | return; | |
250 | |||
251 |
2/2✓ Branch 0 taken 52975 times.
✓ Branch 1 taken 17511 times.
|
70486 | for (int i = 0; i < ops->num_ops; i++) |
252 | 52975 | op_uninit(&ops->ops[i]); | |
253 | |||
254 | 17511 | av_freep(&ops->ops); | |
255 | 17511 | av_free(ops); | |
256 | 17511 | *p_ops = NULL; | |
257 | } | ||
258 | |||
259 | 17511 | SwsOpList *ff_sws_op_list_duplicate(const SwsOpList *ops) | |
260 | { | ||
261 | 17511 | SwsOpList *copy = av_malloc(sizeof(*copy)); | |
262 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 17511 times.
|
17511 | if (!copy) |
263 | ✗ | return NULL; | |
264 | |||
265 | 17511 | int num = ops->num_ops; | |
266 |
1/2✓ Branch 0 taken 17511 times.
✗ Branch 1 not taken.
|
17511 | if (num) |
267 | 17511 | num = 1 << av_ceil_log2(num); | |
268 | |||
269 | 17511 | *copy = *ops; | |
270 | 17511 | copy->ops = av_memdup(ops->ops, num * sizeof(ops->ops[0])); | |
271 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 17511 times.
|
17511 | if (!copy->ops) { |
272 | ✗ | av_free(copy); | |
273 | ✗ | return NULL; | |
274 | } | ||
275 | |||
276 |
2/2✓ Branch 0 taken 52975 times.
✓ Branch 1 taken 17511 times.
|
70486 | for (int i = 0; i < ops->num_ops; i++) { |
277 | 52975 | const SwsOp *op = &ops->ops[i]; | |
278 |
2/2✓ Branch 0 taken 1404 times.
✓ Branch 1 taken 51571 times.
|
52975 | switch (op->op) { |
279 | 1404 | case SWS_OP_DITHER: | |
280 | 1404 | av_refstruct_ref(copy->ops[i].dither.matrix); | |
281 | 1404 | break; | |
282 | } | ||
283 | } | ||
284 | |||
285 | 17511 | return copy; | |
286 | } | ||
287 | |||
288 | ✗ | void ff_sws_op_list_remove_at(SwsOpList *ops, int index, int count) | |
289 | { | ||
290 | ✗ | const int end = ops->num_ops - count; | |
291 | av_assert2(index >= 0 && count >= 0 && index + count <= ops->num_ops); | ||
292 | ✗ | op_uninit(&ops->ops[index]); | |
293 | ✗ | for (int i = index; i < end; i++) | |
294 | ✗ | ops->ops[i] = ops->ops[i + count]; | |
295 | ✗ | ops->num_ops = end; | |
296 | ✗ | } | |
297 | |||
298 | ✗ | int ff_sws_op_list_insert_at(SwsOpList *ops, int index, SwsOp *op) | |
299 | { | ||
300 | ✗ | void *ret = av_dynarray2_add((void **) &ops->ops, &ops->num_ops, sizeof(*op), NULL); | |
301 | ✗ | if (!ret) { | |
302 | ✗ | op_uninit(op); | |
303 | ✗ | return AVERROR(ENOMEM); | |
304 | } | ||
305 | |||
306 | ✗ | for (int i = ops->num_ops - 1; i > index; i--) | |
307 | ✗ | ops->ops[i] = ops->ops[i - 1]; | |
308 | ✗ | ops->ops[index] = *op; | |
309 | ✗ | return 0; | |
310 | } | ||
311 | |||
312 | ✗ | int ff_sws_op_list_append(SwsOpList *ops, SwsOp *op) | |
313 | { | ||
314 | ✗ | return ff_sws_op_list_insert_at(ops, ops->num_ops, op); | |
315 | } | ||
316 | |||
317 | 2130 | int ff_sws_op_list_max_size(const SwsOpList *ops) | |
318 | { | ||
319 | 2130 | int max_size = 0; | |
320 |
2/2✓ Branch 0 taken 6618 times.
✓ Branch 1 taken 2130 times.
|
8748 | for (int i = 0; i < ops->num_ops; i++) { |
321 | 6618 | const int size = ff_sws_pixel_type_size(ops->ops[i].type); | |
322 | 6618 | max_size = FFMAX(max_size, size); | |
323 | } | ||
324 | |||
325 | 2130 | return max_size; | |
326 | } | ||
327 | |||
328 | ✗ | uint32_t ff_sws_linear_mask(const SwsLinearOp c) | |
329 | { | ||
330 | ✗ | uint32_t mask = 0; | |
331 | ✗ | for (int i = 0; i < 4; i++) { | |
332 | ✗ | for (int j = 0; j < 5; j++) { | |
333 | ✗ | if (av_cmp_q(c.m[i][j], Q(i == j))) | |
334 | ✗ | mask |= SWS_MASK(i, j); | |
335 | } | ||
336 | } | ||
337 | ✗ | return mask; | |
338 | } | ||
339 | |||
340 | 105 | static const char *describe_lin_mask(uint32_t mask) | |
341 | { | ||
342 | /* Try to be fairly descriptive without assuming too much */ | ||
343 | static const struct { | ||
344 | char name[24]; | ||
345 | uint32_t mask; | ||
346 | } patterns[] = { | ||
347 | { "noop", 0 }, | ||
348 | { "luma", SWS_MASK_LUMA }, | ||
349 | { "alpha", SWS_MASK_ALPHA }, | ||
350 | { "luma+alpha", SWS_MASK_LUMA | SWS_MASK_ALPHA }, | ||
351 | { "dot3", 0x7 }, | ||
352 | { "dot4", 0xF }, | ||
353 | { "row0", SWS_MASK_ROW(0) }, | ||
354 | { "row0+alpha", SWS_MASK_ROW(0) | SWS_MASK_ALPHA }, | ||
355 | { "col0", SWS_MASK_COL(0) }, | ||
356 | { "col0+off3", SWS_MASK_COL(0) | SWS_MASK_OFF3 }, | ||
357 | { "off3", SWS_MASK_OFF3 }, | ||
358 | { "off3+alpha", SWS_MASK_OFF3 | SWS_MASK_ALPHA }, | ||
359 | { "diag3", SWS_MASK_DIAG3 }, | ||
360 | { "diag4", SWS_MASK_DIAG4 }, | ||
361 | { "diag3+alpha", SWS_MASK_DIAG3 | SWS_MASK_ALPHA }, | ||
362 | { "diag3+off3", SWS_MASK_DIAG3 | SWS_MASK_OFF3 }, | ||
363 | { "diag3+off3+alpha", SWS_MASK_DIAG3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA }, | ||
364 | { "diag4+off4", SWS_MASK_DIAG4 | SWS_MASK_OFF4 }, | ||
365 | { "matrix3", SWS_MASK_MAT3 }, | ||
366 | { "matrix3+off3", SWS_MASK_MAT3 | SWS_MASK_OFF3 }, | ||
367 | { "matrix3+off3+alpha", SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA }, | ||
368 | { "matrix4", SWS_MASK_MAT4 }, | ||
369 | { "matrix4+off4", SWS_MASK_MAT4 | SWS_MASK_OFF4 }, | ||
370 | }; | ||
371 | |||
372 |
1/2✓ Branch 0 taken 1275 times.
✗ Branch 1 not taken.
|
1275 | for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) { |
373 |
2/2✓ Branch 0 taken 105 times.
✓ Branch 1 taken 1170 times.
|
1275 | if (!(mask & ~patterns[i].mask)) |
374 | 105 | return patterns[i].name; | |
375 | } | ||
376 | |||
377 | ✗ | av_unreachable("Invalid linear mask!"); | |
378 | return "ERR"; | ||
379 | } | ||
380 | |||
381 | 13528 | static char describe_comp_flags(unsigned flags) | |
382 | { | ||
383 |
2/2✓ Branch 0 taken 2865 times.
✓ Branch 1 taken 10663 times.
|
13528 | if (flags & SWS_COMP_GARBAGE) |
384 | 2865 | return 'X'; | |
385 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10663 times.
|
10663 | else if (flags & SWS_COMP_ZERO) |
386 | ✗ | return '0'; | |
387 |
2/2✓ Branch 0 taken 8098 times.
✓ Branch 1 taken 2565 times.
|
10663 | else if (flags & SWS_COMP_EXACT) |
388 | 8098 | return '+'; | |
389 | else | ||
390 | 2565 | return '.'; | |
391 | } | ||
392 | |||
393 | 22640 | static const char *print_q(const AVRational q, char buf[], int buf_len) | |
394 | { | ||
395 |
2/2✓ Branch 0 taken 4184 times.
✓ Branch 1 taken 18456 times.
|
22640 | if (!q.den) { |
396 |
2/4✓ Branch 0 taken 4184 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 4184 times.
|
4184 | return q.num > 0 ? "inf" : q.num < 0 ? "-inf" : "nan"; |
397 |
2/2✓ Branch 0 taken 15112 times.
✓ Branch 1 taken 3344 times.
|
18456 | } else if (q.den == 1) { |
398 | 15112 | snprintf(buf, buf_len, "%d", q.num); | |
399 | 15112 | return buf; | |
400 |
3/4✓ Branch 0 taken 1344 times.
✓ Branch 1 taken 2000 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1344 times.
|
3344 | } else if (abs(q.num) > 1000 || abs(q.den) > 1000) { |
401 | 2000 | snprintf(buf, buf_len, "%f", av_q2d(q)); | |
402 | 2000 | return buf; | |
403 | } else { | ||
404 | 1344 | snprintf(buf, buf_len, "%d/%d", q.num, q.den); | |
405 | 1344 | return buf; | |
406 | } | ||
407 | } | ||
408 | |||
409 | #define PRINTQ(q) print_q(q, (char[32]){0}, sizeof(char[32]) - 1) | ||
410 | |||
411 | 1547 | void ff_sws_op_list_print(void *log, int lev, const SwsOpList *ops) | |
412 | { | ||
413 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1547 times.
|
1547 | if (!ops->num_ops) { |
414 | ✗ | av_log(log, lev, " (empty)\n"); | |
415 | ✗ | return; | |
416 | } | ||
417 | |||
418 |
2/2✓ Branch 0 taken 3382 times.
✓ Branch 1 taken 1547 times.
|
4929 | for (int i = 0; i < ops->num_ops; i++) { |
419 | 3382 | const SwsOp *op = &ops->ops[i]; | |
420 | 16910 | av_log(log, lev, " [%3s %c%c%c%c -> %c%c%c%c] ", | |
421 | 3382 | ff_sws_pixel_type_name(op->type), | |
422 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3382 times.
|
3382 | op->comps.unused[0] ? 'X' : '.', |
423 |
2/2✓ Branch 0 taken 1382 times.
✓ Branch 1 taken 2000 times.
|
3382 | op->comps.unused[1] ? 'X' : '.', |
424 |
2/2✓ Branch 0 taken 1736 times.
✓ Branch 1 taken 1646 times.
|
3382 | op->comps.unused[2] ? 'X' : '.', |
425 |
2/2✓ Branch 0 taken 1781 times.
✓ Branch 1 taken 1601 times.
|
3382 | op->comps.unused[3] ? 'X' : '.', |
426 | 3382 | describe_comp_flags(op->comps.flags[0]), | |
427 | 3382 | describe_comp_flags(op->comps.flags[1]), | |
428 | 3382 | describe_comp_flags(op->comps.flags[2]), | |
429 | 3382 | describe_comp_flags(op->comps.flags[3])); | |
430 | |||
431 |
12/16✗ Branch 0 not taken.
✓ Branch 1 taken 1547 times.
✓ Branch 2 taken 30 times.
✓ Branch 3 taken 352 times.
✓ Branch 4 taken 352 times.
✓ Branch 5 taken 50 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 339 times.
✓ Branch 8 taken 223 times.
✓ Branch 9 taken 180 times.
✓ Branch 10 taken 92 times.
✓ Branch 11 taken 92 times.
✓ Branch 12 taken 105 times.
✓ Branch 13 taken 20 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
|
3382 | switch (op->op) { |
432 | ✗ | case SWS_OP_INVALID: | |
433 | ✗ | av_log(log, lev, "SWS_OP_INVALID\n"); | |
434 | ✗ | break; | |
435 | 1547 | case SWS_OP_READ: | |
436 | case SWS_OP_WRITE: | ||
437 | 3094 | av_log(log, lev, "%-20s: %d elem(s) %s >> %d\n", | |
438 | 1547 | op->op == SWS_OP_READ ? "SWS_OP_READ" | |
439 | : "SWS_OP_WRITE", | ||
440 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1547 times.
|
1547 | op->rw.elems, op->rw.packed ? "packed" : "planar", |
441 |
2/2✓ Branch 0 taken 45 times.
✓ Branch 1 taken 1502 times.
|
1547 | op->rw.frac); |
442 | 1547 | break; | |
443 | 30 | case SWS_OP_SWAP_BYTES: | |
444 | 30 | av_log(log, lev, "SWS_OP_SWAP_BYTES\n"); | |
445 | 30 | break; | |
446 | 352 | case SWS_OP_LSHIFT: | |
447 | 352 | av_log(log, lev, "%-20s: << %u\n", "SWS_OP_LSHIFT", op->c.u); | |
448 | 352 | break; | |
449 | 352 | case SWS_OP_RSHIFT: | |
450 | 352 | av_log(log, lev, "%-20s: >> %u\n", "SWS_OP_RSHIFT", op->c.u); | |
451 | 352 | break; | |
452 | 50 | case SWS_OP_PACK: | |
453 | case SWS_OP_UNPACK: | ||
454 | 50 | av_log(log, lev, "%-20s: {%d %d %d %d}\n", | |
455 | 50 | op->op == SWS_OP_PACK ? "SWS_OP_PACK" | |
456 | : "SWS_OP_UNPACK", | ||
457 | 50 | op->pack.pattern[0], op->pack.pattern[1], | |
458 |
2/2✓ Branch 0 taken 25 times.
✓ Branch 1 taken 25 times.
|
50 | op->pack.pattern[2], op->pack.pattern[3]); |
459 | 50 | break; | |
460 | ✗ | case SWS_OP_CLEAR: | |
461 | ✗ | av_log(log, lev, "%-20s: {%s %s %s %s}\n", "SWS_OP_CLEAR", | |
462 | ✗ | op->c.q4[0].den ? PRINTQ(op->c.q4[0]) : "_", | |
463 | ✗ | op->c.q4[1].den ? PRINTQ(op->c.q4[1]) : "_", | |
464 | ✗ | op->c.q4[2].den ? PRINTQ(op->c.q4[2]) : "_", | |
465 | ✗ | op->c.q4[3].den ? PRINTQ(op->c.q4[3]) : "_"); | |
466 | ✗ | break; | |
467 | 339 | case SWS_OP_SWIZZLE: | |
468 | 339 | av_log(log, lev, "%-20s: %d%d%d%d\n", "SWS_OP_SWIZZLE", | |
469 | 339 | op->swizzle.x, op->swizzle.y, op->swizzle.z, op->swizzle.w); | |
470 | 339 | break; | |
471 | 223 | case SWS_OP_CONVERT: | |
472 | 446 | av_log(log, lev, "%-20s: %s -> %s%s\n", "SWS_OP_CONVERT", | |
473 | 223 | ff_sws_pixel_type_name(op->type), | |
474 | 223 | ff_sws_pixel_type_name(op->convert.to), | |
475 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 208 times.
|
223 | op->convert.expand ? " (expand)" : ""); |
476 | 223 | break; | |
477 | 180 | case SWS_OP_DITHER: | |
478 | 180 | av_log(log, lev, "%-20s: %dx%d matrix\n", "SWS_OP_DITHER", | |
479 | 180 | 1 << op->dither.size_log2, 1 << op->dither.size_log2); | |
480 | 180 | break; | |
481 | 92 | case SWS_OP_MIN: | |
482 | 92 | av_log(log, lev, "%-20s: x <= {%s %s %s %s}\n", "SWS_OP_MIN", | |
483 |
1/2✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
|
92 | op->c.q4[0].den ? PRINTQ(op->c.q4[0]) : "_", |
484 |
1/2✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
|
92 | op->c.q4[1].den ? PRINTQ(op->c.q4[1]) : "_", |
485 |
1/2✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
|
92 | op->c.q4[2].den ? PRINTQ(op->c.q4[2]) : "_", |
486 |
1/2✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
|
92 | op->c.q4[3].den ? PRINTQ(op->c.q4[3]) : "_"); |
487 | 92 | break; | |
488 | 92 | case SWS_OP_MAX: | |
489 | 92 | av_log(log, lev, "%-20s: {%s %s %s %s} <= x\n", "SWS_OP_MAX", | |
490 |
1/2✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
|
92 | op->c.q4[0].den ? PRINTQ(op->c.q4[0]) : "_", |
491 |
1/2✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
|
92 | op->c.q4[1].den ? PRINTQ(op->c.q4[1]) : "_", |
492 |
1/2✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
|
92 | op->c.q4[2].den ? PRINTQ(op->c.q4[2]) : "_", |
493 |
1/2✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
|
92 | op->c.q4[3].den ? PRINTQ(op->c.q4[3]) : "_"); |
494 | 92 | break; | |
495 | 105 | case SWS_OP_LINEAR: | |
496 | 105 | av_log(log, lev, "%-20s: %s [[%s %s %s %s %s] " | |
497 | "[%s %s %s %s %s] " | ||
498 | "[%s %s %s %s %s] " | ||
499 | "[%s %s %s %s %s]]\n", | ||
500 | 105 | "SWS_OP_LINEAR", describe_lin_mask(op->lin.mask), | |
501 | 105 | PRINTQ(op->lin.m[0][0]), PRINTQ(op->lin.m[0][1]), PRINTQ(op->lin.m[0][2]), PRINTQ(op->lin.m[0][3]), PRINTQ(op->lin.m[0][4]), | |
502 | 105 | PRINTQ(op->lin.m[1][0]), PRINTQ(op->lin.m[1][1]), PRINTQ(op->lin.m[1][2]), PRINTQ(op->lin.m[1][3]), PRINTQ(op->lin.m[1][4]), | |
503 | 105 | PRINTQ(op->lin.m[2][0]), PRINTQ(op->lin.m[2][1]), PRINTQ(op->lin.m[2][2]), PRINTQ(op->lin.m[2][3]), PRINTQ(op->lin.m[2][4]), | |
504 | 105 | PRINTQ(op->lin.m[3][0]), PRINTQ(op->lin.m[3][1]), PRINTQ(op->lin.m[3][2]), PRINTQ(op->lin.m[3][3]), PRINTQ(op->lin.m[3][4])); | |
505 | 105 | break; | |
506 | 20 | case SWS_OP_SCALE: | |
507 | 20 | av_log(log, lev, "%-20s: * %s\n", "SWS_OP_SCALE", | |
508 | 20 | PRINTQ(op->c.q)); | |
509 | 20 | break; | |
510 | ✗ | case SWS_OP_TYPE_NB: | |
511 | ✗ | break; | |
512 | } | ||
513 | |||
514 |
3/4✓ Branch 0 taken 909 times.
✓ Branch 1 taken 2473 times.
✓ Branch 2 taken 909 times.
✗ Branch 3 not taken.
|
3382 | if (op->comps.min[0].den || op->comps.min[1].den || |
515 |
2/4✓ Branch 0 taken 909 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 909 times.
✗ Branch 3 not taken.
|
909 | op->comps.min[2].den || op->comps.min[3].den || |
516 |
2/4✓ Branch 0 taken 909 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 909 times.
✗ Branch 3 not taken.
|
909 | op->comps.max[0].den || op->comps.max[1].den || |
517 |
2/4✓ Branch 0 taken 909 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 909 times.
|
909 | op->comps.max[2].den || op->comps.max[3].den) |
518 | { | ||
519 | 2473 | av_log(log, AV_LOG_TRACE, " min: {%s, %s, %s, %s}, max: {%s, %s, %s, %s}\n", | |
520 | 2473 | PRINTQ(op->comps.min[0]), PRINTQ(op->comps.min[1]), | |
521 | 2473 | PRINTQ(op->comps.min[2]), PRINTQ(op->comps.min[3]), | |
522 | 2473 | PRINTQ(op->comps.max[0]), PRINTQ(op->comps.max[1]), | |
523 | 2473 | PRINTQ(op->comps.max[2]), PRINTQ(op->comps.max[3])); | |
524 | } | ||
525 | |||
526 | } | ||
527 | |||
528 | 1547 | av_log(log, lev, " (X = unused, + = exact, 0 = zero)\n"); | |
529 | } | ||
530 | |||
531 | 17511 | int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend, | |
532 | const SwsOpList *ops, SwsCompiledOp *out) | ||
533 | { | ||
534 | SwsOpList *copy, rest; | ||
535 | 17511 | SwsCompiledOp compiled = {0}; | |
536 | 17511 | int ret = 0; | |
537 | |||
538 | 17511 | copy = ff_sws_op_list_duplicate(ops); | |
539 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 17511 times.
|
17511 | if (!copy) |
540 | ✗ | return AVERROR(ENOMEM); | |
541 | |||
542 | /* Ensure these are always set during compilation */ | ||
543 | 17511 | ff_sws_op_list_update_comps(copy); | |
544 | |||
545 | /* Make an on-stack copy of `ops` to ensure we can still properly clean up | ||
546 | * the copy afterwards */ | ||
547 | 17511 | rest = *copy; | |
548 | |||
549 | 17511 | ret = backend->compile(ctx, &rest, &compiled); | |
550 |
2/2✓ Branch 0 taken 8827 times.
✓ Branch 1 taken 8684 times.
|
17511 | if (ret < 0) { |
551 |
1/2✓ Branch 0 taken 8827 times.
✗ Branch 1 not taken.
|
8827 | int msg_lev = ret == AVERROR(ENOTSUP) ? AV_LOG_TRACE : AV_LOG_ERROR; |
552 | 17654 | av_log(ctx, msg_lev, "Backend '%s' failed to compile operations: %s\n", | |
553 | 8827 | backend->name, av_err2str(ret)); | |
554 |
2/2✓ Branch 0 taken 1547 times.
✓ Branch 1 taken 7280 times.
|
8827 | if (rest.num_ops != ops->num_ops) { |
555 | 1547 | av_log(ctx, msg_lev, "Uncompiled remainder:\n"); | |
556 | 1547 | ff_sws_op_list_print(ctx, msg_lev, &rest); | |
557 | } | ||
558 | } else { | ||
559 | 8684 | *out = compiled; | |
560 | } | ||
561 | |||
562 | 17511 | ff_sws_op_list_free(©); | |
563 | 17511 | return ret; | |
564 | } | ||
565 | |||
566 | ✗ | int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out) | |
567 | { | ||
568 | ✗ | for (int n = 0; ff_sws_op_backends[n]; n++) { | |
569 | ✗ | const SwsOpBackend *backend = ff_sws_op_backends[n]; | |
570 | ✗ | if (ff_sws_ops_compile_backend(ctx, backend, ops, out) < 0) | |
571 | ✗ | continue; | |
572 | |||
573 | ✗ | av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': " | |
574 | "block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n", | ||
575 | ✗ | backend->name, out->block_size, out->over_read, out->over_write, | |
576 | out->cpu_flags); | ||
577 | ✗ | return 0; | |
578 | } | ||
579 | |||
580 | ✗ | av_log(ctx, AV_LOG_WARNING, "No backend found for operations:\n"); | |
581 | ✗ | ff_sws_op_list_print(ctx, AV_LOG_WARNING, ops); | |
582 | ✗ | return AVERROR(ENOTSUP); | |
583 | } | ||
584 | |||
585 | typedef struct SwsOpPass { | ||
586 | SwsCompiledOp comp; | ||
587 | SwsOpExec exec_base; | ||
588 | int num_blocks; | ||
589 | int tail_off_in; | ||
590 | int tail_off_out; | ||
591 | int tail_size_in; | ||
592 | int tail_size_out; | ||
593 | int planes_in; | ||
594 | int planes_out; | ||
595 | int pixel_bits_in; | ||
596 | int pixel_bits_out; | ||
597 | bool memcpy_in; | ||
598 | bool memcpy_out; | ||
599 | } SwsOpPass; | ||
600 | |||
601 | ✗ | static void op_pass_free(void *ptr) | |
602 | { | ||
603 | ✗ | SwsOpPass *p = ptr; | |
604 | ✗ | if (!p) | |
605 | ✗ | return; | |
606 | |||
607 | ✗ | if (p->comp.free) | |
608 | ✗ | p->comp.free(p->comp.priv); | |
609 | |||
610 | ✗ | av_free(p); | |
611 | } | ||
612 | |||
613 | ✗ | static void op_pass_setup(const SwsImg *out, const SwsImg *in, const SwsPass *pass) | |
614 | { | ||
615 | ✗ | const AVPixFmtDescriptor *indesc = av_pix_fmt_desc_get(in->fmt); | |
616 | ✗ | const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out->fmt); | |
617 | |||
618 | ✗ | SwsOpPass *p = pass->priv; | |
619 | ✗ | SwsOpExec *exec = &p->exec_base; | |
620 | ✗ | const SwsCompiledOp *comp = &p->comp; | |
621 | ✗ | const int block_size = comp->block_size; | |
622 | ✗ | p->num_blocks = (pass->width + block_size - 1) / block_size; | |
623 | |||
624 | /* Set up main loop parameters */ | ||
625 | ✗ | const int aligned_w = p->num_blocks * block_size; | |
626 | ✗ | const int safe_width = (p->num_blocks - 1) * block_size; | |
627 | ✗ | const int tail_size = pass->width - safe_width; | |
628 | ✗ | p->tail_off_in = safe_width * p->pixel_bits_in >> 3; | |
629 | ✗ | p->tail_off_out = safe_width * p->pixel_bits_out >> 3; | |
630 | ✗ | p->tail_size_in = tail_size * p->pixel_bits_in >> 3; | |
631 | ✗ | p->tail_size_out = tail_size * p->pixel_bits_out >> 3; | |
632 | ✗ | p->memcpy_in = false; | |
633 | ✗ | p->memcpy_out = false; | |
634 | |||
635 | ✗ | for (int i = 0; i < p->planes_in; i++) { | |
636 | ✗ | const int sub_x = (i == 1 || i == 2) ? indesc->log2_chroma_w : 0; | |
637 | ✗ | const int plane_w = (aligned_w + sub_x) >> sub_x; | |
638 | ✗ | const int plane_pad = (comp->over_read + sub_x) >> sub_x; | |
639 | ✗ | const int plane_size = plane_w * p->pixel_bits_in >> 3; | |
640 | ✗ | p->memcpy_in |= plane_size + plane_pad > in->linesize[i]; | |
641 | ✗ | exec->in_stride[i] = in->linesize[i]; | |
642 | } | ||
643 | |||
644 | ✗ | for (int i = 0; i < p->planes_out; i++) { | |
645 | ✗ | const int sub_x = (i == 1 || i == 2) ? outdesc->log2_chroma_w : 0; | |
646 | ✗ | const int plane_w = (aligned_w + sub_x) >> sub_x; | |
647 | ✗ | const int plane_pad = (comp->over_write + sub_x) >> sub_x; | |
648 | ✗ | const int plane_size = plane_w * p->pixel_bits_out >> 3; | |
649 | ✗ | p->memcpy_out |= plane_size + plane_pad > out->linesize[i]; | |
650 | ✗ | exec->out_stride[i] = out->linesize[i]; | |
651 | } | ||
652 | |||
653 | /* Pre-fill pointer bump for the main section only; this value does not | ||
654 | * matter at all for the tail / last row handlers because they only ever | ||
655 | * process a single line */ | ||
656 | ✗ | const int blocks_main = p->num_blocks - p->memcpy_out; | |
657 | ✗ | for (int i = 0; i < 4; i++) { | |
658 | ✗ | exec->in_bump[i] = in->linesize[i] - blocks_main * exec->block_size_in; | |
659 | ✗ | exec->out_bump[i] = out->linesize[i] - blocks_main * exec->block_size_out; | |
660 | } | ||
661 | ✗ | } | |
662 | |||
663 | /* Dispatch kernel over the last column of the image using memcpy */ | ||
664 | static av_always_inline void | ||
665 | ✗ | handle_tail(const SwsOpPass *p, SwsOpExec *exec, | |
666 | const SwsImg *out_base, const bool copy_out, | ||
667 | const SwsImg *in_base, const bool copy_in, | ||
668 | int y, const int h) | ||
669 | { | ||
670 | DECLARE_ALIGNED_64(uint8_t, tmp)[2][4][sizeof(uint32_t[128])]; | ||
671 | |||
672 | ✗ | const SwsCompiledOp *comp = &p->comp; | |
673 | ✗ | const int tail_size_in = p->tail_size_in; | |
674 | ✗ | const int tail_size_out = p->tail_size_out; | |
675 | ✗ | const int bx = p->num_blocks - 1; | |
676 | |||
677 | ✗ | SwsImg in = ff_sws_img_shift(in_base, y); | |
678 | ✗ | SwsImg out = ff_sws_img_shift(out_base, y); | |
679 | ✗ | for (int i = 0; i < p->planes_in; i++) { | |
680 | ✗ | in.data[i] += p->tail_off_in; | |
681 | ✗ | if (copy_in) { | |
682 | ✗ | exec->in[i] = (void *) tmp[0][i]; | |
683 | ✗ | exec->in_stride[i] = sizeof(tmp[0][i]); | |
684 | } else { | ||
685 | ✗ | exec->in[i] = in.data[i]; | |
686 | } | ||
687 | } | ||
688 | |||
689 | ✗ | for (int i = 0; i < p->planes_out; i++) { | |
690 | ✗ | out.data[i] += p->tail_off_out; | |
691 | ✗ | if (copy_out) { | |
692 | ✗ | exec->out[i] = (void *) tmp[1][i]; | |
693 | ✗ | exec->out_stride[i] = sizeof(tmp[1][i]); | |
694 | } else { | ||
695 | ✗ | exec->out[i] = out.data[i]; | |
696 | } | ||
697 | } | ||
698 | |||
699 | ✗ | for (int y_end = y + h; y < y_end; y++) { | |
700 | ✗ | if (copy_in) { | |
701 | ✗ | for (int i = 0; i < p->planes_in; i++) { | |
702 | av_assert2(tmp[0][i] + tail_size_in < (uint8_t *) tmp[1]); | ||
703 | ✗ | memcpy(tmp[0][i], in.data[i], tail_size_in); | |
704 | ✗ | in.data[i] += in.linesize[i]; | |
705 | } | ||
706 | } | ||
707 | |||
708 | ✗ | comp->func(exec, comp->priv, bx, y, p->num_blocks, y + 1); | |
709 | |||
710 | ✗ | if (copy_out) { | |
711 | ✗ | for (int i = 0; i < p->planes_out; i++) { | |
712 | av_assert2(tmp[1][i] + tail_size_out < (uint8_t *) tmp[2]); | ||
713 | ✗ | memcpy(out.data[i], tmp[1][i], tail_size_out); | |
714 | ✗ | out.data[i] += out.linesize[i]; | |
715 | } | ||
716 | } | ||
717 | |||
718 | ✗ | for (int i = 0; i < 4; i++) { | |
719 | ✗ | if (!copy_in) | |
720 | ✗ | exec->in[i] += in.linesize[i]; | |
721 | ✗ | if (!copy_out) | |
722 | ✗ | exec->out[i] += out.linesize[i]; | |
723 | } | ||
724 | } | ||
725 | ✗ | } | |
726 | |||
727 | ✗ | static void op_pass_run(const SwsImg *out_base, const SwsImg *in_base, | |
728 | const int y, const int h, const SwsPass *pass) | ||
729 | { | ||
730 | ✗ | const SwsOpPass *p = pass->priv; | |
731 | ✗ | const SwsCompiledOp *comp = &p->comp; | |
732 | ✗ | const SwsImg in = ff_sws_img_shift(in_base, y); | |
733 | ✗ | const SwsImg out = ff_sws_img_shift(out_base, y); | |
734 | |||
735 | /* Fill exec metadata for this slice */ | ||
736 | ✗ | DECLARE_ALIGNED_32(SwsOpExec, exec) = p->exec_base; | |
737 | ✗ | exec.slice_y = y; | |
738 | ✗ | exec.slice_h = h; | |
739 | ✗ | for (int i = 0; i < 4; i++) { | |
740 | ✗ | exec.in[i] = in.data[i]; | |
741 | ✗ | exec.out[i] = out.data[i]; | |
742 | } | ||
743 | |||
744 | /** | ||
745 | * To ensure safety, we need to consider the following: | ||
746 | * | ||
747 | * 1. We can overread the input, unless this is the last line of an | ||
748 | * unpadded buffer. All defined operations can handle arbitrary pixel | ||
749 | * input, so overread of arbitrary data is fine. | ||
750 | * | ||
751 | * 2. We can overwrite the output, as long as we don't write more than the | ||
752 | * amount of pixels that fit into one linesize. So we always need to | ||
753 | * memcpy the last column on the output side if unpadded. | ||
754 | * | ||
755 | * 3. For the last row, we also need to memcpy the remainder of the input, | ||
756 | * to avoid reading past the end of the buffer. Note that since we know | ||
757 | * the run() function is called on stripes of the same buffer, we don't | ||
758 | * need to worry about this for the end of a slice. | ||
759 | */ | ||
760 | |||
761 | ✗ | const int last_slice = y + h == pass->height; | |
762 | ✗ | const bool memcpy_in = last_slice && p->memcpy_in; | |
763 | ✗ | const bool memcpy_out = p->memcpy_out; | |
764 | ✗ | const int num_blocks = p->num_blocks; | |
765 | ✗ | const int blocks_main = num_blocks - memcpy_out; | |
766 | ✗ | const int h_main = h - memcpy_in; | |
767 | |||
768 | /* Handle main section */ | ||
769 | ✗ | comp->func(&exec, comp->priv, 0, y, blocks_main, y + h_main); | |
770 | |||
771 | ✗ | if (memcpy_in) { | |
772 | /* Safe part of last row */ | ||
773 | ✗ | for (int i = 0; i < 4; i++) { | |
774 | ✗ | exec.in[i] += h_main * in.linesize[i]; | |
775 | ✗ | exec.out[i] += h_main * out.linesize[i]; | |
776 | } | ||
777 | ✗ | comp->func(&exec, comp->priv, 0, y + h_main, num_blocks - 1, y + h); | |
778 | } | ||
779 | |||
780 | /* Handle last column via memcpy, takes over `exec` so call these last */ | ||
781 | ✗ | if (memcpy_out) | |
782 | ✗ | handle_tail(p, &exec, out_base, true, in_base, false, y, h_main); | |
783 | ✗ | if (memcpy_in) | |
784 | ✗ | handle_tail(p, &exec, out_base, memcpy_out, in_base, true, y + h_main, 1); | |
785 | ✗ | } | |
786 | |||
787 | ✗ | static int rw_planes(const SwsOp *op) | |
788 | { | ||
789 | ✗ | return op->rw.packed ? 1 : op->rw.elems; | |
790 | } | ||
791 | |||
792 | ✗ | static int rw_pixel_bits(const SwsOp *op) | |
793 | { | ||
794 | ✗ | const int elems = op->rw.packed ? op->rw.elems : 1; | |
795 | ✗ | const int size = ff_sws_pixel_type_size(op->type); | |
796 | ✗ | const int bits = 8 >> op->rw.frac; | |
797 | av_assert1(bits >= 1); | ||
798 | ✗ | return elems * size * bits; | |
799 | } | ||
800 | |||
801 | ✗ | int ff_sws_compile_pass(SwsGraph *graph, SwsOpList *ops, int flags, SwsFormat dst, | |
802 | SwsPass *input, SwsPass **output) | ||
803 | { | ||
804 | ✗ | SwsContext *ctx = graph->ctx; | |
805 | ✗ | SwsOpPass *p = NULL; | |
806 | ✗ | const SwsOp *read = &ops->ops[0]; | |
807 | ✗ | const SwsOp *write = &ops->ops[ops->num_ops - 1]; | |
808 | SwsPass *pass; | ||
809 | int ret; | ||
810 | |||
811 | ✗ | if (ops->num_ops < 2) { | |
812 | ✗ | av_log(ctx, AV_LOG_ERROR, "Need at least two operations.\n"); | |
813 | ✗ | return AVERROR(EINVAL); | |
814 | } | ||
815 | |||
816 | ✗ | if (read->op != SWS_OP_READ || write->op != SWS_OP_WRITE) { | |
817 | ✗ | av_log(ctx, AV_LOG_ERROR, "First and last operations must be a read " | |
818 | "and write, respectively.\n"); | ||
819 | ✗ | return AVERROR(EINVAL); | |
820 | } | ||
821 | |||
822 | ✗ | if (flags & SWS_OP_FLAG_OPTIMIZE) | |
823 | ✗ | RET(ff_sws_op_list_optimize(ops)); | |
824 | else | ||
825 | ✗ | ff_sws_op_list_update_comps(ops); | |
826 | |||
827 | ✗ | p = av_mallocz(sizeof(*p)); | |
828 | ✗ | if (!p) | |
829 | ✗ | return AVERROR(ENOMEM); | |
830 | |||
831 | ✗ | ret = ff_sws_ops_compile(ctx, ops, &p->comp); | |
832 | ✗ | if (ret < 0) | |
833 | ✗ | goto fail; | |
834 | |||
835 | ✗ | p->planes_in = rw_planes(read); | |
836 | ✗ | p->planes_out = rw_planes(write); | |
837 | ✗ | p->pixel_bits_in = rw_pixel_bits(read); | |
838 | ✗ | p->pixel_bits_out = rw_pixel_bits(write); | |
839 | ✗ | p->exec_base = (SwsOpExec) { | |
840 | ✗ | .width = dst.width, | |
841 | ✗ | .height = dst.height, | |
842 | ✗ | .block_size_in = p->comp.block_size * p->pixel_bits_in >> 3, | |
843 | ✗ | .block_size_out = p->comp.block_size * p->pixel_bits_out >> 3, | |
844 | }; | ||
845 | |||
846 | ✗ | pass = ff_sws_graph_add_pass(graph, dst.format, dst.width, dst.height, input, | |
847 | 1, p, op_pass_run); | ||
848 | ✗ | if (!pass) { | |
849 | ✗ | ret = AVERROR(ENOMEM); | |
850 | ✗ | goto fail; | |
851 | } | ||
852 | ✗ | pass->setup = op_pass_setup; | |
853 | ✗ | pass->free = op_pass_free; | |
854 | |||
855 | ✗ | *output = pass; | |
856 | ✗ | return 0; | |
857 | |||
858 | ✗ | fail: | |
859 | ✗ | op_pass_free(p); | |
860 | ✗ | return ret; | |
861 | } | ||
862 |