FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libswscale/ops_dispatch.c
Date: 2026-03-14 06:54:08
Exec Total Coverage
Lines: 196 216 90.7%
Functions: 12 12 100.0%
Branches: 95 114 83.3%

Line Branch Exec Source
1 /**
2 * Copyright (C) 2025 Niklas Haas
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/avassert.h"
22 #include "libavutil/mem.h"
23 #include "libavutil/mem_internal.h"
24
25 #include "ops.h"
26 #include "ops_internal.h"
27 #include "ops_dispatch.h"
28
29 typedef struct SwsOpPass {
30 SwsCompiledOp comp;
31 SwsOpExec exec_base;
32 int num_blocks;
33 int tail_off_in;
34 int tail_off_out;
35 int tail_size_in;
36 int tail_size_out;
37 int planes_in;
38 int planes_out;
39 int pixel_bits_in;
40 int pixel_bits_out;
41 int idx_in[4];
42 int idx_out[4];
43 bool memcpy_in;
44 bool memcpy_out;
45 } SwsOpPass;
46
47 48415 int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend,
48 const SwsOpList *ops, SwsCompiledOp *out)
49 {
50 SwsOpList *copy;
51 48415 SwsCompiledOp compiled = {0};
52 48415 int ret = 0;
53
54 48415 copy = ff_sws_op_list_duplicate(ops);
55
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 48415 times.
48415 if (!copy)
56 return AVERROR(ENOMEM);
57
58 /* Ensure these are always set during compilation */
59 48415 ff_sws_op_list_update_comps(copy);
60
61 48415 ret = backend->compile(ctx, copy, &compiled);
62
2/2
✓ Branch 0 taken 24773 times.
✓ Branch 1 taken 23642 times.
48415 if (ret < 0) {
63
1/2
✓ Branch 0 taken 24773 times.
✗ Branch 1 not taken.
24773 int msg_lev = ret == AVERROR(ENOTSUP) ? AV_LOG_TRACE : AV_LOG_ERROR;
64 49546 av_log(ctx, msg_lev, "Backend '%s' failed to compile operations: %s\n",
65 24773 backend->name, av_err2str(ret));
66 } else {
67 23642 *out = compiled;
68 }
69
70 48415 ff_sws_op_list_free(&copy);
71 48415 return ret;
72 }
73
74 14280 int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
75 {
76
1/2
✓ Branch 0 taken 29557 times.
✗ Branch 1 not taken.
29557 for (int n = 0; ff_sws_op_backends[n]; n++) {
77 29557 const SwsOpBackend *backend = ff_sws_op_backends[n];
78
1/2
✓ Branch 0 taken 29557 times.
✗ Branch 1 not taken.
29557 if (ops->src.hw_format != backend->hw_format ||
79
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 29557 times.
29557 ops->dst.hw_format != backend->hw_format)
80 continue;
81
2/2
✓ Branch 1 taken 15277 times.
✓ Branch 2 taken 14280 times.
29557 if (ff_sws_ops_compile_backend(ctx, backend, ops, out) < 0)
82 15277 continue;
83
84 14280 av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': "
85 "block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n",
86 14280 backend->name, out->block_size, out->over_read, out->over_write,
87 out->cpu_flags);
88
89 14280 ff_sws_op_list_print(ctx, AV_LOG_VERBOSE, AV_LOG_TRACE, ops);
90 14280 return 0;
91 }
92
93 return AVERROR(ENOTSUP);
94 }
95
96 23642 void ff_sws_compiled_op_unref(SwsCompiledOp *comp)
97 {
98
1/2
✓ Branch 0 taken 23642 times.
✗ Branch 1 not taken.
23642 if (comp->free)
99 23642 comp->free(comp->priv);
100
101 23642 *comp = (SwsCompiledOp) {0};
102 23642 }
103
104 14280 static void op_pass_free(void *ptr)
105 {
106 14280 SwsOpPass *p = ptr;
107
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (!p)
108 return;
109
110 14280 ff_sws_compiled_op_unref(&p->comp);
111 14280 av_free(p);
112 }
113
114 15855 static inline void get_row_data(const SwsOpPass *p, const int y,
115 const uint8_t *in[4], uint8_t *out[4])
116 {
117 15855 const SwsOpExec *base = &p->exec_base;
118
2/2
✓ Branch 0 taken 28840 times.
✓ Branch 1 taken 15855 times.
44695 for (int i = 0; i < p->planes_in; i++)
119 28840 in[i] = base->in[i] + (y >> base->in_sub_y[i]) * base->in_stride[i];
120
2/2
✓ Branch 0 taken 32021 times.
✓ Branch 1 taken 15855 times.
47876 for (int i = 0; i < p->planes_out; i++)
121 32021 out[i] = base->out[i] + (y >> base->out_sub_y[i]) * base->out_stride[i];
122 15855 }
123
124 14280 static int op_pass_setup(const SwsFrame *out, const SwsFrame *in,
125 const SwsPass *pass)
126 {
127 14280 const AVPixFmtDescriptor *indesc = av_pix_fmt_desc_get(in->format);
128 14280 const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out->format);
129
130 14280 SwsOpPass *p = pass->priv;
131 14280 SwsOpExec *exec = &p->exec_base;
132 14280 const SwsCompiledOp *comp = &p->comp;
133 14280 const int block_size = comp->block_size;
134 14280 p->num_blocks = (pass->width + block_size - 1) / block_size;
135
136 /* Set up main loop parameters */
137 14280 const int aligned_w = p->num_blocks * block_size;
138 14280 const int safe_width = (p->num_blocks - 1) * block_size;
139 14280 const int tail_size = pass->width - safe_width;
140 14280 p->tail_off_in = safe_width * p->pixel_bits_in >> 3;
141 14280 p->tail_off_out = safe_width * p->pixel_bits_out >> 3;
142 14280 p->tail_size_in = tail_size * p->pixel_bits_in >> 3;
143 14280 p->tail_size_out = tail_size * p->pixel_bits_out >> 3;
144 14280 p->memcpy_in = false;
145 14280 p->memcpy_out = false;
146
147
2/2
✓ Branch 0 taken 26774 times.
✓ Branch 1 taken 14280 times.
41054 for (int i = 0; i < p->planes_in; i++) {
148 26774 const int idx = p->idx_in[i];
149
4/4
✓ Branch 0 taken 20917 times.
✓ Branch 1 taken 5857 times.
✓ Branch 2 taken 5857 times.
✓ Branch 3 taken 15060 times.
26774 const int chroma = idx == 1 || idx == 2;
150
2/2
✓ Branch 0 taken 11714 times.
✓ Branch 1 taken 15060 times.
26774 const int sub_x = chroma ? indesc->log2_chroma_w : 0;
151
2/2
✓ Branch 0 taken 11714 times.
✓ Branch 1 taken 15060 times.
26774 const int sub_y = chroma ? indesc->log2_chroma_h : 0;
152 26774 const int plane_w = (aligned_w + sub_x) >> sub_x;
153 26774 const int plane_pad = (comp->over_read + sub_x) >> sub_x;
154 26774 const int plane_size = plane_w * p->pixel_bits_in >> 3;
155
1/2
✓ Branch 0 taken 26774 times.
✗ Branch 1 not taken.
26774 if (comp->slice_align)
156 26774 p->memcpy_in |= plane_size + plane_pad > in->linesize[idx];
157 26774 exec->in[i] = in->data[idx];
158 26774 exec->in_stride[i] = in->linesize[idx];
159 26774 exec->in_sub_y[i] = sub_y;
160 26774 exec->in_sub_x[i] = sub_x;
161 }
162
163
2/2
✓ Branch 0 taken 29274 times.
✓ Branch 1 taken 14280 times.
43554 for (int i = 0; i < p->planes_out; i++) {
164 29274 const int idx = p->idx_out[i];
165
4/4
✓ Branch 0 taken 22967 times.
✓ Branch 1 taken 6307 times.
✓ Branch 2 taken 6307 times.
✓ Branch 3 taken 16660 times.
29274 const int chroma = idx == 1 || idx == 2;
166
2/2
✓ Branch 0 taken 12614 times.
✓ Branch 1 taken 16660 times.
29274 const int sub_x = chroma ? outdesc->log2_chroma_w : 0;
167
2/2
✓ Branch 0 taken 12614 times.
✓ Branch 1 taken 16660 times.
29274 const int sub_y = chroma ? outdesc->log2_chroma_h : 0;
168 29274 const int plane_w = (aligned_w + sub_x) >> sub_x;
169 29274 const int plane_pad = (comp->over_write + sub_x) >> sub_x;
170 29274 const int plane_size = plane_w * p->pixel_bits_out >> 3;
171
1/2
✓ Branch 0 taken 29274 times.
✗ Branch 1 not taken.
29274 if (comp->slice_align)
172 29274 p->memcpy_out |= plane_size + plane_pad > out->linesize[idx];
173 29274 exec->out[i] = out->data[idx];
174 29274 exec->out_stride[i] = out->linesize[idx];
175 29274 exec->out_sub_y[i] = sub_y;
176 29274 exec->out_sub_x[i] = sub_x;
177 }
178
179 /* Pre-fill pointer bump for the main section only; this value does not
180 * matter at all for the tail / last row handlers because they only ever
181 * process a single line */
182 14280 const int blocks_main = p->num_blocks - p->memcpy_out;
183
2/2
✓ Branch 0 taken 57120 times.
✓ Branch 1 taken 14280 times.
71400 for (int i = 0; i < 4; i++) {
184 57120 exec->in_bump[i] = exec->in_stride[i] - blocks_main * exec->block_size_in;
185 57120 exec->out_bump[i] = exec->out_stride[i] - blocks_main * exec->block_size_out;
186 }
187
188 14280 return 0;
189 }
190
191 /* Dispatch kernel over the last column of the image using memcpy */
192 static av_always_inline void
193 1050 handle_tail(const SwsOpPass *p, SwsOpExec *exec,
194 const bool copy_out, const bool copy_in,
195 int y, const int h)
196 {
197 DECLARE_ALIGNED_64(uint8_t, tmp)[2][4][sizeof(uint32_t[128])];
198
199 1050 const SwsOpExec *base = &p->exec_base;
200 1050 const SwsCompiledOp *comp = &p->comp;
201 1050 const int tail_size_in = p->tail_size_in;
202 1050 const int tail_size_out = p->tail_size_out;
203 1050 const int bx = p->num_blocks - 1;
204
205 const uint8_t *in_data[4];
206 uint8_t *out_data[4];
207 1050 get_row_data(p, y, in_data, out_data);
208
209
2/2
✓ Branch 0 taken 1541 times.
✓ Branch 1 taken 1050 times.
2591 for (int i = 0; i < p->planes_in; i++) {
210 1541 in_data[i] += p->tail_off_in;
211
2/2
✓ Branch 0 taken 525 times.
✓ Branch 1 taken 1016 times.
1541 if (copy_in) {
212 525 exec->in[i] = (void *) tmp[0][i];
213 525 exec->in_stride[i] = sizeof(tmp[0][i]);
214 } else {
215 1016 exec->in[i] = in_data[i];
216 }
217 }
218
219
2/2
✓ Branch 0 taken 1636 times.
✓ Branch 1 taken 1050 times.
2686 for (int i = 0; i < p->planes_out; i++) {
220 1636 out_data[i] += p->tail_off_out;
221
2/2
✓ Branch 0 taken 567 times.
✓ Branch 1 taken 1069 times.
1636 if (copy_out) {
222 567 exec->out[i] = (void *) tmp[1][i];
223 567 exec->out_stride[i] = sizeof(tmp[1][i]);
224 } else {
225 1069 exec->out[i] = out_data[i];
226 }
227 }
228
229
2/2
✓ Branch 0 taken 50883 times.
✓ Branch 1 taken 1050 times.
51933 for (int y_end = y + h; y < y_end; y++) {
230
2/2
✓ Branch 0 taken 525 times.
✓ Branch 1 taken 50358 times.
50883 if (copy_in) {
231
2/2
✓ Branch 0 taken 525 times.
✓ Branch 1 taken 525 times.
1050 for (int i = 0; i < p->planes_in; i++) {
232 av_assert2(tmp[0][i] + tail_size_in < (uint8_t *) tmp[1]);
233 525 memcpy(tmp[0][i], in_data[i], tail_size_in);
234 525 in_data[i] += base->in_stride[i]; /* exec->in_stride was clobbered */
235 }
236 }
237
238 50883 comp->func(exec, comp->priv, bx, y, p->num_blocks, y + 1);
239
240
2/2
✓ Branch 0 taken 50400 times.
✓ Branch 1 taken 483 times.
50883 if (copy_out) {
241
2/2
✓ Branch 0 taken 50400 times.
✓ Branch 1 taken 50400 times.
100800 for (int i = 0; i < p->planes_out; i++) {
242 av_assert2(tmp[1][i] + tail_size_out < (uint8_t *) tmp[2]);
243 50400 memcpy(out_data[i], tmp[1][i], tail_size_out);
244 50400 out_data[i] += base->out_stride[i];
245 }
246 }
247
248
2/2
✓ Branch 0 taken 203532 times.
✓ Branch 1 taken 50883 times.
254415 for (int i = 0; i < 4; i++) {
249
4/4
✓ Branch 0 taken 201432 times.
✓ Branch 1 taken 2100 times.
✓ Branch 2 taken 97494 times.
✓ Branch 3 taken 103938 times.
203532 if (!copy_in && exec->in[i])
250 97494 exec->in[i] += exec->in_stride[i];
251
4/4
✓ Branch 0 taken 1932 times.
✓ Branch 1 taken 201600 times.
✓ Branch 2 taken 1069 times.
✓ Branch 3 taken 863 times.
203532 if (!copy_out && exec->out[i])
252 1069 exec->out[i] += exec->out_stride[i];
253 }
254 }
255 1050 }
256
257 14280 static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y,
258 const int h, const SwsPass *pass)
259 {
260 14280 const SwsOpPass *p = pass->priv;
261 14280 const SwsCompiledOp *comp = &p->comp;
262
263 /* Fill exec metadata for this slice */
264 14280 DECLARE_ALIGNED_32(SwsOpExec, exec) = p->exec_base;
265 14280 exec.slice_y = y;
266 14280 exec.slice_h = h;
267
268 /**
269 * To ensure safety, we need to consider the following:
270 *
271 * 1. We can overread the input, unless this is the last line of an
272 * unpadded buffer. All defined operations can handle arbitrary pixel
273 * input, so overread of arbitrary data is fine.
274 *
275 * 2. We can overwrite the output, as long as we don't write more than the
276 * amount of pixels that fit into one linesize. So we always need to
277 * memcpy the last column on the output side if unpadded.
278 *
279 * 3. For the last row, we also need to memcpy the remainder of the input,
280 * to avoid reading past the end of the buffer. Note that since we know
281 * the run() function is called on stripes of the same buffer, we don't
282 * need to worry about this for the end of a slice.
283 */
284
285 14280 const int last_slice = y + h == pass->height;
286
3/4
✓ Branch 0 taken 14280 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 525 times.
✓ Branch 3 taken 13755 times.
14280 const bool memcpy_in = last_slice && p->memcpy_in;
287 14280 const bool memcpy_out = p->memcpy_out;
288 14280 const int num_blocks = p->num_blocks;
289 14280 const int blocks_main = num_blocks - memcpy_out;
290 14280 const int h_main = h - memcpy_in;
291
292 /* Handle main section */
293 14280 get_row_data(p, y, exec.in, exec.out);
294 14280 comp->func(&exec, comp->priv, 0, y, blocks_main, y + h_main);
295
296
2/2
✓ Branch 0 taken 525 times.
✓ Branch 1 taken 13755 times.
14280 if (memcpy_in) {
297 /* Safe part of last row */
298 525 get_row_data(p, y + h_main, exec.in, exec.out);
299 525 comp->func(&exec, comp->priv, 0, y + h_main, num_blocks - 1, y + h);
300 }
301
302 /* Handle last column via memcpy, takes over `exec` so call these last */
303
2/2
✓ Branch 0 taken 525 times.
✓ Branch 1 taken 13755 times.
14280 if (memcpy_out)
304 525 handle_tail(p, &exec, true, false, y, h_main);
305
2/2
✓ Branch 0 taken 525 times.
✓ Branch 1 taken 13755 times.
14280 if (memcpy_in)
306 525 handle_tail(p, &exec, memcpy_out, true, y + h_main, 1);
307 14280 }
308
309 28560 static int rw_planes(const SwsOp *op)
310 {
311
2/2
✓ Branch 0 taken 12376 times.
✓ Branch 1 taken 16184 times.
28560 return op->rw.packed ? 1 : op->rw.elems;
312 }
313
314 28560 static int rw_pixel_bits(const SwsOp *op)
315 {
316
2/2
✓ Branch 0 taken 12376 times.
✓ Branch 1 taken 16184 times.
28560 const int elems = op->rw.packed ? op->rw.elems : 1;
317 28560 const int size = ff_sws_pixel_type_size(op->type);
318 28560 const int bits = 8 >> op->rw.frac;
319 av_assert1(bits >= 1);
320 28560 return elems * size * bits;
321 }
322
323 14280 static int compile(SwsGraph *graph, const SwsOpList *ops, SwsPass *input,
324 SwsPass **output)
325 {
326 14280 SwsContext *ctx = graph->ctx;
327 14280 SwsOpPass *p = av_mallocz(sizeof(*p));
328
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (!p)
329 return AVERROR(ENOMEM);
330
331 14280 int ret = ff_sws_ops_compile(ctx, ops, &p->comp);
332
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (ret < 0)
333 goto fail;
334
335 14280 const SwsFormat *dst = &ops->dst;
336
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (p->comp.opaque) {
337 SwsCompiledOp c = p->comp;
338 av_free(p);
339 return ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
340 input, c.slice_align, c.func_opaque,
341 NULL, c.priv, c.free, output);
342 }
343
344 14280 const SwsOp *read = ff_sws_op_list_input(ops);
345 14280 const SwsOp *write = ff_sws_op_list_output(ops);
346 14280 p->planes_in = rw_planes(read);
347 14280 p->planes_out = rw_planes(write);
348 14280 p->pixel_bits_in = rw_pixel_bits(read);
349 14280 p->pixel_bits_out = rw_pixel_bits(write);
350 14280 p->exec_base = (SwsOpExec) {
351 14280 .width = dst->width,
352 14280 .height = dst->height,
353 14280 .block_size_in = p->comp.block_size * p->pixel_bits_in >> 3,
354 14280 .block_size_out = p->comp.block_size * p->pixel_bits_out >> 3,
355 };
356
357
2/2
✓ Branch 0 taken 57120 times.
✓ Branch 1 taken 14280 times.
71400 for (int i = 0; i < 4; i++) {
358
2/2
✓ Branch 0 taken 26774 times.
✓ Branch 1 taken 30346 times.
57120 p->idx_in[i] = i < p->planes_in ? ops->order_src.in[i] : -1;
359
2/2
✓ Branch 0 taken 29274 times.
✓ Branch 1 taken 27846 times.
57120 p->idx_out[i] = i < p->planes_out ? ops->order_dst.in[i] : -1;
360 }
361
362 14280 return ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
363 input, p->comp.slice_align, op_pass_run,
364 op_pass_setup, p, op_pass_free, output);
365
366 fail:
367 op_pass_free(p);
368 return ret;
369 }
370
371 14280 int ff_sws_compile_pass(SwsGraph *graph, SwsOpList **pops, int flags,
372 SwsPass *input, SwsPass **output)
373 {
374 14280 SwsContext *ctx = graph->ctx;
375 14280 SwsOpList *ops = *pops;
376 14280 int ret = 0;
377
378 /* Check if the whole operation graph is an end-to-end no-op */
379
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 14280 times.
14280 if (ff_sws_op_list_is_noop(ops)) {
380 *output = input;
381 goto out;
382 }
383
384 14280 const SwsOp *read = ff_sws_op_list_input(ops);
385 14280 const SwsOp *write = ff_sws_op_list_output(ops);
386
2/4
✓ Branch 0 taken 14280 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 14280 times.
14280 if (!read || !write) {
387 av_log(ctx, AV_LOG_ERROR, "First and last operations must be a read "
388 "and write, respectively.\n");
389 ret = AVERROR(EINVAL);
390 goto out;
391 }
392
393
1/2
✓ Branch 0 taken 14280 times.
✗ Branch 1 not taken.
14280 if (flags & SWS_OP_FLAG_OPTIMIZE) {
394 14280 ret = ff_sws_op_list_optimize(ops);
395
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (ret < 0)
396 goto out;
397 }
398
399 14280 ret = compile(graph, ops, input, output);
400
401 14280 out:
402
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (ret == AVERROR(ENOTSUP)) {
403 av_log(ctx, AV_LOG_WARNING, "No backend found for operations:\n");
404 ff_sws_op_list_print(ctx, AV_LOG_WARNING, AV_LOG_TRACE, ops);
405 }
406 14280 ff_sws_op_list_free(&ops);
407 14280 *pops = NULL;
408 14280 return ret;
409 }
410