FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libswscale/ops_dispatch.c
Date: 2026-04-18 02:30:19
Exec Total Coverage
Lines: 258 365 70.7%
Functions: 15 16 93.8%
Branches: 117 207 56.5%

Line Branch Exec Source
1 /**
2 * Copyright (C) 2025 Niklas Haas
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/avassert.h"
22 #include "libavutil/cpu.h"
23 #include "libavutil/mathematics.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/mem_internal.h"
26 #include "libavutil/refstruct.h"
27
28 #include "ops.h"
29 #include "ops_internal.h"
30 #include "ops_dispatch.h"
31
32 typedef struct SwsOpPass {
33 SwsCompiledOp comp;
34 SwsOpExec exec_base;
35 SwsOpExec exec_tail;
36 size_t num_blocks;
37 int tail_off_in;
38 int tail_off_out;
39 int tail_size_in;
40 int tail_size_out;
41 int planes_in;
42 int planes_out;
43 int pixel_bits_in;
44 int pixel_bits_out;
45 int idx_in[4];
46 int idx_out[4];
47 int *offsets_y;
48 int filter_size;
49 bool memcpy_first;
50 bool memcpy_last;
51 bool memcpy_out;
52 size_t tail_blocks;
53 uint8_t *tail_buf; /* extra memory for fixing unpadded tails */
54 unsigned int tail_buf_size;
55 } SwsOpPass;
56
57 66685 int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend,
58 const SwsOpList *ops, SwsCompiledOp *out)
59 {
60 SwsOpList *copy;
61 66685 SwsCompiledOp compiled = {0};
62 66685 int ret = 0;
63
64 66685 copy = ff_sws_op_list_duplicate(ops);
65
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 66685 times.
66685 if (!copy)
66 return AVERROR(ENOMEM);
67
68 /* Ensure these are always set during compilation */
69 66685 ff_sws_op_list_update_comps(copy);
70
71 66685 ret = backend->compile(ctx, copy, &compiled);
72
2/2
✓ Branch 0 taken 36496 times.
✓ Branch 1 taken 30189 times.
66685 if (ret < 0) {
73
1/2
✓ Branch 0 taken 36496 times.
✗ Branch 1 not taken.
36496 int msg_lev = ret == AVERROR(ENOTSUP) ? AV_LOG_TRACE : AV_LOG_ERROR;
74 72992 av_log(ctx, msg_lev, "Backend '%s' failed to compile operations: %s\n",
75 36496 backend->name, av_err2str(ret));
76 } else {
77 30189 *out = compiled;
78 }
79
80 66685 ff_sws_op_list_free(&copy);
81 66685 return ret;
82 }
83
84 14280 int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
85 {
86
1/2
✓ Branch 0 taken 29557 times.
✗ Branch 1 not taken.
29557 for (int n = 0; ff_sws_op_backends[n]; n++) {
87 29557 const SwsOpBackend *backend = ff_sws_op_backends[n];
88
1/2
✓ Branch 0 taken 29557 times.
✗ Branch 1 not taken.
29557 if (ops->src.hw_format != backend->hw_format ||
89
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 29557 times.
29557 ops->dst.hw_format != backend->hw_format)
90 continue;
91
2/2
✓ Branch 1 taken 15277 times.
✓ Branch 2 taken 14280 times.
29557 if (ff_sws_ops_compile_backend(ctx, backend, ops, out) < 0)
92 15277 continue;
93
94 14280 av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': "
95 "block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n",
96 14280 backend->name, out->block_size, out->over_read, out->over_write,
97 out->cpu_flags);
98
99 14280 ff_sws_op_list_print(ctx, AV_LOG_VERBOSE, AV_LOG_TRACE, ops);
100 14280 return 0;
101 }
102
103 return AVERROR(ENOTSUP);
104 }
105
106 30189 void ff_sws_compiled_op_unref(SwsCompiledOp *comp)
107 {
108
1/2
✓ Branch 0 taken 30189 times.
✗ Branch 1 not taken.
30189 if (comp->free)
109 30189 comp->free(comp->priv);
110
111 30189 *comp = (SwsCompiledOp) {0};
112 30189 }
113
114 14280 static void op_pass_free(void *ptr)
115 {
116 14280 SwsOpPass *p = ptr;
117
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (!p)
118 return;
119
120 14280 ff_sws_compiled_op_unref(&p->comp);
121 14280 av_refstruct_unref(&p->offsets_y);
122 14280 av_free(p->exec_base.in_bump_y);
123 14280 av_free(p->exec_base.in_offset_x);
124 14280 av_free(p->tail_buf);
125 14280 av_free(p);
126 }
127
128 14280 static inline void get_row_data(const SwsOpPass *p, const int y_dst,
129 const uint8_t *in[4], uint8_t *out[4])
130 {
131 14280 const SwsOpExec *base = &p->exec_base;
132
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 const int y_src = p->offsets_y ? p->offsets_y[y_dst] : y_dst;
133
2/2
✓ Branch 0 taken 26774 times.
✓ Branch 1 taken 14280 times.
41054 for (int i = 0; i < p->planes_in; i++)
134 26774 in[i] = base->in[i] + (y_src >> base->in_sub_y[i]) * base->in_stride[i];
135
2/2
✓ Branch 0 taken 29274 times.
✓ Branch 1 taken 14280 times.
43554 for (int i = 0; i < p->planes_out; i++)
136 29274 out[i] = base->out[i] + (y_dst >> base->out_sub_y[i]) * base->out_stride[i];
137 14280 }
138
139 4577 static inline size_t pixel_bytes(size_t pixels, int pixel_bits,
140 enum AVRounding rounding)
141 {
142 4577 const uint64_t bits = (uint64_t) pixels * pixel_bits;
143
2/3
✓ Branch 0 taken 1804 times.
✓ Branch 1 taken 2773 times.
✗ Branch 2 not taken.
4577 switch (rounding) {
144 1804 case AV_ROUND_ZERO:
145 case AV_ROUND_DOWN:
146 1804 return bits >> 3;
147 2773 case AV_ROUND_INF:
148 case AV_ROUND_UP:
149 2773 return (bits + 7) >> 3;
150 default:
151 av_unreachable("Invalid rounding mode");
152 return (size_t) -1;
153 }
154 }
155
156 56048 static size_t safe_bytes_pad(int linesize, int plane_pad)
157 {
158 av_assert1(linesize);
159 56048 int64_t safe_bytes = FFABS((int64_t) linesize) - plane_pad;
160 56048 return FFMAX(safe_bytes, 0);
161 }
162
163 static size_t safe_blocks_offset(size_t num_blocks, unsigned block_size,
164 ptrdiff_t safe_offset,
165 const int32_t *offset_bytes)
166 {
167 size_t safe_blocks = num_blocks;
168 while (safe_blocks && offset_bytes[safe_blocks * block_size - 1] > safe_offset)
169 safe_blocks--;
170 return safe_blocks;
171 }
172
173 14280 static int op_pass_setup(const SwsFrame *out, const SwsFrame *in,
174 const SwsPass *pass)
175 {
176 14280 const AVPixFmtDescriptor *indesc = av_pix_fmt_desc_get(in->format);
177 14280 const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out->format);
178
179 14280 SwsOpPass *p = pass->priv;
180 14280 SwsOpExec *exec = &p->exec_base;
181 14280 const SwsCompiledOp *comp = &p->comp;
182
183 /* Set up main loop parameters */
184 14280 const unsigned block_size = comp->block_size;
185 14280 const size_t num_blocks = (pass->width + block_size - 1) / block_size;
186 14280 const size_t aligned_w = num_blocks * block_size;
187
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (aligned_w < pass->width) /* overflow */
188 return AVERROR(EINVAL);
189 14280 p->num_blocks = num_blocks;
190 14280 p->memcpy_first = false;
191 14280 p->memcpy_last = false;
192 14280 p->memcpy_out = false;
193
194 14280 size_t safe_blocks = num_blocks;
195
2/2
✓ Branch 0 taken 26774 times.
✓ Branch 1 taken 14280 times.
41054 for (int i = 0; i < p->planes_in; i++) {
196 26774 int idx = p->idx_in[i];
197
4/4
✓ Branch 0 taken 20917 times.
✓ Branch 1 taken 5857 times.
✓ Branch 2 taken 5857 times.
✓ Branch 3 taken 15060 times.
26774 int chroma = idx == 1 || idx == 2;
198
2/2
✓ Branch 0 taken 11714 times.
✓ Branch 1 taken 15060 times.
26774 int sub_x = chroma ? indesc->log2_chroma_w : 0;
199
2/2
✓ Branch 0 taken 11714 times.
✓ Branch 1 taken 15060 times.
26774 int sub_y = chroma ? indesc->log2_chroma_h : 0;
200 26774 size_t safe_bytes = safe_bytes_pad(in->linesize[idx], comp->over_read);
201 size_t safe_blocks_in;
202
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 26774 times.
26774 if (exec->in_offset_x) {
203 size_t filter_size = pixel_bytes(p->filter_size, p->pixel_bits_in,
204 AV_ROUND_UP);
205 safe_blocks_in = safe_blocks_offset(num_blocks, block_size,
206 safe_bytes - filter_size,
207 exec->in_offset_x);
208 } else {
209 26774 safe_blocks_in = safe_bytes / exec->block_size_in;
210 }
211
212
2/2
✓ Branch 0 taken 969 times.
✓ Branch 1 taken 25805 times.
26774 if (safe_blocks_in < num_blocks) {
213 969 p->memcpy_first |= in->linesize[idx] < 0;
214 969 p->memcpy_last |= in->linesize[idx] > 0;
215 969 safe_blocks = FFMIN(safe_blocks, safe_blocks_in);
216 }
217
218 26774 size_t loop_size = num_blocks * exec->block_size_in;
219 26774 exec->in[i] = in->data[idx];
220 26774 exec->in_stride[i] = in->linesize[idx];
221 26774 exec->in_bump[i] = in->linesize[idx] - loop_size;
222 26774 exec->in_sub_y[i] = sub_y;
223 26774 exec->in_sub_x[i] = sub_x;
224 }
225
226
2/2
✓ Branch 0 taken 29274 times.
✓ Branch 1 taken 14280 times.
43554 for (int i = 0; i < p->planes_out; i++) {
227 29274 int idx = p->idx_out[i];
228
4/4
✓ Branch 0 taken 22967 times.
✓ Branch 1 taken 6307 times.
✓ Branch 2 taken 6307 times.
✓ Branch 3 taken 16660 times.
29274 int chroma = idx == 1 || idx == 2;
229
2/2
✓ Branch 0 taken 12614 times.
✓ Branch 1 taken 16660 times.
29274 int sub_x = chroma ? outdesc->log2_chroma_w : 0;
230
2/2
✓ Branch 0 taken 12614 times.
✓ Branch 1 taken 16660 times.
29274 int sub_y = chroma ? outdesc->log2_chroma_h : 0;
231 29274 size_t safe_bytes = safe_bytes_pad(out->linesize[idx], comp->over_write);
232 29274 size_t safe_blocks_out = safe_bytes / exec->block_size_out;
233
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 29274 times.
29274 if (safe_blocks_out < num_blocks) {
234 p->memcpy_out = true;
235 safe_blocks = FFMIN(safe_blocks, safe_blocks_out);
236 }
237
238 29274 size_t loop_size = num_blocks * exec->block_size_out;
239 29274 exec->out[i] = out->data[idx];
240 29274 exec->out_stride[i] = out->linesize[idx];
241 29274 exec->out_bump[i] = out->linesize[idx] - loop_size;
242 29274 exec->out_sub_y[i] = sub_y;
243 29274 exec->out_sub_x[i] = sub_x;
244 }
245
246
3/4
✓ Branch 0 taken 14280 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 902 times.
✓ Branch 3 taken 13378 times.
14280 const bool memcpy_in = p->memcpy_first || p->memcpy_last;
247
3/4
✓ Branch 0 taken 13378 times.
✓ Branch 1 taken 902 times.
✓ Branch 2 taken 13378 times.
✗ Branch 3 not taken.
14280 if (!memcpy_in && !p->memcpy_out) {
248
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 13378 times.
13378 av_assert0(safe_blocks == num_blocks);
249 13378 return 0;
250 }
251
252 /* Set-up tail section parameters and buffers */
253 902 SwsOpExec *tail = &p->exec_tail;
254 902 const int align = av_cpu_max_align();
255 902 size_t alloc_size = 0;
256 902 *tail = *exec;
257
258 902 const size_t safe_width = safe_blocks * block_size;
259 902 const size_t tail_size = pass->width - safe_width;
260 902 p->tail_off_out = pixel_bytes(safe_width, p->pixel_bits_out, AV_ROUND_DOWN);
261 902 p->tail_size_out = pixel_bytes(tail_size, p->pixel_bits_out, AV_ROUND_UP);
262 902 p->tail_blocks = num_blocks - safe_blocks;
263
264
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 902 times.
902 if (exec->in_offset_x) {
265 p->tail_off_in = exec->in_offset_x[safe_width];
266 p->tail_size_in = exec->in_offset_x[pass->width - 1] - p->tail_off_in;
267 p->tail_size_in += pixel_bytes(p->filter_size, p->pixel_bits_in, AV_ROUND_UP);
268 } else {
269 902 p->tail_off_in = pixel_bytes(safe_width, p->pixel_bits_in, AV_ROUND_DOWN);
270 902 p->tail_size_in = pixel_bytes(tail_size, p->pixel_bits_in, AV_ROUND_UP);
271 }
272
273 902 const size_t alloc_width = aligned_w - safe_width;
274
3/4
✓ Branch 0 taken 1871 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 969 times.
✓ Branch 3 taken 902 times.
1871 for (int i = 0; memcpy_in && i < p->planes_in; i++) {
275 size_t needed_size;
276
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 969 times.
969 if (exec->in_offset_x) {
277 /* The input offset map is already padded to multiples of the block
278 * size, and clamps the input offsets to the image boundaries; so
279 * we just need to compensate for the comp->over_read */
280 needed_size = p->tail_size_in;
281 } else {
282 969 needed_size = pixel_bytes(alloc_width, p->pixel_bits_in, AV_ROUND_UP);
283 }
284 969 size_t loop_size = p->tail_blocks * exec->block_size_in;
285 969 tail->in_stride[i] = FFALIGN(needed_size + comp->over_read, align);
286 969 tail->in_bump[i] = tail->in_stride[i] - loop_size;
287 969 alloc_size += tail->in_stride[i] * in->height;
288 }
289
290
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 902 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
902 for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
291 size_t needed_size = pixel_bytes(alloc_width, p->pixel_bits_out, AV_ROUND_UP);
292 size_t loop_size = p->tail_blocks * exec->block_size_out;
293 tail->out_stride[i] = FFALIGN(needed_size + comp->over_write, align);
294 tail->out_bump[i] = tail->out_stride[i] - loop_size;
295 alloc_size += tail->out_stride[i] * out->height;
296 }
297
298
2/4
✓ Branch 0 taken 902 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 902 times.
902 if (memcpy_in && exec->in_offset_x) {
299 /* `in_offset_x` is indexed relative to the line start, not the start
300 * of the section being processed; so we need to over-allocate this
301 * array to the full width of the image, even though we will only
302 * partially fill in the offsets relevant to the tail region */
303 alloc_size += aligned_w * sizeof(*exec->in_offset_x);
304 }
305
306 902 av_fast_mallocz(&p->tail_buf, &p->tail_buf_size, alloc_size);
307
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 902 times.
902 if (!p->tail_buf)
308 return AVERROR(ENOMEM);
309
310 902 uint8_t *tail_buf = p->tail_buf;
311
3/4
✓ Branch 0 taken 1871 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 969 times.
✓ Branch 3 taken 902 times.
1871 for (int i = 0; memcpy_in && i < p->planes_in; i++) {
312 969 tail->in[i] = tail_buf;
313 969 tail_buf += tail->in_stride[i] * in->height;
314 }
315
316
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 902 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
902 for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
317 tail->out[i] = tail_buf;
318 tail_buf += tail->out_stride[i] * out->height;
319 }
320
321
2/4
✓ Branch 0 taken 902 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 902 times.
902 if (memcpy_in && exec->in_offset_x) {
322 tail->in_offset_x = (int32_t *) tail_buf;
323 for (int i = safe_width; i < aligned_w; i++)
324 tail->in_offset_x[i] = exec->in_offset_x[i] - p->tail_off_in;
325 }
326
327 902 return 0;
328 }
329
330 969 static void copy_lines(uint8_t *dst, const size_t dst_stride,
331 const uint8_t *src, const size_t src_stride,
332 const int h, const size_t bytes)
333 {
334
2/2
✓ Branch 0 taken 93024 times.
✓ Branch 1 taken 969 times.
93993 for (int y = 0; y < h; y++) {
335 93024 memcpy(dst, src, bytes);
336 93024 dst += dst_stride;
337 93024 src += src_stride;
338 }
339 969 }
340
341 14280 static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y,
342 const int h, const SwsPass *pass)
343 {
344 14280 const SwsOpPass *p = pass->priv;
345 14280 const SwsCompiledOp *comp = &p->comp;
346
347 /* Fill exec metadata for this slice */
348 14280 DECLARE_ALIGNED_32(SwsOpExec, exec) = p->exec_base;
349 14280 exec.slice_y = y;
350 14280 exec.slice_h = h;
351
352 /**
353 * To ensure safety, we need to consider the following:
354 *
355 * 1. We can overread the input, unless this is the last line of an
356 * unpadded buffer. All defined operations can handle arbitrary pixel
357 * input, so overread of arbitrary data is fine. For flipped images,
358 * this condition is actually *inverted* to where the first line is
359 * the one at the end of the buffer.
360 *
361 * 2. We can overwrite the output, as long as we don't write more than the
362 * amount of pixels that fit into one linesize. So we always need to
363 * memcpy the last column on the output side if unpadded.
364 */
365
366
3/4
✓ Branch 0 taken 902 times.
✓ Branch 1 taken 13378 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 902 times.
27658 const bool memcpy_in = p->memcpy_last && y + h == pass->height ||
367
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 13378 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
13378 p->memcpy_first && y == 0;
368 14280 const bool memcpy_out = p->memcpy_out;
369 14280 const size_t num_blocks = p->num_blocks;
370 14280 const size_t tail_blocks = p->tail_blocks;
371
372 14280 get_row_data(p, y, exec.in, exec.out);
373
3/4
✓ Branch 0 taken 13378 times.
✓ Branch 1 taken 902 times.
✓ Branch 2 taken 13378 times.
✗ Branch 3 not taken.
14280 if (!memcpy_in && !memcpy_out) {
374 /* Fast path (fully aligned/padded inputs and outputs) */
375 13378 comp->func(&exec, comp->priv, 0, y, num_blocks, y + h);
376 13378 return;
377 }
378
379 /* Non-aligned case (slow path); process main blocks as normal, and
380 * a separate tail (via memcpy into an appropriately padded buffer) */
381
1/2
✓ Branch 0 taken 902 times.
✗ Branch 1 not taken.
902 if (num_blocks > tail_blocks) {
382
2/2
✓ Branch 0 taken 3608 times.
✓ Branch 1 taken 902 times.
4510 for (int i = 0; i < 4; i++) {
383 /* We process fewer blocks, so the in_bump needs to be increased
384 * to reflect that the plane pointers are left on the last block,
385 * not the end of the processed line, after each loop iteration */
386 3608 exec.in_bump[i] += exec.block_size_in * tail_blocks;
387 3608 exec.out_bump[i] += exec.block_size_out * tail_blocks;
388 }
389
390 902 comp->func(&exec, comp->priv, 0, y, num_blocks - tail_blocks, y + h);
391 }
392
393 902 DECLARE_ALIGNED_32(SwsOpExec, tail) = p->exec_tail;
394 902 tail.slice_y = y;
395 902 tail.slice_h = h;
396
397
2/2
✓ Branch 0 taken 969 times.
✓ Branch 1 taken 902 times.
1871 for (int i = 0; i < p->planes_in; i++) {
398 /* Input offsets are relative to the base pointer */
399
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 969 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
969 if (!exec.in_offset_x || memcpy_in)
400 969 exec.in[i] += p->tail_off_in;
401 969 tail.in[i] += y * tail.in_stride[i];
402 }
403
2/2
✓ Branch 0 taken 1856 times.
✓ Branch 1 taken 902 times.
2758 for (int i = 0; i < p->planes_out; i++) {
404 1856 exec.out[i] += p->tail_off_out;
405 1856 tail.out[i] += y * tail.out_stride[i];
406 }
407
408
2/2
✓ Branch 0 taken 969 times.
✓ Branch 1 taken 902 times.
1871 for (int i = 0; i < p->planes_in; i++) {
409
1/2
✓ Branch 0 taken 969 times.
✗ Branch 1 not taken.
969 if (memcpy_in) {
410 969 copy_lines((uint8_t *) tail.in[i], tail.in_stride[i],
411 969 exec.in[i], exec.in_stride[i], h, p->tail_size_in);
412 } else {
413 /* Reuse input pointers directly */
414 const size_t loop_size = tail_blocks * exec.block_size_in;
415 tail.in[i] = exec.in[i];
416 tail.in_stride[i] = exec.in_stride[i];
417 tail.in_bump[i] = exec.in_stride[i] - loop_size;
418 }
419 }
420
421
3/4
✓ Branch 0 taken 2758 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1856 times.
✓ Branch 3 taken 902 times.
2758 for (int i = 0; !memcpy_out && i < p->planes_out; i++) {
422 /* Reuse output pointers directly */
423 1856 const size_t loop_size = tail_blocks * exec.block_size_out;
424 1856 tail.out[i] = exec.out[i];
425 1856 tail.out_stride[i] = exec.out_stride[i];
426 1856 tail.out_bump[i] = exec.out_stride[i] - loop_size;
427 }
428
429 /* Dispatch kernel over tail */
430 av_assert1(tail_blocks > 0);
431 902 comp->func(&tail, comp->priv, num_blocks - tail_blocks, y, num_blocks, y + h);
432
433
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 902 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
902 for (int i = 0; memcpy_out && i < p->planes_out; i++) {
434 copy_lines(exec.out[i], exec.out_stride[i],
435 tail.out[i], tail.out_stride[i], h, p->tail_size_out);
436 }
437 }
438
439 28560 static int rw_planes(const SwsOp *op)
440 {
441
2/2
✓ Branch 0 taken 12376 times.
✓ Branch 1 taken 16184 times.
28560 return op->rw.packed ? 1 : op->rw.elems;
442 }
443
444 28560 static int rw_pixel_bits(const SwsOp *op)
445 {
446
2/2
✓ Branch 0 taken 12376 times.
✓ Branch 1 taken 16184 times.
28560 const int elems = op->rw.packed ? op->rw.elems : 1;
447 28560 const int size = ff_sws_pixel_type_size(op->type);
448 28560 const int bits = 8 >> op->rw.frac;
449 av_assert1(bits >= 1);
450 28560 return elems * size * bits;
451 }
452
453 28560 static void align_pass(SwsPass *pass, int block_size, int over_rw, int pixel_bits)
454 {
455
2/2
✓ Branch 0 taken 14280 times.
✓ Branch 1 taken 14280 times.
28560 if (!pass)
456 14280 return;
457
458 /* Add at least as many pixels as needed to cover the padding requirement */
459 14280 const int pad = (over_rw * 8 + pixel_bits - 1) / pixel_bits;
460
461 14280 SwsPassBuffer *buf = pass->output;
462 14280 buf->width_align = FFMAX(buf->width_align, block_size);
463 14280 buf->width_pad = FFMAX(buf->width_pad, pad);
464 }
465
466 14280 static int compile(SwsGraph *graph, const SwsOpList *ops, SwsPass *input,
467 SwsPass **output)
468 {
469 14280 SwsContext *ctx = graph->ctx;
470 14280 SwsOpPass *p = av_mallocz(sizeof(*p));
471
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (!p)
472 return AVERROR(ENOMEM);
473
474 14280 int ret = ff_sws_ops_compile(ctx, ops, &p->comp);
475
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (ret < 0)
476 goto fail;
477
478 14280 const SwsCompiledOp *comp = &p->comp;
479 14280 const SwsFormat *dst = &ops->dst;
480
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (p->comp.opaque) {
481 SwsCompiledOp c = *comp;
482 av_free(p);
483 return ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
484 input, c.slice_align, c.func_opaque,
485 NULL, c.priv, c.free, output);
486 }
487
488 14280 const SwsOp *read = ff_sws_op_list_input(ops);
489 14280 const SwsOp *write = ff_sws_op_list_output(ops);
490 14280 p->planes_in = rw_planes(read);
491 14280 p->planes_out = rw_planes(write);
492 14280 p->pixel_bits_in = rw_pixel_bits(read);
493 14280 p->pixel_bits_out = rw_pixel_bits(write);
494 14280 p->exec_base = (SwsOpExec) {
495 14280 .width = dst->width,
496 14280 .height = dst->height,
497 };
498
499 14280 const int64_t block_bits_in = (int64_t) comp->block_size * p->pixel_bits_in;
500 14280 const int64_t block_bits_out = (int64_t) comp->block_size * p->pixel_bits_out;
501
2/4
✓ Branch 0 taken 14280 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 14280 times.
14280 if (block_bits_in & 0x7 || block_bits_out & 0x7) {
502 av_log(ctx, AV_LOG_ERROR, "Block size must be a multiple of the pixel size.\n");
503 ret = AVERROR(EINVAL);
504 goto fail;
505 }
506
507 14280 p->exec_base.block_size_in = block_bits_in >> 3;
508 14280 p->exec_base.block_size_out = block_bits_out >> 3;
509
510
2/2
✓ Branch 0 taken 57120 times.
✓ Branch 1 taken 14280 times.
71400 for (int i = 0; i < 4; i++) {
511
2/2
✓ Branch 0 taken 26774 times.
✓ Branch 1 taken 30346 times.
57120 p->idx_in[i] = i < p->planes_in ? ops->plane_src[i] : -1;
512
2/2
✓ Branch 0 taken 29274 times.
✓ Branch 1 taken 27846 times.
57120 p->idx_out[i] = i < p->planes_out ? ops->plane_dst[i] : -1;
513 }
514
515 14280 const SwsFilterWeights *filter = read->rw.kernel;
516
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (read->rw.filter == SWS_OP_FILTER_V) {
517 p->offsets_y = av_refstruct_ref(filter->offsets);
518
519 /* Compute relative pointer bumps for each output line */
520 int32_t *bump = av_malloc_array(filter->dst_size, sizeof(*bump));
521 if (!bump) {
522 ret = AVERROR(ENOMEM);
523 goto fail;
524 }
525
526 int line = filter->offsets[0];
527 for (int y = 0; y < filter->dst_size - 1; y++) {
528 int next = filter->offsets[y + 1];
529 bump[y] = next - line - 1;
530 line = next;
531 }
532 bump[filter->dst_size - 1] = 0;
533 p->exec_base.in_bump_y = bump;
534
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 } else if (read->rw.filter == SWS_OP_FILTER_H) {
535 /* Compute pixel offset map for each output line */
536 const int pixels = FFALIGN(filter->dst_size, p->comp.block_size);
537 int32_t *offset = av_malloc_array(pixels, sizeof(*offset));
538 if (!offset) {
539 ret = AVERROR(ENOMEM);
540 goto fail;
541 }
542
543 for (int x = 0; x < filter->dst_size; x++) {
544 /* Sanity check; if the tap would land on a half-pixel, we cannot
545 * reasonably expect the implementation to know about this. Just
546 * error out in such (theoretical) cases. */
547 int64_t bits = (int64_t) filter->offsets[x] * p->pixel_bits_in;
548 if ((bits & 0x7) || (bits >> 3) > INT32_MAX) {
549 ret = AVERROR(EINVAL);
550 goto fail;
551 }
552 offset[x] = bits >> 3;
553 }
554 for (int x = filter->dst_size; x < pixels; x++)
555 offset[x] = offset[filter->dst_size - 1];
556 p->exec_base.in_offset_x = offset;
557 p->exec_base.block_size_in = 0; /* ptr does not advance */
558 p->filter_size = filter->filter_size;
559 }
560
561 14280 ret = ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
562 14280 input, comp->slice_align, op_pass_run,
563 op_pass_setup, p, op_pass_free, output);
564
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (ret < 0)
565 return ret;
566
567 14280 align_pass(input, comp->block_size, comp->over_read, p->pixel_bits_in);
568 14280 align_pass(*output, comp->block_size, comp->over_write, p->pixel_bits_out);
569 14280 return 0;
570
571 fail:
572 op_pass_free(p);
573 return ret;
574 }
575
576 14280 int ff_sws_compile_pass(SwsGraph *graph, SwsOpList **pops, int flags,
577 SwsPass *input, SwsPass **output)
578 {
579 14280 const int passes_orig = graph->num_passes;
580 14280 SwsContext *ctx = graph->ctx;
581 14280 SwsOpList *ops = *pops;
582 14280 int ret = 0;
583
584 /* Check if the whole operation graph is an end-to-end no-op */
585
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 14280 times.
14280 if (ff_sws_op_list_is_noop(ops)) {
586 *output = input;
587 goto out;
588 }
589
590 14280 const SwsOp *read = ff_sws_op_list_input(ops);
591 14280 const SwsOp *write = ff_sws_op_list_output(ops);
592
2/4
✓ Branch 0 taken 14280 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 14280 times.
14280 if (!read || !write) {
593 av_log(ctx, AV_LOG_ERROR, "First and last operations must be a read "
594 "and write, respectively.\n");
595 ret = AVERROR(EINVAL);
596 goto out;
597 }
598
599
1/2
✓ Branch 0 taken 14280 times.
✗ Branch 1 not taken.
14280 if (flags & SWS_OP_FLAG_OPTIMIZE) {
600 14280 ret = ff_sws_op_list_optimize(ops);
601
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (ret < 0)
602 goto out;
603 14280 av_log(ctx, AV_LOG_DEBUG, "Operation list after optimizing:\n");
604 14280 ff_sws_op_list_print(ctx, AV_LOG_DEBUG, AV_LOG_TRACE, ops);
605 }
606
607 14280 ret = compile(graph, ops, input, output);
608
1/2
✓ Branch 0 taken 14280 times.
✗ Branch 1 not taken.
14280 if (ret != AVERROR(ENOTSUP))
609 14280 goto out;
610
611 av_log(ctx, AV_LOG_DEBUG, "Retrying with separated filter passes.\n");
612 SwsPass *prev = input;
613 while (ops) {
614 SwsOpList *rest;
615 ret = ff_sws_op_list_subpass(ops, &rest);
616 if (ret < 0)
617 goto out;
618
619 if (prev == input && !rest) {
620 /* No point in compiling an unsplit pass again */
621 ret = AVERROR(ENOTSUP);
622 goto out;
623 }
624
625 ret = compile(graph, ops, prev, &prev);
626 if (ret < 0) {
627 ff_sws_op_list_free(&rest);
628 goto out;
629 }
630
631 ff_sws_op_list_free(&ops);
632 ops = rest;
633 }
634
635 /* Return last subpass successfully compiled */
636 av_log(ctx, AV_LOG_VERBOSE, "Using %d separate passes.\n",
637 graph->num_passes - passes_orig);
638 *output = prev;
639
640 14280 out:
641
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (ret == AVERROR(ENOTSUP)) {
642 av_log(ctx, AV_LOG_WARNING, "No backend found for operations:\n");
643 ff_sws_op_list_print(ctx, AV_LOG_WARNING, AV_LOG_TRACE, ops);
644 }
645
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14280 times.
14280 if (ret < 0)
646 ff_sws_graph_rollback(graph, passes_orig);
647 14280 ff_sws_op_list_free(&ops);
648 14280 *pops = NULL;
649 14280 return ret;
650 }
651