FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavutil/tx.c
Date: 2024-04-19 17:50:32
Exec Total Coverage
Lines: 431 496 86.9%
Functions: 24 25 96.0%
Branches: 465 612 76.0%

Line Branch Exec Source
1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "avassert.h"
20 #include "intmath.h"
21 #include "cpu.h"
22 #include "mem.h"
23 #include "qsort.h"
24 #include "bprint.h"
25
26 #include "tx_priv.h"
27
28 #define TYPE_IS(type, x) \
29 (((x) == AV_TX_FLOAT_ ## type) || \
30 ((x) == AV_TX_DOUBLE_ ## type) || \
31 ((x) == AV_TX_INT32_ ## type))
32
33 /* Calculates the modular multiplicative inverse */
34 2480 static av_always_inline int mulinv(int n, int m)
35 {
36 2480 n = n % m;
37
1/2
✓ Branch 0 taken 22303 times.
✗ Branch 1 not taken.
22303 for (int x = 1; x < m; x++)
38
2/2
✓ Branch 0 taken 2480 times.
✓ Branch 1 taken 19823 times.
22303 if (((n * x) % m) == 1)
39 2480 return x;
40 av_assert0(0); /* Never reached */
41 return 0;
42 }
43
44 75 int ff_tx_gen_pfa_input_map(AVTXContext *s, FFTXCodeletOptions *opts,
45 int d1, int d2)
46 {
47 75 const int sl = d1*d2;
48
49 75 s->map = av_malloc(s->len*sizeof(*s->map));
50
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 75 times.
75 if (!s->map)
51 return AVERROR(ENOMEM);
52
53
2/2
✓ Branch 0 taken 75 times.
✓ Branch 1 taken 75 times.
150 for (int k = 0; k < s->len; k += sl) {
54
3/6
✓ Branch 0 taken 75 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 75 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 75 times.
75 if (s->inv || (opts && opts->map_dir == FF_TX_MAP_SCATTER)) {
55 for (int m = 0; m < d2; m++)
56 for (int n = 0; n < d1; n++)
57 s->map[k + ((m*d1 + n*d2) % (sl))] = m*d1 + n;
58 } else {
59
2/2
✓ Branch 0 taken 375 times.
✓ Branch 1 taken 75 times.
450 for (int m = 0; m < d2; m++)
60
2/2
✓ Branch 0 taken 1125 times.
✓ Branch 1 taken 375 times.
1500 for (int n = 0; n < d1; n++)
61 1125 s->map[k + m*d1 + n] = (m*d1 + n*d2) % (sl);
62 }
63
64
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 75 times.
75 if (s->inv)
65 for (int w = 1; w <= ((sl) >> 1); w++)
66 FFSWAP(int, s->map[k + w], s->map[k + sl - w]);
67 }
68
69
1/2
✓ Branch 0 taken 75 times.
✗ Branch 1 not taken.
75 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
70
71 75 return 0;
72 }
73
74 /* Guaranteed to work for any n, m where gcd(n, m) == 1 */
75 1240 int ff_tx_gen_compound_mapping(AVTXContext *s, FFTXCodeletOptions *opts,
76 int inv, int n, int m)
77 {
78 int *in_map, *out_map;
79 1240 const int len = n*m; /* Will not be equal to s->len for MDCTs */
80 int m_inv, n_inv;
81
82 /* Make sure the numbers are coprime */
83
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1240 times.
1240 if (av_gcd(n, m) != 1)
84 return AVERROR(EINVAL);
85
86 1240 m_inv = mulinv(m, n);
87 1240 n_inv = mulinv(n, m);
88
89
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1240 times.
1240 if (!(s->map = av_malloc(2*len*sizeof(*s->map))))
90 return AVERROR(ENOMEM);
91
92 1240 in_map = s->map;
93 1240 out_map = s->map + len;
94
95 /* Ruritanian map for input, CRT map for output, can be swapped */
96
3/4
✓ Branch 0 taken 210 times.
✓ Branch 1 taken 1030 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 210 times.
1240 if (opts && opts->map_dir == FF_TX_MAP_SCATTER) {
97 for (int j = 0; j < m; j++) {
98 for (int i = 0; i < n; i++) {
99 in_map[(i*m + j*n) % len] = j*n + i;
100 out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
101 }
102 }
103 } else {
104
2/2
✓ Branch 0 taken 25288 times.
✓ Branch 1 taken 1240 times.
26528 for (int j = 0; j < m; j++) {
105
2/2
✓ Branch 0 taken 378024 times.
✓ Branch 1 taken 25288 times.
403312 for (int i = 0; i < n; i++) {
106 378024 in_map[j*n + i] = (i*m + j*n) % len;
107 378024 out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
108 }
109 }
110 }
111
112
2/2
✓ Branch 0 taken 1146 times.
✓ Branch 1 taken 94 times.
1240 if (inv) {
113
2/2
✓ Branch 0 taken 19976 times.
✓ Branch 1 taken 1146 times.
21122 for (int i = 0; i < m; i++) {
114 19976 int *in = &in_map[i*n + 1]; /* Skip the DC */
115
2/2
✓ Branch 0 taken 139512 times.
✓ Branch 1 taken 19976 times.
159488 for (int j = 0; j < ((n - 1) >> 1); j++)
116 139512 FFSWAP(int, in[j], in[n - j - 2]);
117 }
118 }
119
120
2/2
✓ Branch 0 taken 210 times.
✓ Branch 1 taken 1030 times.
1240 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
121
122 1240 return 0;
123 }
124
125 10179042 static inline int split_radix_permutation(int i, int len, int inv)
126 {
127 10179042 len >>= 1;
128
2/2
✓ Branch 0 taken 1286558 times.
✓ Branch 1 taken 8892484 times.
10179042 if (len <= 1)
129 1286558 return i & 1;
130
2/2
✓ Branch 0 taken 4446242 times.
✓ Branch 1 taken 4446242 times.
8892484 if (!(i & len))
131 4446242 return split_radix_permutation(i, len, inv) * 2;
132 4446242 len >>= 1;
133 4446242 return split_radix_permutation(i, len, inv) * 4 + 1 - 2*(!(i & len) ^ inv);
134 }
135
136 3574 int ff_tx_gen_ptwo_revtab(AVTXContext *s, FFTXCodeletOptions *opts)
137 {
138 3574 int len = s->len;
139
140
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3574 times.
3574 if (!(s->map = av_malloc(len*sizeof(*s->map))))
141 return AVERROR(ENOMEM);
142
143
3/4
✓ Branch 0 taken 3574 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1278 times.
✓ Branch 3 taken 2296 times.
3574 if (opts && opts->map_dir == FF_TX_MAP_SCATTER) {
144
2/2
✓ Branch 0 taken 157792 times.
✓ Branch 1 taken 1278 times.
159070 for (int i = 0; i < s->len; i++)
145 157792 s->map[-split_radix_permutation(i, len, s->inv) & (len - 1)] = i;
146 } else {
147
2/2
✓ Branch 0 taken 963274 times.
✓ Branch 1 taken 2296 times.
965570 for (int i = 0; i < s->len; i++)
148 963274 s->map[i] = -split_radix_permutation(i, len, s->inv) & (len - 1);
149 }
150
151
1/2
✓ Branch 0 taken 3574 times.
✗ Branch 1 not taken.
3574 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
152
153 3574 return 0;
154 }
155
156 int ff_tx_gen_inplace_map(AVTXContext *s, int len)
157 {
158 int *src_map, out_map_idx = 0;
159
160 if (!s->sub || !s->sub->map)
161 return AVERROR(EINVAL);
162
163 if (!(s->map = av_mallocz(len*sizeof(*s->map))))
164 return AVERROR(ENOMEM);
165
166 src_map = s->sub->map;
167
168 /* The first coefficient is always already in-place */
169 for (int src = 1; src < s->len; src++) {
170 int dst = src_map[src];
171 int found = 0;
172
173 if (dst <= src)
174 continue;
175
176 /* This just checks if a closed loop has been encountered before,
177 * and if so, skips it, since to fully permute a loop we must only
178 * enter it once. */
179 do {
180 for (int j = 0; j < out_map_idx; j++) {
181 if (dst == s->map[j]) {
182 found = 1;
183 break;
184 }
185 }
186 dst = src_map[dst];
187 } while (dst != src && !found);
188
189 if (!found)
190 s->map[out_map_idx++] = src;
191 }
192
193 s->map[out_map_idx++] = 0;
194
195 return 0;
196 }
197
198 41020 static void parity_revtab_generator(int *revtab, int n, int inv, int offset,
199 int is_dual, int dual_high, int len,
200 int basis, int dual_stride, int inv_lookup)
201 {
202 41020 len >>= 1;
203
204
2/2
✓ Branch 0 taken 27564 times.
✓ Branch 1 taken 13456 times.
41020 if (len <= basis) {
205 int k1, k2, stride, even_idx, odd_idx;
206
207
3/4
✓ Branch 0 taken 20594 times.
✓ Branch 1 taken 6970 times.
✓ Branch 2 taken 20594 times.
✗ Branch 3 not taken.
27564 is_dual = is_dual && dual_stride;
208 27564 dual_high = is_dual & dual_high;
209
2/2
✓ Branch 0 taken 20594 times.
✓ Branch 1 taken 6970 times.
27564 stride = is_dual ? FFMIN(dual_stride, len) : 0;
210
211 27564 even_idx = offset + dual_high*(stride - 2*len);
212
4/4
✓ Branch 0 taken 20594 times.
✓ Branch 1 taken 6970 times.
✓ Branch 2 taken 10297 times.
✓ Branch 3 taken 10297 times.
27564 odd_idx = even_idx + len + (is_dual && !dual_high)*len + dual_high*len;
213
214
2/2
✓ Branch 0 taken 82746 times.
✓ Branch 1 taken 27564 times.
110310 for (int i = 0; i < len; i++) {
215 82746 k1 = -split_radix_permutation(offset + i*2 + 0, n, inv) & (n - 1);
216 82746 k2 = -split_radix_permutation(offset + i*2 + 1, n, inv) & (n - 1);
217
2/2
✓ Branch 0 taken 69002 times.
✓ Branch 1 taken 13744 times.
82746 if (inv_lookup) {
218 69002 revtab[even_idx++] = k1;
219 69002 revtab[odd_idx++] = k2;
220 } else {
221 13744 revtab[k1] = even_idx++;
222 13744 revtab[k2] = odd_idx++;
223 }
224
4/4
✓ Branch 0 taken 55028 times.
✓ Branch 1 taken 27718 times.
✓ Branch 2 taken 27514 times.
✓ Branch 3 taken 27514 times.
82746 if (stride && !((i + 1) % stride)) {
225 27514 even_idx += stride;
226 27514 odd_idx += stride;
227 }
228 }
229
230 27564 return;
231 }
232
233 13456 parity_revtab_generator(revtab, n, inv, offset,
234 0, 0, len >> 0, basis, dual_stride, inv_lookup);
235 13456 parity_revtab_generator(revtab, n, inv, offset + (len >> 0),
236 1, 0, len >> 1, basis, dual_stride, inv_lookup);
237 13456 parity_revtab_generator(revtab, n, inv, offset + (len >> 0) + (len >> 1),
238 1, 1, len >> 1, basis, dual_stride, inv_lookup);
239 }
240
241 652 int ff_tx_gen_split_radix_parity_revtab(AVTXContext *s, int len, int inv,
242 FFTXCodeletOptions *opts,
243 int basis, int dual_stride)
244 {
245 652 basis >>= 1;
246
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 652 times.
652 if (len < basis)
247 return AVERROR(EINVAL);
248
249
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 652 times.
652 if (!(s->map = av_mallocz(len*sizeof(*s->map))))
250 return AVERROR(ENOMEM);
251
252
3/4
✓ Branch 0 taken 519 times.
✓ Branch 1 taken 133 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 519 times.
652 av_assert0(!dual_stride || !(dual_stride & (dual_stride - 1)));
253
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 652 times.
652 av_assert0(dual_stride <= basis);
254
255
2/2
✓ Branch 0 taken 629 times.
✓ Branch 1 taken 23 times.
1281 parity_revtab_generator(s->map, len, inv, 0, 0, 0, len,
256 basis, dual_stride,
257
2/2
✓ Branch 0 taken 340 times.
✓ Branch 1 taken 289 times.
629 opts ? opts->map_dir == FF_TX_MAP_GATHER : FF_TX_MAP_GATHER);
258
259
2/2
✓ Branch 0 taken 629 times.
✓ Branch 1 taken 23 times.
652 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
260
261 652 return 0;
262 }
263
264 26159 static void reset_ctx(AVTXContext *s, int free_sub)
265 {
266
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 26159 times.
26159 if (!s)
267 return;
268
269
2/2
✓ Branch 0 taken 4588 times.
✓ Branch 1 taken 21571 times.
26159 if (s->sub)
270
2/2
✓ Branch 0 taken 18352 times.
✓ Branch 1 taken 4588 times.
22940 for (int i = 0; i < TX_MAX_SUB; i++)
271 18352 reset_ctx(&s->sub[i], free_sub + 1);
272
273
3/4
✓ Branch 0 taken 12486 times.
✓ Branch 1 taken 13673 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 12486 times.
26159 if (s->cd_self && s->cd_self->uninit)
274 s->cd_self->uninit(s);
275
276
2/2
✓ Branch 0 taken 22624 times.
✓ Branch 1 taken 3535 times.
26159 if (free_sub)
277 22624 av_freep(&s->sub);
278
279 26159 av_freep(&s->map);
280 26159 av_freep(&s->exp);
281 26159 av_freep(&s->tmp);
282
283 /* Nothing else needs to be reset, it gets overwritten if another
284 * ff_tx_init_subtx() call is made. */
285 26159 s->nb_sub = 0;
286 26159 s->opaque = NULL;
287 26159 memset(s->fn, 0, sizeof(*s->fn));
288 }
289
290 91 void ff_tx_clear_ctx(AVTXContext *s)
291 {
292 91 reset_ctx(s, 0);
293 91 }
294
295 4855 av_cold void av_tx_uninit(AVTXContext **ctx)
296 {
297
2/2
✓ Branch 0 taken 583 times.
✓ Branch 1 taken 4272 times.
4855 if (!(*ctx))
298 583 return;
299
300 4272 reset_ctx(*ctx, 1);
301 4272 av_freep(ctx);
302 }
303
304 13 static av_cold int ff_tx_null_init(AVTXContext *s, const FFTXCodelet *cd,
305 uint64_t flags, FFTXCodeletOptions *opts,
306 int len, int inv, const void *scale)
307 {
308 /* Can only handle one sample+type to one sample+type transforms */
309
6/12
✓ Branch 0 taken 13 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 13 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 13 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 13 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 13 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 13 times.
13 if (TYPE_IS(MDCT, s->type) || TYPE_IS(RDFT, s->type))
310 return AVERROR(EINVAL);
311 13 return 0;
312 }
313
314 /* Null transform when the length is 1 */
315 2 static void ff_tx_null(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
316 {
317 2 memcpy(_out, _in, stride);
318 2 }
319
320 static const FFTXCodelet ff_tx_null_def = {
321 .name = NULL_IF_CONFIG_SMALL("null"),
322 .function = ff_tx_null,
323 .type = TX_TYPE_ANY,
324 .flags = AV_TX_UNALIGNED | FF_TX_ALIGNED |
325 FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
326 .factors[0] = TX_FACTOR_ANY,
327 .min_len = 1,
328 .max_len = 1,
329 .init = ff_tx_null_init,
330 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
331 .prio = FF_TX_PRIO_MAX,
332 };
333
334 static const FFTXCodelet * const ff_tx_null_list[] = {
335 &ff_tx_null_def,
336 NULL,
337 };
338
339 /* Array of all compiled codelet lists. Order is irrelevant. */
340 static const FFTXCodelet * const * const codelet_list[] = {
341 ff_tx_codelet_list_float_c,
342 ff_tx_codelet_list_double_c,
343 ff_tx_codelet_list_int32_c,
344 ff_tx_null_list,
345 #if HAVE_X86ASM
346 ff_tx_codelet_list_float_x86,
347 #endif
348 #if ARCH_AARCH64
349 ff_tx_codelet_list_float_aarch64,
350 #endif
351 };
352 static const int codelet_list_num = FF_ARRAY_ELEMS(codelet_list);
353
354 static const int cpu_slow_mask = AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE3SLOW |
355 AV_CPU_FLAG_ATOM | AV_CPU_FLAG_SSSE3SLOW |
356 AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER;
357
358 static const int cpu_slow_penalties[][2] = {
359 { AV_CPU_FLAG_SSE2SLOW, 1 + 64 },
360 { AV_CPU_FLAG_SSE3SLOW, 1 + 64 },
361 { AV_CPU_FLAG_SSSE3SLOW, 1 + 64 },
362 { AV_CPU_FLAG_ATOM, 1 + 128 },
363 { AV_CPU_FLAG_AVXSLOW, 1 + 128 },
364 { AV_CPU_FLAG_SLOW_GATHER, 1 + 32 },
365 };
366
367 23418 static int get_codelet_prio(const FFTXCodelet *cd, int cpu_flags, int len)
368 {
369 23418 int prio = cd->prio;
370 23418 int max_factor = 0;
371
372 /* If the CPU has a SLOW flag, and the instruction is also flagged
373 * as being slow for such, reduce its priority */
374
2/2
✓ Branch 0 taken 140508 times.
✓ Branch 1 taken 23418 times.
163926 for (int i = 0; i < FF_ARRAY_ELEMS(cpu_slow_penalties); i++) {
375
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 140508 times.
140508 if ((cpu_flags & cd->cpu_flags) & cpu_slow_penalties[i][0])
376 prio -= cpu_slow_penalties[i][1];
377 }
378
379 /* Prioritize aligned-only codelets */
380
4/4
✓ Branch 0 taken 2284 times.
✓ Branch 1 taken 21134 times.
✓ Branch 2 taken 2271 times.
✓ Branch 3 taken 13 times.
23418 if ((cd->flags & FF_TX_ALIGNED) && !(cd->flags & AV_TX_UNALIGNED))
381 2271 prio += 64;
382
383 /* Codelets for specific lengths are generally faster */
384
4/4
✓ Branch 0 taken 5451 times.
✓ Branch 1 taken 17967 times.
✓ Branch 2 taken 4958 times.
✓ Branch 3 taken 493 times.
23418 if ((len == cd->min_len) && (len == cd->max_len))
385 4958 prio += 64;
386
387 /* Forward-only or inverse-only transforms are generally better */
388
2/2
✓ Branch 0 taken 12034 times.
✓ Branch 1 taken 11384 times.
23418 if ((cd->flags & (FF_TX_FORWARD_ONLY | FF_TX_INVERSE_ONLY)))
389 12034 prio += 64;
390
391 /* Larger factors are generally better */
392
2/2
✓ Branch 0 taken 93672 times.
✓ Branch 1 taken 23418 times.
117090 for (int i = 0; i < TX_MAX_SUB; i++)
393 93672 max_factor = FFMAX(cd->factors[i], max_factor);
394
2/2
✓ Branch 0 taken 21647 times.
✓ Branch 1 taken 1771 times.
23418 if (max_factor)
395 21647 prio += 16*max_factor;
396
397 23418 return prio;
398 }
399
400 typedef struct FFTXLenDecomp {
401 int len;
402 int len2;
403 int prio;
404 const FFTXCodelet *cd;
405 } FFTXLenDecomp;
406
407 440 static int cmp_decomp(FFTXLenDecomp *a, FFTXLenDecomp *b)
408 {
409 440 return FFDIFFSIGN(b->prio, a->prio);
410 }
411
412 3511 int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
413 int len, int inv)
414 {
415 3511 int nb_decomp = 0;
416 FFTXLenDecomp ld[TX_MAX_DECOMPOSITIONS];
417 3511 int codelet_list_idx = codelet_list_num;
418
419 3511 const int cpu_flags = av_get_cpu_flags();
420
421 /* Loop through all codelets in all codelet lists to find matches
422 * to the requirements */
423
2/2
✓ Branch 0 taken 17555 times.
✓ Branch 1 taken 3511 times.
21066 while (codelet_list_idx--) {
424 17555 const FFTXCodelet * const * list = codelet_list[codelet_list_idx];
425 17555 const FFTXCodelet *cd = NULL;
426
427
2/2
✓ Branch 0 taken 800508 times.
✓ Branch 1 taken 17555 times.
818063 while ((cd = *list++)) {
428 800508 int fl = len;
429 800508 int skip = 0, prio;
430 800508 int factors_product = 1, factors_mod = 0;
431
432
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 800508 times.
800508 if (nb_decomp >= TX_MAX_DECOMPOSITIONS)
433 goto sort;
434
435 /* Check if the type matches */
436
4/4
✓ Branch 0 taken 796997 times.
✓ Branch 1 taken 3511 times.
✓ Branch 2 taken 545050 times.
✓ Branch 3 taken 251947 times.
800508 if (cd->type != TX_TYPE_ANY && type != cd->type)
437 545050 continue;
438
439 /* Check direction for non-orthogonal codelets */
440
4/4
✓ Branch 0 taken 17095 times.
✓ Branch 1 taken 238363 times.
✓ Branch 2 taken 3498 times.
✓ Branch 3 taken 13597 times.
255458 if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
441
4/4
✓ Branch 0 taken 6102 times.
✓ Branch 1 taken 235759 times.
✓ Branch 2 taken 5130 times.
✓ Branch 3 taken 972 times.
241861 ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
442
3/4
✓ Branch 0 taken 3498 times.
✓ Branch 1 taken 237391 times.
✓ Branch 2 taken 3498 times.
✗ Branch 3 not taken.
240889 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
443
3/4
✓ Branch 0 taken 3498 times.
✓ Branch 1 taken 237391 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 3498 times.
240889 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
444 14569 continue;
445
446 /* Check if the CPU supports the required ISA */
447
2/2
✓ Branch 0 taken 118503 times.
✓ Branch 1 taken 122386 times.
240889 if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
448
2/2
✓ Branch 0 taken 117290 times.
✓ Branch 1 taken 1213 times.
118503 !(cpu_flags & (cd->cpu_flags & ~cpu_slow_mask)))
449 117290 continue;
450
451
1/2
✓ Branch 0 taken 268264 times.
✗ Branch 1 not taken.
268264 for (int i = 0; i < TX_MAX_FACTORS; i++) {
452
4/4
✓ Branch 0 taken 151671 times.
✓ Branch 1 taken 116593 times.
✓ Branch 2 taken 144665 times.
✓ Branch 3 taken 7006 times.
268264 if (!cd->factors[i] || (fl == 1))
453 break;
454
455
2/2
✓ Branch 0 taken 21082 times.
✓ Branch 1 taken 123583 times.
144665 if (cd->factors[i] == TX_FACTOR_ANY) {
456 21082 factors_mod++;
457 21082 factors_product *= fl;
458
2/2
✓ Branch 0 taken 82385 times.
✓ Branch 1 taken 41198 times.
123583 } else if (!(fl % cd->factors[i])) {
459 82385 factors_mod++;
460
2/2
✓ Branch 0 taken 81598 times.
✓ Branch 1 taken 787 times.
82385 if (cd->factors[i] == 2) {
461 81598 int b = ff_ctz(fl);
462 81598 fl >>= b;
463 81598 factors_product <<= b;
464 } else {
465 do {
466 819 fl /= cd->factors[i];
467 819 factors_product *= cd->factors[i];
468
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 787 times.
819 } while (!(fl % cd->factors[i]));
469 }
470 }
471 }
472
473 /* Disqualify if factor requirements are not satisfied or if trivial */
474
4/4
✓ Branch 0 taken 96295 times.
✓ Branch 1 taken 27304 times.
✓ Branch 2 taken 93984 times.
✓ Branch 3 taken 2311 times.
123599 if ((factors_mod < cd->nb_factors) || (len == factors_product))
475 121288 continue;
476
477
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2311 times.
2311 if (av_gcd(factors_product, fl) != 1)
478 continue;
479
480 /* Check if length is supported and factorization was successful */
481
2/2
✓ Branch 0 taken 1109 times.
✓ Branch 1 taken 1202 times.
2311 if ((factors_product < cd->min_len) ||
482
3/4
✓ Branch 0 taken 1109 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 566 times.
✓ Branch 3 taken 543 times.
1109 (cd->max_len != TX_LEN_UNLIMITED && (factors_product > cd->max_len)))
483 1768 continue;
484
485 543 prio = get_codelet_prio(cd, cpu_flags, factors_product) * factors_product;
486
487 /* Check for duplicates */
488
2/2
✓ Branch 0 taken 910 times.
✓ Branch 1 taken 324 times.
1234 for (int i = 0; i < nb_decomp; i++) {
489
2/2
✓ Branch 0 taken 219 times.
✓ Branch 1 taken 691 times.
910 if (factors_product == ld[i].len) {
490 /* Update priority if new one is higher */
491
2/2
✓ Branch 0 taken 191 times.
✓ Branch 1 taken 28 times.
219 if (prio > ld[i].prio)
492 191 ld[i].prio = prio;
493 219 skip = 1;
494 219 break;
495 }
496 }
497
498 /* Add decomposition if unique */
499
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 219 times.
543 if (!skip) {
500 324 ld[nb_decomp].cd = cd;
501 324 ld[nb_decomp].len = factors_product;
502 324 ld[nb_decomp].len2 = fl;
503 324 ld[nb_decomp].prio = prio;
504 324 nb_decomp++;
505 }
506 }
507 }
508
509
2/2
✓ Branch 0 taken 3420 times.
✓ Branch 1 taken 91 times.
3511 if (!nb_decomp)
510 3420 return AVERROR(EINVAL);
511
512 91 sort:
513
27/44
✓ Branch 0 taken 75 times.
✓ Branch 1 taken 83 times.
✓ Branch 3 taken 25 times.
✓ Branch 4 taken 50 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 25 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 50 times.
✓ Branch 12 taken 75 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✓ Branch 15 taken 75 times.
✓ Branch 16 taken 75 times.
✓ Branch 17 taken 18 times.
✓ Branch 19 taken 18 times.
✓ Branch 20 taken 57 times.
✓ Branch 21 taken 57 times.
✓ Branch 22 taken 75 times.
✓ Branch 24 taken 57 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 75 times.
✓ Branch 28 taken 75 times.
✓ Branch 29 taken 75 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 75 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✓ Branch 43 taken 18 times.
✓ Branch 44 taken 57 times.
✓ Branch 46 taken 8 times.
✓ Branch 47 taken 75 times.
✓ Branch 48 taken 158 times.
✓ Branch 49 taken 83 times.
✓ Branch 50 taken 166 times.
✓ Branch 51 taken 91 times.
557 AV_QSORT(ld, nb_decomp, FFTXLenDecomp, cmp_decomp);
514
515
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 91 times.
415 for (int i = 0; i < nb_decomp; i++) {
516
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 324 times.
324 if (ld[i].cd->nb_factors > 1)
517 dst[i] = ld[i].len2;
518 else
519 324 dst[i] = ld[i].len;
520 }
521
522 91 return nb_decomp;
523 }
524
525 24 int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
526 {
527 24 s->map = av_malloc(s->len*sizeof(*s->map));
528
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
24 if (!s->map)
529 return AVERROR(ENOMEM);
530
531 24 s->map[0] = 0; /* DC is always at the start */
532
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
24 if (s->inv) /* Reversing the ACs flips the transform direction */
533 for (int i = 1; i < s->len; i++)
534 s->map[i] = s->len - i;
535 else
536
2/2
✓ Branch 0 taken 144 times.
✓ Branch 1 taken 24 times.
168 for (int i = 1; i < s->len; i++)
537 144 s->map[i] = i;
538
539 24 s->map_dir = FF_TX_MAP_GATHER;
540
541 24 return 0;
542 }
543
544 #if !CONFIG_SMALL
545 40822 static void print_flags(AVBPrint *bp, uint64_t f)
546 {
547 40822 int prev = 0;
548 40822 const char *sep = ", ";
549 40822 av_bprintf(bp, "flags: [");
550
3/4
✓ Branch 0 taken 12664 times.
✓ Branch 1 taken 28158 times.
✓ Branch 2 taken 12664 times.
✗ Branch 3 not taken.
40822 if ((f & FF_TX_ALIGNED) && ++prev)
551 12664 av_bprintf(bp, "aligned");
552
3/4
✓ Branch 0 taken 28184 times.
✓ Branch 1 taken 12638 times.
✓ Branch 2 taken 28184 times.
✗ Branch 3 not taken.
40822 if ((f & AV_TX_UNALIGNED) && ++prev)
553
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 28158 times.
28184 av_bprintf(bp, "%sunaligned", prev > 1 ? sep : "");
554
3/4
✓ Branch 0 taken 19314 times.
✓ Branch 1 taken 21508 times.
✓ Branch 2 taken 19314 times.
✗ Branch 3 not taken.
40822 if ((f & AV_TX_INPLACE) && ++prev)
555
1/2
✓ Branch 0 taken 19314 times.
✗ Branch 1 not taken.
19314 av_bprintf(bp, "%sinplace", prev > 1 ? sep : "");
556
3/4
✓ Branch 0 taken 36343 times.
✓ Branch 1 taken 4479 times.
✓ Branch 2 taken 36343 times.
✗ Branch 3 not taken.
40822 if ((f & FF_TX_OUT_OF_PLACE) && ++prev)
557
1/2
✓ Branch 0 taken 36343 times.
✗ Branch 1 not taken.
36343 av_bprintf(bp, "%sout_of_place", prev > 1 ? sep : "");
558
3/4
✓ Branch 0 taken 1105 times.
✓ Branch 1 taken 39717 times.
✓ Branch 2 taken 1105 times.
✗ Branch 3 not taken.
40822 if ((f & FF_TX_FORWARD_ONLY) && ++prev)
559
1/2
✓ Branch 0 taken 1105 times.
✗ Branch 1 not taken.
1105 av_bprintf(bp, "%sfwd_only", prev > 1 ? sep : "");
560
3/4
✓ Branch 0 taken 14740 times.
✓ Branch 1 taken 26082 times.
✓ Branch 2 taken 14740 times.
✗ Branch 3 not taken.
40822 if ((f & FF_TX_INVERSE_ONLY) && ++prev)
561
1/2
✓ Branch 0 taken 14740 times.
✗ Branch 1 not taken.
14740 av_bprintf(bp, "%sinv_only", prev > 1 ? sep : "");
562
3/4
✓ Branch 0 taken 17873 times.
✓ Branch 1 taken 22949 times.
✓ Branch 2 taken 17873 times.
✗ Branch 3 not taken.
40822 if ((f & FF_TX_PRESHUFFLE) && ++prev)
563
1/2
✓ Branch 0 taken 17873 times.
✗ Branch 1 not taken.
17873 av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
564
3/4
✓ Branch 0 taken 207 times.
✓ Branch 1 taken 40615 times.
✓ Branch 2 taken 207 times.
✗ Branch 3 not taken.
40822 if ((f & AV_TX_FULL_IMDCT) && ++prev)
565
1/2
✓ Branch 0 taken 207 times.
✗ Branch 1 not taken.
207 av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
566
3/4
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 40798 times.
✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
40822 if ((f & AV_TX_REAL_TO_REAL) && ++prev)
567
1/2
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
24 av_bprintf(bp, "%sreal_to_real", prev > 1 ? sep : "");
568
3/4
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 40798 times.
✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
40822 if ((f & AV_TX_REAL_TO_IMAGINARY) && ++prev)
569
1/2
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
24 av_bprintf(bp, "%sreal_to_imaginary", prev > 1 ? sep : "");
570
3/4
✓ Branch 0 taken 2906 times.
✓ Branch 1 taken 37916 times.
✓ Branch 2 taken 2906 times.
✗ Branch 3 not taken.
40822 if ((f & FF_TX_ASM_CALL) && ++prev)
571
1/2
✓ Branch 0 taken 2906 times.
✗ Branch 1 not taken.
2906 av_bprintf(bp, "%sasm_call", prev > 1 ? sep : "");
572 40822 av_bprintf(bp, "]");
573 40822 }
574
575 40822 static void print_type(AVBPrint *bp, enum AVTXType type)
576 {
577
2/2
✓ Branch 0 taken 40796 times.
✓ Branch 1 taken 26 times.
81618 av_bprintf(bp, "%s",
578 type == TX_TYPE_ANY ? "any" :
579
2/2
✓ Branch 0 taken 22825 times.
✓ Branch 1 taken 17971 times.
63621 type == AV_TX_FLOAT_FFT ? "fft_float" :
580
2/2
✓ Branch 0 taken 4201 times.
✓ Branch 1 taken 18624 times.
27026 type == AV_TX_FLOAT_MDCT ? "mdct_float" :
581
2/2
✓ Branch 0 taken 4030 times.
✓ Branch 1 taken 171 times.
8231 type == AV_TX_FLOAT_RDFT ? "rdft_float" :
582
2/2
✓ Branch 0 taken 4006 times.
✓ Branch 1 taken 24 times.
8036 type == AV_TX_FLOAT_DCT_I ? "dctI_float" :
583
2/2
✓ Branch 0 taken 3982 times.
✓ Branch 1 taken 24 times.
7988 type == AV_TX_FLOAT_DST_I ? "dstI_float" :
584
2/2
✓ Branch 0 taken 2188 times.
✓ Branch 1 taken 1794 times.
6170 type == AV_TX_DOUBLE_FFT ? "fft_double" :
585
1/2
✓ Branch 0 taken 2188 times.
✗ Branch 1 not taken.
4376 type == AV_TX_DOUBLE_MDCT ? "mdct_double" :
586
1/2
✓ Branch 0 taken 2188 times.
✗ Branch 1 not taken.
4376 type == AV_TX_DOUBLE_RDFT ? "rdft_double" :
587
1/2
✓ Branch 0 taken 2188 times.
✗ Branch 1 not taken.
4376 type == AV_TX_DOUBLE_DCT_I ? "dctI_double" :
588
1/2
✓ Branch 0 taken 2188 times.
✗ Branch 1 not taken.
4376 type == AV_TX_DOUBLE_DST_I ? "dstI_double" :
589
2/2
✓ Branch 0 taken 1215 times.
✓ Branch 1 taken 973 times.
3403 type == AV_TX_INT32_FFT ? "fft_int32" :
590
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 1203 times.
1227 type == AV_TX_INT32_MDCT ? "mdct_int32" :
591
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
24 type == AV_TX_INT32_RDFT ? "rdft_int32" :
592
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
24 type == AV_TX_INT32_DCT_I ? "dctI_int32" :
593
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
12 type == AV_TX_INT32_DST_I ? "dstI_int32" :
594 "unknown");
595 40822 }
596
597 31826 static void print_cd_info(const FFTXCodelet *cd, int prio, int len, int print_prio,
598 int log_level)
599 {
600 AVBPrint bp;
601 31826 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
602
603 31826 av_bprintf(&bp, "%s - type: ", cd->name);
604
605 31826 print_type(&bp, cd->type);
606
607 31826 av_bprintf(&bp, ", len: ");
608
2/2
✓ Branch 0 taken 22875 times.
✓ Branch 1 taken 8951 times.
31826 if (!len) {
609
2/2
✓ Branch 0 taken 18442 times.
✓ Branch 1 taken 4433 times.
22875 if (cd->min_len != cd->max_len)
610 18442 av_bprintf(&bp, "[%i, ", cd->min_len);
611
612
2/2
✓ Branch 0 taken 16939 times.
✓ Branch 1 taken 5936 times.
22875 if (cd->max_len == TX_LEN_UNLIMITED)
613 16939 av_bprintf(&bp, "∞");
614 else
615 5936 av_bprintf(&bp, "%i", cd->max_len);
616 } else {
617 8951 av_bprintf(&bp, "%i", len);
618 }
619
620
2/2
✓ Branch 0 taken 20209 times.
✓ Branch 1 taken 11617 times.
31826 if (cd->factors[1]) {
621
3/4
✓ Branch 0 taken 15844 times.
✓ Branch 1 taken 4365 times.
✓ Branch 2 taken 15844 times.
✗ Branch 3 not taken.
20209 av_bprintf(&bp, "%s, factors", !len && cd->min_len != cd->max_len ? "]" : "");
622
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 20201 times.
20209 if (!cd->nb_factors)
623 8 av_bprintf(&bp, ": [");
624 else
625 20201 av_bprintf(&bp, "[%i]: [", cd->nb_factors);
626
627
1/2
✓ Branch 0 taken 71985 times.
✗ Branch 1 not taken.
71985 for (int i = 0; i < TX_MAX_FACTORS; i++) {
628
4/4
✓ Branch 0 taken 51776 times.
✓ Branch 1 taken 20209 times.
✓ Branch 2 taken 31567 times.
✓ Branch 3 taken 20209 times.
71985 if (i && cd->factors[i])
629 31567 av_bprintf(&bp, ", ");
630
2/2
✓ Branch 0 taken 19783 times.
✓ Branch 1 taken 52202 times.
71985 if (cd->factors[i] == TX_FACTOR_ANY)
631 19783 av_bprintf(&bp, "any");
632
2/2
✓ Branch 0 taken 31993 times.
✓ Branch 1 taken 20209 times.
52202 else if (cd->factors[i])
633 31993 av_bprintf(&bp, "%i", cd->factors[i]);
634 else
635 20209 break;
636 }
637
638 20209 av_bprintf(&bp, "], ");
639 } else {
640 11617 av_bprintf(&bp, "%s, factor: %i, ",
641
4/4
✓ Branch 0 taken 7031 times.
✓ Branch 1 taken 4586 times.
✓ Branch 2 taken 2598 times.
✓ Branch 3 taken 4433 times.
11617 !len && cd->min_len != cd->max_len ? "]" : "", cd->factors[0]);
642 }
643 31826 print_flags(&bp, cd->flags);
644
645
2/2
✓ Branch 0 taken 22875 times.
✓ Branch 1 taken 8951 times.
31826 if (print_prio)
646 22875 av_bprintf(&bp, ", prio: %i", prio);
647
648 31826 av_log(NULL, log_level, "%s\n", bp.str);
649 31826 }
650
651 8951 static void print_tx_structure(AVTXContext *s, int depth)
652 {
653 8951 const FFTXCodelet *cd = s->cd_self;
654
655
2/2
✓ Branch 0 taken 14038 times.
✓ Branch 1 taken 8951 times.
22989 for (int i = 0; i <= depth; i++)
656 14038 av_log(NULL, AV_LOG_DEBUG, " ");
657
658 8951 print_cd_info(cd, cd->prio, s->len, 0, AV_LOG_DEBUG);
659
660
2/2
✓ Branch 0 taken 4679 times.
✓ Branch 1 taken 8951 times.
13630 for (int i = 0; i < s->nb_sub; i++)
661 4679 print_tx_structure(&s->sub[i], depth + 1);
662 8951 }
663 #endif /* CONFIG_SMALL */
664
665 typedef struct TXCodeletMatch {
666 const FFTXCodelet *cd;
667 int prio;
668 } TXCodeletMatch;
669
670 22341 static int cmp_matches(TXCodeletMatch *a, TXCodeletMatch *b)
671 {
672 22341 return FFDIFFSIGN(b->prio, a->prio);
673 }
674
675 /* We want all factors to completely cover the length */
676 39566 static inline int check_cd_factors(const FFTXCodelet *cd, int len)
677 {
678 39566 int matches = 0, any_flag = 0;
679
680
1/2
✓ Branch 0 taken 122250 times.
✗ Branch 1 not taken.
122250 for (int i = 0; i < TX_MAX_FACTORS; i++) {
681 122250 int factor = cd->factors[i];
682
683
2/2
✓ Branch 0 taken 33266 times.
✓ Branch 1 taken 88984 times.
122250 if (factor == TX_FACTOR_ANY) {
684 33266 any_flag = 1;
685 33266 matches++;
686 33266 continue;
687
4/4
✓ Branch 0 taken 73569 times.
✓ Branch 1 taken 15415 times.
✓ Branch 2 taken 49418 times.
✓ Branch 3 taken 24151 times.
88984 } else if (len <= 1 || !factor) {
688 break;
689
2/2
✓ Branch 0 taken 18215 times.
✓ Branch 1 taken 31203 times.
49418 } else if (factor == 2) { /* Fast path */
690 18215 int bits_2 = ff_ctz(len);
691
2/2
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 18167 times.
18215 if (!bits_2)
692 48 continue; /* Factor not supported */
693
694 18167 len >>= bits_2;
695 18167 matches++;
696 } else {
697 31203 int res = len % factor;
698
2/2
✓ Branch 0 taken 27070 times.
✓ Branch 1 taken 4133 times.
31203 if (res)
699 27070 continue; /* Factor not supported */
700
701
2/2
✓ Branch 0 taken 4297 times.
✓ Branch 1 taken 4133 times.
8430 while (!res) {
702 4297 len /= factor;
703 4297 res = len % factor;
704 }
705 4133 matches++;
706 }
707 }
708
709
6/6
✓ Branch 0 taken 23331 times.
✓ Branch 1 taken 16235 times.
✓ Branch 2 taken 6012 times.
✓ Branch 3 taken 17319 times.
✓ Branch 4 taken 5556 times.
✓ Branch 5 taken 456 times.
39566 return (cd->nb_factors <= matches) && (any_flag || len == 1);
710 }
711
712 8996 av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
713 uint64_t flags, FFTXCodeletOptions *opts,
714 int len, int inv, const void *scale)
715 {
716 8996 int ret = 0;
717 8996 AVTXContext *sub = NULL;
718 8996 TXCodeletMatch *cd_tmp, *cd_matches = NULL;
719 8996 unsigned int cd_matches_size = 0;
720 8996 int codelet_list_idx = codelet_list_num;
721 8996 int nb_cd_matches = 0;
722 #if !CONFIG_SMALL
723 AVBPrint bp;
724 #endif
725
726 /* We still accept functions marked with SLOW, even if the CPU is
727 * marked with the same flag, but we give them lower priority. */
728 8996 const int cpu_flags = av_get_cpu_flags();
729
730 /* Flags the transform wants */
731 8996 uint64_t req_flags = flags;
732
733 /* Flags the codelet may require to be present */
734 8996 uint64_t inv_req_mask = AV_TX_FULL_IMDCT |
735 AV_TX_REAL_TO_REAL |
736 AV_TX_REAL_TO_IMAGINARY |
737 FF_TX_PRESHUFFLE |
738 FF_TX_ASM_CALL;
739
740 /* Unaligned codelets are compatible with the aligned flag */
741
1/2
✓ Branch 0 taken 8996 times.
✗ Branch 1 not taken.
8996 if (req_flags & FF_TX_ALIGNED)
742 8996 req_flags |= AV_TX_UNALIGNED;
743
744 /* If either flag is set, both are okay, so don't check for an exact match */
745
3/4
✓ Branch 0 taken 4479 times.
✓ Branch 1 taken 4517 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 4479 times.
8996 if ((req_flags & AV_TX_INPLACE) && (req_flags & FF_TX_OUT_OF_PLACE))
746 req_flags &= ~(AV_TX_INPLACE | FF_TX_OUT_OF_PLACE);
747
2/4
✓ Branch 0 taken 8996 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8996 times.
✗ Branch 3 not taken.
8996 if ((req_flags & FF_TX_ALIGNED) && (req_flags & AV_TX_UNALIGNED))
748 8996 req_flags &= ~(FF_TX_ALIGNED | AV_TX_UNALIGNED);
749
750 /* Loop through all codelets in all codelet lists to find matches
751 * to the requirements */
752
2/2
✓ Branch 0 taken 44980 times.
✓ Branch 1 taken 8996 times.
53976 while (codelet_list_idx--) {
753 44980 const FFTXCodelet * const * list = codelet_list[codelet_list_idx];
754 44980 const FFTXCodelet *cd = NULL;
755
756
2/2
✓ Branch 0 taken 2051088 times.
✓ Branch 1 taken 44980 times.
2096068 while ((cd = *list++)) {
757 /* Check if the type matches */
758
4/4
✓ Branch 0 taken 2042092 times.
✓ Branch 1 taken 8996 times.
✓ Branch 2 taken 1621196 times.
✓ Branch 3 taken 420896 times.
2051088 if (cd->type != TX_TYPE_ANY && type != cd->type)
759 1621196 continue;
760
761 /* Check direction for non-orthogonal codelets */
762
4/4
✓ Branch 0 taken 52278 times.
✓ Branch 1 taken 377614 times.
✓ Branch 2 taken 7548 times.
✓ Branch 3 taken 44730 times.
429892 if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
763
4/4
✓ Branch 0 taken 44390 times.
✓ Branch 1 taken 340772 times.
✓ Branch 2 taken 39879 times.
✓ Branch 3 taken 4511 times.
385162 ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
764
3/4
✓ Branch 0 taken 7548 times.
✓ Branch 1 taken 373103 times.
✓ Branch 2 taken 7548 times.
✗ Branch 3 not taken.
380651 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
765
3/4
✓ Branch 0 taken 7548 times.
✓ Branch 1 taken 373103 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 7548 times.
380651 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
766 49241 continue;
767
768 /* Check if the requested flags match from both sides */
769
2/2
✓ Branch 0 taken 258227 times.
✓ Branch 1 taken 122424 times.
380651 if (((req_flags & cd->flags) != (req_flags)) ||
770
2/2
✓ Branch 0 taken 60396 times.
✓ Branch 1 taken 197831 times.
258227 ((inv_req_mask & cd->flags) != (req_flags & inv_req_mask)))
771 182820 continue;
772
773 /* Check if length is supported */
774
6/6
✓ Branch 0 taken 122521 times.
✓ Branch 1 taken 75310 times.
✓ Branch 2 taken 84951 times.
✓ Branch 3 taken 37570 times.
✓ Branch 4 taken 70522 times.
✓ Branch 5 taken 14429 times.
197831 if ((len < cd->min_len) || (cd->max_len != -1 && (len > cd->max_len)))
775 145832 continue;
776
777 /* Check if the CPU supports the required ISA */
778
2/2
✓ Branch 0 taken 15387 times.
✓ Branch 1 taken 36612 times.
51999 if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
779
2/2
✓ Branch 0 taken 12433 times.
✓ Branch 1 taken 2954 times.
15387 !(cpu_flags & (cd->cpu_flags & ~cpu_slow_mask)))
780 12433 continue;
781
782 /* Check for factors */
783
2/2
✓ Branch 1 taken 16691 times.
✓ Branch 2 taken 22875 times.
39566 if (!check_cd_factors(cd, len))
784 16691 continue;
785
786 /* Realloc array and append */
787 22875 cd_tmp = av_fast_realloc(cd_matches, &cd_matches_size,
788 22875 sizeof(*cd_tmp) * (nb_cd_matches + 1));
789
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 22875 times.
22875 if (!cd_tmp) {
790 av_free(cd_matches);
791 return AVERROR(ENOMEM);
792 }
793
794 22875 cd_matches = cd_tmp;
795 22875 cd_matches[nb_cd_matches].cd = cd;
796 22875 cd_matches[nb_cd_matches].prio = get_codelet_prio(cd, cpu_flags, len);
797 22875 nb_cd_matches++;
798 }
799 }
800
801 #if !CONFIG_SMALL
802 /* Print debugging info */
803 8996 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
804
2/2
✓ Branch 0 taken 7536 times.
✓ Branch 1 taken 1460 times.
8996 av_bprintf(&bp, "For transform of length %i, %s, ", len,
805 inv ? "inverse" : "forward");
806 8996 print_type(&bp, type);
807 8996 av_bprintf(&bp, ", ");
808 8996 print_flags(&bp, flags);
809
2/2
✓ Branch 0 taken 8951 times.
✓ Branch 1 taken 45 times.
8996 av_bprintf(&bp, ", found %i matches%s", nb_cd_matches,
810 nb_cd_matches ? ":" : ".");
811 #endif
812
813 /* No matches found */
814
2/2
✓ Branch 0 taken 45 times.
✓ Branch 1 taken 8951 times.
8996 if (!nb_cd_matches)
815 45 return AVERROR(ENOSYS);
816
817 /* Sort the list */
818
44/44
✓ Branch 0 taken 3604 times.
✓ Branch 1 taken 6714 times.
✓ Branch 3 taken 1201 times.
✓ Branch 4 taken 2403 times.
✓ Branch 6 taken 43 times.
✓ Branch 7 taken 1158 times.
✓ Branch 9 taken 1011 times.
✓ Branch 10 taken 1392 times.
✓ Branch 12 taken 1448 times.
✓ Branch 13 taken 2156 times.
✓ Branch 14 taken 2020 times.
✓ Branch 15 taken 1584 times.
✓ Branch 16 taken 3085 times.
✓ Branch 17 taken 1171 times.
✓ Branch 19 taken 2224 times.
✓ Branch 20 taken 861 times.
✓ Branch 21 taken 907 times.
✓ Branch 22 taken 1504 times.
✓ Branch 24 taken 379 times.
✓ Branch 25 taken 528 times.
✓ Branch 26 taken 1504 times.
✓ Branch 27 taken 528 times.
✓ Branch 28 taken 2032 times.
✓ Branch 29 taken 1584 times.
✓ Branch 30 taken 587 times.
✓ Branch 31 taken 997 times.
✓ Branch 32 taken 347 times.
✓ Branch 33 taken 240 times.
✓ Branch 34 taken 290 times.
✓ Branch 35 taken 57 times.
✓ Branch 36 taken 823 times.
✓ Branch 37 taken 2 times.
✓ Branch 39 taken 295 times.
✓ Branch 40 taken 528 times.
✓ Branch 41 taken 2 times.
✓ Branch 42 taken 528 times.
✓ Branch 43 taken 1187 times.
✓ Branch 44 taken 395 times.
✓ Branch 46 taken 3841 times.
✓ Branch 47 taken 2873 times.
✓ Branch 48 taken 10318 times.
✓ Branch 49 taken 1797 times.
✓ Branch 50 taken 10533 times.
✓ Branch 51 taken 8951 times.
27580 AV_QSORT(cd_matches, nb_cd_matches, TXCodeletMatch, cmp_matches);
819
820 #if !CONFIG_SMALL
821 8951 av_log(NULL, AV_LOG_TRACE, "%s\n", bp.str);
822
823
2/2
✓ Branch 0 taken 22875 times.
✓ Branch 1 taken 8951 times.
31826 for (int i = 0; i < nb_cd_matches; i++) {
824 22875 av_log(NULL, AV_LOG_TRACE, " %i: ", i + 1);
825 22875 print_cd_info(cd_matches[i].cd, cd_matches[i].prio, 0, 1, AV_LOG_TRACE);
826 }
827 #endif
828
829
2/2
✓ Branch 0 taken 8860 times.
✓ Branch 1 taken 91 times.
8951 if (!s->sub) {
830 8860 s->sub = sub = av_mallocz(TX_MAX_SUB*sizeof(*sub));
831
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8860 times.
8860 if (!sub) {
832 ret = AVERROR(ENOMEM);
833 goto end;
834 }
835 }
836
837 /* Attempt to initialize each */
838
1/2
✓ Branch 0 taken 12395 times.
✗ Branch 1 not taken.
12395 for (int i = 0; i < nb_cd_matches; i++) {
839 12395 const FFTXCodelet *cd = cd_matches[i].cd;
840 12395 AVTXContext *sctx = &s->sub[s->nb_sub];
841
842 12395 sctx->len = len;
843 12395 sctx->inv = inv;
844 12395 sctx->type = type;
845 12395 sctx->flags = cd->flags | flags;
846 12395 sctx->cd_self = cd;
847
848 12395 s->fn[s->nb_sub] = cd->function;
849 12395 s->cd[s->nb_sub] = cd;
850
851 12395 ret = 0;
852
2/2
✓ Branch 0 taken 12378 times.
✓ Branch 1 taken 17 times.
12395 if (cd->init)
853 12378 ret = cd->init(sctx, cd, flags, opts, len, inv, scale);
854
855
2/2
✓ Branch 0 taken 8951 times.
✓ Branch 1 taken 3444 times.
12395 if (ret >= 0) {
856
3/4
✓ Branch 0 taken 4533 times.
✓ Branch 1 taken 4418 times.
✓ Branch 2 taken 4533 times.
✗ Branch 3 not taken.
8951 if (opts && opts->map_dir != FF_TX_MAP_NONE &&
857
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 4512 times.
4533 sctx->map_dir == FF_TX_MAP_NONE) {
858 /* If a specific map direction was requested, and it doesn't
859 * exist, create one.*/
860 21 sctx->map = av_malloc(len*sizeof(*sctx->map));
861
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
21 if (!sctx->map) {
862 ret = AVERROR(ENOMEM);
863 goto end;
864 }
865
866
2/2
✓ Branch 0 taken 117 times.
✓ Branch 1 taken 21 times.
138 for (int i = 0; i < len; i++)
867 117 sctx->map[i] = i;
868
4/4
✓ Branch 0 taken 4512 times.
✓ Branch 1 taken 4418 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 4496 times.
8930 } else if (opts && (opts->map_dir != sctx->map_dir)) {
869 16 int *tmp = av_malloc(len*sizeof(*sctx->map));
870
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 if (!tmp) {
871 ret = AVERROR(ENOMEM);
872 goto end;
873 }
874
875 16 memcpy(tmp, sctx->map, len*sizeof(*sctx->map));
876
877
2/2
✓ Branch 0 taken 112 times.
✓ Branch 1 taken 16 times.
128 for (int i = 0; i < len; i++)
878 112 sctx->map[tmp[i]] = i;
879
880 16 av_free(tmp);
881 }
882
883 8951 s->nb_sub++;
884 8951 goto end;
885 }
886
887 3444 s->fn[s->nb_sub] = NULL;
888 3444 s->cd[s->nb_sub] = NULL;
889
890 3444 reset_ctx(sctx, 0);
891
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3444 times.
3444 if (ret == AVERROR(ENOMEM))
892 break;
893 }
894
895 if (!s->nb_sub)
896 av_freep(&s->sub);
897
898 end:
899 8951 av_free(cd_matches);
900 8951 return ret;
901 }
902
903 4272 av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type,
904 int inv, int len, const void *scale, uint64_t flags)
905 {
906 int ret;
907 4272 AVTXContext tmp = { 0 };
908 4272 const double default_scale_d = 1.0;
909 4272 const float default_scale_f = 1.0f;
910
911
4/8
✓ Branch 0 taken 4272 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 4272 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 4272 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 4272 times.
4272 if (!len || type >= AV_TX_NB || !ctx || !tx)
912 return AVERROR(EINVAL);
913
914
1/2
✓ Branch 0 taken 4272 times.
✗ Branch 1 not taken.
4272 if (!(flags & AV_TX_UNALIGNED))
915 4272 flags |= FF_TX_ALIGNED;
916
1/2
✓ Branch 0 taken 4272 times.
✗ Branch 1 not taken.
4272 if (!(flags & AV_TX_INPLACE))
917 4272 flags |= FF_TX_OUT_OF_PLACE;
918
919
5/8
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4260 times.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 12 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 12 times.
✗ Branch 7 not taken.
4272 if (!scale && ((type == AV_TX_DOUBLE_MDCT) || (type == AV_TX_DOUBLE_DCT) ||
920
2/4
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
12 (type == AV_TX_DOUBLE_DCT_I) || (type == AV_TX_DOUBLE_DST_I) ||
921 (type == AV_TX_DOUBLE_RDFT)))
922 scale = &default_scale_d;
923
3/8
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4260 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
4272 else if (!scale && !TYPE_IS(FFT, type))
924 scale = &default_scale_f;
925
926 4272 ret = ff_tx_init_subtx(&tmp, type, flags, NULL, len, inv, scale);
927
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4272 times.
4272 if (ret < 0)
928 return ret;
929
930 4272 *ctx = &tmp.sub[0];
931 4272 *tx = tmp.fn[0];
932
933 #if !CONFIG_SMALL
934 4272 av_log(NULL, AV_LOG_DEBUG, "Transform tree:\n");
935 4272 print_tx_structure(*ctx, 0);
936 #endif
937
938 4272 return ret;
939 }
940