FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavutil/tx.c
Date: 2025-01-20 09:27:23
Exec Total Coverage
Lines: 431 496 86.9%
Functions: 24 25 96.0%
Branches: 465 612 76.0%

Line Branch Exec Source
1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "avassert.h"
20 #include "intmath.h"
21 #include "cpu.h"
22 #include "mem.h"
23 #include "qsort.h"
24 #include "bprint.h"
25
26 #include "tx_priv.h"
27
28 #define TYPE_IS(type, x) \
29 (((x) == AV_TX_FLOAT_ ## type) || \
30 ((x) == AV_TX_DOUBLE_ ## type) || \
31 ((x) == AV_TX_INT32_ ## type))
32
33 /* Calculates the modular multiplicative inverse */
34 4146 static av_always_inline int mulinv(int n, int m)
35 {
36 4146 n = n % m;
37
1/2
✓ Branch 0 taken 42855 times.
✗ Branch 1 not taken.
42855 for (int x = 1; x < m; x++)
38
2/2
✓ Branch 0 taken 4146 times.
✓ Branch 1 taken 38709 times.
42855 if (((n * x) % m) == 1)
39 4146 return x;
40 av_assert0(0); /* Never reached */
41 return 0;
42 }
43
44 75 int ff_tx_gen_pfa_input_map(AVTXContext *s, FFTXCodeletOptions *opts,
45 int d1, int d2)
46 {
47 75 const int sl = d1*d2;
48
49 75 s->map = av_malloc(s->len*sizeof(*s->map));
50
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 75 times.
75 if (!s->map)
51 return AVERROR(ENOMEM);
52
53
2/2
✓ Branch 0 taken 75 times.
✓ Branch 1 taken 75 times.
150 for (int k = 0; k < s->len; k += sl) {
54
3/6
✓ Branch 0 taken 75 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 75 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 75 times.
75 if (s->inv || (opts && opts->map_dir == FF_TX_MAP_SCATTER)) {
55 for (int m = 0; m < d2; m++)
56 for (int n = 0; n < d1; n++)
57 s->map[k + ((m*d1 + n*d2) % (sl))] = m*d1 + n;
58 } else {
59
2/2
✓ Branch 0 taken 375 times.
✓ Branch 1 taken 75 times.
450 for (int m = 0; m < d2; m++)
60
2/2
✓ Branch 0 taken 1125 times.
✓ Branch 1 taken 375 times.
1500 for (int n = 0; n < d1; n++)
61 1125 s->map[k + m*d1 + n] = (m*d1 + n*d2) % (sl);
62 }
63
64
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 75 times.
75 if (s->inv)
65 for (int w = 1; w <= ((sl) >> 1); w++)
66 FFSWAP(int, s->map[k + w], s->map[k + sl - w]);
67 }
68
69
1/2
✓ Branch 0 taken 75 times.
✗ Branch 1 not taken.
75 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
70
71 75 return 0;
72 }
73
74 /* Guaranteed to work for any n, m where gcd(n, m) == 1 */
75 2073 int ff_tx_gen_compound_mapping(AVTXContext *s, FFTXCodeletOptions *opts,
76 int inv, int n, int m)
77 {
78 int *in_map, *out_map;
79 2073 const int len = n*m; /* Will not be equal to s->len for MDCTs */
80 int m_inv, n_inv;
81
82 /* Make sure the numbers are coprime */
83
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2073 times.
2073 if (av_gcd(n, m) != 1)
84 return AVERROR(EINVAL);
85
86 2073 m_inv = mulinv(m, n);
87 2073 n_inv = mulinv(n, m);
88
89
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2073 times.
2073 if (!(s->map = av_malloc(2*len*sizeof(*s->map))))
90 return AVERROR(ENOMEM);
91
92 2073 in_map = s->map;
93 2073 out_map = s->map + len;
94
95 /* Ruritanian map for input, CRT map for output, can be swapped */
96
3/4
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 1749 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 324 times.
2073 if (opts && opts->map_dir == FF_TX_MAP_SCATTER) {
97 for (int j = 0; j < m; j++) {
98 for (int i = 0; i < n; i++) {
99 in_map[(i*m + j*n) % len] = j*n + i;
100 out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
101 }
102 }
103 } else {
104
2/2
✓ Branch 0 taken 72772 times.
✓ Branch 1 taken 2073 times.
74845 for (int j = 0; j < m; j++) {
105
2/2
✓ Branch 0 taken 561516 times.
✓ Branch 1 taken 72772 times.
634288 for (int i = 0; i < n; i++) {
106 561516 in_map[j*n + i] = (i*m + j*n) % len;
107 561516 out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
108 }
109 }
110 }
111
112
2/2
✓ Branch 0 taken 1979 times.
✓ Branch 1 taken 94 times.
2073 if (inv) {
113
2/2
✓ Branch 0 taken 67460 times.
✓ Branch 1 taken 1979 times.
69439 for (int i = 0; i < m; i++) {
114 67460 int *in = &in_map[i*n + 1]; /* Skip the DC */
115
2/2
✓ Branch 0 taken 207516 times.
✓ Branch 1 taken 67460 times.
274976 for (int j = 0; j < ((n - 1) >> 1); j++)
116 207516 FFSWAP(int, in[j], in[n - j - 2]);
117 }
118 }
119
120
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 1749 times.
2073 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
121
122 2073 return 0;
123 }
124
125 10590362 static inline int split_radix_permutation(int i, int len, int inv)
126 {
127 10590362 len >>= 1;
128
2/2
✓ Branch 0 taken 1362714 times.
✓ Branch 1 taken 9227648 times.
10590362 if (len <= 1)
129 1362714 return i & 1;
130
2/2
✓ Branch 0 taken 4613824 times.
✓ Branch 1 taken 4613824 times.
9227648 if (!(i & len))
131 4613824 return split_radix_permutation(i, len, inv) * 2;
132 4613824 len >>= 1;
133 4613824 return split_radix_permutation(i, len, inv) * 4 + 1 - 2*(!(i & len) ^ inv);
134 }
135
136 4287 int ff_tx_gen_ptwo_revtab(AVTXContext *s, FFTXCodeletOptions *opts)
137 {
138 4287 int len = s->len;
139
140
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 4287 times.
4287 if (!(s->map = av_malloc(len*sizeof(*s->map))))
141 return AVERROR(ENOMEM);
142
143
3/4
✓ Branch 0 taken 4287 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1904 times.
✓ Branch 3 taken 2383 times.
4287 if (opts && opts->map_dir == FF_TX_MAP_SCATTER) {
144
2/2
✓ Branch 0 taken 200788 times.
✓ Branch 1 taken 1904 times.
202692 for (int i = 0; i < s->len; i++)
145 200788 s->map[-split_radix_permutation(i, len, s->inv) & (len - 1)] = i;
146 } else {
147
2/2
✓ Branch 0 taken 977738 times.
✓ Branch 1 taken 2383 times.
980121 for (int i = 0; i < s->len; i++)
148 977738 s->map[i] = -split_radix_permutation(i, len, s->inv) & (len - 1);
149 }
150
151
1/2
✓ Branch 0 taken 4287 times.
✗ Branch 1 not taken.
4287 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
152
153 4287 return 0;
154 }
155
156 int ff_tx_gen_inplace_map(AVTXContext *s, int len)
157 {
158 int *src_map, out_map_idx = 0;
159
160 if (!s->sub || !s->sub->map)
161 return AVERROR(EINVAL);
162
163 if (!(s->map = av_mallocz(len*sizeof(*s->map))))
164 return AVERROR(ENOMEM);
165
166 src_map = s->sub->map;
167
168 /* The first coefficient is always already in-place */
169 for (int src = 1; src < s->len; src++) {
170 int dst = src_map[src];
171 int found = 0;
172
173 if (dst <= src)
174 continue;
175
176 /* This just checks if a closed loop has been encountered before,
177 * and if so, skips it, since to fully permute a loop we must only
178 * enter it once. */
179 do {
180 for (int j = 0; j < out_map_idx; j++) {
181 if (dst == s->map[j]) {
182 found = 1;
183 break;
184 }
185 }
186 dst = src_map[dst];
187 } while (dst != src && !found);
188
189 if (!found)
190 s->map[out_map_idx++] = src;
191 }
192
193 s->map[out_map_idx++] = 0;
194
195 return 0;
196 }
197
198 45539 static void parity_revtab_generator(int *revtab, int n, int inv, int offset,
199 int is_dual, int dual_high, int len,
200 int basis, int dual_stride, int inv_lookup)
201 {
202 45539 len >>= 1;
203
204
2/2
✓ Branch 0 taken 30670 times.
✓ Branch 1 taken 14869 times.
45539 if (len <= basis) {
205 int k1, k2, stride, even_idx, odd_idx;
206
207
3/4
✓ Branch 0 taken 22892 times.
✓ Branch 1 taken 7778 times.
✓ Branch 2 taken 22892 times.
✗ Branch 3 not taken.
30670 is_dual = is_dual && dual_stride;
208 30670 dual_high = is_dual & dual_high;
209
2/2
✓ Branch 0 taken 22892 times.
✓ Branch 1 taken 7778 times.
30670 stride = is_dual ? FFMIN(dual_stride, len) : 0;
210
211 30670 even_idx = offset + dual_high*(stride - 2*len);
212
4/4
✓ Branch 0 taken 22892 times.
✓ Branch 1 taken 7778 times.
✓ Branch 2 taken 11446 times.
✓ Branch 3 taken 11446 times.
30670 odd_idx = even_idx + len + (is_dual && !dual_high)*len + dual_high*len;
213
214
2/2
✓ Branch 0 taken 92094 times.
✓ Branch 1 taken 30670 times.
122764 for (int i = 0; i < len; i++) {
215 92094 k1 = -split_radix_permutation(offset + i*2 + 0, n, inv) & (n - 1);
216 92094 k2 = -split_radix_permutation(offset + i*2 + 1, n, inv) & (n - 1);
217
2/2
✓ Branch 0 taken 72266 times.
✓ Branch 1 taken 19828 times.
92094 if (inv_lookup) {
218 72266 revtab[even_idx++] = k1;
219 72266 revtab[odd_idx++] = k2;
220 } else {
221 19828 revtab[k1] = even_idx++;
222 19828 revtab[k2] = odd_idx++;
223 }
224
4/4
✓ Branch 0 taken 61204 times.
✓ Branch 1 taken 30890 times.
✓ Branch 2 taken 30602 times.
✓ Branch 3 taken 30602 times.
92094 if (stride && !((i + 1) % stride)) {
225 30602 even_idx += stride;
226 30602 odd_idx += stride;
227 }
228 }
229
230 30670 return;
231 }
232
233 14869 parity_revtab_generator(revtab, n, inv, offset,
234 0, 0, len >> 0, basis, dual_stride, inv_lookup);
235 14869 parity_revtab_generator(revtab, n, inv, offset + (len >> 0),
236 1, 0, len >> 1, basis, dual_stride, inv_lookup);
237 14869 parity_revtab_generator(revtab, n, inv, offset + (len >> 0) + (len >> 1),
238 1, 1, len >> 1, basis, dual_stride, inv_lookup);
239 }
240
241 932 int ff_tx_gen_split_radix_parity_revtab(AVTXContext *s, int len, int inv,
242 FFTXCodeletOptions *opts,
243 int basis, int dual_stride)
244 {
245 932 basis >>= 1;
246
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 932 times.
932 if (len < basis)
247 return AVERROR(EINVAL);
248
249
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 932 times.
932 if (!(s->map = av_mallocz(len*sizeof(*s->map))))
250 return AVERROR(ENOMEM);
251
252
3/4
✓ Branch 0 taken 745 times.
✓ Branch 1 taken 187 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 745 times.
932 av_assert0(!dual_stride || !(dual_stride & (dual_stride - 1)));
253
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 932 times.
932 av_assert0(dual_stride <= basis);
254
255
2/2
✓ Branch 0 taken 909 times.
✓ Branch 1 taken 23 times.
1841 parity_revtab_generator(s->map, len, inv, 0, 0, 0, len,
256 basis, dual_stride,
257
2/2
✓ Branch 0 taken 398 times.
✓ Branch 1 taken 511 times.
909 opts ? opts->map_dir == FF_TX_MAP_GATHER : FF_TX_MAP_GATHER);
258
259
2/2
✓ Branch 0 taken 909 times.
✓ Branch 1 taken 23 times.
932 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
260
261 932 return 0;
262 }
263
264 32376 static void reset_ctx(AVTXContext *s, int free_sub)
265 {
266
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 32376 times.
32376 if (!s)
267 return;
268
269
2/2
✓ Branch 0 taken 5697 times.
✓ Branch 1 taken 26679 times.
32376 if (s->sub)
270
2/2
✓ Branch 0 taken 22788 times.
✓ Branch 1 taken 5697 times.
28485 for (int i = 0; i < TX_MAX_SUB; i++)
271 22788 reset_ctx(&s->sub[i], free_sub + 1);
272
273
3/4
✓ Branch 0 taken 15376 times.
✓ Branch 1 taken 17000 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 15376 times.
32376 if (s->cd_self && s->cd_self->uninit)
274 s->cd_self->uninit(s);
275
276
2/2
✓ Branch 0 taken 28053 times.
✓ Branch 1 taken 4323 times.
32376 if (free_sub)
277 28053 av_freep(&s->sub);
278
279 32376 av_freep(&s->map);
280 32376 av_freep(&s->exp);
281 32376 av_freep(&s->tmp);
282
283 /* Nothing else needs to be reset, it gets overwritten if another
284 * ff_tx_init_subtx() call is made. */
285 32376 s->nb_sub = 0;
286 32376 s->opaque = NULL;
287 32376 memset(s->fn, 0, sizeof(*s->fn));
288 }
289
290 91 void ff_tx_clear_ctx(AVTXContext *s)
291 {
292 91 reset_ctx(s, 0);
293 91 }
294
295 5852 av_cold void av_tx_uninit(AVTXContext **ctx)
296 {
297
2/2
✓ Branch 0 taken 587 times.
✓ Branch 1 taken 5265 times.
5852 if (!(*ctx))
298 587 return;
299
300 5265 reset_ctx(*ctx, 1);
301 5265 av_freep(ctx);
302 }
303
304 13 static av_cold int ff_tx_null_init(AVTXContext *s, const FFTXCodelet *cd,
305 uint64_t flags, FFTXCodeletOptions *opts,
306 int len, int inv, const void *scale)
307 {
308 /* Can only handle one sample+type to one sample+type transforms */
309
6/12
✓ Branch 0 taken 13 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 13 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 13 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 13 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 13 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 13 times.
13 if (TYPE_IS(MDCT, s->type) || TYPE_IS(RDFT, s->type))
310 return AVERROR(EINVAL);
311 13 return 0;
312 }
313
314 /* Null transform when the length is 1 */
315 2 static void ff_tx_null(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
316 {
317 2 memcpy(_out, _in, stride);
318 2 }
319
320 static const FFTXCodelet ff_tx_null_def = {
321 .name = NULL_IF_CONFIG_SMALL("null"),
322 .function = ff_tx_null,
323 .type = TX_TYPE_ANY,
324 .flags = AV_TX_UNALIGNED | FF_TX_ALIGNED |
325 FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
326 .factors[0] = TX_FACTOR_ANY,
327 .min_len = 1,
328 .max_len = 1,
329 .init = ff_tx_null_init,
330 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
331 .prio = FF_TX_PRIO_MAX,
332 };
333
334 static const FFTXCodelet * const ff_tx_null_list[] = {
335 &ff_tx_null_def,
336 NULL,
337 };
338
339 /* Array of all compiled codelet lists. Order is irrelevant. */
340 static const FFTXCodelet * const * const codelet_list[] = {
341 ff_tx_codelet_list_float_c,
342 ff_tx_codelet_list_double_c,
343 ff_tx_codelet_list_int32_c,
344 ff_tx_null_list,
345 #if HAVE_X86ASM
346 ff_tx_codelet_list_float_x86,
347 #endif
348 #if ARCH_AARCH64
349 ff_tx_codelet_list_float_aarch64,
350 #endif
351 };
352 static const int codelet_list_num = FF_ARRAY_ELEMS(codelet_list);
353
354 static const int cpu_slow_mask = AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE3SLOW |
355 AV_CPU_FLAG_ATOM | AV_CPU_FLAG_SSSE3SLOW |
356 AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER;
357
358 static const int cpu_slow_penalties[][2] = {
359 { AV_CPU_FLAG_SSE2SLOW, 1 + 64 },
360 { AV_CPU_FLAG_SSE3SLOW, 1 + 64 },
361 { AV_CPU_FLAG_SSSE3SLOW, 1 + 64 },
362 { AV_CPU_FLAG_ATOM, 1 + 128 },
363 { AV_CPU_FLAG_AVXSLOW, 1 + 128 },
364 { AV_CPU_FLAG_SLOW_GATHER, 1 + 32 },
365 };
366
367 29297 static int get_codelet_prio(const FFTXCodelet *cd, int cpu_flags, int len)
368 {
369 29297 int prio = cd->prio;
370 29297 int max_factor = 0;
371
372 /* If the CPU has a SLOW flag, and the instruction is also flagged
373 * as being slow for such, reduce its priority */
374
2/2
✓ Branch 0 taken 175782 times.
✓ Branch 1 taken 29297 times.
205079 for (int i = 0; i < FF_ARRAY_ELEMS(cpu_slow_penalties); i++) {
375
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 175782 times.
175782 if ((cpu_flags & cd->cpu_flags) & cpu_slow_penalties[i][0])
376 prio -= cpu_slow_penalties[i][1];
377 }
378
379 /* Prioritize aligned-only codelets */
380
4/4
✓ Branch 0 taken 3281 times.
✓ Branch 1 taken 26016 times.
✓ Branch 2 taken 3268 times.
✓ Branch 3 taken 13 times.
29297 if ((cd->flags & FF_TX_ALIGNED) && !(cd->flags & AV_TX_UNALIGNED))
381 3268 prio += 64;
382
383 /* Codelets for specific lengths are generally faster */
384
4/4
✓ Branch 0 taken 6698 times.
✓ Branch 1 taken 22599 times.
✓ Branch 2 taken 6151 times.
✓ Branch 3 taken 547 times.
29297 if ((len == cd->min_len) && (len == cd->max_len))
385 6151 prio += 64;
386
387 /* Forward-only or inverse-only transforms are generally better */
388
2/2
✓ Branch 0 taken 15567 times.
✓ Branch 1 taken 13730 times.
29297 if ((cd->flags & (FF_TX_FORWARD_ONLY | FF_TX_INVERSE_ONLY)))
389 15567 prio += 64;
390
391 /* Larger factors are generally better */
392
2/2
✓ Branch 0 taken 117188 times.
✓ Branch 1 taken 29297 times.
146485 for (int i = 0; i < TX_MAX_SUB; i++)
393 117188 max_factor = FFMAX(cd->factors[i], max_factor);
394
2/2
✓ Branch 0 taken 27518 times.
✓ Branch 1 taken 1779 times.
29297 if (max_factor)
395 27518 prio += 16*max_factor;
396
397 29297 return prio;
398 }
399
400 typedef struct FFTXLenDecomp {
401 int len;
402 int len2;
403 int prio;
404 const FFTXCodelet *cd;
405 } FFTXLenDecomp;
406
407 440 static int cmp_decomp(FFTXLenDecomp *a, FFTXLenDecomp *b)
408 {
409 440 return FFDIFFSIGN(b->prio, a->prio);
410 }
411
412 4197 int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
413 int len, int inv)
414 {
415 4197 int nb_decomp = 0;
416 FFTXLenDecomp ld[TX_MAX_DECOMPOSITIONS];
417 4197 int codelet_list_idx = codelet_list_num;
418
419 4197 const int cpu_flags = av_get_cpu_flags();
420
421 /* Loop through all codelets in all codelet lists to find matches
422 * to the requirements */
423
2/2
✓ Branch 0 taken 20985 times.
✓ Branch 1 taken 4197 times.
25182 while (codelet_list_idx--) {
424 20985 const FFTXCodelet * const * list = codelet_list[codelet_list_idx];
425 20985 const FFTXCodelet *cd = NULL;
426
427
2/2
✓ Branch 0 taken 956916 times.
✓ Branch 1 taken 20985 times.
977901 while ((cd = *list++)) {
428 956916 int fl = len;
429 956916 int skip = 0, prio;
430 956916 int factors_product = 1, factors_mod = 0;
431
432
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 956916 times.
956916 if (nb_decomp >= TX_MAX_DECOMPOSITIONS)
433 goto sort;
434
435 /* Check if the type matches */
436
4/4
✓ Branch 0 taken 952719 times.
✓ Branch 1 taken 4197 times.
✓ Branch 2 taken 649790 times.
✓ Branch 3 taken 302929 times.
956916 if (cd->type != TX_TYPE_ANY && type != cd->type)
437 649790 continue;
438
439 /* Check direction for non-orthogonal codelets */
440
4/4
✓ Branch 0 taken 20479 times.
✓ Branch 1 taken 286647 times.
✓ Branch 2 taken 3553 times.
✓ Branch 3 taken 16926 times.
307126 if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
441
4/4
✓ Branch 0 taken 7382 times.
✓ Branch 1 taken 282818 times.
✓ Branch 2 taken 6388 times.
✓ Branch 3 taken 994 times.
290200 ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
442
3/4
✓ Branch 0 taken 3553 times.
✓ Branch 1 taken 285653 times.
✓ Branch 2 taken 3553 times.
✗ Branch 3 not taken.
289206 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
443
3/4
✓ Branch 0 taken 3553 times.
✓ Branch 1 taken 285653 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 3553 times.
289206 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
444 17920 continue;
445
446 /* Check if the CPU supports the required ISA */
447
2/2
✓ Branch 0 taken 143452 times.
✓ Branch 1 taken 145754 times.
289206 if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
448
2/2
✓ Branch 0 taken 142239 times.
✓ Branch 1 taken 1213 times.
143452 !(cpu_flags & (cd->cpu_flags & ~cpu_slow_mask)))
449 142239 continue;
450
451
1/2
✓ Branch 0 taken 319116 times.
✗ Branch 1 not taken.
319116 for (int i = 0; i < TX_MAX_FACTORS; i++) {
452
4/4
✓ Branch 0 taken 180527 times.
✓ Branch 1 taken 138589 times.
✓ Branch 2 taken 172149 times.
✓ Branch 3 taken 8378 times.
319116 if (!cd->factors[i] || (fl == 1))
453 break;
454
455
2/2
✓ Branch 0 taken 25198 times.
✓ Branch 1 taken 146951 times.
172149 if (cd->factors[i] == TX_FACTOR_ANY) {
456 25198 factors_mod++;
457 25198 factors_product *= fl;
458
2/2
✓ Branch 0 taken 98163 times.
✓ Branch 1 taken 48788 times.
146951 } else if (!(fl % cd->factors[i])) {
459 98163 factors_mod++;
460
2/2
✓ Branch 0 taken 97376 times.
✓ Branch 1 taken 787 times.
98163 if (cd->factors[i] == 2) {
461 97376 int b = ff_ctz(fl);
462 97376 fl >>= b;
463 97376 factors_product <<= b;
464 } else {
465 do {
466 819 fl /= cd->factors[i];
467 819 factors_product *= cd->factors[i];
468
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 787 times.
819 } while (!(fl % cd->factors[i]));
469 }
470 }
471 }
472
473 /* Disqualify if factor requirements are not satisfied or if trivial */
474
4/4
✓ Branch 0 taken 114817 times.
✓ Branch 1 taken 32150 times.
✓ Branch 2 taken 112506 times.
✓ Branch 3 taken 2311 times.
146967 if ((factors_mod < cd->nb_factors) || (len == factors_product))
475 144656 continue;
476
477
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2311 times.
2311 if (av_gcd(factors_product, fl) != 1)
478 continue;
479
480 /* Check if length is supported and factorization was successful */
481
2/2
✓ Branch 0 taken 1109 times.
✓ Branch 1 taken 1202 times.
2311 if ((factors_product < cd->min_len) ||
482
3/4
✓ Branch 0 taken 1109 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 566 times.
✓ Branch 3 taken 543 times.
1109 (cd->max_len != TX_LEN_UNLIMITED && (factors_product > cd->max_len)))
483 1768 continue;
484
485 543 prio = get_codelet_prio(cd, cpu_flags, factors_product) * factors_product;
486
487 /* Check for duplicates */
488
2/2
✓ Branch 0 taken 910 times.
✓ Branch 1 taken 324 times.
1234 for (int i = 0; i < nb_decomp; i++) {
489
2/2
✓ Branch 0 taken 219 times.
✓ Branch 1 taken 691 times.
910 if (factors_product == ld[i].len) {
490 /* Update priority if new one is higher */
491
2/2
✓ Branch 0 taken 191 times.
✓ Branch 1 taken 28 times.
219 if (prio > ld[i].prio)
492 191 ld[i].prio = prio;
493 219 skip = 1;
494 219 break;
495 }
496 }
497
498 /* Add decomposition if unique */
499
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 219 times.
543 if (!skip) {
500 324 ld[nb_decomp].cd = cd;
501 324 ld[nb_decomp].len = factors_product;
502 324 ld[nb_decomp].len2 = fl;
503 324 ld[nb_decomp].prio = prio;
504 324 nb_decomp++;
505 }
506 }
507 }
508
509
2/2
✓ Branch 0 taken 4106 times.
✓ Branch 1 taken 91 times.
4197 if (!nb_decomp)
510 4106 return AVERROR(EINVAL);
511
512 91 sort:
513
27/44
✓ Branch 0 taken 75 times.
✓ Branch 1 taken 83 times.
✓ Branch 3 taken 25 times.
✓ Branch 4 taken 50 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 25 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 50 times.
✓ Branch 12 taken 75 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✓ Branch 15 taken 75 times.
✓ Branch 16 taken 75 times.
✓ Branch 17 taken 18 times.
✓ Branch 19 taken 18 times.
✓ Branch 20 taken 57 times.
✓ Branch 21 taken 57 times.
✓ Branch 22 taken 75 times.
✓ Branch 24 taken 57 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 75 times.
✓ Branch 28 taken 75 times.
✓ Branch 29 taken 75 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 75 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✓ Branch 43 taken 18 times.
✓ Branch 44 taken 57 times.
✓ Branch 46 taken 8 times.
✓ Branch 47 taken 75 times.
✓ Branch 48 taken 158 times.
✓ Branch 49 taken 83 times.
✓ Branch 50 taken 166 times.
✓ Branch 51 taken 91 times.
557 AV_QSORT(ld, nb_decomp, FFTXLenDecomp, cmp_decomp);
514
515
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 91 times.
415 for (int i = 0; i < nb_decomp; i++) {
516
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 324 times.
324 if (ld[i].cd->nb_factors > 1)
517 dst[i] = ld[i].len2;
518 else
519 324 dst[i] = ld[i].len;
520 }
521
522 91 return nb_decomp;
523 }
524
525 24 int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
526 {
527 24 s->map = av_malloc(s->len*sizeof(*s->map));
528
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
24 if (!s->map)
529 return AVERROR(ENOMEM);
530
531 24 s->map[0] = 0; /* DC is always at the start */
532
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
24 if (s->inv) /* Reversing the ACs flips the transform direction */
533 for (int i = 1; i < s->len; i++)
534 s->map[i] = s->len - i;
535 else
536
2/2
✓ Branch 0 taken 144 times.
✓ Branch 1 taken 24 times.
168 for (int i = 1; i < s->len; i++)
537 144 s->map[i] = i;
538
539 24 s->map_dir = FF_TX_MAP_GATHER;
540
541 24 return 0;
542 }
543
544 #if !CONFIG_SMALL
545 51007 static void print_flags(AVBPrint *bp, uint64_t f)
546 {
547 51007 int prev = 0;
548 51007 const char *sep = ", ";
549 51007 av_bprintf(bp, "flags: [");
550
3/4
✓ Branch 0 taken 16431 times.
✓ Branch 1 taken 34576 times.
✓ Branch 2 taken 16431 times.
✗ Branch 3 not taken.
51007 if ((f & FF_TX_ALIGNED) && ++prev)
551 16431 av_bprintf(bp, "aligned");
552
3/4
✓ Branch 0 taken 34602 times.
✓ Branch 1 taken 16405 times.
✓ Branch 2 taken 34602 times.
✗ Branch 3 not taken.
51007 if ((f & AV_TX_UNALIGNED) && ++prev)
553
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 34576 times.
34602 av_bprintf(bp, "%sunaligned", prev > 1 ? sep : "");
554
3/4
✓ Branch 0 taken 23976 times.
✓ Branch 1 taken 27031 times.
✓ Branch 2 taken 23976 times.
✗ Branch 3 not taken.
51007 if ((f & AV_TX_INPLACE) && ++prev)
555
1/2
✓ Branch 0 taken 23976 times.
✗ Branch 1 not taken.
23976 av_bprintf(bp, "%sinplace", prev > 1 ? sep : "");
556
3/4
✓ Branch 0 taken 45319 times.
✓ Branch 1 taken 5688 times.
✓ Branch 2 taken 45319 times.
✗ Branch 3 not taken.
51007 if ((f & FF_TX_OUT_OF_PLACE) && ++prev)
557
1/2
✓ Branch 0 taken 45319 times.
✗ Branch 1 not taken.
45319 av_bprintf(bp, "%sout_of_place", prev > 1 ? sep : "");
558
3/4
✓ Branch 0 taken 1152 times.
✓ Branch 1 taken 49855 times.
✓ Branch 2 taken 1152 times.
✗ Branch 3 not taken.
51007 if ((f & FF_TX_FORWARD_ONLY) && ++prev)
559
1/2
✓ Branch 0 taken 1152 times.
✗ Branch 1 not taken.
1152 av_bprintf(bp, "%sfwd_only", prev > 1 ? sep : "");
560
3/4
✓ Branch 0 taken 19219 times.
✓ Branch 1 taken 31788 times.
✓ Branch 2 taken 19219 times.
✗ Branch 3 not taken.
51007 if ((f & FF_TX_INVERSE_ONLY) && ++prev)
561
1/2
✓ Branch 0 taken 19219 times.
✗ Branch 1 not taken.
19219 av_bprintf(bp, "%sinv_only", prev > 1 ? sep : "");
562
3/4
✓ Branch 0 taken 22525 times.
✓ Branch 1 taken 28482 times.
✓ Branch 2 taken 22525 times.
✗ Branch 3 not taken.
51007 if ((f & FF_TX_PRESHUFFLE) && ++prev)
563
1/2
✓ Branch 0 taken 22525 times.
✗ Branch 1 not taken.
22525 av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
564
3/4
✓ Branch 0 taken 207 times.
✓ Branch 1 taken 50800 times.
✓ Branch 2 taken 207 times.
✗ Branch 3 not taken.
51007 if ((f & AV_TX_FULL_IMDCT) && ++prev)
565
1/2
✓ Branch 0 taken 207 times.
✗ Branch 1 not taken.
207 av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
566
3/4
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 50983 times.
✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
51007 if ((f & AV_TX_REAL_TO_REAL) && ++prev)
567
1/2
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
24 av_bprintf(bp, "%sreal_to_real", prev > 1 ? sep : "");
568
3/4
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 50983 times.
✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
51007 if ((f & AV_TX_REAL_TO_IMAGINARY) && ++prev)
569
1/2
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
24 av_bprintf(bp, "%sreal_to_imaginary", prev > 1 ? sep : "");
570
3/4
✓ Branch 0 taken 4030 times.
✓ Branch 1 taken 46977 times.
✓ Branch 2 taken 4030 times.
✗ Branch 3 not taken.
51007 if ((f & FF_TX_ASM_CALL) && ++prev)
571
1/2
✓ Branch 0 taken 4030 times.
✗ Branch 1 not taken.
4030 av_bprintf(bp, "%sasm_call", prev > 1 ? sep : "");
572 51007 av_bprintf(bp, "]");
573 51007 }
574
575 51007 static void print_type(AVBPrint *bp, enum AVTXType type)
576 {
577
2/2
✓ Branch 0 taken 50981 times.
✓ Branch 1 taken 26 times.
101988 av_bprintf(bp, "%s",
578 type == TX_TYPE_ANY ? "any" :
579
2/2
✓ Branch 0 taken 28528 times.
✓ Branch 1 taken 22453 times.
79509 type == AV_TX_FLOAT_FFT ? "fft_float" :
580
2/2
✓ Branch 0 taken 4621 times.
✓ Branch 1 taken 23907 times.
33149 type == AV_TX_FLOAT_MDCT ? "mdct_float" :
581
2/2
✓ Branch 0 taken 4444 times.
✓ Branch 1 taken 177 times.
9065 type == AV_TX_FLOAT_RDFT ? "rdft_float" :
582
2/2
✓ Branch 0 taken 4420 times.
✓ Branch 1 taken 24 times.
8864 type == AV_TX_FLOAT_DCT_I ? "dctI_float" :
583
2/2
✓ Branch 0 taken 4396 times.
✓ Branch 1 taken 24 times.
8816 type == AV_TX_FLOAT_DST_I ? "dstI_float" :
584
2/2
✓ Branch 0 taken 2602 times.
✓ Branch 1 taken 1794 times.
6998 type == AV_TX_DOUBLE_FFT ? "fft_double" :
585
1/2
✓ Branch 0 taken 2602 times.
✗ Branch 1 not taken.
5204 type == AV_TX_DOUBLE_MDCT ? "mdct_double" :
586
1/2
✓ Branch 0 taken 2602 times.
✗ Branch 1 not taken.
5204 type == AV_TX_DOUBLE_RDFT ? "rdft_double" :
587
1/2
✓ Branch 0 taken 2602 times.
✗ Branch 1 not taken.
5204 type == AV_TX_DOUBLE_DCT_I ? "dctI_double" :
588
1/2
✓ Branch 0 taken 2602 times.
✗ Branch 1 not taken.
5204 type == AV_TX_DOUBLE_DST_I ? "dstI_double" :
589
2/2
✓ Branch 0 taken 1445 times.
✓ Branch 1 taken 1157 times.
4047 type == AV_TX_INT32_FFT ? "fft_int32" :
590
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 1433 times.
1457 type == AV_TX_INT32_MDCT ? "mdct_int32" :
591
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
24 type == AV_TX_INT32_RDFT ? "rdft_int32" :
592
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
24 type == AV_TX_INT32_DCT_I ? "dctI_int32" :
593
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
12 type == AV_TX_INT32_DST_I ? "dstI_int32" :
594 "unknown");
595 51007 }
596
597 39807 static void print_cd_info(const FFTXCodelet *cd, int prio, int len, int print_prio,
598 int log_level)
599 {
600 AVBPrint bp;
601 39807 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
602
603 39807 av_bprintf(&bp, "%s - type: ", cd->name);
604
605 39807 print_type(&bp, cd->type);
606
607 39807 av_bprintf(&bp, ", len: ");
608
2/2
✓ Branch 0 taken 28754 times.
✓ Branch 1 taken 11053 times.
39807 if (!len) {
609
2/2
✓ Branch 0 taken 23128 times.
✓ Branch 1 taken 5626 times.
28754 if (cd->min_len != cd->max_len)
610 23128 av_bprintf(&bp, "[%i, ", cd->min_len);
611
612
2/2
✓ Branch 0 taken 21386 times.
✓ Branch 1 taken 7368 times.
28754 if (cd->max_len == TX_LEN_UNLIMITED)
613 21386 av_bprintf(&bp, "∞");
614 else
615 7368 av_bprintf(&bp, "%i", cd->max_len);
616 } else {
617 11053 av_bprintf(&bp, "%i", len);
618 }
619
620
2/2
✓ Branch 0 taken 25757 times.
✓ Branch 1 taken 14050 times.
39807 if (cd->factors[1]) {
621
3/4
✓ Branch 0 taken 20285 times.
✓ Branch 1 taken 5472 times.
✓ Branch 2 taken 20285 times.
✗ Branch 3 not taken.
25757 av_bprintf(&bp, "%s, factors", !len && cd->min_len != cd->max_len ? "]" : "");
622
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 25749 times.
25757 if (!cd->nb_factors)
623 8 av_bprintf(&bp, ": [");
624 else
625 25749 av_bprintf(&bp, "[%i]: [", cd->nb_factors);
626
627
1/2
✓ Branch 0 taken 91011 times.
✗ Branch 1 not taken.
91011 for (int i = 0; i < TX_MAX_FACTORS; i++) {
628
4/4
✓ Branch 0 taken 65254 times.
✓ Branch 1 taken 25757 times.
✓ Branch 2 taken 39497 times.
✓ Branch 3 taken 25757 times.
91011 if (i && cd->factors[i])
629 39497 av_bprintf(&bp, ", ");
630
2/2
✓ Branch 0 taken 25103 times.
✓ Branch 1 taken 65908 times.
91011 if (cd->factors[i] == TX_FACTOR_ANY)
631 25103 av_bprintf(&bp, "any");
632
2/2
✓ Branch 0 taken 40151 times.
✓ Branch 1 taken 25757 times.
65908 else if (cd->factors[i])
633 40151 av_bprintf(&bp, "%i", cd->factors[i]);
634 else
635 25757 break;
636 }
637
638 25757 av_bprintf(&bp, "], ");
639 } else {
640 14050 av_bprintf(&bp, "%s, factor: %i, ",
641
4/4
✓ Branch 0 taken 8469 times.
✓ Branch 1 taken 5581 times.
✓ Branch 2 taken 2843 times.
✓ Branch 3 taken 5626 times.
14050 !len && cd->min_len != cd->max_len ? "]" : "", cd->factors[0]);
642 }
643 39807 print_flags(&bp, cd->flags);
644
645
2/2
✓ Branch 0 taken 28754 times.
✓ Branch 1 taken 11053 times.
39807 if (print_prio)
646 28754 av_bprintf(&bp, ", prio: %i", prio);
647
648 39807 av_log(NULL, log_level, "%s\n", bp.str);
649 39807 }
650
651 11053 static void print_tx_structure(AVTXContext *s, int depth)
652 {
653 11053 const FFTXCodelet *cd = s->cd_self;
654
655
2/2
✓ Branch 0 taken 17365 times.
✓ Branch 1 taken 11053 times.
28418 for (int i = 0; i <= depth; i++)
656 17365 av_log(NULL, AV_LOG_DEBUG, " ");
657
658 11053 print_cd_info(cd, cd->prio, s->len, 0, AV_LOG_DEBUG);
659
660
2/2
✓ Branch 0 taken 5788 times.
✓ Branch 1 taken 11053 times.
16841 for (int i = 0; i < s->nb_sub; i++)
661 5788 print_tx_structure(&s->sub[i], depth + 1);
662 11053 }
663 #endif /* CONFIG_SMALL */
664
665 typedef struct TXCodeletMatch {
666 const FFTXCodelet *cd;
667 int prio;
668 } TXCodeletMatch;
669
670 28731 static int cmp_matches(TXCodeletMatch *a, TXCodeletMatch *b)
671 {
672 28731 return FFDIFFSIGN(b->prio, a->prio);
673 }
674
675 /* We want all factors to completely cover the length */
676 49660 static inline int check_cd_factors(const FFTXCodelet *cd, int len)
677 {
678 49660 int matches = 0, any_flag = 0;
679
680
1/2
✓ Branch 0 taken 153071 times.
✗ Branch 1 not taken.
153071 for (int i = 0; i < TX_MAX_FACTORS; i++) {
681 153071 int factor = cd->factors[i];
682
683
2/2
✓ Branch 0 taken 41281 times.
✓ Branch 1 taken 111790 times.
153071 if (factor == TX_FACTOR_ANY) {
684 41281 any_flag = 1;
685 41281 matches++;
686 41281 continue;
687
4/4
✓ Branch 0 taken 93582 times.
✓ Branch 1 taken 18208 times.
✓ Branch 2 taken 62130 times.
✓ Branch 3 taken 31452 times.
111790 } else if (len <= 1 || !factor) {
688 break;
689
2/2
✓ Branch 0 taken 23344 times.
✓ Branch 1 taken 38786 times.
62130 } else if (factor == 2) { /* Fast path */
690 23344 int bits_2 = ff_ctz(len);
691
2/2
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 23296 times.
23344 if (!bits_2)
692 48 continue; /* Factor not supported */
693
694 23296 len >>= bits_2;
695 23296 matches++;
696 } else {
697 38786 int res = len % factor;
698
2/2
✓ Branch 0 taken 33262 times.
✓ Branch 1 taken 5524 times.
38786 if (res)
699 33262 continue; /* Factor not supported */
700
701
2/2
✓ Branch 0 taken 5698 times.
✓ Branch 1 taken 5524 times.
11222 while (!res) {
702 5698 len /= factor;
703 5698 res = len % factor;
704 }
705 5524 matches++;
706 }
707 }
708
709
6/6
✓ Branch 0 taken 29615 times.
✓ Branch 1 taken 20045 times.
✓ Branch 2 taken 7961 times.
✓ Branch 3 taken 21654 times.
✓ Branch 4 taken 7100 times.
✓ Branch 5 taken 861 times.
49660 return (cd->nb_factors <= matches) && (any_flag || len == 1);
710 }
711
712 11200 av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
713 uint64_t flags, FFTXCodeletOptions *opts,
714 int len, int inv, const void *scale)
715 {
716 11200 int ret = 0;
717 11200 AVTXContext *sub = NULL;
718 11200 TXCodeletMatch *cd_tmp, *cd_matches = NULL;
719 11200 unsigned int cd_matches_size = 0;
720 11200 int codelet_list_idx = codelet_list_num;
721 11200 int nb_cd_matches = 0;
722 #if !CONFIG_SMALL
723 AVBPrint bp;
724 #endif
725
726 /* We still accept functions marked with SLOW, even if the CPU is
727 * marked with the same flag, but we give them lower priority. */
728 11200 const int cpu_flags = av_get_cpu_flags();
729
730 /* Flags the transform wants */
731 11200 uint64_t req_flags = flags;
732
733 /* Flags the codelet may require to be present */
734 11200 uint64_t inv_req_mask = AV_TX_FULL_IMDCT |
735 AV_TX_REAL_TO_REAL |
736 AV_TX_REAL_TO_IMAGINARY |
737 FF_TX_PRESHUFFLE |
738 FF_TX_ASM_CALL;
739
740 /* Unaligned codelets are compatible with the aligned flag */
741
1/2
✓ Branch 0 taken 11200 times.
✗ Branch 1 not taken.
11200 if (req_flags & FF_TX_ALIGNED)
742 11200 req_flags |= AV_TX_UNALIGNED;
743
744 /* If either flag is set, both are okay, so don't check for an exact match */
745
3/4
✓ Branch 0 taken 5688 times.
✓ Branch 1 taken 5512 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5688 times.
11200 if ((req_flags & AV_TX_INPLACE) && (req_flags & FF_TX_OUT_OF_PLACE))
746 req_flags &= ~(AV_TX_INPLACE | FF_TX_OUT_OF_PLACE);
747
2/4
✓ Branch 0 taken 11200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 11200 times.
✗ Branch 3 not taken.
11200 if ((req_flags & FF_TX_ALIGNED) && (req_flags & AV_TX_UNALIGNED))
748 11200 req_flags &= ~(FF_TX_ALIGNED | AV_TX_UNALIGNED);
749
750 /* Loop through all codelets in all codelet lists to find matches
751 * to the requirements */
752
2/2
✓ Branch 0 taken 56000 times.
✓ Branch 1 taken 11200 times.
67200 while (codelet_list_idx--) {
753 56000 const FFTXCodelet * const * list = codelet_list[codelet_list_idx];
754 56000 const FFTXCodelet *cd = NULL;
755
756
2/2
✓ Branch 0 taken 2553600 times.
✓ Branch 1 taken 56000 times.
2609600 while ((cd = *list++)) {
757 /* Check if the type matches */
758
4/4
✓ Branch 0 taken 2542400 times.
✓ Branch 1 taken 11200 times.
✓ Branch 2 taken 2014275 times.
✓ Branch 3 taken 528125 times.
2553600 if (cd->type != TX_TYPE_ANY && type != cd->type)
759 2014275 continue;
760
761 /* Check direction for non-orthogonal codelets */
762
4/4
✓ Branch 0 taken 65234 times.
✓ Branch 1 taken 474091 times.
✓ Branch 2 taken 7743 times.
✓ Branch 3 taken 57491 times.
539325 if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
763
4/4
✓ Branch 0 taken 55595 times.
✓ Branch 1 taken 426239 times.
✓ Branch 2 taken 50914 times.
✓ Branch 3 taken 4681 times.
481834 ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
764
3/4
✓ Branch 0 taken 7743 times.
✓ Branch 1 taken 469410 times.
✓ Branch 2 taken 7743 times.
✗ Branch 3 not taken.
477153 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
765
3/4
✓ Branch 0 taken 7743 times.
✓ Branch 1 taken 469410 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 7743 times.
477153 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
766 62172 continue;
767
768 /* Check if the requested flags match from both sides */
769
2/2
✓ Branch 0 taken 314416 times.
✓ Branch 1 taken 162737 times.
477153 if (((req_flags & cd->flags) != (req_flags)) ||
770
2/2
✓ Branch 0 taken 70781 times.
✓ Branch 1 taken 243635 times.
314416 ((inv_req_mask & cd->flags) != (req_flags & inv_req_mask)))
771 233518 continue;
772
773 /* Check if length is supported */
774
6/6
✓ Branch 0 taken 151437 times.
✓ Branch 1 taken 92198 times.
✓ Branch 2 taken 104666 times.
✓ Branch 3 taken 46771 times.
✓ Branch 4 taken 86803 times.
✓ Branch 5 taken 17863 times.
243635 if ((len < cd->min_len) || (cd->max_len != -1 && (len > cd->max_len)))
775 179001 continue;
776
777 /* Check if the CPU supports the required ISA */
778
2/2
✓ Branch 0 taken 19460 times.
✓ Branch 1 taken 45174 times.
64634 if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
779
2/2
✓ Branch 0 taken 14974 times.
✓ Branch 1 taken 4486 times.
19460 !(cpu_flags & (cd->cpu_flags & ~cpu_slow_mask)))
780 14974 continue;
781
782 /* Check for factors */
783
2/2
✓ Branch 1 taken 20906 times.
✓ Branch 2 taken 28754 times.
49660 if (!check_cd_factors(cd, len))
784 20906 continue;
785
786 /* Realloc array and append */
787 28754 cd_tmp = av_fast_realloc(cd_matches, &cd_matches_size,
788 28754 sizeof(*cd_tmp) * (nb_cd_matches + 1));
789
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 28754 times.
28754 if (!cd_tmp) {
790 av_free(cd_matches);
791 return AVERROR(ENOMEM);
792 }
793
794 28754 cd_matches = cd_tmp;
795 28754 cd_matches[nb_cd_matches].cd = cd;
796 28754 cd_matches[nb_cd_matches].prio = get_codelet_prio(cd, cpu_flags, len);
797 28754 nb_cd_matches++;
798 }
799 }
800
801 #if !CONFIG_SMALL
802 /* Print debugging info */
803 11200 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
804
2/2
✓ Branch 0 taken 9707 times.
✓ Branch 1 taken 1493 times.
11200 av_bprintf(&bp, "For transform of length %i, %s, ", len,
805 inv ? "inverse" : "forward");
806 11200 print_type(&bp, type);
807 11200 av_bprintf(&bp, ", ");
808 11200 print_flags(&bp, flags);
809
2/2
✓ Branch 0 taken 11053 times.
✓ Branch 1 taken 147 times.
11200 av_bprintf(&bp, ", found %i matches%s", nb_cd_matches,
810 nb_cd_matches ? ":" : ".");
811 #endif
812
813 /* No matches found */
814
2/2
✓ Branch 0 taken 147 times.
✓ Branch 1 taken 11053 times.
11200 if (!nb_cd_matches)
815 147 return AVERROR(ENOSYS);
816
817 /* Sort the list */
818
44/44
✓ Branch 0 taken 4848 times.
✓ Branch 1 taken 7901 times.
✓ Branch 3 taken 1330 times.
✓ Branch 4 taken 3518 times.
✓ Branch 6 taken 43 times.
✓ Branch 7 taken 1287 times.
✓ Branch 9 taken 1736 times.
✓ Branch 10 taken 1782 times.
✓ Branch 12 taken 1669 times.
✓ Branch 13 taken 3179 times.
✓ Branch 14 taken 2831 times.
✓ Branch 15 taken 2017 times.
✓ Branch 16 taken 3796 times.
✓ Branch 17 taken 1494 times.
✓ Branch 19 taken 2711 times.
✓ Branch 20 taken 1085 times.
✓ Branch 21 taken 1131 times.
✓ Branch 22 taken 1935 times.
✓ Branch 24 taken 487 times.
✓ Branch 25 taken 644 times.
✓ Branch 26 taken 1935 times.
✓ Branch 27 taken 644 times.
✓ Branch 28 taken 2579 times.
✓ Branch 29 taken 2017 times.
✓ Branch 30 taken 805 times.
✓ Branch 31 taken 1212 times.
✓ Branch 32 taken 349 times.
✓ Branch 33 taken 456 times.
✓ Branch 34 taken 292 times.
✓ Branch 35 taken 57 times.
✓ Branch 36 taken 1359 times.
✓ Branch 37 taken 104 times.
✓ Branch 39 taken 715 times.
✓ Branch 40 taken 644 times.
✓ Branch 41 taken 104 times.
✓ Branch 42 taken 644 times.
✓ Branch 43 taken 1408 times.
✓ Branch 44 taken 505 times.
✓ Branch 46 taken 4755 times.
✓ Branch 47 taken 3146 times.
✓ Branch 48 taken 12749 times.
✓ Branch 49 taken 2130 times.
✓ Branch 50 taken 12966 times.
✓ Branch 51 taken 11053 times.
34441 AV_QSORT(cd_matches, nb_cd_matches, TXCodeletMatch, cmp_matches);
819
820 #if !CONFIG_SMALL
821 11053 av_log(NULL, AV_LOG_TRACE, "%s\n", bp.str);
822
823
2/2
✓ Branch 0 taken 28754 times.
✓ Branch 1 taken 11053 times.
39807 for (int i = 0; i < nb_cd_matches; i++) {
824 28754 av_log(NULL, AV_LOG_TRACE, " %i: ", i + 1);
825 28754 print_cd_info(cd_matches[i].cd, cd_matches[i].prio, 0, 1, AV_LOG_TRACE);
826 }
827 #endif
828
829
2/2
✓ Branch 0 taken 10962 times.
✓ Branch 1 taken 91 times.
11053 if (!s->sub) {
830 10962 s->sub = sub = av_mallocz(TX_MAX_SUB*sizeof(*sub));
831
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10962 times.
10962 if (!sub) {
832 ret = AVERROR(ENOMEM);
833 goto end;
834 }
835 }
836
837 /* Attempt to initialize each */
838
1/2
✓ Branch 0 taken 15285 times.
✗ Branch 1 not taken.
15285 for (int i = 0; i < nb_cd_matches; i++) {
839 15285 const FFTXCodelet *cd = cd_matches[i].cd;
840 15285 AVTXContext *sctx = &s->sub[s->nb_sub];
841
842 15285 sctx->len = len;
843 15285 sctx->inv = inv;
844 15285 sctx->type = type;
845 15285 sctx->flags = cd->flags | flags;
846 15285 sctx->cd_self = cd;
847
848 15285 s->fn[s->nb_sub] = cd->function;
849 15285 s->cd[s->nb_sub] = cd;
850
851 15285 ret = 0;
852
2/2
✓ Branch 0 taken 15268 times.
✓ Branch 1 taken 17 times.
15285 if (cd->init)
853 15268 ret = cd->init(sctx, cd, flags, opts, len, inv, scale);
854
855
2/2
✓ Branch 0 taken 11053 times.
✓ Branch 1 taken 4232 times.
15285 if (ret >= 0) {
856
3/4
✓ Branch 0 taken 5640 times.
✓ Branch 1 taken 5413 times.
✓ Branch 2 taken 5640 times.
✗ Branch 3 not taken.
11053 if (opts && opts->map_dir != FF_TX_MAP_NONE &&
857
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 5619 times.
5640 sctx->map_dir == FF_TX_MAP_NONE) {
858 /* If a specific map direction was requested, and it doesn't
859 * exist, create one.*/
860 21 sctx->map = av_malloc(len*sizeof(*sctx->map));
861
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
21 if (!sctx->map) {
862 ret = AVERROR(ENOMEM);
863 goto end;
864 }
865
866
2/2
✓ Branch 0 taken 117 times.
✓ Branch 1 taken 21 times.
138 for (int i = 0; i < len; i++)
867 117 sctx->map[i] = i;
868
4/4
✓ Branch 0 taken 5619 times.
✓ Branch 1 taken 5413 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 5603 times.
11032 } else if (opts && (opts->map_dir != sctx->map_dir)) {
869 16 int *tmp = av_malloc(len*sizeof(*sctx->map));
870
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 if (!tmp) {
871 ret = AVERROR(ENOMEM);
872 goto end;
873 }
874
875 16 memcpy(tmp, sctx->map, len*sizeof(*sctx->map));
876
877
2/2
✓ Branch 0 taken 112 times.
✓ Branch 1 taken 16 times.
128 for (int i = 0; i < len; i++)
878 112 sctx->map[tmp[i]] = i;
879
880 16 av_free(tmp);
881 }
882
883 11053 s->nb_sub++;
884 11053 goto end;
885 }
886
887 4232 s->fn[s->nb_sub] = NULL;
888 4232 s->cd[s->nb_sub] = NULL;
889
890 4232 reset_ctx(sctx, 0);
891
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4232 times.
4232 if (ret == AVERROR(ENOMEM))
892 break;
893 }
894
895 if (!s->nb_sub)
896 av_freep(&s->sub);
897
898 end:
899 11053 av_free(cd_matches);
900 11053 return ret;
901 }
902
903 5265 av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type,
904 int inv, int len, const void *scale, uint64_t flags)
905 {
906 int ret;
907 5265 AVTXContext tmp = { 0 };
908 5265 const double default_scale_d = 1.0;
909 5265 const float default_scale_f = 1.0f;
910
911
4/8
✓ Branch 0 taken 5265 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 5265 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 5265 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 5265 times.
5265 if (!len || type >= AV_TX_NB || !ctx || !tx)
912 return AVERROR(EINVAL);
913
914
1/2
✓ Branch 0 taken 5265 times.
✗ Branch 1 not taken.
5265 if (!(flags & AV_TX_UNALIGNED))
915 5265 flags |= FF_TX_ALIGNED;
916
1/2
✓ Branch 0 taken 5265 times.
✗ Branch 1 not taken.
5265 if (!(flags & AV_TX_INPLACE))
917 5265 flags |= FF_TX_OUT_OF_PLACE;
918
919
5/8
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 5253 times.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 12 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 12 times.
✗ Branch 7 not taken.
5265 if (!scale && ((type == AV_TX_DOUBLE_MDCT) || (type == AV_TX_DOUBLE_DCT) ||
920
2/4
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
12 (type == AV_TX_DOUBLE_DCT_I) || (type == AV_TX_DOUBLE_DST_I) ||
921 (type == AV_TX_DOUBLE_RDFT)))
922 scale = &default_scale_d;
923
3/8
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 5253 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
5265 else if (!scale && !TYPE_IS(FFT, type))
924 scale = &default_scale_f;
925
926 5265 ret = ff_tx_init_subtx(&tmp, type, flags, NULL, len, inv, scale);
927
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5265 times.
5265 if (ret < 0)
928 return ret;
929
930 5265 *ctx = &tmp.sub[0];
931 5265 *tx = tmp.fn[0];
932
933 #if !CONFIG_SMALL
934 5265 av_log(NULL, AV_LOG_DEBUG, "Transform tree:\n");
935 5265 print_tx_structure(*ctx, 0);
936 #endif
937
938 5265 return ret;
939 }
940