FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavutil/tx.c
Date: 2024-07-26 21:54:09
Exec Total Coverage
Lines: 431 496 86.9%
Functions: 24 25 96.0%
Branches: 465 612 76.0%

Line Branch Exec Source
1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "avassert.h"
20 #include "intmath.h"
21 #include "cpu.h"
22 #include "mem.h"
23 #include "qsort.h"
24 #include "bprint.h"
25
26 #include "tx_priv.h"
27
28 #define TYPE_IS(type, x) \
29 (((x) == AV_TX_FLOAT_ ## type) || \
30 ((x) == AV_TX_DOUBLE_ ## type) || \
31 ((x) == AV_TX_INT32_ ## type))
32
33 /* Calculates the modular multiplicative inverse */
34 3908 static av_always_inline int mulinv(int n, int m)
35 {
36 3908 n = n % m;
37
1/2
✓ Branch 0 taken 40842 times.
✗ Branch 1 not taken.
40842 for (int x = 1; x < m; x++)
38
2/2
✓ Branch 0 taken 3908 times.
✓ Branch 1 taken 36934 times.
40842 if (((n * x) % m) == 1)
39 3908 return x;
40 av_assert0(0); /* Never reached */
41 return 0;
42 }
43
44 75 int ff_tx_gen_pfa_input_map(AVTXContext *s, FFTXCodeletOptions *opts,
45 int d1, int d2)
46 {
47 75 const int sl = d1*d2;
48
49 75 s->map = av_malloc(s->len*sizeof(*s->map));
50
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 75 times.
75 if (!s->map)
51 return AVERROR(ENOMEM);
52
53
2/2
✓ Branch 0 taken 75 times.
✓ Branch 1 taken 75 times.
150 for (int k = 0; k < s->len; k += sl) {
54
3/6
✓ Branch 0 taken 75 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 75 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 75 times.
75 if (s->inv || (opts && opts->map_dir == FF_TX_MAP_SCATTER)) {
55 for (int m = 0; m < d2; m++)
56 for (int n = 0; n < d1; n++)
57 s->map[k + ((m*d1 + n*d2) % (sl))] = m*d1 + n;
58 } else {
59
2/2
✓ Branch 0 taken 375 times.
✓ Branch 1 taken 75 times.
450 for (int m = 0; m < d2; m++)
60
2/2
✓ Branch 0 taken 1125 times.
✓ Branch 1 taken 375 times.
1500 for (int n = 0; n < d1; n++)
61 1125 s->map[k + m*d1 + n] = (m*d1 + n*d2) % (sl);
62 }
63
64
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 75 times.
75 if (s->inv)
65 for (int w = 1; w <= ((sl) >> 1); w++)
66 FFSWAP(int, s->map[k + w], s->map[k + sl - w]);
67 }
68
69
1/2
✓ Branch 0 taken 75 times.
✗ Branch 1 not taken.
75 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
70
71 75 return 0;
72 }
73
74 /* Guaranteed to work for any n, m where gcd(n, m) == 1 */
75 1954 int ff_tx_gen_compound_mapping(AVTXContext *s, FFTXCodeletOptions *opts,
76 int inv, int n, int m)
77 {
78 int *in_map, *out_map;
79 1954 const int len = n*m; /* Will not be equal to s->len for MDCTs */
80 int m_inv, n_inv;
81
82 /* Make sure the numbers are coprime */
83
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1954 times.
1954 if (av_gcd(n, m) != 1)
84 return AVERROR(EINVAL);
85
86 1954 m_inv = mulinv(m, n);
87 1954 n_inv = mulinv(n, m);
88
89
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1954 times.
1954 if (!(s->map = av_malloc(2*len*sizeof(*s->map))))
90 return AVERROR(ENOMEM);
91
92 1954 in_map = s->map;
93 1954 out_map = s->map + len;
94
95 /* Ruritanian map for input, CRT map for output, can be swapped */
96
3/4
✓ Branch 0 taken 274 times.
✓ Branch 1 taken 1680 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 274 times.
1954 if (opts && opts->map_dir == FF_TX_MAP_SCATTER) {
97 for (int j = 0; j < m; j++) {
98 for (int i = 0; i < n; i++) {
99 in_map[(i*m + j*n) % len] = j*n + i;
100 out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
101 }
102 }
103 } else {
104
2/2
✓ Branch 0 taken 69656 times.
✓ Branch 1 taken 1954 times.
71610 for (int j = 0; j < m; j++) {
105
2/2
✓ Branch 0 taken 533784 times.
✓ Branch 1 taken 69656 times.
603440 for (int i = 0; i < n; i++) {
106 533784 in_map[j*n + i] = (i*m + j*n) % len;
107 533784 out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
108 }
109 }
110 }
111
112
2/2
✓ Branch 0 taken 1860 times.
✓ Branch 1 taken 94 times.
1954 if (inv) {
113
2/2
✓ Branch 0 taken 64344 times.
✓ Branch 1 taken 1860 times.
66204 for (int i = 0; i < m; i++) {
114 64344 int *in = &in_map[i*n + 1]; /* Skip the DC */
115
2/2
✓ Branch 0 taken 195208 times.
✓ Branch 1 taken 64344 times.
259552 for (int j = 0; j < ((n - 1) >> 1); j++)
116 195208 FFSWAP(int, in[j], in[n - j - 2]);
117 }
118 }
119
120
2/2
✓ Branch 0 taken 274 times.
✓ Branch 1 taken 1680 times.
1954 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
121
122 1954 return 0;
123 }
124
125 10465242 static inline int split_radix_permutation(int i, int len, int inv)
126 {
127 10465242 len >>= 1;
128
2/2
✓ Branch 0 taken 1340654 times.
✓ Branch 1 taken 9124588 times.
10465242 if (len <= 1)
129 1340654 return i & 1;
130
2/2
✓ Branch 0 taken 4562294 times.
✓ Branch 1 taken 4562294 times.
9124588 if (!(i & len))
131 4562294 return split_radix_permutation(i, len, inv) * 2;
132 4562294 len >>= 1;
133 4562294 return split_radix_permutation(i, len, inv) * 4 + 1 - 2*(!(i & len) ^ inv);
134 }
135
136 4160 int ff_tx_gen_ptwo_revtab(AVTXContext *s, FFTXCodeletOptions *opts)
137 {
138 4160 int len = s->len;
139
140
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 4160 times.
4160 if (!(s->map = av_malloc(len*sizeof(*s->map))))
141 return AVERROR(ENOMEM);
142
143
3/4
✓ Branch 0 taken 4160 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1842 times.
✓ Branch 3 taken 2318 times.
4160 if (opts && opts->map_dir == FF_TX_MAP_SCATTER) {
144
2/2
✓ Branch 0 taken 196768 times.
✓ Branch 1 taken 1842 times.
198610 for (int i = 0; i < s->len; i++)
145 196768 s->map[-split_radix_permutation(i, len, s->inv) & (len - 1)] = i;
146 } else {
147
2/2
✓ Branch 0 taken 970954 times.
✓ Branch 1 taken 2318 times.
973272 for (int i = 0; i < s->len; i++)
148 970954 s->map[i] = -split_radix_permutation(i, len, s->inv) & (len - 1);
149 }
150
151
1/2
✓ Branch 0 taken 4160 times.
✗ Branch 1 not taken.
4160 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
152
153 4160 return 0;
154 }
155
156 int ff_tx_gen_inplace_map(AVTXContext *s, int len)
157 {
158 int *src_map, out_map_idx = 0;
159
160 if (!s->sub || !s->sub->map)
161 return AVERROR(EINVAL);
162
163 if (!(s->map = av_mallocz(len*sizeof(*s->map))))
164 return AVERROR(ENOMEM);
165
166 src_map = s->sub->map;
167
168 /* The first coefficient is always already in-place */
169 for (int src = 1; src < s->len; src++) {
170 int dst = src_map[src];
171 int found = 0;
172
173 if (dst <= src)
174 continue;
175
176 /* This just checks if a closed loop has been encountered before,
177 * and if so, skips it, since to fully permute a loop we must only
178 * enter it once. */
179 do {
180 for (int j = 0; j < out_map_idx; j++) {
181 if (dst == s->map[j]) {
182 found = 1;
183 break;
184 }
185 }
186 dst = src_map[dst];
187 } while (dst != src && !found);
188
189 if (!found)
190 s->map[out_map_idx++] = src;
191 }
192
193 s->map[out_map_idx++] = 0;
194
195 return 0;
196 }
197
198 42803 static void parity_revtab_generator(int *revtab, int n, int inv, int offset,
199 int is_dual, int dual_high, int len,
200 int basis, int dual_stride, int inv_lookup)
201 {
202 42803 len >>= 1;
203
204
2/2
✓ Branch 0 taken 28804 times.
✓ Branch 1 taken 13999 times.
42803 if (len <= basis) {
205 int k1, k2, stride, even_idx, odd_idx;
206
207
3/4
✓ Branch 0 taken 21500 times.
✓ Branch 1 taken 7304 times.
✓ Branch 2 taken 21500 times.
✗ Branch 3 not taken.
28804 is_dual = is_dual && dual_stride;
208 28804 dual_high = is_dual & dual_high;
209
2/2
✓ Branch 0 taken 21500 times.
✓ Branch 1 taken 7304 times.
28804 stride = is_dual ? FFMIN(dual_stride, len) : 0;
210
211 28804 even_idx = offset + dual_high*(stride - 2*len);
212
4/4
✓ Branch 0 taken 21500 times.
✓ Branch 1 taken 7304 times.
✓ Branch 2 taken 10750 times.
✓ Branch 3 taken 10750 times.
28804 odd_idx = even_idx + len + (is_dual && !dual_high)*len + dual_high*len;
213
214
2/2
✓ Branch 0 taken 86466 times.
✓ Branch 1 taken 28804 times.
115270 for (int i = 0; i < len; i++) {
215 86466 k1 = -split_radix_permutation(offset + i*2 + 0, n, inv) & (n - 1);
216 86466 k2 = -split_radix_permutation(offset + i*2 + 1, n, inv) & (n - 1);
217
2/2
✓ Branch 0 taken 69002 times.
✓ Branch 1 taken 17464 times.
86466 if (inv_lookup) {
218 69002 revtab[even_idx++] = k1;
219 69002 revtab[odd_idx++] = k2;
220 } else {
221 17464 revtab[k1] = even_idx++;
222 17464 revtab[k2] = odd_idx++;
223 }
224
4/4
✓ Branch 0 taken 57444 times.
✓ Branch 1 taken 29022 times.
✓ Branch 2 taken 28722 times.
✓ Branch 3 taken 28722 times.
86466 if (stride && !((i + 1) % stride)) {
225 28722 even_idx += stride;
226 28722 odd_idx += stride;
227 }
228 }
229
230 28804 return;
231 }
232
233 13999 parity_revtab_generator(revtab, n, inv, offset,
234 0, 0, len >> 0, basis, dual_stride, inv_lookup);
235 13999 parity_revtab_generator(revtab, n, inv, offset + (len >> 0),
236 1, 0, len >> 1, basis, dual_stride, inv_lookup);
237 13999 parity_revtab_generator(revtab, n, inv, offset + (len >> 0) + (len >> 1),
238 1, 1, len >> 1, basis, dual_stride, inv_lookup);
239 }
240
241 806 int ff_tx_gen_split_radix_parity_revtab(AVTXContext *s, int len, int inv,
242 FFTXCodeletOptions *opts,
243 int basis, int dual_stride)
244 {
245 806 basis >>= 1;
246
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 806 times.
806 if (len < basis)
247 return AVERROR(EINVAL);
248
249
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 806 times.
806 if (!(s->map = av_mallocz(len*sizeof(*s->map))))
250 return AVERROR(ENOMEM);
251
252
3/4
✓ Branch 0 taken 641 times.
✓ Branch 1 taken 165 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 641 times.
806 av_assert0(!dual_stride || !(dual_stride & (dual_stride - 1)));
253
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 806 times.
806 av_assert0(dual_stride <= basis);
254
255
2/2
✓ Branch 0 taken 783 times.
✓ Branch 1 taken 23 times.
1589 parity_revtab_generator(s->map, len, inv, 0, 0, 0, len,
256 basis, dual_stride,
257
2/2
✓ Branch 0 taken 340 times.
✓ Branch 1 taken 443 times.
783 opts ? opts->map_dir == FF_TX_MAP_GATHER : FF_TX_MAP_GATHER);
258
259
2/2
✓ Branch 0 taken 783 times.
✓ Branch 1 taken 23 times.
806 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
260
261 806 return 0;
262 }
263
264 30785 static void reset_ctx(AVTXContext *s, int free_sub)
265 {
266
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 30785 times.
30785 if (!s)
267 return;
268
269
2/2
✓ Branch 0 taken 5394 times.
✓ Branch 1 taken 25391 times.
30785 if (s->sub)
270
2/2
✓ Branch 0 taken 21576 times.
✓ Branch 1 taken 5394 times.
26970 for (int i = 0; i < TX_MAX_SUB; i++)
271 21576 reset_ctx(&s->sub[i], free_sub + 1);
272
273
3/4
✓ Branch 0 taken 14694 times.
✓ Branch 1 taken 16091 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 14694 times.
30785 if (s->cd_self && s->cd_self->uninit)
274 s->cd_self->uninit(s);
275
276
2/2
✓ Branch 0 taken 26588 times.
✓ Branch 1 taken 4197 times.
30785 if (free_sub)
277 26588 av_freep(&s->sub);
278
279 30785 av_freep(&s->map);
280 30785 av_freep(&s->exp);
281 30785 av_freep(&s->tmp);
282
283 /* Nothing else needs to be reset, it gets overwritten if another
284 * ff_tx_init_subtx() call is made. */
285 30785 s->nb_sub = 0;
286 30785 s->opaque = NULL;
287 30785 memset(s->fn, 0, sizeof(*s->fn));
288 }
289
290 91 void ff_tx_clear_ctx(AVTXContext *s)
291 {
292 91 reset_ctx(s, 0);
293 91 }
294
295 5599 av_cold void av_tx_uninit(AVTXContext **ctx)
296 {
297
2/2
✓ Branch 0 taken 587 times.
✓ Branch 1 taken 5012 times.
5599 if (!(*ctx))
298 587 return;
299
300 5012 reset_ctx(*ctx, 1);
301 5012 av_freep(ctx);
302 }
303
304 13 static av_cold int ff_tx_null_init(AVTXContext *s, const FFTXCodelet *cd,
305 uint64_t flags, FFTXCodeletOptions *opts,
306 int len, int inv, const void *scale)
307 {
308 /* Can only handle one sample+type to one sample+type transforms */
309
6/12
✓ Branch 0 taken 13 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 13 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 13 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 13 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 13 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 13 times.
13 if (TYPE_IS(MDCT, s->type) || TYPE_IS(RDFT, s->type))
310 return AVERROR(EINVAL);
311 13 return 0;
312 }
313
314 /* Null transform when the length is 1 */
315 2 static void ff_tx_null(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
316 {
317 2 memcpy(_out, _in, stride);
318 2 }
319
320 static const FFTXCodelet ff_tx_null_def = {
321 .name = NULL_IF_CONFIG_SMALL("null"),
322 .function = ff_tx_null,
323 .type = TX_TYPE_ANY,
324 .flags = AV_TX_UNALIGNED | FF_TX_ALIGNED |
325 FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
326 .factors[0] = TX_FACTOR_ANY,
327 .min_len = 1,
328 .max_len = 1,
329 .init = ff_tx_null_init,
330 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
331 .prio = FF_TX_PRIO_MAX,
332 };
333
334 static const FFTXCodelet * const ff_tx_null_list[] = {
335 &ff_tx_null_def,
336 NULL,
337 };
338
339 /* Array of all compiled codelet lists. Order is irrelevant. */
340 static const FFTXCodelet * const * const codelet_list[] = {
341 ff_tx_codelet_list_float_c,
342 ff_tx_codelet_list_double_c,
343 ff_tx_codelet_list_int32_c,
344 ff_tx_null_list,
345 #if HAVE_X86ASM
346 ff_tx_codelet_list_float_x86,
347 #endif
348 #if ARCH_AARCH64
349 ff_tx_codelet_list_float_aarch64,
350 #endif
351 };
352 static const int codelet_list_num = FF_ARRAY_ELEMS(codelet_list);
353
354 static const int cpu_slow_mask = AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE3SLOW |
355 AV_CPU_FLAG_ATOM | AV_CPU_FLAG_SSSE3SLOW |
356 AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER;
357
358 static const int cpu_slow_penalties[][2] = {
359 { AV_CPU_FLAG_SSE2SLOW, 1 + 64 },
360 { AV_CPU_FLAG_SSE3SLOW, 1 + 64 },
361 { AV_CPU_FLAG_SSSE3SLOW, 1 + 64 },
362 { AV_CPU_FLAG_ATOM, 1 + 128 },
363 { AV_CPU_FLAG_AVXSLOW, 1 + 128 },
364 { AV_CPU_FLAG_SLOW_GATHER, 1 + 32 },
365 };
366
367 27759 static int get_codelet_prio(const FFTXCodelet *cd, int cpu_flags, int len)
368 {
369 27759 int prio = cd->prio;
370 27759 int max_factor = 0;
371
372 /* If the CPU has a SLOW flag, and the instruction is also flagged
373 * as being slow for such, reduce its priority */
374
2/2
✓ Branch 0 taken 166554 times.
✓ Branch 1 taken 27759 times.
194313 for (int i = 0; i < FF_ARRAY_ELEMS(cpu_slow_penalties); i++) {
375
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 166554 times.
166554 if ((cpu_flags & cd->cpu_flags) & cpu_slow_penalties[i][0])
376 prio -= cpu_slow_penalties[i][1];
377 }
378
379 /* Prioritize aligned-only codelets */
380
4/4
✓ Branch 0 taken 2839 times.
✓ Branch 1 taken 24920 times.
✓ Branch 2 taken 2826 times.
✓ Branch 3 taken 13 times.
27759 if ((cd->flags & FF_TX_ALIGNED) && !(cd->flags & AV_TX_UNALIGNED))
381 2826 prio += 64;
382
383 /* Codelets for specific lengths are generally faster */
384
4/4
✓ Branch 0 taken 6345 times.
✓ Branch 1 taken 21414 times.
✓ Branch 2 taken 5836 times.
✓ Branch 3 taken 509 times.
27759 if ((len == cd->min_len) && (len == cd->max_len))
385 5836 prio += 64;
386
387 /* Forward-only or inverse-only transforms are generally better */
388
2/2
✓ Branch 0 taken 14628 times.
✓ Branch 1 taken 13131 times.
27759 if ((cd->flags & (FF_TX_FORWARD_ONLY | FF_TX_INVERSE_ONLY)))
389 14628 prio += 64;
390
391 /* Larger factors are generally better */
392
2/2
✓ Branch 0 taken 111036 times.
✓ Branch 1 taken 27759 times.
138795 for (int i = 0; i < TX_MAX_SUB; i++)
393 111036 max_factor = FFMAX(cd->factors[i], max_factor);
394
2/2
✓ Branch 0 taken 25980 times.
✓ Branch 1 taken 1779 times.
27759 if (max_factor)
395 25980 prio += 16*max_factor;
396
397 27759 return prio;
398 }
399
400 typedef struct FFTXLenDecomp {
401 int len;
402 int len2;
403 int prio;
404 const FFTXCodelet *cd;
405 } FFTXLenDecomp;
406
407 440 static int cmp_decomp(FFTXLenDecomp *a, FFTXLenDecomp *b)
408 {
409 440 return FFDIFFSIGN(b->prio, a->prio);
410 }
411
412 4083 int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
413 int len, int inv)
414 {
415 4083 int nb_decomp = 0;
416 FFTXLenDecomp ld[TX_MAX_DECOMPOSITIONS];
417 4083 int codelet_list_idx = codelet_list_num;
418
419 4083 const int cpu_flags = av_get_cpu_flags();
420
421 /* Loop through all codelets in all codelet lists to find matches
422 * to the requirements */
423
2/2
✓ Branch 0 taken 20415 times.
✓ Branch 1 taken 4083 times.
24498 while (codelet_list_idx--) {
424 20415 const FFTXCodelet * const * list = codelet_list[codelet_list_idx];
425 20415 const FFTXCodelet *cd = NULL;
426
427
2/2
✓ Branch 0 taken 930924 times.
✓ Branch 1 taken 20415 times.
951339 while ((cd = *list++)) {
428 930924 int fl = len;
429 930924 int skip = 0, prio;
430 930924 int factors_product = 1, factors_mod = 0;
431
432
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 930924 times.
930924 if (nb_decomp >= TX_MAX_DECOMPOSITIONS)
433 goto sort;
434
435 /* Check if the type matches */
436
4/4
✓ Branch 0 taken 926841 times.
✓ Branch 1 taken 4083 times.
✓ Branch 2 taken 632690 times.
✓ Branch 3 taken 294151 times.
930924 if (cd->type != TX_TYPE_ANY && type != cd->type)
437 632690 continue;
438
439 /* Check direction for non-orthogonal codelets */
440
4/4
✓ Branch 0 taken 19909 times.
✓ Branch 1 taken 278325 times.
✓ Branch 2 taken 3528 times.
✓ Branch 3 taken 16381 times.
298234 if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
441
4/4
✓ Branch 0 taken 7154 times.
✓ Branch 1 taken 274699 times.
✓ Branch 2 taken 6170 times.
✓ Branch 3 taken 984 times.
281853 ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
442
3/4
✓ Branch 0 taken 3528 times.
✓ Branch 1 taken 277341 times.
✓ Branch 2 taken 3528 times.
✗ Branch 3 not taken.
280869 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
443
3/4
✓ Branch 0 taken 3528 times.
✓ Branch 1 taken 277341 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 3528 times.
280869 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
444 17365 continue;
445
446 /* Check if the CPU supports the required ISA */
447
2/2
✓ Branch 0 taken 139011 times.
✓ Branch 1 taken 141858 times.
280869 if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
448
2/2
✓ Branch 0 taken 137798 times.
✓ Branch 1 taken 1213 times.
139011 !(cpu_flags & (cd->cpu_flags & ~cpu_slow_mask)))
449 137798 continue;
450
451
1/2
✓ Branch 0 taken 310640 times.
✗ Branch 1 not taken.
310640 for (int i = 0; i < TX_MAX_FACTORS; i++) {
452
4/4
✓ Branch 0 taken 175719 times.
✓ Branch 1 taken 134921 times.
✓ Branch 2 taken 167569 times.
✓ Branch 3 taken 8150 times.
310640 if (!cd->factors[i] || (fl == 1))
453 break;
454
455
2/2
✓ Branch 0 taken 24514 times.
✓ Branch 1 taken 143055 times.
167569 if (cd->factors[i] == TX_FACTOR_ANY) {
456 24514 factors_mod++;
457 24514 factors_product *= fl;
458
2/2
✓ Branch 0 taken 95541 times.
✓ Branch 1 taken 47514 times.
143055 } else if (!(fl % cd->factors[i])) {
459 95541 factors_mod++;
460
2/2
✓ Branch 0 taken 94754 times.
✓ Branch 1 taken 787 times.
95541 if (cd->factors[i] == 2) {
461 94754 int b = ff_ctz(fl);
462 94754 fl >>= b;
463 94754 factors_product <<= b;
464 } else {
465 do {
466 819 fl /= cd->factors[i];
467 819 factors_product *= cd->factors[i];
468
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 787 times.
819 } while (!(fl % cd->factors[i]));
469 }
470 }
471 }
472
473 /* Disqualify if factor requirements are not satisfied or if trivial */
474
4/4
✓ Branch 0 taken 111739 times.
✓ Branch 1 taken 31332 times.
✓ Branch 2 taken 109428 times.
✓ Branch 3 taken 2311 times.
143071 if ((factors_mod < cd->nb_factors) || (len == factors_product))
475 140760 continue;
476
477
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2311 times.
2311 if (av_gcd(factors_product, fl) != 1)
478 continue;
479
480 /* Check if length is supported and factorization was successful */
481
2/2
✓ Branch 0 taken 1109 times.
✓ Branch 1 taken 1202 times.
2311 if ((factors_product < cd->min_len) ||
482
3/4
✓ Branch 0 taken 1109 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 566 times.
✓ Branch 3 taken 543 times.
1109 (cd->max_len != TX_LEN_UNLIMITED && (factors_product > cd->max_len)))
483 1768 continue;
484
485 543 prio = get_codelet_prio(cd, cpu_flags, factors_product) * factors_product;
486
487 /* Check for duplicates */
488
2/2
✓ Branch 0 taken 910 times.
✓ Branch 1 taken 324 times.
1234 for (int i = 0; i < nb_decomp; i++) {
489
2/2
✓ Branch 0 taken 219 times.
✓ Branch 1 taken 691 times.
910 if (factors_product == ld[i].len) {
490 /* Update priority if new one is higher */
491
2/2
✓ Branch 0 taken 191 times.
✓ Branch 1 taken 28 times.
219 if (prio > ld[i].prio)
492 191 ld[i].prio = prio;
493 219 skip = 1;
494 219 break;
495 }
496 }
497
498 /* Add decomposition if unique */
499
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 219 times.
543 if (!skip) {
500 324 ld[nb_decomp].cd = cd;
501 324 ld[nb_decomp].len = factors_product;
502 324 ld[nb_decomp].len2 = fl;
503 324 ld[nb_decomp].prio = prio;
504 324 nb_decomp++;
505 }
506 }
507 }
508
509
2/2
✓ Branch 0 taken 3992 times.
✓ Branch 1 taken 91 times.
4083 if (!nb_decomp)
510 3992 return AVERROR(EINVAL);
511
512 91 sort:
513
27/44
✓ Branch 0 taken 75 times.
✓ Branch 1 taken 83 times.
✓ Branch 3 taken 25 times.
✓ Branch 4 taken 50 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 25 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 50 times.
✓ Branch 12 taken 75 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✓ Branch 15 taken 75 times.
✓ Branch 16 taken 75 times.
✓ Branch 17 taken 18 times.
✓ Branch 19 taken 18 times.
✓ Branch 20 taken 57 times.
✓ Branch 21 taken 57 times.
✓ Branch 22 taken 75 times.
✓ Branch 24 taken 57 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 75 times.
✓ Branch 28 taken 75 times.
✓ Branch 29 taken 75 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 75 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✓ Branch 43 taken 18 times.
✓ Branch 44 taken 57 times.
✓ Branch 46 taken 8 times.
✓ Branch 47 taken 75 times.
✓ Branch 48 taken 158 times.
✓ Branch 49 taken 83 times.
✓ Branch 50 taken 166 times.
✓ Branch 51 taken 91 times.
557 AV_QSORT(ld, nb_decomp, FFTXLenDecomp, cmp_decomp);
514
515
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 91 times.
415 for (int i = 0; i < nb_decomp; i++) {
516
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 324 times.
324 if (ld[i].cd->nb_factors > 1)
517 dst[i] = ld[i].len2;
518 else
519 324 dst[i] = ld[i].len;
520 }
521
522 91 return nb_decomp;
523 }
524
525 24 int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
526 {
527 24 s->map = av_malloc(s->len*sizeof(*s->map));
528
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
24 if (!s->map)
529 return AVERROR(ENOMEM);
530
531 24 s->map[0] = 0; /* DC is always at the start */
532
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
24 if (s->inv) /* Reversing the ACs flips the transform direction */
533 for (int i = 1; i < s->len; i++)
534 s->map[i] = s->len - i;
535 else
536
2/2
✓ Branch 0 taken 144 times.
✓ Branch 1 taken 24 times.
168 for (int i = 1; i < s->len; i++)
537 144 s->map[i] = i;
538
539 24 s->map_dir = FF_TX_MAP_GATHER;
540
541 24 return 0;
542 }
543
544 #if !CONFIG_SMALL
545 48345 static void print_flags(AVBPrint *bp, uint64_t f)
546 {
547 48345 int prev = 0;
548 48345 const char *sep = ", ";
549 48345 av_bprintf(bp, "flags: [");
550
3/4
✓ Branch 0 taken 15137 times.
✓ Branch 1 taken 33208 times.
✓ Branch 2 taken 15137 times.
✗ Branch 3 not taken.
48345 if ((f & FF_TX_ALIGNED) && ++prev)
551 15137 av_bprintf(bp, "aligned");
552
3/4
✓ Branch 0 taken 33234 times.
✓ Branch 1 taken 15111 times.
✓ Branch 2 taken 33234 times.
✗ Branch 3 not taken.
48345 if ((f & AV_TX_UNALIGNED) && ++prev)
553
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 33208 times.
33234 av_bprintf(bp, "%sunaligned", prev > 1 ? sep : "");
554
3/4
✓ Branch 0 taken 22759 times.
✓ Branch 1 taken 25586 times.
✓ Branch 2 taken 22759 times.
✗ Branch 3 not taken.
48345 if ((f & AV_TX_INPLACE) && ++prev)
555
1/2
✓ Branch 0 taken 22759 times.
✗ Branch 1 not taken.
22759 av_bprintf(bp, "%sinplace", prev > 1 ? sep : "");
556
3/4
✓ Branch 0 taken 42972 times.
✓ Branch 1 taken 5373 times.
✓ Branch 2 taken 42972 times.
✗ Branch 3 not taken.
48345 if ((f & FF_TX_OUT_OF_PLACE) && ++prev)
557
1/2
✓ Branch 0 taken 42972 times.
✗ Branch 1 not taken.
42972 av_bprintf(bp, "%sout_of_place", prev > 1 ? sep : "");
558
3/4
✓ Branch 0 taken 1119 times.
✓ Branch 1 taken 47226 times.
✓ Branch 2 taken 1119 times.
✗ Branch 3 not taken.
48345 if ((f & FF_TX_FORWARD_ONLY) && ++prev)
559
1/2
✓ Branch 0 taken 1119 times.
✗ Branch 1 not taken.
1119 av_bprintf(bp, "%sfwd_only", prev > 1 ? sep : "");
560
3/4
✓ Branch 0 taken 18060 times.
✓ Branch 1 taken 30285 times.
✓ Branch 2 taken 18060 times.
✗ Branch 3 not taken.
48345 if ((f & FF_TX_INVERSE_ONLY) && ++prev)
561
1/2
✓ Branch 0 taken 18060 times.
✗ Branch 1 not taken.
18060 av_bprintf(bp, "%sinv_only", prev > 1 ? sep : "");
562
3/4
✓ Branch 0 taken 21308 times.
✓ Branch 1 taken 27037 times.
✓ Branch 2 taken 21308 times.
✗ Branch 3 not taken.
48345 if ((f & FF_TX_PRESHUFFLE) && ++prev)
563
1/2
✓ Branch 0 taken 21308 times.
✗ Branch 1 not taken.
21308 av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
564
3/4
✓ Branch 0 taken 207 times.
✓ Branch 1 taken 48138 times.
✓ Branch 2 taken 207 times.
✗ Branch 3 not taken.
48345 if ((f & AV_TX_FULL_IMDCT) && ++prev)
565
1/2
✓ Branch 0 taken 207 times.
✗ Branch 1 not taken.
207 av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
566
3/4
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 48321 times.
✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
48345 if ((f & AV_TX_REAL_TO_REAL) && ++prev)
567
1/2
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
24 av_bprintf(bp, "%sreal_to_real", prev > 1 ? sep : "");
568
3/4
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 48321 times.
✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
48345 if ((f & AV_TX_REAL_TO_IMAGINARY) && ++prev)
569
1/2
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
24 av_bprintf(bp, "%sreal_to_imaginary", prev > 1 ? sep : "");
570
3/4
✓ Branch 0 taken 3428 times.
✓ Branch 1 taken 44917 times.
✓ Branch 2 taken 3428 times.
✗ Branch 3 not taken.
48345 if ((f & FF_TX_ASM_CALL) && ++prev)
571
1/2
✓ Branch 0 taken 3428 times.
✗ Branch 1 not taken.
3428 av_bprintf(bp, "%sasm_call", prev > 1 ? sep : "");
572 48345 av_bprintf(bp, "]");
573 48345 }
574
575 48345 static void print_type(AVBPrint *bp, enum AVTXType type)
576 {
577
2/2
✓ Branch 0 taken 48319 times.
✓ Branch 1 taken 26 times.
96664 av_bprintf(bp, "%s",
578 type == TX_TYPE_ANY ? "any" :
579
2/2
✓ Branch 0 taken 27083 times.
✓ Branch 1 taken 21236 times.
75402 type == AV_TX_FLOAT_FFT ? "fft_float" :
580
2/2
✓ Branch 0 taken 4621 times.
✓ Branch 1 taken 22462 times.
31704 type == AV_TX_FLOAT_MDCT ? "mdct_float" :
581
2/2
✓ Branch 0 taken 4444 times.
✓ Branch 1 taken 177 times.
9065 type == AV_TX_FLOAT_RDFT ? "rdft_float" :
582
2/2
✓ Branch 0 taken 4420 times.
✓ Branch 1 taken 24 times.
8864 type == AV_TX_FLOAT_DCT_I ? "dctI_float" :
583
2/2
✓ Branch 0 taken 4396 times.
✓ Branch 1 taken 24 times.
8816 type == AV_TX_FLOAT_DST_I ? "dstI_float" :
584
2/2
✓ Branch 0 taken 2602 times.
✓ Branch 1 taken 1794 times.
6998 type == AV_TX_DOUBLE_FFT ? "fft_double" :
585
1/2
✓ Branch 0 taken 2602 times.
✗ Branch 1 not taken.
5204 type == AV_TX_DOUBLE_MDCT ? "mdct_double" :
586
1/2
✓ Branch 0 taken 2602 times.
✗ Branch 1 not taken.
5204 type == AV_TX_DOUBLE_RDFT ? "rdft_double" :
587
1/2
✓ Branch 0 taken 2602 times.
✗ Branch 1 not taken.
5204 type == AV_TX_DOUBLE_DCT_I ? "dctI_double" :
588
1/2
✓ Branch 0 taken 2602 times.
✗ Branch 1 not taken.
5204 type == AV_TX_DOUBLE_DST_I ? "dstI_double" :
589
2/2
✓ Branch 0 taken 1445 times.
✓ Branch 1 taken 1157 times.
4047 type == AV_TX_INT32_FFT ? "fft_int32" :
590
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 1433 times.
1457 type == AV_TX_INT32_MDCT ? "mdct_int32" :
591
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
24 type == AV_TX_INT32_RDFT ? "rdft_int32" :
592
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
24 type == AV_TX_INT32_DCT_I ? "dctI_int32" :
593
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
12 type == AV_TX_INT32_DST_I ? "dstI_int32" :
594 "unknown");
595 48345 }
596
597 37713 static void print_cd_info(const FFTXCodelet *cd, int prio, int len, int print_prio,
598 int log_level)
599 {
600 AVBPrint bp;
601 37713 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
602
603 37713 av_bprintf(&bp, "%s - type: ", cd->name);
604
605 37713 print_type(&bp, cd->type);
606
607 37713 av_bprintf(&bp, ", len: ");
608
2/2
✓ Branch 0 taken 27216 times.
✓ Branch 1 taken 10497 times.
37713 if (!len) {
609
2/2
✓ Branch 0 taken 21905 times.
✓ Branch 1 taken 5311 times.
27216 if (cd->min_len != cd->max_len)
610 21905 av_bprintf(&bp, "[%i, ", cd->min_len);
611
612
2/2
✓ Branch 0 taken 20265 times.
✓ Branch 1 taken 6951 times.
27216 if (cd->max_len == TX_LEN_UNLIMITED)
613 20265 av_bprintf(&bp, "∞");
614 else
615 6951 av_bprintf(&bp, "%i", cd->max_len);
616 } else {
617 10497 av_bprintf(&bp, "%i", len);
618 }
619
620
2/2
✓ Branch 0 taken 24333 times.
✓ Branch 1 taken 13380 times.
37713 if (cd->factors[1]) {
621
3/4
✓ Branch 0 taken 19164 times.
✓ Branch 1 taken 5169 times.
✓ Branch 2 taken 19164 times.
✗ Branch 3 not taken.
24333 av_bprintf(&bp, "%s, factors", !len && cd->min_len != cd->max_len ? "]" : "");
622
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 24325 times.
24333 if (!cd->nb_factors)
623 8 av_bprintf(&bp, ": [");
624 else
625 24325 av_bprintf(&bp, "[%i]: [", cd->nb_factors);
626
627
1/2
✓ Branch 0 taken 86343 times.
✗ Branch 1 not taken.
86343 for (int i = 0; i < TX_MAX_FACTORS; i++) {
628
4/4
✓ Branch 0 taken 62010 times.
✓ Branch 1 taken 24333 times.
✓ Branch 2 taken 37677 times.
✓ Branch 3 taken 24333 times.
86343 if (i && cd->factors[i])
629 37677 av_bprintf(&bp, ", ");
630
2/2
✓ Branch 0 taken 23779 times.
✓ Branch 1 taken 62564 times.
86343 if (cd->factors[i] == TX_FACTOR_ANY)
631 23779 av_bprintf(&bp, "any");
632
2/2
✓ Branch 0 taken 38231 times.
✓ Branch 1 taken 24333 times.
62564 else if (cd->factors[i])
633 38231 av_bprintf(&bp, "%i", cd->factors[i]);
634 else
635 24333 break;
636 }
637
638 24333 av_bprintf(&bp, "], ");
639 } else {
640 13380 av_bprintf(&bp, "%s, factor: %i, ",
641
4/4
✓ Branch 0 taken 8052 times.
✓ Branch 1 taken 5328 times.
✓ Branch 2 taken 2741 times.
✓ Branch 3 taken 5311 times.
13380 !len && cd->min_len != cd->max_len ? "]" : "", cd->factors[0]);
642 }
643 37713 print_flags(&bp, cd->flags);
644
645
2/2
✓ Branch 0 taken 27216 times.
✓ Branch 1 taken 10497 times.
37713 if (print_prio)
646 27216 av_bprintf(&bp, ", prio: %i", prio);
647
648 37713 av_log(NULL, log_level, "%s\n", bp.str);
649 37713 }
650
651 10497 static void print_tx_structure(AVTXContext *s, int depth)
652 {
653 10497 const FFTXCodelet *cd = s->cd_self;
654
655
2/2
✓ Branch 0 taken 16456 times.
✓ Branch 1 taken 10497 times.
26953 for (int i = 0; i <= depth; i++)
656 16456 av_log(NULL, AV_LOG_DEBUG, " ");
657
658 10497 print_cd_info(cd, cd->prio, s->len, 0, AV_LOG_DEBUG);
659
660
2/2
✓ Branch 0 taken 5485 times.
✓ Branch 1 taken 10497 times.
15982 for (int i = 0; i < s->nb_sub; i++)
661 5485 print_tx_structure(&s->sub[i], depth + 1);
662 10497 }
663 #endif /* CONFIG_SMALL */
664
665 typedef struct TXCodeletMatch {
666 const FFTXCodelet *cd;
667 int prio;
668 } TXCodeletMatch;
669
670 27007 static int cmp_matches(TXCodeletMatch *a, TXCodeletMatch *b)
671 {
672 27007 return FFDIFFSIGN(b->prio, a->prio);
673 }
674
675 /* We want all factors to completely cover the length */
676 47004 static inline int check_cd_factors(const FFTXCodelet *cd, int len)
677 {
678 47004 int matches = 0, any_flag = 0;
679
680
1/2
✓ Branch 0 taken 145250 times.
✗ Branch 1 not taken.
145250 for (int i = 0; i < TX_MAX_FACTORS; i++) {
681 145250 int factor = cd->factors[i];
682
683
2/2
✓ Branch 0 taken 39258 times.
✓ Branch 1 taken 105992 times.
145250 if (factor == TX_FACTOR_ANY) {
684 39258 any_flag = 1;
685 39258 matches++;
686 39258 continue;
687
4/4
✓ Branch 0 taken 88743 times.
✓ Branch 1 taken 17249 times.
✓ Branch 2 taken 58988 times.
✓ Branch 3 taken 29755 times.
105992 } else if (len <= 1 || !factor) {
688 break;
689
2/2
✓ Branch 0 taken 21953 times.
✓ Branch 1 taken 37035 times.
58988 } else if (factor == 2) { /* Fast path */
690 21953 int bits_2 = ff_ctz(len);
691
2/2
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 21905 times.
21953 if (!bits_2)
692 48 continue; /* Factor not supported */
693
694 21905 len >>= bits_2;
695 21905 matches++;
696 } else {
697 37035 int res = len % factor;
698
2/2
✓ Branch 0 taken 31874 times.
✓ Branch 1 taken 5161 times.
37035 if (res)
699 31874 continue; /* Factor not supported */
700
701
2/2
✓ Branch 0 taken 5335 times.
✓ Branch 1 taken 5161 times.
10496 while (!res) {
702 5335 len /= factor;
703 5335 res = len % factor;
704 }
705 5161 matches++;
706 }
707 }
708
709
6/6
✓ Branch 0 taken 27951 times.
✓ Branch 1 taken 19053 times.
✓ Branch 2 taken 7368 times.
✓ Branch 3 taken 20583 times.
✓ Branch 4 taken 6633 times.
✓ Branch 5 taken 735 times.
47004 return (cd->nb_factors <= matches) && (any_flag || len == 1);
710 }
711
712 10632 av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
713 uint64_t flags, FFTXCodeletOptions *opts,
714 int len, int inv, const void *scale)
715 {
716 10632 int ret = 0;
717 10632 AVTXContext *sub = NULL;
718 10632 TXCodeletMatch *cd_tmp, *cd_matches = NULL;
719 10632 unsigned int cd_matches_size = 0;
720 10632 int codelet_list_idx = codelet_list_num;
721 10632 int nb_cd_matches = 0;
722 #if !CONFIG_SMALL
723 AVBPrint bp;
724 #endif
725
726 /* We still accept functions marked with SLOW, even if the CPU is
727 * marked with the same flag, but we give them lower priority. */
728 10632 const int cpu_flags = av_get_cpu_flags();
729
730 /* Flags the transform wants */
731 10632 uint64_t req_flags = flags;
732
733 /* Flags the codelet may require to be present */
734 10632 uint64_t inv_req_mask = AV_TX_FULL_IMDCT |
735 AV_TX_REAL_TO_REAL |
736 AV_TX_REAL_TO_IMAGINARY |
737 FF_TX_PRESHUFFLE |
738 FF_TX_ASM_CALL;
739
740 /* Unaligned codelets are compatible with the aligned flag */
741
1/2
✓ Branch 0 taken 10632 times.
✗ Branch 1 not taken.
10632 if (req_flags & FF_TX_ALIGNED)
742 10632 req_flags |= AV_TX_UNALIGNED;
743
744 /* If either flag is set, both are okay, so don't check for an exact match */
745
3/4
✓ Branch 0 taken 5373 times.
✓ Branch 1 taken 5259 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5373 times.
10632 if ((req_flags & AV_TX_INPLACE) && (req_flags & FF_TX_OUT_OF_PLACE))
746 req_flags &= ~(AV_TX_INPLACE | FF_TX_OUT_OF_PLACE);
747
2/4
✓ Branch 0 taken 10632 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10632 times.
✗ Branch 3 not taken.
10632 if ((req_flags & FF_TX_ALIGNED) && (req_flags & AV_TX_UNALIGNED))
748 10632 req_flags &= ~(FF_TX_ALIGNED | AV_TX_UNALIGNED);
749
750 /* Loop through all codelets in all codelet lists to find matches
751 * to the requirements */
752
2/2
✓ Branch 0 taken 53160 times.
✓ Branch 1 taken 10632 times.
63792 while (codelet_list_idx--) {
753 53160 const FFTXCodelet * const * list = codelet_list[codelet_list_idx];
754 53160 const FFTXCodelet *cd = NULL;
755
756
2/2
✓ Branch 0 taken 2424096 times.
✓ Branch 1 taken 53160 times.
2477256 while ((cd = *list++)) {
757 /* Check if the type matches */
758
4/4
✓ Branch 0 taken 2413464 times.
✓ Branch 1 taken 10632 times.
✓ Branch 2 taken 1913642 times.
✓ Branch 3 taken 499822 times.
2424096 if (cd->type != TX_TYPE_ANY && type != cd->type)
759 1913642 continue;
760
761 /* Check direction for non-orthogonal codelets */
762
4/4
✓ Branch 0 taken 61888 times.
✓ Branch 1 taken 448566 times.
✓ Branch 2 taken 7611 times.
✓ Branch 3 taken 54277 times.
510454 if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
763
4/4
✓ Branch 0 taken 52688 times.
✓ Branch 1 taken 403489 times.
✓ Branch 2 taken 48128 times.
✓ Branch 3 taken 4560 times.
456177 ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
764
3/4
✓ Branch 0 taken 7611 times.
✓ Branch 1 taken 444006 times.
✓ Branch 2 taken 7611 times.
✗ Branch 3 not taken.
451617 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
765
3/4
✓ Branch 0 taken 7611 times.
✓ Branch 1 taken 444006 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 7611 times.
451617 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
766 58837 continue;
767
768 /* Check if the requested flags match from both sides */
769
2/2
✓ Branch 0 taken 302328 times.
✓ Branch 1 taken 149289 times.
451617 if (((req_flags & cd->flags) != (req_flags)) ||
770
2/2
✓ Branch 0 taken 68799 times.
✓ Branch 1 taken 233529 times.
302328 ((inv_req_mask & cd->flags) != (req_flags & inv_req_mask)))
771 218088 continue;
772
773 /* Check if length is supported */
774
6/6
✓ Branch 0 taken 144782 times.
✓ Branch 1 taken 88747 times.
✓ Branch 2 taken 100287 times.
✓ Branch 3 taken 44495 times.
✓ Branch 4 taken 83249 times.
✓ Branch 5 taken 17038 times.
233529 if ((len < cd->min_len) || (cd->max_len != -1 && (len > cd->max_len)))
775 171996 continue;
776
777 /* Check if the CPU supports the required ISA */
778
2/2
✓ Branch 0 taken 18407 times.
✓ Branch 1 taken 43126 times.
61533 if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
779
2/2
✓ Branch 0 taken 14529 times.
✓ Branch 1 taken 3878 times.
18407 !(cpu_flags & (cd->cpu_flags & ~cpu_slow_mask)))
780 14529 continue;
781
782 /* Check for factors */
783
2/2
✓ Branch 1 taken 19788 times.
✓ Branch 2 taken 27216 times.
47004 if (!check_cd_factors(cd, len))
784 19788 continue;
785
786 /* Realloc array and append */
787 27216 cd_tmp = av_fast_realloc(cd_matches, &cd_matches_size,
788 27216 sizeof(*cd_tmp) * (nb_cd_matches + 1));
789
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 27216 times.
27216 if (!cd_tmp) {
790 av_free(cd_matches);
791 return AVERROR(ENOMEM);
792 }
793
794 27216 cd_matches = cd_tmp;
795 27216 cd_matches[nb_cd_matches].cd = cd;
796 27216 cd_matches[nb_cd_matches].prio = get_codelet_prio(cd, cpu_flags, len);
797 27216 nb_cd_matches++;
798 }
799 }
800
801 #if !CONFIG_SMALL
802 /* Print debugging info */
803 10632 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
804
2/2
✓ Branch 0 taken 9161 times.
✓ Branch 1 taken 1471 times.
10632 av_bprintf(&bp, "For transform of length %i, %s, ", len,
805 inv ? "inverse" : "forward");
806 10632 print_type(&bp, type);
807 10632 av_bprintf(&bp, ", ");
808 10632 print_flags(&bp, flags);
809
2/2
✓ Branch 0 taken 10497 times.
✓ Branch 1 taken 135 times.
10632 av_bprintf(&bp, ", found %i matches%s", nb_cd_matches,
810 nb_cd_matches ? ":" : ".");
811 #endif
812
813 /* No matches found */
814
2/2
✓ Branch 0 taken 135 times.
✓ Branch 1 taken 10497 times.
10632 if (!nb_cd_matches)
815 135 return AVERROR(ENOSYS);
816
817 /* Sort the list */
818
44/44
✓ Branch 0 taken 4534 times.
✓ Branch 1 taken 7559 times.
✓ Branch 3 taken 1261 times.
✓ Branch 4 taken 3273 times.
✓ Branch 6 taken 43 times.
✓ Branch 7 taken 1218 times.
✓ Branch 9 taken 1661 times.
✓ Branch 10 taken 1612 times.
✓ Branch 12 taken 1572 times.
✓ Branch 13 taken 2962 times.
✓ Branch 14 taken 2644 times.
✓ Branch 15 taken 1890 times.
✓ Branch 16 taken 3560 times.
✓ Branch 17 taken 1385 times.
✓ Branch 19 taken 2543 times.
✓ Branch 20 taken 1017 times.
✓ Branch 21 taken 1063 times.
✓ Branch 22 taken 1808 times.
✓ Branch 24 taken 469 times.
✓ Branch 25 taken 594 times.
✓ Branch 26 taken 1808 times.
✓ Branch 27 taken 594 times.
✓ Branch 28 taken 2402 times.
✓ Branch 29 taken 1890 times.
✓ Branch 30 taken 743 times.
✓ Branch 31 taken 1147 times.
✓ Branch 32 taken 349 times.
✓ Branch 33 taken 394 times.
✓ Branch 34 taken 292 times.
✓ Branch 35 taken 57 times.
✓ Branch 36 taken 1223 times.
✓ Branch 37 taken 92 times.
✓ Branch 39 taken 629 times.
✓ Branch 40 taken 594 times.
✓ Branch 41 taken 92 times.
✓ Branch 42 taken 594 times.
✓ Branch 43 taken 1311 times.
✓ Branch 44 taken 487 times.
✓ Branch 46 taken 4551 times.
✓ Branch 47 taken 3008 times.
✓ Branch 48 taken 12093 times.
✓ Branch 49 taken 2000 times.
✓ Branch 50 taken 12295 times.
✓ Branch 51 taken 10497 times.
32523 AV_QSORT(cd_matches, nb_cd_matches, TXCodeletMatch, cmp_matches);
819
820 #if !CONFIG_SMALL
821 10497 av_log(NULL, AV_LOG_TRACE, "%s\n", bp.str);
822
823
2/2
✓ Branch 0 taken 27216 times.
✓ Branch 1 taken 10497 times.
37713 for (int i = 0; i < nb_cd_matches; i++) {
824 27216 av_log(NULL, AV_LOG_TRACE, " %i: ", i + 1);
825 27216 print_cd_info(cd_matches[i].cd, cd_matches[i].prio, 0, 1, AV_LOG_TRACE);
826 }
827 #endif
828
829
2/2
✓ Branch 0 taken 10406 times.
✓ Branch 1 taken 91 times.
10497 if (!s->sub) {
830 10406 s->sub = sub = av_mallocz(TX_MAX_SUB*sizeof(*sub));
831
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10406 times.
10406 if (!sub) {
832 ret = AVERROR(ENOMEM);
833 goto end;
834 }
835 }
836
837 /* Attempt to initialize each */
838
1/2
✓ Branch 0 taken 14603 times.
✗ Branch 1 not taken.
14603 for (int i = 0; i < nb_cd_matches; i++) {
839 14603 const FFTXCodelet *cd = cd_matches[i].cd;
840 14603 AVTXContext *sctx = &s->sub[s->nb_sub];
841
842 14603 sctx->len = len;
843 14603 sctx->inv = inv;
844 14603 sctx->type = type;
845 14603 sctx->flags = cd->flags | flags;
846 14603 sctx->cd_self = cd;
847
848 14603 s->fn[s->nb_sub] = cd->function;
849 14603 s->cd[s->nb_sub] = cd;
850
851 14603 ret = 0;
852
2/2
✓ Branch 0 taken 14586 times.
✓ Branch 1 taken 17 times.
14603 if (cd->init)
853 14586 ret = cd->init(sctx, cd, flags, opts, len, inv, scale);
854
855
2/2
✓ Branch 0 taken 10497 times.
✓ Branch 1 taken 4106 times.
14603 if (ret >= 0) {
856
3/4
✓ Branch 0 taken 5337 times.
✓ Branch 1 taken 5160 times.
✓ Branch 2 taken 5337 times.
✗ Branch 3 not taken.
10497 if (opts && opts->map_dir != FF_TX_MAP_NONE &&
857
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 5316 times.
5337 sctx->map_dir == FF_TX_MAP_NONE) {
858 /* If a specific map direction was requested, and it doesn't
859 * exist, create one.*/
860 21 sctx->map = av_malloc(len*sizeof(*sctx->map));
861
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
21 if (!sctx->map) {
862 ret = AVERROR(ENOMEM);
863 goto end;
864 }
865
866
2/2
✓ Branch 0 taken 117 times.
✓ Branch 1 taken 21 times.
138 for (int i = 0; i < len; i++)
867 117 sctx->map[i] = i;
868
4/4
✓ Branch 0 taken 5316 times.
✓ Branch 1 taken 5160 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 5300 times.
10476 } else if (opts && (opts->map_dir != sctx->map_dir)) {
869 16 int *tmp = av_malloc(len*sizeof(*sctx->map));
870
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 if (!tmp) {
871 ret = AVERROR(ENOMEM);
872 goto end;
873 }
874
875 16 memcpy(tmp, sctx->map, len*sizeof(*sctx->map));
876
877
2/2
✓ Branch 0 taken 112 times.
✓ Branch 1 taken 16 times.
128 for (int i = 0; i < len; i++)
878 112 sctx->map[tmp[i]] = i;
879
880 16 av_free(tmp);
881 }
882
883 10497 s->nb_sub++;
884 10497 goto end;
885 }
886
887 4106 s->fn[s->nb_sub] = NULL;
888 4106 s->cd[s->nb_sub] = NULL;
889
890 4106 reset_ctx(sctx, 0);
891
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4106 times.
4106 if (ret == AVERROR(ENOMEM))
892 break;
893 }
894
895 if (!s->nb_sub)
896 av_freep(&s->sub);
897
898 end:
899 10497 av_free(cd_matches);
900 10497 return ret;
901 }
902
903 5012 av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type,
904 int inv, int len, const void *scale, uint64_t flags)
905 {
906 int ret;
907 5012 AVTXContext tmp = { 0 };
908 5012 const double default_scale_d = 1.0;
909 5012 const float default_scale_f = 1.0f;
910
911
4/8
✓ Branch 0 taken 5012 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 5012 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 5012 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 5012 times.
5012 if (!len || type >= AV_TX_NB || !ctx || !tx)
912 return AVERROR(EINVAL);
913
914
1/2
✓ Branch 0 taken 5012 times.
✗ Branch 1 not taken.
5012 if (!(flags & AV_TX_UNALIGNED))
915 5012 flags |= FF_TX_ALIGNED;
916
1/2
✓ Branch 0 taken 5012 times.
✗ Branch 1 not taken.
5012 if (!(flags & AV_TX_INPLACE))
917 5012 flags |= FF_TX_OUT_OF_PLACE;
918
919
5/8
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 5000 times.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 12 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 12 times.
✗ Branch 7 not taken.
5012 if (!scale && ((type == AV_TX_DOUBLE_MDCT) || (type == AV_TX_DOUBLE_DCT) ||
920
2/4
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
12 (type == AV_TX_DOUBLE_DCT_I) || (type == AV_TX_DOUBLE_DST_I) ||
921 (type == AV_TX_DOUBLE_RDFT)))
922 scale = &default_scale_d;
923
3/8
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 5000 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
5012 else if (!scale && !TYPE_IS(FFT, type))
924 scale = &default_scale_f;
925
926 5012 ret = ff_tx_init_subtx(&tmp, type, flags, NULL, len, inv, scale);
927
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5012 times.
5012 if (ret < 0)
928 return ret;
929
930 5012 *ctx = &tmp.sub[0];
931 5012 *tx = tmp.fn[0];
932
933 #if !CONFIG_SMALL
934 5012 av_log(NULL, AV_LOG_DEBUG, "Transform tree:\n");
935 5012 print_tx_structure(*ctx, 0);
936 #endif
937
938 5012 return ret;
939 }
940