FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavutil/tx.c
Date: 2024-11-20 23:03:26
Exec Total Coverage
Lines: 431 496 86.9%
Functions: 24 25 96.0%
Branches: 465 612 76.0%

Line Branch Exec Source
1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "avassert.h"
20 #include "intmath.h"
21 #include "cpu.h"
22 #include "mem.h"
23 #include "qsort.h"
24 #include "bprint.h"
25
26 #include "tx_priv.h"
27
28 #define TYPE_IS(type, x) \
29 (((x) == AV_TX_FLOAT_ ## type) || \
30 ((x) == AV_TX_DOUBLE_ ## type) || \
31 ((x) == AV_TX_INT32_ ## type))
32
33 /* Calculates the modular multiplicative inverse */
34 3938 static av_always_inline int mulinv(int n, int m)
35 {
36 3938 n = n % m;
37
1/2
✓ Branch 0 taken 41151 times.
✗ Branch 1 not taken.
41151 for (int x = 1; x < m; x++)
38
2/2
✓ Branch 0 taken 3938 times.
✓ Branch 1 taken 37213 times.
41151 if (((n * x) % m) == 1)
39 3938 return x;
40 av_assert0(0); /* Never reached */
41 return 0;
42 }
43
44 75 int ff_tx_gen_pfa_input_map(AVTXContext *s, FFTXCodeletOptions *opts,
45 int d1, int d2)
46 {
47 75 const int sl = d1*d2;
48
49 75 s->map = av_malloc(s->len*sizeof(*s->map));
50
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 75 times.
75 if (!s->map)
51 return AVERROR(ENOMEM);
52
53
2/2
✓ Branch 0 taken 75 times.
✓ Branch 1 taken 75 times.
150 for (int k = 0; k < s->len; k += sl) {
54
3/6
✓ Branch 0 taken 75 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 75 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 75 times.
75 if (s->inv || (opts && opts->map_dir == FF_TX_MAP_SCATTER)) {
55 for (int m = 0; m < d2; m++)
56 for (int n = 0; n < d1; n++)
57 s->map[k + ((m*d1 + n*d2) % (sl))] = m*d1 + n;
58 } else {
59
2/2
✓ Branch 0 taken 375 times.
✓ Branch 1 taken 75 times.
450 for (int m = 0; m < d2; m++)
60
2/2
✓ Branch 0 taken 1125 times.
✓ Branch 1 taken 375 times.
1500 for (int n = 0; n < d1; n++)
61 1125 s->map[k + m*d1 + n] = (m*d1 + n*d2) % (sl);
62 }
63
64
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 75 times.
75 if (s->inv)
65 for (int w = 1; w <= ((sl) >> 1); w++)
66 FFSWAP(int, s->map[k + w], s->map[k + sl - w]);
67 }
68
69
1/2
✓ Branch 0 taken 75 times.
✗ Branch 1 not taken.
75 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
70
71 75 return 0;
72 }
73
74 /* Guaranteed to work for any n, m where gcd(n, m) == 1 */
75 1969 int ff_tx_gen_compound_mapping(AVTXContext *s, FFTXCodeletOptions *opts,
76 int inv, int n, int m)
77 {
78 int *in_map, *out_map;
79 1969 const int len = n*m; /* Will not be equal to s->len for MDCTs */
80 int m_inv, n_inv;
81
82 /* Make sure the numbers are coprime */
83
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1969 times.
1969 if (av_gcd(n, m) != 1)
84 return AVERROR(EINVAL);
85
86 1969 m_inv = mulinv(m, n);
87 1969 n_inv = mulinv(n, m);
88
89
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1969 times.
1969 if (!(s->map = av_malloc(2*len*sizeof(*s->map))))
90 return AVERROR(ENOMEM);
91
92 1969 in_map = s->map;
93 1969 out_map = s->map + len;
94
95 /* Ruritanian map for input, CRT map for output, can be swapped */
96
3/4
✓ Branch 0 taken 280 times.
✓ Branch 1 taken 1689 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 280 times.
1969 if (opts && opts->map_dir == FF_TX_MAP_SCATTER) {
97 for (int j = 0; j < m; j++) {
98 for (int i = 0; i < n; i++) {
99 in_map[(i*m + j*n) % len] = j*n + i;
100 out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
101 }
102 }
103 } else {
104
2/2
✓ Branch 0 taken 70244 times.
✓ Branch 1 taken 1969 times.
72213 for (int j = 0; j < m; j++) {
105
2/2
✓ Branch 0 taken 537420 times.
✓ Branch 1 taken 70244 times.
607664 for (int i = 0; i < n; i++) {
106 537420 in_map[j*n + i] = (i*m + j*n) % len;
107 537420 out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
108 }
109 }
110 }
111
112
2/2
✓ Branch 0 taken 1875 times.
✓ Branch 1 taken 94 times.
1969 if (inv) {
113
2/2
✓ Branch 0 taken 64932 times.
✓ Branch 1 taken 1875 times.
66807 for (int i = 0; i < m; i++) {
114 64932 int *in = &in_map[i*n + 1]; /* Skip the DC */
115
2/2
✓ Branch 0 taken 196732 times.
✓ Branch 1 taken 64932 times.
261664 for (int j = 0; j < ((n - 1) >> 1); j++)
116 196732 FFSWAP(int, in[j], in[n - j - 2]);
117 }
118 }
119
120
2/2
✓ Branch 0 taken 280 times.
✓ Branch 1 taken 1689 times.
1969 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
121
122 1969 return 0;
123 }
124
125 10499994 static inline int split_radix_permutation(int i, int len, int inv)
126 {
127 10499994 len >>= 1;
128
2/2
✓ Branch 0 taken 1346618 times.
✓ Branch 1 taken 9153376 times.
10499994 if (len <= 1)
129 1346618 return i & 1;
130
2/2
✓ Branch 0 taken 4576688 times.
✓ Branch 1 taken 4576688 times.
9153376 if (!(i & len))
131 4576688 return split_radix_permutation(i, len, inv) * 2;
132 4576688 len >>= 1;
133 4576688 return split_radix_permutation(i, len, inv) * 4 + 1 - 2*(!(i & len) ^ inv);
134 }
135
136 4183 int ff_tx_gen_ptwo_revtab(AVTXContext *s, FFTXCodeletOptions *opts)
137 {
138 4183 int len = s->len;
139
140
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 4183 times.
4183 if (!(s->map = av_malloc(len*sizeof(*s->map))))
141 return AVERROR(ENOMEM);
142
143
3/4
✓ Branch 0 taken 4183 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1848 times.
✓ Branch 3 taken 2335 times.
4183 if (opts && opts->map_dir == FF_TX_MAP_SCATTER) {
144
2/2
✓ Branch 0 taken 197476 times.
✓ Branch 1 taken 1848 times.
199324 for (int i = 0; i < s->len; i++)
145 197476 s->map[-split_radix_permutation(i, len, s->inv) & (len - 1)] = i;
146 } else {
147
2/2
✓ Branch 0 taken 973002 times.
✓ Branch 1 taken 2335 times.
975337 for (int i = 0; i < s->len; i++)
148 973002 s->map[i] = -split_radix_permutation(i, len, s->inv) & (len - 1);
149 }
150
151
1/2
✓ Branch 0 taken 4183 times.
✗ Branch 1 not taken.
4183 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
152
153 4183 return 0;
154 }
155
156 int ff_tx_gen_inplace_map(AVTXContext *s, int len)
157 {
158 int *src_map, out_map_idx = 0;
159
160 if (!s->sub || !s->sub->map)
161 return AVERROR(EINVAL);
162
163 if (!(s->map = av_mallocz(len*sizeof(*s->map))))
164 return AVERROR(ENOMEM);
165
166 src_map = s->sub->map;
167
168 /* The first coefficient is always already in-place */
169 for (int src = 1; src < s->len; src++) {
170 int dst = src_map[src];
171 int found = 0;
172
173 if (dst <= src)
174 continue;
175
176 /* This just checks if a closed loop has been encountered before,
177 * and if so, skips it, since to fully permute a loop we must only
178 * enter it once. */
179 do {
180 for (int j = 0; j < out_map_idx; j++) {
181 if (dst == s->map[j]) {
182 found = 1;
183 break;
184 }
185 }
186 dst = src_map[dst];
187 } while (dst != src && !found);
188
189 if (!found)
190 s->map[out_map_idx++] = src;
191 }
192
193 s->map[out_map_idx++] = 0;
194
195 return 0;
196 }
197
198 43593 static void parity_revtab_generator(int *revtab, int n, int inv, int offset,
199 int is_dual, int dual_high, int len,
200 int basis, int dual_stride, int inv_lookup)
201 {
202 43593 len >>= 1;
203
204
2/2
✓ Branch 0 taken 29338 times.
✓ Branch 1 taken 14255 times.
43593 if (len <= basis) {
205 int k1, k2, stride, even_idx, odd_idx;
206
207
3/4
✓ Branch 0 taken 21900 times.
✓ Branch 1 taken 7438 times.
✓ Branch 2 taken 21900 times.
✗ Branch 3 not taken.
29338 is_dual = is_dual && dual_stride;
208 29338 dual_high = is_dual & dual_high;
209
2/2
✓ Branch 0 taken 21900 times.
✓ Branch 1 taken 7438 times.
29338 stride = is_dual ? FFMIN(dual_stride, len) : 0;
210
211 29338 even_idx = offset + dual_high*(stride - 2*len);
212
4/4
✓ Branch 0 taken 21900 times.
✓ Branch 1 taken 7438 times.
✓ Branch 2 taken 10950 times.
✓ Branch 3 taken 10950 times.
29338 odd_idx = even_idx + len + (is_dual && !dual_high)*len + dual_high*len;
213
214
2/2
✓ Branch 0 taken 88070 times.
✓ Branch 1 taken 29338 times.
117408 for (int i = 0; i < len; i++) {
215 88070 k1 = -split_radix_permutation(offset + i*2 + 0, n, inv) & (n - 1);
216 88070 k2 = -split_radix_permutation(offset + i*2 + 1, n, inv) & (n - 1);
217
2/2
✓ Branch 0 taken 69898 times.
✓ Branch 1 taken 18172 times.
88070 if (inv_lookup) {
218 69898 revtab[even_idx++] = k1;
219 69898 revtab[odd_idx++] = k2;
220 } else {
221 18172 revtab[k1] = even_idx++;
222 18172 revtab[k2] = odd_idx++;
223 }
224
4/4
✓ Branch 0 taken 58516 times.
✓ Branch 1 taken 29554 times.
✓ Branch 2 taken 29258 times.
✓ Branch 3 taken 29258 times.
88070 if (stride && !((i + 1) % stride)) {
225 29258 even_idx += stride;
226 29258 odd_idx += stride;
227 }
228 }
229
230 29338 return;
231 }
232
233 14255 parity_revtab_generator(revtab, n, inv, offset,
234 0, 0, len >> 0, basis, dual_stride, inv_lookup);
235 14255 parity_revtab_generator(revtab, n, inv, offset + (len >> 0),
236 1, 0, len >> 1, basis, dual_stride, inv_lookup);
237 14255 parity_revtab_generator(revtab, n, inv, offset + (len >> 0) + (len >> 1),
238 1, 1, len >> 1, basis, dual_stride, inv_lookup);
239 }
240
241 828 int ff_tx_gen_split_radix_parity_revtab(AVTXContext *s, int len, int inv,
242 FFTXCodeletOptions *opts,
243 int basis, int dual_stride)
244 {
245 828 basis >>= 1;
246
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 828 times.
828 if (len < basis)
247 return AVERROR(EINVAL);
248
249
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 828 times.
828 if (!(s->map = av_mallocz(len*sizeof(*s->map))))
250 return AVERROR(ENOMEM);
251
252
3/4
✓ Branch 0 taken 661 times.
✓ Branch 1 taken 167 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 661 times.
828 av_assert0(!dual_stride || !(dual_stride & (dual_stride - 1)));
253
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 828 times.
828 av_assert0(dual_stride <= basis);
254
255
2/2
✓ Branch 0 taken 805 times.
✓ Branch 1 taken 23 times.
1633 parity_revtab_generator(s->map, len, inv, 0, 0, 0, len,
256 basis, dual_stride,
257
2/2
✓ Branch 0 taken 350 times.
✓ Branch 1 taken 455 times.
805 opts ? opts->map_dir == FF_TX_MAP_GATHER : FF_TX_MAP_GATHER);
258
259
2/2
✓ Branch 0 taken 805 times.
✓ Branch 1 taken 23 times.
828 s->map_dir = opts ? opts->map_dir : FF_TX_MAP_GATHER;
260
261 828 return 0;
262 }
263
264 31060 static void reset_ctx(AVTXContext *s, int free_sub)
265 {
266
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 31060 times.
31060 if (!s)
267 return;
268
269
2/2
✓ Branch 0 taken 5445 times.
✓ Branch 1 taken 25615 times.
31060 if (s->sub)
270
2/2
✓ Branch 0 taken 21780 times.
✓ Branch 1 taken 5445 times.
27225 for (int i = 0; i < TX_MAX_SUB; i++)
271 21780 reset_ctx(&s->sub[i], free_sub + 1);
272
273
3/4
✓ Branch 0 taken 14816 times.
✓ Branch 1 taken 16244 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 14816 times.
31060 if (s->cd_self && s->cd_self->uninit)
274 s->cd_self->uninit(s);
275
276
2/2
✓ Branch 0 taken 26837 times.
✓ Branch 1 taken 4223 times.
31060 if (free_sub)
277 26837 av_freep(&s->sub);
278
279 31060 av_freep(&s->map);
280 31060 av_freep(&s->exp);
281 31060 av_freep(&s->tmp);
282
283 /* Nothing else needs to be reset, it gets overwritten if another
284 * ff_tx_init_subtx() call is made. */
285 31060 s->nb_sub = 0;
286 31060 s->opaque = NULL;
287 31060 memset(s->fn, 0, sizeof(*s->fn));
288 }
289
290 91 void ff_tx_clear_ctx(AVTXContext *s)
291 {
292 91 reset_ctx(s, 0);
293 91 }
294
295 5644 av_cold void av_tx_uninit(AVTXContext **ctx)
296 {
297
2/2
✓ Branch 0 taken 587 times.
✓ Branch 1 taken 5057 times.
5644 if (!(*ctx))
298 587 return;
299
300 5057 reset_ctx(*ctx, 1);
301 5057 av_freep(ctx);
302 }
303
304 13 static av_cold int ff_tx_null_init(AVTXContext *s, const FFTXCodelet *cd,
305 uint64_t flags, FFTXCodeletOptions *opts,
306 int len, int inv, const void *scale)
307 {
308 /* Can only handle one sample+type to one sample+type transforms */
309
6/12
✓ Branch 0 taken 13 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 13 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 13 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 13 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 13 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 13 times.
13 if (TYPE_IS(MDCT, s->type) || TYPE_IS(RDFT, s->type))
310 return AVERROR(EINVAL);
311 13 return 0;
312 }
313
314 /* Null transform when the length is 1 */
315 2 static void ff_tx_null(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
316 {
317 2 memcpy(_out, _in, stride);
318 2 }
319
320 static const FFTXCodelet ff_tx_null_def = {
321 .name = NULL_IF_CONFIG_SMALL("null"),
322 .function = ff_tx_null,
323 .type = TX_TYPE_ANY,
324 .flags = AV_TX_UNALIGNED | FF_TX_ALIGNED |
325 FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
326 .factors[0] = TX_FACTOR_ANY,
327 .min_len = 1,
328 .max_len = 1,
329 .init = ff_tx_null_init,
330 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
331 .prio = FF_TX_PRIO_MAX,
332 };
333
334 static const FFTXCodelet * const ff_tx_null_list[] = {
335 &ff_tx_null_def,
336 NULL,
337 };
338
339 /* Array of all compiled codelet lists. Order is irrelevant. */
340 static const FFTXCodelet * const * const codelet_list[] = {
341 ff_tx_codelet_list_float_c,
342 ff_tx_codelet_list_double_c,
343 ff_tx_codelet_list_int32_c,
344 ff_tx_null_list,
345 #if HAVE_X86ASM
346 ff_tx_codelet_list_float_x86,
347 #endif
348 #if ARCH_AARCH64
349 ff_tx_codelet_list_float_aarch64,
350 #endif
351 };
352 static const int codelet_list_num = FF_ARRAY_ELEMS(codelet_list);
353
354 static const int cpu_slow_mask = AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE3SLOW |
355 AV_CPU_FLAG_ATOM | AV_CPU_FLAG_SSSE3SLOW |
356 AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER;
357
358 static const int cpu_slow_penalties[][2] = {
359 { AV_CPU_FLAG_SSE2SLOW, 1 + 64 },
360 { AV_CPU_FLAG_SSE3SLOW, 1 + 64 },
361 { AV_CPU_FLAG_SSSE3SLOW, 1 + 64 },
362 { AV_CPU_FLAG_ATOM, 1 + 128 },
363 { AV_CPU_FLAG_AVXSLOW, 1 + 128 },
364 { AV_CPU_FLAG_SLOW_GATHER, 1 + 32 },
365 };
366
367 28017 static int get_codelet_prio(const FFTXCodelet *cd, int cpu_flags, int len)
368 {
369 28017 int prio = cd->prio;
370 28017 int max_factor = 0;
371
372 /* If the CPU has a SLOW flag, and the instruction is also flagged
373 * as being slow for such, reduce its priority */
374
2/2
✓ Branch 0 taken 168102 times.
✓ Branch 1 taken 28017 times.
196119 for (int i = 0; i < FF_ARRAY_ELEMS(cpu_slow_penalties); i++) {
375
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 168102 times.
168102 if ((cpu_flags & cd->cpu_flags) & cpu_slow_penalties[i][0])
376 prio -= cpu_slow_penalties[i][1];
377 }
378
379 /* Prioritize aligned-only codelets */
380
4/4
✓ Branch 0 taken 2917 times.
✓ Branch 1 taken 25100 times.
✓ Branch 2 taken 2904 times.
✓ Branch 3 taken 13 times.
28017 if ((cd->flags & FF_TX_ALIGNED) && !(cd->flags & AV_TX_UNALIGNED))
381 2904 prio += 64;
382
383 /* Codelets for specific lengths are generally faster */
384
4/4
✓ Branch 0 taken 6404 times.
✓ Branch 1 taken 21613 times.
✓ Branch 2 taken 5887 times.
✓ Branch 3 taken 517 times.
28017 if ((len == cd->min_len) && (len == cd->max_len))
385 5887 prio += 64;
386
387 /* Forward-only or inverse-only transforms are generally better */
388
2/2
✓ Branch 0 taken 14771 times.
✓ Branch 1 taken 13246 times.
28017 if ((cd->flags & (FF_TX_FORWARD_ONLY | FF_TX_INVERSE_ONLY)))
389 14771 prio += 64;
390
391 /* Larger factors are generally better */
392
2/2
✓ Branch 0 taken 112068 times.
✓ Branch 1 taken 28017 times.
140085 for (int i = 0; i < TX_MAX_SUB; i++)
393 112068 max_factor = FFMAX(cd->factors[i], max_factor);
394
2/2
✓ Branch 0 taken 26238 times.
✓ Branch 1 taken 1779 times.
28017 if (max_factor)
395 26238 prio += 16*max_factor;
396
397 28017 return prio;
398 }
399
400 typedef struct FFTXLenDecomp {
401 int len;
402 int len2;
403 int prio;
404 const FFTXCodelet *cd;
405 } FFTXLenDecomp;
406
407 440 static int cmp_decomp(FFTXLenDecomp *a, FFTXLenDecomp *b)
408 {
409 440 return FFDIFFSIGN(b->prio, a->prio);
410 }
411
412 4105 int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
413 int len, int inv)
414 {
415 4105 int nb_decomp = 0;
416 FFTXLenDecomp ld[TX_MAX_DECOMPOSITIONS];
417 4105 int codelet_list_idx = codelet_list_num;
418
419 4105 const int cpu_flags = av_get_cpu_flags();
420
421 /* Loop through all codelets in all codelet lists to find matches
422 * to the requirements */
423
2/2
✓ Branch 0 taken 20525 times.
✓ Branch 1 taken 4105 times.
24630 while (codelet_list_idx--) {
424 20525 const FFTXCodelet * const * list = codelet_list[codelet_list_idx];
425 20525 const FFTXCodelet *cd = NULL;
426
427
2/2
✓ Branch 0 taken 935940 times.
✓ Branch 1 taken 20525 times.
956465 while ((cd = *list++)) {
428 935940 int fl = len;
429 935940 int skip = 0, prio;
430 935940 int factors_product = 1, factors_mod = 0;
431
432
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 935940 times.
935940 if (nb_decomp >= TX_MAX_DECOMPOSITIONS)
433 goto sort;
434
435 /* Check if the type matches */
436
4/4
✓ Branch 0 taken 931835 times.
✓ Branch 1 taken 4105 times.
✓ Branch 2 taken 635990 times.
✓ Branch 3 taken 295845 times.
935940 if (cd->type != TX_TYPE_ANY && type != cd->type)
437 635990 continue;
438
439 /* Check direction for non-orthogonal codelets */
440
4/4
✓ Branch 0 taken 20019 times.
✓ Branch 1 taken 279931 times.
✓ Branch 2 taken 3533 times.
✓ Branch 3 taken 16486 times.
299950 if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
441
4/4
✓ Branch 0 taken 7198 times.
✓ Branch 1 taken 276266 times.
✓ Branch 2 taken 6212 times.
✓ Branch 3 taken 986 times.
283464 ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
442
3/4
✓ Branch 0 taken 3533 times.
✓ Branch 1 taken 278945 times.
✓ Branch 2 taken 3533 times.
✗ Branch 3 not taken.
282478 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
443
3/4
✓ Branch 0 taken 3533 times.
✓ Branch 1 taken 278945 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 3533 times.
282478 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
444 17472 continue;
445
446 /* Check if the CPU supports the required ISA */
447
2/2
✓ Branch 0 taken 139868 times.
✓ Branch 1 taken 142610 times.
282478 if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
448
2/2
✓ Branch 0 taken 138655 times.
✓ Branch 1 taken 1213 times.
139868 !(cpu_flags & (cd->cpu_flags & ~cpu_slow_mask)))
449 138655 continue;
450
451
1/2
✓ Branch 0 taken 312276 times.
✗ Branch 1 not taken.
312276 for (int i = 0; i < TX_MAX_FACTORS; i++) {
452
4/4
✓ Branch 0 taken 176647 times.
✓ Branch 1 taken 135629 times.
✓ Branch 2 taken 168453 times.
✓ Branch 3 taken 8194 times.
312276 if (!cd->factors[i] || (fl == 1))
453 break;
454
455
2/2
✓ Branch 0 taken 24646 times.
✓ Branch 1 taken 143807 times.
168453 if (cd->factors[i] == TX_FACTOR_ANY) {
456 24646 factors_mod++;
457 24646 factors_product *= fl;
458
2/2
✓ Branch 0 taken 96047 times.
✓ Branch 1 taken 47760 times.
143807 } else if (!(fl % cd->factors[i])) {
459 96047 factors_mod++;
460
2/2
✓ Branch 0 taken 95260 times.
✓ Branch 1 taken 787 times.
96047 if (cd->factors[i] == 2) {
461 95260 int b = ff_ctz(fl);
462 95260 fl >>= b;
463 95260 factors_product <<= b;
464 } else {
465 do {
466 819 fl /= cd->factors[i];
467 819 factors_product *= cd->factors[i];
468
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 787 times.
819 } while (!(fl % cd->factors[i]));
469 }
470 }
471 }
472
473 /* Disqualify if factor requirements are not satisfied or if trivial */
474
4/4
✓ Branch 0 taken 112333 times.
✓ Branch 1 taken 31490 times.
✓ Branch 2 taken 110022 times.
✓ Branch 3 taken 2311 times.
143823 if ((factors_mod < cd->nb_factors) || (len == factors_product))
475 141512 continue;
476
477
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2311 times.
2311 if (av_gcd(factors_product, fl) != 1)
478 continue;
479
480 /* Check if length is supported and factorization was successful */
481
2/2
✓ Branch 0 taken 1109 times.
✓ Branch 1 taken 1202 times.
2311 if ((factors_product < cd->min_len) ||
482
3/4
✓ Branch 0 taken 1109 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 566 times.
✓ Branch 3 taken 543 times.
1109 (cd->max_len != TX_LEN_UNLIMITED && (factors_product > cd->max_len)))
483 1768 continue;
484
485 543 prio = get_codelet_prio(cd, cpu_flags, factors_product) * factors_product;
486
487 /* Check for duplicates */
488
2/2
✓ Branch 0 taken 910 times.
✓ Branch 1 taken 324 times.
1234 for (int i = 0; i < nb_decomp; i++) {
489
2/2
✓ Branch 0 taken 219 times.
✓ Branch 1 taken 691 times.
910 if (factors_product == ld[i].len) {
490 /* Update priority if new one is higher */
491
2/2
✓ Branch 0 taken 191 times.
✓ Branch 1 taken 28 times.
219 if (prio > ld[i].prio)
492 191 ld[i].prio = prio;
493 219 skip = 1;
494 219 break;
495 }
496 }
497
498 /* Add decomposition if unique */
499
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 219 times.
543 if (!skip) {
500 324 ld[nb_decomp].cd = cd;
501 324 ld[nb_decomp].len = factors_product;
502 324 ld[nb_decomp].len2 = fl;
503 324 ld[nb_decomp].prio = prio;
504 324 nb_decomp++;
505 }
506 }
507 }
508
509
2/2
✓ Branch 0 taken 4014 times.
✓ Branch 1 taken 91 times.
4105 if (!nb_decomp)
510 4014 return AVERROR(EINVAL);
511
512 91 sort:
513
27/44
✓ Branch 0 taken 75 times.
✓ Branch 1 taken 83 times.
✓ Branch 3 taken 25 times.
✓ Branch 4 taken 50 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 25 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 50 times.
✓ Branch 12 taken 75 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✓ Branch 15 taken 75 times.
✓ Branch 16 taken 75 times.
✓ Branch 17 taken 18 times.
✓ Branch 19 taken 18 times.
✓ Branch 20 taken 57 times.
✓ Branch 21 taken 57 times.
✓ Branch 22 taken 75 times.
✓ Branch 24 taken 57 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 75 times.
✓ Branch 28 taken 75 times.
✓ Branch 29 taken 75 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 75 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✓ Branch 43 taken 18 times.
✓ Branch 44 taken 57 times.
✓ Branch 46 taken 8 times.
✓ Branch 47 taken 75 times.
✓ Branch 48 taken 158 times.
✓ Branch 49 taken 83 times.
✓ Branch 50 taken 166 times.
✓ Branch 51 taken 91 times.
557 AV_QSORT(ld, nb_decomp, FFTXLenDecomp, cmp_decomp);
514
515
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 91 times.
415 for (int i = 0; i < nb_decomp; i++) {
516
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 324 times.
324 if (ld[i].cd->nb_factors > 1)
517 dst[i] = ld[i].len2;
518 else
519 324 dst[i] = ld[i].len;
520 }
521
522 91 return nb_decomp;
523 }
524
525 24 int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
526 {
527 24 s->map = av_malloc(s->len*sizeof(*s->map));
528
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
24 if (!s->map)
529 return AVERROR(ENOMEM);
530
531 24 s->map[0] = 0; /* DC is always at the start */
532
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
24 if (s->inv) /* Reversing the ACs flips the transform direction */
533 for (int i = 1; i < s->len; i++)
534 s->map[i] = s->len - i;
535 else
536
2/2
✓ Branch 0 taken 144 times.
✓ Branch 1 taken 24 times.
168 for (int i = 1; i < s->len; i++)
537 144 s->map[i] = i;
538
539 24 s->map_dir = FF_TX_MAP_GATHER;
540
541 24 return 0;
542 }
543
544 #if !CONFIG_SMALL
545 48799 static void print_flags(AVBPrint *bp, uint64_t f)
546 {
547 48799 int prev = 0;
548 48799 const char *sep = ", ";
549 48799 av_bprintf(bp, "flags: [");
550
3/4
✓ Branch 0 taken 15359 times.
✓ Branch 1 taken 33440 times.
✓ Branch 2 taken 15359 times.
✗ Branch 3 not taken.
48799 if ((f & FF_TX_ALIGNED) && ++prev)
551 15359 av_bprintf(bp, "aligned");
552
3/4
✓ Branch 0 taken 33466 times.
✓ Branch 1 taken 15333 times.
✓ Branch 2 taken 33466 times.
✗ Branch 3 not taken.
48799 if ((f & AV_TX_UNALIGNED) && ++prev)
553
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 33440 times.
33466 av_bprintf(bp, "%sunaligned", prev > 1 ? sep : "");
554
3/4
✓ Branch 0 taken 22980 times.
✓ Branch 1 taken 25819 times.
✓ Branch 2 taken 22980 times.
✗ Branch 3 not taken.
48799 if ((f & AV_TX_INPLACE) && ++prev)
555
1/2
✓ Branch 0 taken 22980 times.
✗ Branch 1 not taken.
22980 av_bprintf(bp, "%sinplace", prev > 1 ? sep : "");
556
3/4
✓ Branch 0 taken 43371 times.
✓ Branch 1 taken 5428 times.
✓ Branch 2 taken 43371 times.
✗ Branch 3 not taken.
48799 if ((f & FF_TX_OUT_OF_PLACE) && ++prev)
557
1/2
✓ Branch 0 taken 43371 times.
✗ Branch 1 not taken.
43371 av_bprintf(bp, "%sout_of_place", prev > 1 ? sep : "");
558
3/4
✓ Branch 0 taken 1128 times.
✓ Branch 1 taken 47671 times.
✓ Branch 2 taken 1128 times.
✗ Branch 3 not taken.
48799 if ((f & FF_TX_FORWARD_ONLY) && ++prev)
559
1/2
✓ Branch 0 taken 1128 times.
✗ Branch 1 not taken.
1128 av_bprintf(bp, "%sfwd_only", prev > 1 ? sep : "");
560
3/4
✓ Branch 0 taken 18239 times.
✓ Branch 1 taken 30560 times.
✓ Branch 2 taken 18239 times.
✗ Branch 3 not taken.
48799 if ((f & FF_TX_INVERSE_ONLY) && ++prev)
561
1/2
✓ Branch 0 taken 18239 times.
✗ Branch 1 not taken.
18239 av_bprintf(bp, "%sinv_only", prev > 1 ? sep : "");
562
3/4
✓ Branch 0 taken 21529 times.
✓ Branch 1 taken 27270 times.
✓ Branch 2 taken 21529 times.
✗ Branch 3 not taken.
48799 if ((f & FF_TX_PRESHUFFLE) && ++prev)
563
1/2
✓ Branch 0 taken 21529 times.
✗ Branch 1 not taken.
21529 av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
564
3/4
✓ Branch 0 taken 207 times.
✓ Branch 1 taken 48592 times.
✓ Branch 2 taken 207 times.
✗ Branch 3 not taken.
48799 if ((f & AV_TX_FULL_IMDCT) && ++prev)
565
1/2
✓ Branch 0 taken 207 times.
✗ Branch 1 not taken.
207 av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
566
3/4
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 48775 times.
✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
48799 if ((f & AV_TX_REAL_TO_REAL) && ++prev)
567
1/2
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
24 av_bprintf(bp, "%sreal_to_real", prev > 1 ? sep : "");
568
3/4
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 48775 times.
✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
48799 if ((f & AV_TX_REAL_TO_IMAGINARY) && ++prev)
569
1/2
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
24 av_bprintf(bp, "%sreal_to_imaginary", prev > 1 ? sep : "");
570
3/4
✓ Branch 0 taken 3518 times.
✓ Branch 1 taken 45281 times.
✓ Branch 2 taken 3518 times.
✗ Branch 3 not taken.
48799 if ((f & FF_TX_ASM_CALL) && ++prev)
571
1/2
✓ Branch 0 taken 3518 times.
✗ Branch 1 not taken.
3518 av_bprintf(bp, "%sasm_call", prev > 1 ? sep : "");
572 48799 av_bprintf(bp, "]");
573 48799 }
574
575 48799 static void print_type(AVBPrint *bp, enum AVTXType type)
576 {
577
2/2
✓ Branch 0 taken 48773 times.
✓ Branch 1 taken 26 times.
97572 av_bprintf(bp, "%s",
578 type == TX_TYPE_ANY ? "any" :
579
2/2
✓ Branch 0 taken 27316 times.
✓ Branch 1 taken 21457 times.
76089 type == AV_TX_FLOAT_FFT ? "fft_float" :
580
2/2
✓ Branch 0 taken 4621 times.
✓ Branch 1 taken 22695 times.
31937 type == AV_TX_FLOAT_MDCT ? "mdct_float" :
581
2/2
✓ Branch 0 taken 4444 times.
✓ Branch 1 taken 177 times.
9065 type == AV_TX_FLOAT_RDFT ? "rdft_float" :
582
2/2
✓ Branch 0 taken 4420 times.
✓ Branch 1 taken 24 times.
8864 type == AV_TX_FLOAT_DCT_I ? "dctI_float" :
583
2/2
✓ Branch 0 taken 4396 times.
✓ Branch 1 taken 24 times.
8816 type == AV_TX_FLOAT_DST_I ? "dstI_float" :
584
2/2
✓ Branch 0 taken 2602 times.
✓ Branch 1 taken 1794 times.
6998 type == AV_TX_DOUBLE_FFT ? "fft_double" :
585
1/2
✓ Branch 0 taken 2602 times.
✗ Branch 1 not taken.
5204 type == AV_TX_DOUBLE_MDCT ? "mdct_double" :
586
1/2
✓ Branch 0 taken 2602 times.
✗ Branch 1 not taken.
5204 type == AV_TX_DOUBLE_RDFT ? "rdft_double" :
587
1/2
✓ Branch 0 taken 2602 times.
✗ Branch 1 not taken.
5204 type == AV_TX_DOUBLE_DCT_I ? "dctI_double" :
588
1/2
✓ Branch 0 taken 2602 times.
✗ Branch 1 not taken.
5204 type == AV_TX_DOUBLE_DST_I ? "dstI_double" :
589
2/2
✓ Branch 0 taken 1445 times.
✓ Branch 1 taken 1157 times.
4047 type == AV_TX_INT32_FFT ? "fft_int32" :
590
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 1433 times.
1457 type == AV_TX_INT32_MDCT ? "mdct_int32" :
591
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
24 type == AV_TX_INT32_RDFT ? "rdft_int32" :
592
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
24 type == AV_TX_INT32_DCT_I ? "dctI_int32" :
593
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
12 type == AV_TX_INT32_DST_I ? "dstI_int32" :
594 "unknown");
595 48799 }
596
597 38067 static void print_cd_info(const FFTXCodelet *cd, int prio, int len, int print_prio,
598 int log_level)
599 {
600 AVBPrint bp;
601 38067 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
602
603 38067 av_bprintf(&bp, "%s - type: ", cd->name);
604
605 38067 print_type(&bp, cd->type);
606
607 38067 av_bprintf(&bp, ", len: ");
608
2/2
✓ Branch 0 taken 27474 times.
✓ Branch 1 taken 10593 times.
38067 if (!len) {
609
2/2
✓ Branch 0 taken 22112 times.
✓ Branch 1 taken 5362 times.
27474 if (cd->min_len != cd->max_len)
610 22112 av_bprintf(&bp, "[%i, ", cd->min_len);
611
612
2/2
✓ Branch 0 taken 20442 times.
✓ Branch 1 taken 7032 times.
27474 if (cd->max_len == TX_LEN_UNLIMITED)
613 20442 av_bprintf(&bp, "∞");
614 else
615 7032 av_bprintf(&bp, "%i", cd->max_len);
616 } else {
617 10593 av_bprintf(&bp, "%i", len);
618 }
619
620
2/2
✓ Branch 0 taken 24561 times.
✓ Branch 1 taken 13506 times.
38067 if (cd->factors[1]) {
621
3/4
✓ Branch 0 taken 19341 times.
✓ Branch 1 taken 5220 times.
✓ Branch 2 taken 19341 times.
✗ Branch 3 not taken.
24561 av_bprintf(&bp, "%s, factors", !len && cd->min_len != cd->max_len ? "]" : "");
622
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 24553 times.
24561 if (!cd->nb_factors)
623 8 av_bprintf(&bp, ": [");
624 else
625 24553 av_bprintf(&bp, "[%i]: [", cd->nb_factors);
626
627
1/2
✓ Branch 0 taken 87111 times.
✗ Branch 1 not taken.
87111 for (int i = 0; i < TX_MAX_FACTORS; i++) {
628
4/4
✓ Branch 0 taken 62550 times.
✓ Branch 1 taken 24561 times.
✓ Branch 2 taken 37989 times.
✓ Branch 3 taken 24561 times.
87111 if (i && cd->factors[i])
629 37989 av_bprintf(&bp, ", ");
630
2/2
✓ Branch 0 taken 23995 times.
✓ Branch 1 taken 63116 times.
87111 if (cd->factors[i] == TX_FACTOR_ANY)
631 23995 av_bprintf(&bp, "any");
632
2/2
✓ Branch 0 taken 38555 times.
✓ Branch 1 taken 24561 times.
63116 else if (cd->factors[i])
633 38555 av_bprintf(&bp, "%i", cd->factors[i]);
634 else
635 24561 break;
636 }
637
638 24561 av_bprintf(&bp, "], ");
639 } else {
640 13506 av_bprintf(&bp, "%s, factor: %i, ",
641
4/4
✓ Branch 0 taken 8133 times.
✓ Branch 1 taken 5373 times.
✓ Branch 2 taken 2771 times.
✓ Branch 3 taken 5362 times.
13506 !len && cd->min_len != cd->max_len ? "]" : "", cd->factors[0]);
642 }
643 38067 print_flags(&bp, cd->flags);
644
645
2/2
✓ Branch 0 taken 27474 times.
✓ Branch 1 taken 10593 times.
38067 if (print_prio)
646 27474 av_bprintf(&bp, ", prio: %i", prio);
647
648 38067 av_log(NULL, log_level, "%s\n", bp.str);
649 38067 }
650
651 10593 static void print_tx_structure(AVTXContext *s, int depth)
652 {
653 10593 const FFTXCodelet *cd = s->cd_self;
654
655
2/2
✓ Branch 0 taken 16609 times.
✓ Branch 1 taken 10593 times.
27202 for (int i = 0; i <= depth; i++)
656 16609 av_log(NULL, AV_LOG_DEBUG, " ");
657
658 10593 print_cd_info(cd, cd->prio, s->len, 0, AV_LOG_DEBUG);
659
660
2/2
✓ Branch 0 taken 5536 times.
✓ Branch 1 taken 10593 times.
16129 for (int i = 0; i < s->nb_sub; i++)
661 5536 print_tx_structure(&s->sub[i], depth + 1);
662 10593 }
663 #endif /* CONFIG_SMALL */
664
665 typedef struct TXCodeletMatch {
666 const FFTXCodelet *cd;
667 int prio;
668 } TXCodeletMatch;
669
670 27279 static int cmp_matches(TXCodeletMatch *a, TXCodeletMatch *b)
671 {
672 27279 return FFDIFFSIGN(b->prio, a->prio);
673 }
674
675 /* We want all factors to completely cover the length */
676 47484 static inline int check_cd_factors(const FFTXCodelet *cd, int len)
677 {
678 47484 int matches = 0, any_flag = 0;
679
680
1/2
✓ Branch 0 taken 146675 times.
✗ Branch 1 not taken.
146675 for (int i = 0; i < TX_MAX_FACTORS; i++) {
681 146675 int factor = cd->factors[i];
682
683
2/2
✓ Branch 0 taken 39621 times.
✓ Branch 1 taken 107054 times.
146675 if (factor == TX_FACTOR_ANY) {
684 39621 any_flag = 1;
685 39621 matches++;
686 39621 continue;
687
4/4
✓ Branch 0 taken 89610 times.
✓ Branch 1 taken 17444 times.
✓ Branch 2 taken 59570 times.
✓ Branch 3 taken 30040 times.
107054 } else if (len <= 1 || !factor) {
688 break;
689
2/2
✓ Branch 0 taken 22208 times.
✓ Branch 1 taken 37362 times.
59570 } else if (factor == 2) { /* Fast path */
690 22208 int bits_2 = ff_ctz(len);
691
2/2
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 22160 times.
22208 if (!bits_2)
692 48 continue; /* Factor not supported */
693
694 22160 len >>= bits_2;
695 22160 matches++;
696 } else {
697 37362 int res = len % factor;
698
2/2
✓ Branch 0 taken 32162 times.
✓ Branch 1 taken 5200 times.
37362 if (res)
699 32162 continue; /* Factor not supported */
700
701
2/2
✓ Branch 0 taken 5374 times.
✓ Branch 1 taken 5200 times.
10574 while (!res) {
702 5374 len /= factor;
703 5374 res = len % factor;
704 }
705 5200 matches++;
706 }
707 }
708
709
6/6
✓ Branch 0 taken 28227 times.
✓ Branch 1 taken 19257 times.
✓ Branch 2 taken 7473 times.
✓ Branch 3 taken 20754 times.
✓ Branch 4 taken 6720 times.
✓ Branch 5 taken 753 times.
47484 return (cd->nb_factors <= matches) && (any_flag || len == 1);
710 }
711
712 10732 av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
713 uint64_t flags, FFTXCodeletOptions *opts,
714 int len, int inv, const void *scale)
715 {
716 10732 int ret = 0;
717 10732 AVTXContext *sub = NULL;
718 10732 TXCodeletMatch *cd_tmp, *cd_matches = NULL;
719 10732 unsigned int cd_matches_size = 0;
720 10732 int codelet_list_idx = codelet_list_num;
721 10732 int nb_cd_matches = 0;
722 #if !CONFIG_SMALL
723 AVBPrint bp;
724 #endif
725
726 /* We still accept functions marked with SLOW, even if the CPU is
727 * marked with the same flag, but we give them lower priority. */
728 10732 const int cpu_flags = av_get_cpu_flags();
729
730 /* Flags the transform wants */
731 10732 uint64_t req_flags = flags;
732
733 /* Flags the codelet may require to be present */
734 10732 uint64_t inv_req_mask = AV_TX_FULL_IMDCT |
735 AV_TX_REAL_TO_REAL |
736 AV_TX_REAL_TO_IMAGINARY |
737 FF_TX_PRESHUFFLE |
738 FF_TX_ASM_CALL;
739
740 /* Unaligned codelets are compatible with the aligned flag */
741
1/2
✓ Branch 0 taken 10732 times.
✗ Branch 1 not taken.
10732 if (req_flags & FF_TX_ALIGNED)
742 10732 req_flags |= AV_TX_UNALIGNED;
743
744 /* If either flag is set, both are okay, so don't check for an exact match */
745
3/4
✓ Branch 0 taken 5428 times.
✓ Branch 1 taken 5304 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5428 times.
10732 if ((req_flags & AV_TX_INPLACE) && (req_flags & FF_TX_OUT_OF_PLACE))
746 req_flags &= ~(AV_TX_INPLACE | FF_TX_OUT_OF_PLACE);
747
2/4
✓ Branch 0 taken 10732 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10732 times.
✗ Branch 3 not taken.
10732 if ((req_flags & FF_TX_ALIGNED) && (req_flags & AV_TX_UNALIGNED))
748 10732 req_flags &= ~(FF_TX_ALIGNED | AV_TX_UNALIGNED);
749
750 /* Loop through all codelets in all codelet lists to find matches
751 * to the requirements */
752
2/2
✓ Branch 0 taken 53660 times.
✓ Branch 1 taken 10732 times.
64392 while (codelet_list_idx--) {
753 53660 const FFTXCodelet * const * list = codelet_list[codelet_list_idx];
754 53660 const FFTXCodelet *cd = NULL;
755
756
2/2
✓ Branch 0 taken 2446896 times.
✓ Branch 1 taken 53660 times.
2500556 while ((cd = *list++)) {
757 /* Check if the type matches */
758
4/4
✓ Branch 0 taken 2436164 times.
✓ Branch 1 taken 10732 times.
✓ Branch 2 taken 1931387 times.
✓ Branch 3 taken 504777 times.
2446896 if (cd->type != TX_TYPE_ANY && type != cd->type)
759 1931387 continue;
760
761 /* Check direction for non-orthogonal codelets */
762
4/4
✓ Branch 0 taken 62478 times.
✓ Branch 1 taken 453031 times.
✓ Branch 2 taken 7647 times.
✓ Branch 3 taken 54831 times.
515509 if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
763
4/4
✓ Branch 0 taken 53203 times.
✓ Branch 1 taken 407475 times.
✓ Branch 2 taken 48610 times.
✓ Branch 3 taken 4593 times.
460678 ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
764
3/4
✓ Branch 0 taken 7647 times.
✓ Branch 1 taken 448438 times.
✓ Branch 2 taken 7647 times.
✗ Branch 3 not taken.
456085 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
765
3/4
✓ Branch 0 taken 7647 times.
✓ Branch 1 taken 448438 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 7647 times.
456085 ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
766 59424 continue;
767
768 /* Check if the requested flags match from both sides */
769
2/2
✓ Branch 0 taken 304592 times.
✓ Branch 1 taken 151493 times.
456085 if (((req_flags & cd->flags) != (req_flags)) ||
770
2/2
✓ Branch 0 taken 69189 times.
✓ Branch 1 taken 235403 times.
304592 ((inv_req_mask & cd->flags) != (req_flags & inv_req_mask)))
771 220682 continue;
772
773 /* Check if length is supported */
774
6/6
✓ Branch 0 taken 146107 times.
✓ Branch 1 taken 89296 times.
✓ Branch 2 taken 101192 times.
✓ Branch 3 taken 44915 times.
✓ Branch 4 taken 83993 times.
✓ Branch 5 taken 17199 times.
235403 if ((len < cd->min_len) || (cd->max_len != -1 && (len > cd->max_len)))
775 173289 continue;
776
777 /* Check if the CPU supports the required ISA */
778
2/2
✓ Branch 0 taken 18616 times.
✓ Branch 1 taken 43498 times.
62114 if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
779
2/2
✓ Branch 0 taken 14630 times.
✓ Branch 1 taken 3986 times.
18616 !(cpu_flags & (cd->cpu_flags & ~cpu_slow_mask)))
780 14630 continue;
781
782 /* Check for factors */
783
2/2
✓ Branch 1 taken 20010 times.
✓ Branch 2 taken 27474 times.
47484 if (!check_cd_factors(cd, len))
784 20010 continue;
785
786 /* Realloc array and append */
787 27474 cd_tmp = av_fast_realloc(cd_matches, &cd_matches_size,
788 27474 sizeof(*cd_tmp) * (nb_cd_matches + 1));
789
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 27474 times.
27474 if (!cd_tmp) {
790 av_free(cd_matches);
791 return AVERROR(ENOMEM);
792 }
793
794 27474 cd_matches = cd_tmp;
795 27474 cd_matches[nb_cd_matches].cd = cd;
796 27474 cd_matches[nb_cd_matches].prio = get_codelet_prio(cd, cpu_flags, len);
797 27474 nb_cd_matches++;
798 }
799 }
800
801 #if !CONFIG_SMALL
802 /* Print debugging info */
803 10732 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
804
2/2
✓ Branch 0 taken 9255 times.
✓ Branch 1 taken 1477 times.
10732 av_bprintf(&bp, "For transform of length %i, %s, ", len,
805 inv ? "inverse" : "forward");
806 10732 print_type(&bp, type);
807 10732 av_bprintf(&bp, ", ");
808 10732 print_flags(&bp, flags);
809
2/2
✓ Branch 0 taken 10593 times.
✓ Branch 1 taken 139 times.
10732 av_bprintf(&bp, ", found %i matches%s", nb_cd_matches,
810 nb_cd_matches ? ":" : ".");
811 #endif
812
813 /* No matches found */
814
2/2
✓ Branch 0 taken 139 times.
✓ Branch 1 taken 10593 times.
10732 if (!nb_cd_matches)
815 139 return AVERROR(ENOSYS);
816
817 /* Sort the list */
818
44/44
✓ Branch 0 taken 4580 times.
✓ Branch 1 taken 7625 times.
✓ Branch 3 taken 1270 times.
✓ Branch 4 taken 3310 times.
✓ Branch 6 taken 43 times.
✓ Branch 7 taken 1227 times.
✓ Branch 9 taken 1672 times.
✓ Branch 10 taken 1638 times.
✓ Branch 12 taken 1581 times.
✓ Branch 13 taken 2999 times.
✓ Branch 14 taken 2671 times.
✓ Branch 15 taken 1909 times.
✓ Branch 16 taken 3592 times.
✓ Branch 17 taken 1398 times.
✓ Branch 19 taken 2563 times.
✓ Branch 20 taken 1029 times.
✓ Branch 21 taken 1075 times.
✓ Branch 22 taken 1827 times.
✓ Branch 24 taken 475 times.
✓ Branch 25 taken 600 times.
✓ Branch 26 taken 1827 times.
✓ Branch 27 taken 600 times.
✓ Branch 28 taken 2427 times.
✓ Branch 29 taken 1909 times.
✓ Branch 30 taken 753 times.
✓ Branch 31 taken 1156 times.
✓ Branch 32 taken 349 times.
✓ Branch 33 taken 404 times.
✓ Branch 34 taken 292 times.
✓ Branch 35 taken 57 times.
✓ Branch 36 taken 1247 times.
✓ Branch 37 taken 96 times.
✓ Branch 39 taken 647 times.
✓ Branch 40 taken 600 times.
✓ Branch 41 taken 96 times.
✓ Branch 42 taken 600 times.
✓ Branch 43 taken 1320 times.
✓ Branch 44 taken 493 times.
✓ Branch 46 taken 4587 times.
✓ Branch 47 taken 3038 times.
✓ Branch 48 taken 12205 times.
✓ Branch 49 taken 2014 times.
✓ Branch 50 taken 12406 times.
✓ Branch 51 taken 10593 times.
32833 AV_QSORT(cd_matches, nb_cd_matches, TXCodeletMatch, cmp_matches);
819
820 #if !CONFIG_SMALL
821 10593 av_log(NULL, AV_LOG_TRACE, "%s\n", bp.str);
822
823
2/2
✓ Branch 0 taken 27474 times.
✓ Branch 1 taken 10593 times.
38067 for (int i = 0; i < nb_cd_matches; i++) {
824 27474 av_log(NULL, AV_LOG_TRACE, " %i: ", i + 1);
825 27474 print_cd_info(cd_matches[i].cd, cd_matches[i].prio, 0, 1, AV_LOG_TRACE);
826 }
827 #endif
828
829
2/2
✓ Branch 0 taken 10502 times.
✓ Branch 1 taken 91 times.
10593 if (!s->sub) {
830 10502 s->sub = sub = av_mallocz(TX_MAX_SUB*sizeof(*sub));
831
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10502 times.
10502 if (!sub) {
832 ret = AVERROR(ENOMEM);
833 goto end;
834 }
835 }
836
837 /* Attempt to initialize each */
838
1/2
✓ Branch 0 taken 14725 times.
✗ Branch 1 not taken.
14725 for (int i = 0; i < nb_cd_matches; i++) {
839 14725 const FFTXCodelet *cd = cd_matches[i].cd;
840 14725 AVTXContext *sctx = &s->sub[s->nb_sub];
841
842 14725 sctx->len = len;
843 14725 sctx->inv = inv;
844 14725 sctx->type = type;
845 14725 sctx->flags = cd->flags | flags;
846 14725 sctx->cd_self = cd;
847
848 14725 s->fn[s->nb_sub] = cd->function;
849 14725 s->cd[s->nb_sub] = cd;
850
851 14725 ret = 0;
852
2/2
✓ Branch 0 taken 14708 times.
✓ Branch 1 taken 17 times.
14725 if (cd->init)
853 14708 ret = cd->init(sctx, cd, flags, opts, len, inv, scale);
854
855
2/2
✓ Branch 0 taken 10593 times.
✓ Branch 1 taken 4132 times.
14725 if (ret >= 0) {
856
3/4
✓ Branch 0 taken 5388 times.
✓ Branch 1 taken 5205 times.
✓ Branch 2 taken 5388 times.
✗ Branch 3 not taken.
10593 if (opts && opts->map_dir != FF_TX_MAP_NONE &&
857
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 5367 times.
5388 sctx->map_dir == FF_TX_MAP_NONE) {
858 /* If a specific map direction was requested, and it doesn't
859 * exist, create one.*/
860 21 sctx->map = av_malloc(len*sizeof(*sctx->map));
861
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
21 if (!sctx->map) {
862 ret = AVERROR(ENOMEM);
863 goto end;
864 }
865
866
2/2
✓ Branch 0 taken 117 times.
✓ Branch 1 taken 21 times.
138 for (int i = 0; i < len; i++)
867 117 sctx->map[i] = i;
868
4/4
✓ Branch 0 taken 5367 times.
✓ Branch 1 taken 5205 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 5351 times.
10572 } else if (opts && (opts->map_dir != sctx->map_dir)) {
869 16 int *tmp = av_malloc(len*sizeof(*sctx->map));
870
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 if (!tmp) {
871 ret = AVERROR(ENOMEM);
872 goto end;
873 }
874
875 16 memcpy(tmp, sctx->map, len*sizeof(*sctx->map));
876
877
2/2
✓ Branch 0 taken 112 times.
✓ Branch 1 taken 16 times.
128 for (int i = 0; i < len; i++)
878 112 sctx->map[tmp[i]] = i;
879
880 16 av_free(tmp);
881 }
882
883 10593 s->nb_sub++;
884 10593 goto end;
885 }
886
887 4132 s->fn[s->nb_sub] = NULL;
888 4132 s->cd[s->nb_sub] = NULL;
889
890 4132 reset_ctx(sctx, 0);
891
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4132 times.
4132 if (ret == AVERROR(ENOMEM))
892 break;
893 }
894
895 if (!s->nb_sub)
896 av_freep(&s->sub);
897
898 end:
899 10593 av_free(cd_matches);
900 10593 return ret;
901 }
902
903 5057 av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type,
904 int inv, int len, const void *scale, uint64_t flags)
905 {
906 int ret;
907 5057 AVTXContext tmp = { 0 };
908 5057 const double default_scale_d = 1.0;
909 5057 const float default_scale_f = 1.0f;
910
911
4/8
✓ Branch 0 taken 5057 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 5057 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 5057 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 5057 times.
5057 if (!len || type >= AV_TX_NB || !ctx || !tx)
912 return AVERROR(EINVAL);
913
914
1/2
✓ Branch 0 taken 5057 times.
✗ Branch 1 not taken.
5057 if (!(flags & AV_TX_UNALIGNED))
915 5057 flags |= FF_TX_ALIGNED;
916
1/2
✓ Branch 0 taken 5057 times.
✗ Branch 1 not taken.
5057 if (!(flags & AV_TX_INPLACE))
917 5057 flags |= FF_TX_OUT_OF_PLACE;
918
919
5/8
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 5045 times.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 12 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 12 times.
✗ Branch 7 not taken.
5057 if (!scale && ((type == AV_TX_DOUBLE_MDCT) || (type == AV_TX_DOUBLE_DCT) ||
920
2/4
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
12 (type == AV_TX_DOUBLE_DCT_I) || (type == AV_TX_DOUBLE_DST_I) ||
921 (type == AV_TX_DOUBLE_RDFT)))
922 scale = &default_scale_d;
923
3/8
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 5045 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
5057 else if (!scale && !TYPE_IS(FFT, type))
924 scale = &default_scale_f;
925
926 5057 ret = ff_tx_init_subtx(&tmp, type, flags, NULL, len, inv, scale);
927
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5057 times.
5057 if (ret < 0)
928 return ret;
929
930 5057 *ctx = &tmp.sub[0];
931 5057 *tx = tmp.fn[0];
932
933 #if !CONFIG_SMALL
934 5057 av_log(NULL, AV_LOG_DEBUG, "Transform tree:\n");
935 5057 print_tx_structure(*ctx, 0);
936 #endif
937
938 5057 return ret;
939 }
940