FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavutil/tx_template.c
Date: 2026-04-24 19:58:39
Exec Total Coverage
Lines: 619 860 72.0%
Functions: 132 269 49.1%
Branches: 197 360 54.7%

Line Branch Exec Source
1 /*
2 * Copyright (c) Lynne
3 *
4 * Power of two FFT:
5 * Copyright (c) Lynne
6 * Copyright (c) 2008 Loren Merritt
7 * Copyright (c) 2002 Fabrice Bellard
8 * Partly based on libdjbfft by D. J. Bernstein
9 *
10 * This file is part of FFmpeg.
11 *
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27 #include "mem.h"
28
29 #define TABLE_DEF(name, size) \
30 DECLARE_ALIGNED(32, TXSample, TX_TAB(ff_tx_tab_ ##name))[size]
31
32 #define SR_POW2_TABLES \
33 SR_TABLE(8) \
34 SR_TABLE(16) \
35 SR_TABLE(32) \
36 SR_TABLE(64) \
37 SR_TABLE(128) \
38 SR_TABLE(256) \
39 SR_TABLE(512) \
40 SR_TABLE(1024) \
41 SR_TABLE(2048) \
42 SR_TABLE(4096) \
43 SR_TABLE(8192) \
44 SR_TABLE(16384) \
45 SR_TABLE(32768) \
46 SR_TABLE(65536) \
47 SR_TABLE(131072) \
48
49 #define SR_TABLE(len) \
50 TABLE_DEF(len, len/4 + 1);
51 /* Power of two tables */
52 SR_POW2_TABLES
53 #undef SR_TABLE
54
55 /* Other factors' tables */
56 TABLE_DEF(53, 12);
57 TABLE_DEF( 7, 6);
58 TABLE_DEF( 9, 8);
59
60 typedef struct FFTabInitData {
61 void (*func)(void);
62 int factors[TX_MAX_SUB]; /* Must be sorted high -> low */
63 } FFTabInitData;
64
65 #define SR_TABLE(len) \
66 static av_cold void TX_TAB(ff_tx_init_tab_ ##len)(void) \
67 { \
68 double freq = 2*M_PI/len; \
69 TXSample *tab = TX_TAB(ff_tx_tab_ ##len); \
70 \
71 for (int i = 0; i < len/4; i++) \
72 *tab++ = RESCALE(cos(i*freq)); \
73 \
74 *tab = 0; \
75 }
76
2/2
✓ Branch 0 taken 112320 times.
✓ Branch 1 taken 2798 times.
230236 SR_POW2_TABLES
77 #undef SR_TABLE
78
79 static void (*const sr_tabs_init_funcs[])(void) = {
80 #define SR_TABLE(len) TX_TAB(ff_tx_init_tab_ ##len),
81 SR_POW2_TABLES
82 #undef SR_TABLE
83 };
84
85 static AVOnce sr_tabs_init_once[] = {
86 #define SR_TABLE(len) AV_ONCE_INIT,
87 SR_POW2_TABLES
88 #undef SR_TABLE
89 };
90
91 235 static av_cold void TX_TAB(ff_tx_init_tab_53)(void)
92 {
93 /* 5pt, doubled to eliminate AVX lane shuffles */
94 235 TX_TAB(ff_tx_tab_53)[0] = RESCALE(cos(2 * M_PI / 5));
95 235 TX_TAB(ff_tx_tab_53)[1] = RESCALE(cos(2 * M_PI / 5));
96 235 TX_TAB(ff_tx_tab_53)[2] = RESCALE(cos(2 * M_PI / 10));
97 235 TX_TAB(ff_tx_tab_53)[3] = RESCALE(cos(2 * M_PI / 10));
98 235 TX_TAB(ff_tx_tab_53)[4] = RESCALE(sin(2 * M_PI / 5));
99 235 TX_TAB(ff_tx_tab_53)[5] = RESCALE(sin(2 * M_PI / 5));
100 235 TX_TAB(ff_tx_tab_53)[6] = RESCALE(sin(2 * M_PI / 10));
101 235 TX_TAB(ff_tx_tab_53)[7] = RESCALE(sin(2 * M_PI / 10));
102
103 /* 3pt */
104 235 TX_TAB(ff_tx_tab_53)[ 8] = RESCALE(cos(2 * M_PI / 12));
105 235 TX_TAB(ff_tx_tab_53)[ 9] = RESCALE(cos(2 * M_PI / 12));
106 235 TX_TAB(ff_tx_tab_53)[10] = RESCALE(cos(2 * M_PI / 6));
107 235 TX_TAB(ff_tx_tab_53)[11] = RESCALE(cos(8 * M_PI / 6));
108 235 }
109
110 5 static av_cold void TX_TAB(ff_tx_init_tab_7)(void)
111 {
112 5 TX_TAB(ff_tx_tab_7)[0] = RESCALE(cos(2 * M_PI / 7));
113 5 TX_TAB(ff_tx_tab_7)[1] = RESCALE(sin(2 * M_PI / 7));
114 5 TX_TAB(ff_tx_tab_7)[2] = RESCALE(sin(2 * M_PI / 28));
115 5 TX_TAB(ff_tx_tab_7)[3] = RESCALE(cos(2 * M_PI / 28));
116 5 TX_TAB(ff_tx_tab_7)[4] = RESCALE(cos(2 * M_PI / 14));
117 5 TX_TAB(ff_tx_tab_7)[5] = RESCALE(sin(2 * M_PI / 14));
118 5 }
119
120 5 static av_cold void TX_TAB(ff_tx_init_tab_9)(void)
121 {
122 5 TX_TAB(ff_tx_tab_9)[0] = RESCALE(cos(2 * M_PI / 3));
123 5 TX_TAB(ff_tx_tab_9)[1] = RESCALE(sin(2 * M_PI / 3));
124 5 TX_TAB(ff_tx_tab_9)[2] = RESCALE(cos(2 * M_PI / 9));
125 5 TX_TAB(ff_tx_tab_9)[3] = RESCALE(sin(2 * M_PI / 9));
126 5 TX_TAB(ff_tx_tab_9)[4] = RESCALE(cos(2 * M_PI / 36));
127 5 TX_TAB(ff_tx_tab_9)[5] = RESCALE(sin(2 * M_PI / 36));
128 5 TX_TAB(ff_tx_tab_9)[6] = TX_TAB(ff_tx_tab_9)[2] + TX_TAB(ff_tx_tab_9)[5];
129 5 TX_TAB(ff_tx_tab_9)[7] = TX_TAB(ff_tx_tab_9)[3] - TX_TAB(ff_tx_tab_9)[4];
130 5 }
131
132 static const FFTabInitData nptwo_tabs_init_data[] = {
133 { TX_TAB(ff_tx_init_tab_53), { 15, 5, 3 } },
134 { TX_TAB(ff_tx_init_tab_9), { 9 } },
135 { TX_TAB(ff_tx_init_tab_7), { 7 } },
136 };
137
138 static AVOnce nptwo_tabs_init_once[] = {
139 AV_ONCE_INIT,
140 AV_ONCE_INIT,
141 AV_ONCE_INIT,
142 };
143
144 7781 av_cold void TX_TAB(ff_tx_init_tabs)(int len)
145 {
146 7781 int factor_2 = ff_ctz(len);
147
2/2
✓ Branch 0 taken 5544 times.
✓ Branch 1 taken 2237 times.
7781 if (factor_2) {
148 5544 int idx = factor_2 - 3;
149
2/2
✓ Branch 0 taken 20181 times.
✓ Branch 1 taken 5544 times.
25725 for (int i = 0; i <= idx; i++)
150 20181 ff_thread_once(&sr_tabs_init_once[i],
151 sr_tabs_init_funcs[i]);
152 5544 len >>= factor_2;
153 }
154
155
2/2
✓ Branch 0 taken 10034 times.
✓ Branch 1 taken 8 times.
10042 for (int i = 0; i < FF_ARRAY_ELEMS(nptwo_tabs_init_data); i++) {
156 10034 int f, f_idx = 0;
157
158
2/2
✓ Branch 0 taken 7773 times.
✓ Branch 1 taken 2261 times.
10034 if (len <= 1)
159 7773 return;
160
161
2/2
✓ Branch 0 taken 2293 times.
✓ Branch 1 taken 24 times.
2317 while ((f = nptwo_tabs_init_data[i].factors[f_idx++])) {
162
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 2237 times.
2293 if (f % len)
163 56 continue;
164
165 2237 ff_thread_once(&nptwo_tabs_init_once[i],
166 nptwo_tabs_init_data[i].func);
167 2237 len /= f;
168 2237 break;
169 }
170 }
171 }
172
173 4466460 static av_always_inline void fft3(TXComplex *out, TXComplex *in,
174 ptrdiff_t stride)
175 {
176 TXComplex tmp[3];
177 4466460 const TXSample *tab = TX_TAB(ff_tx_tab_53);
178 #ifdef TX_INT32
179 int64_t mtmp[4];
180 #endif
181
182 4466460 tmp[0] = in[0];
183 4466460 BF(tmp[1].re, tmp[2].im, in[1].im, in[2].im);
184 4466460 BF(tmp[1].im, tmp[2].re, in[1].re, in[2].re);
185
186 #ifdef TX_INT32
187 out[0*stride].re = (int64_t)tmp[0].re + tmp[2].re;
188 out[0*stride].im = (int64_t)tmp[0].im + tmp[2].im;
189 mtmp[0] = (int64_t)tab[ 8] * tmp[1].re;
190 mtmp[1] = (int64_t)tab[ 9] * tmp[1].im;
191 mtmp[2] = (int64_t)tab[10] * tmp[2].re;
192 mtmp[3] = (int64_t)tab[10] * tmp[2].im;
193 out[1*stride].re = tmp[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
194 out[1*stride].im = tmp[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
195 out[2*stride].re = tmp[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
196 out[2*stride].im = tmp[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
197 #else
198 4466460 out[0*stride].re = tmp[0].re + tmp[2].re;
199 4466460 out[0*stride].im = tmp[0].im + tmp[2].im;
200 4466460 tmp[1].re = tab[ 8] * tmp[1].re;
201 4466460 tmp[1].im = tab[ 9] * tmp[1].im;
202 4466460 tmp[2].re = tab[10] * tmp[2].re;
203 4466460 tmp[2].im = tab[10] * tmp[2].im;
204 4466460 out[1*stride].re = tmp[0].re - tmp[2].re + tmp[1].re;
205 4466460 out[1*stride].im = tmp[0].im - tmp[2].im - tmp[1].im;
206 4466460 out[2*stride].re = tmp[0].re - tmp[2].re - tmp[1].re;
207 4466460 out[2*stride].im = tmp[0].im - tmp[2].im + tmp[1].im;
208 #endif
209 4466460 }
210
211 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
212 static av_always_inline void NAME(TXComplex *out, TXComplex *in, \
213 ptrdiff_t stride) \
214 { \
215 TXComplex dc, z0[4], t[6]; \
216 const TXSample *tab = TX_TAB(ff_tx_tab_53); \
217 \
218 dc = in[0]; \
219 BF(t[1].im, t[0].re, in[1].re, in[4].re); \
220 BF(t[1].re, t[0].im, in[1].im, in[4].im); \
221 BF(t[3].im, t[2].re, in[2].re, in[3].re); \
222 BF(t[3].re, t[2].im, in[2].im, in[3].im); \
223 \
224 out[D0*stride].re = dc.re + (TXUSample)t[0].re + t[2].re; \
225 out[D0*stride].im = dc.im + (TXUSample)t[0].im + t[2].im; \
226 \
227 SMUL(t[4].re, t[0].re, tab[0], tab[2], t[2].re, t[0].re); \
228 SMUL(t[4].im, t[0].im, tab[0], tab[2], t[2].im, t[0].im); \
229 CMUL(t[5].re, t[1].re, tab[4], tab[6], t[3].re, t[1].re); \
230 CMUL(t[5].im, t[1].im, tab[4], tab[6], t[3].im, t[1].im); \
231 \
232 BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
233 BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
234 BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
235 BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
236 \
237 out[D1*stride].re = dc.re + (TXUSample)z0[3].re; \
238 out[D1*stride].im = dc.im + (TXUSample)z0[0].im; \
239 out[D2*stride].re = dc.re + (TXUSample)z0[2].re; \
240 out[D2*stride].im = dc.im + (TXUSample)z0[1].im; \
241 out[D3*stride].re = dc.re + (TXUSample)z0[1].re; \
242 out[D3*stride].im = dc.im + (TXUSample)z0[2].im; \
243 out[D4*stride].re = dc.re + (TXUSample)z0[0].re; \
244 out[D4*stride].im = dc.im + (TXUSample)z0[3].im; \
245 }
246
247 79542 DECL_FFT5(fft5, 0, 1, 2, 3, 4)
248 893292 DECL_FFT5(fft5_m1, 0, 6, 12, 3, 9)
249 893292 DECL_FFT5(fft5_m2, 10, 1, 7, 13, 4)
250 893292 DECL_FFT5(fft5_m3, 5, 11, 2, 8, 14)
251
252 50526 static av_always_inline void fft7(TXComplex *out, TXComplex *in,
253 ptrdiff_t stride)
254 {
255 TXComplex dc, t[6], z[3];
256 50526 const TXComplex *tab = (const TXComplex *)TX_TAB(ff_tx_tab_7);
257 #ifdef TX_INT32
258 int64_t mtmp[12];
259 #endif
260
261 50526 dc = in[0];
262 50526 BF(t[1].re, t[0].re, in[1].re, in[6].re);
263 50526 BF(t[1].im, t[0].im, in[1].im, in[6].im);
264 50526 BF(t[3].re, t[2].re, in[2].re, in[5].re);
265 50526 BF(t[3].im, t[2].im, in[2].im, in[5].im);
266 50526 BF(t[5].re, t[4].re, in[3].re, in[4].re);
267 50526 BF(t[5].im, t[4].im, in[3].im, in[4].im);
268
269 50526 out[0*stride].re = dc.re + t[0].re + t[2].re + t[4].re;
270 50526 out[0*stride].im = dc.im + t[0].im + t[2].im + t[4].im;
271
272 #ifdef TX_INT32 /* NOTE: it's possible to do this with 16 mults but 72 adds */
273 mtmp[ 0] = ((int64_t)tab[0].re)*t[0].re - ((int64_t)tab[2].re)*t[4].re;
274 mtmp[ 1] = ((int64_t)tab[0].re)*t[4].re - ((int64_t)tab[1].re)*t[0].re;
275 mtmp[ 2] = ((int64_t)tab[0].re)*t[2].re - ((int64_t)tab[2].re)*t[0].re;
276 mtmp[ 3] = ((int64_t)tab[0].re)*t[0].im - ((int64_t)tab[1].re)*t[2].im;
277 mtmp[ 4] = ((int64_t)tab[0].re)*t[4].im - ((int64_t)tab[1].re)*t[0].im;
278 mtmp[ 5] = ((int64_t)tab[0].re)*t[2].im - ((int64_t)tab[2].re)*t[0].im;
279
280 mtmp[ 6] = ((int64_t)tab[2].im)*t[1].im + ((int64_t)tab[1].im)*t[5].im;
281 mtmp[ 7] = ((int64_t)tab[0].im)*t[5].im + ((int64_t)tab[2].im)*t[3].im;
282 mtmp[ 8] = ((int64_t)tab[2].im)*t[5].im + ((int64_t)tab[1].im)*t[3].im;
283 mtmp[ 9] = ((int64_t)tab[0].im)*t[1].re + ((int64_t)tab[1].im)*t[3].re;
284 mtmp[10] = ((int64_t)tab[2].im)*t[3].re + ((int64_t)tab[0].im)*t[5].re;
285 mtmp[11] = ((int64_t)tab[2].im)*t[1].re + ((int64_t)tab[1].im)*t[5].re;
286
287 z[0].re = (int32_t)(mtmp[ 0] - ((int64_t)tab[1].re)*t[2].re + 0x40000000 >> 31);
288 z[1].re = (int32_t)(mtmp[ 1] - ((int64_t)tab[2].re)*t[2].re + 0x40000000 >> 31);
289 z[2].re = (int32_t)(mtmp[ 2] - ((int64_t)tab[1].re)*t[4].re + 0x40000000 >> 31);
290 z[0].im = (int32_t)(mtmp[ 3] - ((int64_t)tab[2].re)*t[4].im + 0x40000000 >> 31);
291 z[1].im = (int32_t)(mtmp[ 4] - ((int64_t)tab[2].re)*t[2].im + 0x40000000 >> 31);
292 z[2].im = (int32_t)(mtmp[ 5] - ((int64_t)tab[1].re)*t[4].im + 0x40000000 >> 31);
293
294 t[0].re = (int32_t)(mtmp[ 6] - ((int64_t)tab[0].im)*t[3].im + 0x40000000 >> 31);
295 t[2].re = (int32_t)(mtmp[ 7] - ((int64_t)tab[1].im)*t[1].im + 0x40000000 >> 31);
296 t[4].re = (int32_t)(mtmp[ 8] + ((int64_t)tab[0].im)*t[1].im + 0x40000000 >> 31);
297 t[0].im = (int32_t)(mtmp[ 9] + ((int64_t)tab[2].im)*t[5].re + 0x40000000 >> 31);
298 t[2].im = (int32_t)(mtmp[10] - ((int64_t)tab[1].im)*t[1].re + 0x40000000 >> 31);
299 t[4].im = (int32_t)(mtmp[11] - ((int64_t)tab[0].im)*t[3].re + 0x40000000 >> 31);
300 #else
301 50526 z[0].re = tab[0].re*t[0].re - tab[2].re*t[4].re - tab[1].re*t[2].re;
302 50526 z[1].re = tab[0].re*t[4].re - tab[1].re*t[0].re - tab[2].re*t[2].re;
303 50526 z[2].re = tab[0].re*t[2].re - tab[2].re*t[0].re - tab[1].re*t[4].re;
304 50526 z[0].im = tab[0].re*t[0].im - tab[1].re*t[2].im - tab[2].re*t[4].im;
305 50526 z[1].im = tab[0].re*t[4].im - tab[1].re*t[0].im - tab[2].re*t[2].im;
306 50526 z[2].im = tab[0].re*t[2].im - tab[2].re*t[0].im - tab[1].re*t[4].im;
307
308 /* It's possible to do t[4].re and t[0].im with 2 multiplies only by
309 * multiplying the sum of all with the average of the twiddles */
310
311 50526 t[0].re = tab[2].im*t[1].im + tab[1].im*t[5].im - tab[0].im*t[3].im;
312 50526 t[2].re = tab[0].im*t[5].im + tab[2].im*t[3].im - tab[1].im*t[1].im;
313 50526 t[4].re = tab[2].im*t[5].im + tab[1].im*t[3].im + tab[0].im*t[1].im;
314 50526 t[0].im = tab[0].im*t[1].re + tab[1].im*t[3].re + tab[2].im*t[5].re;
315 50526 t[2].im = tab[2].im*t[3].re + tab[0].im*t[5].re - tab[1].im*t[1].re;
316 50526 t[4].im = tab[2].im*t[1].re + tab[1].im*t[5].re - tab[0].im*t[3].re;
317 #endif
318
319 50526 BF(t[1].re, z[0].re, z[0].re, t[4].re);
320 50526 BF(t[3].re, z[1].re, z[1].re, t[2].re);
321 50526 BF(t[5].re, z[2].re, z[2].re, t[0].re);
322 50526 BF(t[1].im, z[0].im, z[0].im, t[0].im);
323 50526 BF(t[3].im, z[1].im, z[1].im, t[2].im);
324 50526 BF(t[5].im, z[2].im, z[2].im, t[4].im);
325
326 50526 out[1*stride].re = dc.re + z[0].re;
327 50526 out[1*stride].im = dc.im + t[1].im;
328 50526 out[2*stride].re = dc.re + t[3].re;
329 50526 out[2*stride].im = dc.im + z[1].im;
330 50526 out[3*stride].re = dc.re + z[2].re;
331 50526 out[3*stride].im = dc.im + t[5].im;
332 50526 out[4*stride].re = dc.re + t[5].re;
333 50526 out[4*stride].im = dc.im + z[2].im;
334 50526 out[5*stride].re = dc.re + z[1].re;
335 50526 out[5*stride].im = dc.im + t[3].im;
336 50526 out[6*stride].re = dc.re + t[1].re;
337 50526 out[6*stride].im = dc.im + z[0].im;
338 50526 }
339
340 39298 static av_always_inline void fft9(TXComplex *out, TXComplex *in,
341 ptrdiff_t stride)
342 {
343 39298 const TXComplex *tab = (const TXComplex *)TX_TAB(ff_tx_tab_9);
344 TXComplex dc, t[16], w[4], x[5], y[5], z[2];
345 #ifdef TX_INT32
346 int64_t mtmp[12];
347 #endif
348
349 39298 dc = in[0];
350 39298 BF(t[1].re, t[0].re, in[1].re, in[8].re);
351 39298 BF(t[1].im, t[0].im, in[1].im, in[8].im);
352 39298 BF(t[3].re, t[2].re, in[2].re, in[7].re);
353 39298 BF(t[3].im, t[2].im, in[2].im, in[7].im);
354 39298 BF(t[5].re, t[4].re, in[3].re, in[6].re);
355 39298 BF(t[5].im, t[4].im, in[3].im, in[6].im);
356 39298 BF(t[7].re, t[6].re, in[4].re, in[5].re);
357 39298 BF(t[7].im, t[6].im, in[4].im, in[5].im);
358
359 39298 w[0].re = t[0].re - t[6].re;
360 39298 w[0].im = t[0].im - t[6].im;
361 39298 w[1].re = t[2].re - t[6].re;
362 39298 w[1].im = t[2].im - t[6].im;
363 39298 w[2].re = t[1].re - t[7].re;
364 39298 w[2].im = t[1].im - t[7].im;
365 39298 w[3].re = t[3].re + t[7].re;
366 39298 w[3].im = t[3].im + t[7].im;
367
368 39298 z[0].re = dc.re + t[4].re;
369 39298 z[0].im = dc.im + t[4].im;
370
371 39298 z[1].re = t[0].re + t[2].re + t[6].re;
372 39298 z[1].im = t[0].im + t[2].im + t[6].im;
373
374 39298 out[0*stride].re = z[0].re + z[1].re;
375 39298 out[0*stride].im = z[0].im + z[1].im;
376
377 #ifdef TX_INT32
378 mtmp[0] = t[1].re - t[3].re + t[7].re;
379 mtmp[1] = t[1].im - t[3].im + t[7].im;
380
381 y[3].re = (int32_t)(((int64_t)tab[0].im)*mtmp[0] + 0x40000000 >> 31);
382 y[3].im = (int32_t)(((int64_t)tab[0].im)*mtmp[1] + 0x40000000 >> 31);
383
384 mtmp[0] = (int32_t)(((int64_t)tab[0].re)*z[1].re + 0x40000000 >> 31);
385 mtmp[1] = (int32_t)(((int64_t)tab[0].re)*z[1].im + 0x40000000 >> 31);
386 mtmp[2] = (int32_t)(((int64_t)tab[0].re)*t[4].re + 0x40000000 >> 31);
387 mtmp[3] = (int32_t)(((int64_t)tab[0].re)*t[4].im + 0x40000000 >> 31);
388
389 x[3].re = z[0].re + (int32_t)mtmp[0];
390 x[3].im = z[0].im + (int32_t)mtmp[1];
391 z[0].re = in[0].re + (int32_t)mtmp[2];
392 z[0].im = in[0].im + (int32_t)mtmp[3];
393
394 mtmp[0] = ((int64_t)tab[1].re)*w[0].re;
395 mtmp[1] = ((int64_t)tab[1].re)*w[0].im;
396 mtmp[2] = ((int64_t)tab[2].im)*w[0].re;
397 mtmp[3] = ((int64_t)tab[2].im)*w[0].im;
398 mtmp[4] = ((int64_t)tab[1].im)*w[2].re;
399 mtmp[5] = ((int64_t)tab[1].im)*w[2].im;
400 mtmp[6] = ((int64_t)tab[2].re)*w[2].re;
401 mtmp[7] = ((int64_t)tab[2].re)*w[2].im;
402
403 x[1].re = (int32_t)(mtmp[0] + ((int64_t)tab[2].im)*w[1].re + 0x40000000 >> 31);
404 x[1].im = (int32_t)(mtmp[1] + ((int64_t)tab[2].im)*w[1].im + 0x40000000 >> 31);
405 x[2].re = (int32_t)(mtmp[2] - ((int64_t)tab[3].re)*w[1].re + 0x40000000 >> 31);
406 x[2].im = (int32_t)(mtmp[3] - ((int64_t)tab[3].re)*w[1].im + 0x40000000 >> 31);
407 y[1].re = (int32_t)(mtmp[4] + ((int64_t)tab[2].re)*w[3].re + 0x40000000 >> 31);
408 y[1].im = (int32_t)(mtmp[5] + ((int64_t)tab[2].re)*w[3].im + 0x40000000 >> 31);
409 y[2].re = (int32_t)(mtmp[6] - ((int64_t)tab[3].im)*w[3].re + 0x40000000 >> 31);
410 y[2].im = (int32_t)(mtmp[7] - ((int64_t)tab[3].im)*w[3].im + 0x40000000 >> 31);
411
412 y[0].re = (int32_t)(((int64_t)tab[0].im)*t[5].re + 0x40000000 >> 31);
413 y[0].im = (int32_t)(((int64_t)tab[0].im)*t[5].im + 0x40000000 >> 31);
414
415 #else
416 39298 y[3].re = tab[0].im*(t[1].re - t[3].re + t[7].re);
417 39298 y[3].im = tab[0].im*(t[1].im - t[3].im + t[7].im);
418
419 39298 x[3].re = z[0].re + tab[0].re*z[1].re;
420 39298 x[3].im = z[0].im + tab[0].re*z[1].im;
421 39298 z[0].re = dc.re + tab[0].re*t[4].re;
422 39298 z[0].im = dc.im + tab[0].re*t[4].im;
423
424 39298 x[1].re = tab[1].re*w[0].re + tab[2].im*w[1].re;
425 39298 x[1].im = tab[1].re*w[0].im + tab[2].im*w[1].im;
426 39298 x[2].re = tab[2].im*w[0].re - tab[3].re*w[1].re;
427 39298 x[2].im = tab[2].im*w[0].im - tab[3].re*w[1].im;
428 39298 y[1].re = tab[1].im*w[2].re + tab[2].re*w[3].re;
429 39298 y[1].im = tab[1].im*w[2].im + tab[2].re*w[3].im;
430 39298 y[2].re = tab[2].re*w[2].re - tab[3].im*w[3].re;
431 39298 y[2].im = tab[2].re*w[2].im - tab[3].im*w[3].im;
432
433 39298 y[0].re = tab[0].im*t[5].re;
434 39298 y[0].im = tab[0].im*t[5].im;
435 #endif
436
437 39298 x[4].re = x[1].re + x[2].re;
438 39298 x[4].im = x[1].im + x[2].im;
439
440 39298 y[4].re = y[1].re - y[2].re;
441 39298 y[4].im = y[1].im - y[2].im;
442 39298 x[1].re = z[0].re + x[1].re;
443 39298 x[1].im = z[0].im + x[1].im;
444 39298 y[1].re = y[0].re + y[1].re;
445 39298 y[1].im = y[0].im + y[1].im;
446 39298 x[2].re = z[0].re + x[2].re;
447 39298 x[2].im = z[0].im + x[2].im;
448 39298 y[2].re = y[2].re - y[0].re;
449 39298 y[2].im = y[2].im - y[0].im;
450 39298 x[4].re = z[0].re - x[4].re;
451 39298 x[4].im = z[0].im - x[4].im;
452 39298 y[4].re = y[0].re - y[4].re;
453 39298 y[4].im = y[0].im - y[4].im;
454
455 39298 out[1*stride] = (TXComplex){ x[1].re + y[1].im, x[1].im - y[1].re };
456 39298 out[2*stride] = (TXComplex){ x[2].re + y[2].im, x[2].im - y[2].re };
457 39298 out[3*stride] = (TXComplex){ x[3].re + y[3].im, x[3].im - y[3].re };
458 39298 out[4*stride] = (TXComplex){ x[4].re + y[4].im, x[4].im - y[4].re };
459 39298 out[5*stride] = (TXComplex){ x[4].re - y[4].im, x[4].im + y[4].re };
460 39298 out[6*stride] = (TXComplex){ x[3].re - y[3].im, x[3].im + y[3].re };
461 39298 out[7*stride] = (TXComplex){ x[2].re - y[2].im, x[2].im + y[2].re };
462 39298 out[8*stride] = (TXComplex){ x[1].re - y[1].im, x[1].im + y[1].re };
463 39298 }
464
465 893292 static av_always_inline void fft15(TXComplex *out, TXComplex *in,
466 ptrdiff_t stride)
467 {
468 TXComplex tmp[15];
469
470
2/2
✓ Branch 0 taken 4466460 times.
✓ Branch 1 taken 893292 times.
5359752 for (int i = 0; i < 5; i++)
471 4466460 fft3(tmp + i, in + i*3, 5);
472
473 893292 fft5_m1(out, tmp + 0, stride);
474 893292 fft5_m2(out, tmp + 5, stride);
475 893292 fft5_m3(out, tmp + 10, stride);
476 893292 }
477
478 105 static av_cold int TX_NAME(ff_tx_fft_factor_init)(AVTXContext *s,
479 const FFTXCodelet *cd,
480 uint64_t flags,
481 FFTXCodeletOptions *opts,
482 int len, int inv,
483 const void *scale)
484 {
485 105 int ret = 0;
486 105 TX_TAB(ff_tx_init_tabs)(len);
487
488
2/2
✓ Branch 0 taken 81 times.
✓ Branch 1 taken 24 times.
105 if (len == 15)
489 81 ret = ff_tx_gen_pfa_input_map(s, opts, 3, 5);
490
1/2
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
24 else if (flags & FF_TX_PRESHUFFLE)
491 24 ret = ff_tx_gen_default_map(s, opts);
492
493 105 return ret;
494 }
495
496 #define DECL_FACTOR_S(n) \
497 static void TX_NAME(ff_tx_fft##n)(AVTXContext *s, void *dst, \
498 void *src, ptrdiff_t stride) \
499 { \
500 fft##n((TXComplex *)dst, (TXComplex *)src, stride / sizeof(TXComplex)); \
501 } \
502 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
503 .name = TX_NAME_STR("fft" #n "_ns"), \
504 .function = TX_NAME(ff_tx_fft##n), \
505 .type = TX_TYPE(FFT), \
506 .flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
507 AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
508 .factors[0] = n, \
509 .nb_factors = 1, \
510 .min_len = n, \
511 .max_len = n, \
512 .init = TX_NAME(ff_tx_fft_factor_init), \
513 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
514 .prio = FF_TX_PRIO_BASE, \
515 };
516
517 #define DECL_FACTOR_F(n) \
518 DECL_FACTOR_S(n) \
519 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_fwd_def) = { \
520 .name = TX_NAME_STR("fft" #n "_fwd"), \
521 .function = TX_NAME(ff_tx_fft##n), \
522 .type = TX_TYPE(FFT), \
523 .flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
524 AV_TX_UNALIGNED | FF_TX_FORWARD_ONLY, \
525 .factors[0] = n, \
526 .nb_factors = 1, \
527 .min_len = n, \
528 .max_len = n, \
529 .init = TX_NAME(ff_tx_fft_factor_init), \
530 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
531 .prio = FF_TX_PRIO_BASE, \
532 };
533
534 DECL_FACTOR_F(3)
535 72982 DECL_FACTOR_F(5)
536 50526 DECL_FACTOR_F(7)
537 39298 DECL_FACTOR_F(9)
538 1000 DECL_FACTOR_S(15)
539
540 #define BUTTERFLIES(a0, a1, a2, a3) \
541 do { \
542 r0=a0.re; \
543 i0=a0.im; \
544 r1=a1.re; \
545 i1=a1.im; \
546 BF(t3, t5, t5, t1); \
547 BF(a2.re, a0.re, r0, t5); \
548 BF(a3.im, a1.im, i1, t3); \
549 BF(t4, t6, t2, t6); \
550 BF(a3.re, a1.re, r1, t4); \
551 BF(a2.im, a0.im, i0, t6); \
552 } while (0)
553
554 #define TRANSFORM(a0, a1, a2, a3, wre, wim) \
555 do { \
556 CMUL(t1, t2, a2.re, a2.im, wre, -wim); \
557 CMUL(t5, t6, a3.re, a3.im, wre, wim); \
558 BUTTERFLIES(a0, a1, a2, a3); \
559 } while (0)
560
561 /* z[0...8n-1], w[1...2n-1] */
562 5102419 static inline void TX_NAME(ff_tx_fft_sr_combine)(TXComplex *z,
563 const TXSample *cos, int len)
564 {
565 5102419 int o1 = 2*len;
566 5102419 int o2 = 4*len;
567 5102419 int o3 = 6*len;
568 5102419 const TXSample *wim = cos + o1 - 7;
569 TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
570
571
2/2
✓ Branch 0 taken 10306356 times.
✓ Branch 1 taken 5102419 times.
15408775 for (int i = 0; i < len; i += 4) {
572 10306356 TRANSFORM(z[0], z[o1 + 0], z[o2 + 0], z[o3 + 0], cos[0], wim[7]);
573 10306356 TRANSFORM(z[2], z[o1 + 2], z[o2 + 2], z[o3 + 2], cos[2], wim[5]);
574 10306356 TRANSFORM(z[4], z[o1 + 4], z[o2 + 4], z[o3 + 4], cos[4], wim[3]);
575 10306356 TRANSFORM(z[6], z[o1 + 6], z[o2 + 6], z[o3 + 6], cos[6], wim[1]);
576
577 10306356 TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], cos[1], wim[6]);
578 10306356 TRANSFORM(z[3], z[o1 + 3], z[o2 + 3], z[o3 + 3], cos[3], wim[4]);
579 10306356 TRANSFORM(z[5], z[o1 + 5], z[o2 + 5], z[o3 + 5], cos[5], wim[2]);
580 10306356 TRANSFORM(z[7], z[o1 + 7], z[o2 + 7], z[o3 + 7], cos[7], wim[0]);
581
582 10306356 z += 2*4;
583 10306356 cos += 2*4;
584 10306356 wim -= 2*4;
585 }
586 5102419 }
587
588 4352 static av_cold int TX_NAME(ff_tx_fft_sr_codelet_init)(AVTXContext *s,
589 const FFTXCodelet *cd,
590 uint64_t flags,
591 FFTXCodeletOptions *opts,
592 int len, int inv,
593 const void *scale)
594 {
595 4352 TX_TAB(ff_tx_init_tabs)(len);
596 4352 return ff_tx_gen_ptwo_revtab(s, opts);
597 }
598
599 #define DECL_SR_CODELET_DEF(n) \
600 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
601 .name = TX_NAME_STR("fft" #n "_ns"), \
602 .function = TX_NAME(ff_tx_fft##n##_ns), \
603 .type = TX_TYPE(FFT), \
604 .flags = FF_TX_OUT_OF_PLACE | AV_TX_INPLACE | \
605 AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
606 .factors[0] = 2, \
607 .nb_factors = 1, \
608 .min_len = n, \
609 .max_len = n, \
610 .init = TX_NAME(ff_tx_fft_sr_codelet_init), \
611 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
612 .prio = FF_TX_PRIO_BASE, \
613 };
614
615 #define DECL_SR_CODELET(n, n2, n4) \
616 static void TX_NAME(ff_tx_fft##n##_ns)(AVTXContext *s, void *_dst, \
617 void *_src, ptrdiff_t stride) \
618 { \
619 TXComplex *src = _src; \
620 TXComplex *dst = _dst; \
621 const TXSample *cos = TX_TAB(ff_tx_tab_##n); \
622 \
623 TX_NAME(ff_tx_fft##n2##_ns)(s, dst, src, stride); \
624 TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*2, src + n4*2, stride); \
625 TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*3, src + n4*3, stride); \
626 TX_NAME(ff_tx_fft_sr_combine)(dst, cos, n4 >> 1); \
627 } \
628 \
629 DECL_SR_CODELET_DEF(n)
630
631 7 static void TX_NAME(ff_tx_fft2_ns)(AVTXContext *s, void *_dst,
632 void *_src, ptrdiff_t stride)
633 {
634 7 TXComplex *src = _src;
635 7 TXComplex *dst = _dst;
636 TXComplex tmp;
637
638 7 BF(tmp.re, dst[0].re, src[0].re, src[1].re);
639 7 BF(tmp.im, dst[0].im, src[0].im, src[1].im);
640 7 dst[1] = tmp;
641 7 }
642
643 25111017 static void TX_NAME(ff_tx_fft4_ns)(AVTXContext *s, void *_dst,
644 void *_src, ptrdiff_t stride)
645 {
646 25111017 TXComplex *src = _src;
647 25111017 TXComplex *dst = _dst;
648 TXSample t1, t2, t3, t4, t5, t6, t7, t8;
649
650 25111017 BF(t3, t1, src[0].re, src[1].re);
651 25111017 BF(t8, t6, src[3].re, src[2].re);
652 25111017 BF(dst[2].re, dst[0].re, t1, t6);
653 25111017 BF(t4, t2, src[0].im, src[1].im);
654 25111017 BF(t7, t5, src[2].im, src[3].im);
655 25111017 BF(dst[3].im, dst[1].im, t4, t8);
656 25111017 BF(dst[3].re, dst[1].re, t3, t7);
657 25111017 BF(dst[2].im, dst[0].im, t2, t5);
658 25111017 }
659
660 13088075 static void TX_NAME(ff_tx_fft8_ns)(AVTXContext *s, void *_dst,
661 void *_src, ptrdiff_t stride)
662 {
663 13088075 TXComplex *src = _src;
664 13088075 TXComplex *dst = _dst;
665 TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
666 13088075 const TXSample cos = TX_TAB(ff_tx_tab_8)[1];
667
668 13088075 TX_NAME(ff_tx_fft4_ns)(s, dst, src, stride);
669
670 13088075 BF(t1, dst[5].re, src[4].re, -src[5].re);
671 13088075 BF(t2, dst[5].im, src[4].im, -src[5].im);
672 13088075 BF(t5, dst[7].re, src[6].re, -src[7].re);
673 13088075 BF(t6, dst[7].im, src[6].im, -src[7].im);
674
675 13088075 BUTTERFLIES(dst[0], dst[2], dst[4], dst[6]);
676 13088075 TRANSFORM(dst[1], dst[3], dst[5], dst[7], cos, cos);
677 13088075 }
678
679 5496690 static void TX_NAME(ff_tx_fft16_ns)(AVTXContext *s, void *_dst,
680 void *_src, ptrdiff_t stride)
681 {
682 5496690 TXComplex *src = _src;
683 5496690 TXComplex *dst = _dst;
684 5496690 const TXSample *cos = TX_TAB(ff_tx_tab_16);
685
686 TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
687 5496690 TXSample cos_16_1 = cos[1];
688 5496690 TXSample cos_16_2 = cos[2];
689 5496690 TXSample cos_16_3 = cos[3];
690
691 5496690 TX_NAME(ff_tx_fft8_ns)(s, dst + 0, src + 0, stride);
692 5496690 TX_NAME(ff_tx_fft4_ns)(s, dst + 8, src + 8, stride);
693 5496690 TX_NAME(ff_tx_fft4_ns)(s, dst + 12, src + 12, stride);
694
695 5496690 t1 = dst[ 8].re;
696 5496690 t2 = dst[ 8].im;
697 5496690 t5 = dst[12].re;
698 5496690 t6 = dst[12].im;
699 5496690 BUTTERFLIES(dst[0], dst[4], dst[8], dst[12]);
700
701 5496690 TRANSFORM(dst[ 2], dst[ 6], dst[10], dst[14], cos_16_2, cos_16_2);
702 5496690 TRANSFORM(dst[ 1], dst[ 5], dst[ 9], dst[13], cos_16_1, cos_16_3);
703 5496690 TRANSFORM(dst[ 3], dst[ 7], dst[11], dst[15], cos_16_3, cos_16_1);
704 5496690 }
705
706 DECL_SR_CODELET_DEF(2)
707 DECL_SR_CODELET_DEF(4)
708 DECL_SR_CODELET_DEF(8)
709 DECL_SR_CODELET_DEF(16)
710 3641792 DECL_SR_CODELET(32,16,8)
711 787694 DECL_SR_CODELET(64,32,16)
712 412074 DECL_SR_CODELET(128,64,32)
713 146322 DECL_SR_CODELET(256,128,64)
714 103104 DECL_SR_CODELET(512,256,128)
715 7655 DECL_SR_CODELET(1024,512,256)
716 2582 DECL_SR_CODELET(2048,1024,512)
717 758 DECL_SR_CODELET(4096,2048,1024)
718 433 DECL_SR_CODELET(8192,4096,2048)
719 5 DECL_SR_CODELET(16384,8192,4096)
720 DECL_SR_CODELET(32768,16384,8192)
721 DECL_SR_CODELET(65536,32768,16384)
722 DECL_SR_CODELET(131072,65536,32768)
723
724 262 static av_cold int TX_NAME(ff_tx_fft_init)(AVTXContext *s,
725 const FFTXCodelet *cd,
726 uint64_t flags,
727 FFTXCodeletOptions *opts,
728 int len, int inv,
729 const void *scale)
730 {
731 int ret;
732 262 int is_inplace = !!(flags & AV_TX_INPLACE);
733 262 FFTXCodeletOptions sub_opts = {
734
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 262 times.
262 .map_dir = is_inplace ? FF_TX_MAP_SCATTER : FF_TX_MAP_GATHER,
735 };
736
737 262 flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */
738 262 flags |= AV_TX_INPLACE; /* in-place */
739 262 flags |= FF_TX_PRESHUFFLE; /* This function handles the permute step */
740
741
2/2
✓ Branch 1 taken 24 times.
✓ Branch 2 taken 238 times.
262 if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len, inv, scale)))
742 24 return ret;
743
744
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 238 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
238 if (is_inplace && (ret = ff_tx_gen_inplace_map(s, len)))
745 return ret;
746
747 238 return 0;
748 }
749
750 8 static av_cold int TX_NAME(ff_tx_fft_inplace_small_init)(AVTXContext *s,
751 const FFTXCodelet *cd,
752 uint64_t flags,
753 FFTXCodeletOptions *opts,
754 int len, int inv,
755 const void *scale)
756 {
757
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
8 if (!(s->tmp = av_malloc(len*sizeof(*s->tmp))))
758 return AVERROR(ENOMEM);
759 8 flags &= ~AV_TX_INPLACE;
760 8 return TX_NAME(ff_tx_fft_init)(s, cd, flags, opts, len, inv, scale);
761 }
762
763 35616 static void TX_NAME(ff_tx_fft)(AVTXContext *s, void *_dst,
764 void *_src, ptrdiff_t stride)
765 {
766 35616 TXComplex *src = _src;
767
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 35616 times.
35616 TXComplex *dst1 = s->flags & AV_TX_INPLACE ? s->tmp : _dst;
768 35616 TXComplex *dst2 = _dst;
769 35616 int *map = s->sub[0].map;
770 35616 int len = s->len;
771
772 /* Compilers can't vectorize this anyway without assuming AVX2, which they
773 * generally don't, at least without -march=native -mtune=native */
774
2/2
✓ Branch 0 taken 10978294 times.
✓ Branch 1 taken 35616 times.
11013910 for (int i = 0; i < len; i++)
775 10978294 dst1[i] = src[map[i]];
776
777 35616 s->fn[0](&s->sub[0], dst2, dst1, stride);
778 35616 }
779
780 static void TX_NAME(ff_tx_fft_inplace)(AVTXContext *s, void *_dst,
781 void *_src, ptrdiff_t stride)
782 {
783 TXComplex *src = _src;
784 TXComplex *dst = _dst;
785 TXComplex tmp;
786 const int *map = s->sub->map;
787 const int *inplace_idx = s->map;
788 int src_idx, dst_idx;
789
790 src_idx = *inplace_idx++;
791 do {
792 tmp = src[src_idx];
793 dst_idx = map[src_idx];
794 do {
795 FFSWAP(TXComplex, tmp, src[dst_idx]);
796 dst_idx = map[dst_idx];
797 } while (dst_idx != src_idx); /* Can be > as well, but was less predictable */
798 src[dst_idx] = tmp;
799 } while ((src_idx = *inplace_idx++));
800
801 s->fn[0](&s->sub[0], dst, src, stride);
802 }
803
804 static const FFTXCodelet TX_NAME(ff_tx_fft_def) = {
805 .name = TX_NAME_STR("fft"),
806 .function = TX_NAME(ff_tx_fft),
807 .type = TX_TYPE(FFT),
808 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE,
809 .factors[0] = TX_FACTOR_ANY,
810 .nb_factors = 1,
811 .min_len = 2,
812 .max_len = TX_LEN_UNLIMITED,
813 .init = TX_NAME(ff_tx_fft_init),
814 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
815 .prio = FF_TX_PRIO_BASE,
816 };
817
818 static const FFTXCodelet TX_NAME(ff_tx_fft_inplace_small_def) = {
819 .name = TX_NAME_STR("fft_inplace_small"),
820 .function = TX_NAME(ff_tx_fft),
821 .type = TX_TYPE(FFT),
822 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
823 .factors[0] = TX_FACTOR_ANY,
824 .nb_factors = 1,
825 .min_len = 2,
826 .max_len = 65536,
827 .init = TX_NAME(ff_tx_fft_inplace_small_init),
828 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
829 .prio = FF_TX_PRIO_BASE - 256,
830 };
831
832 static const FFTXCodelet TX_NAME(ff_tx_fft_inplace_def) = {
833 .name = TX_NAME_STR("fft_inplace"),
834 .function = TX_NAME(ff_tx_fft_inplace),
835 .type = TX_TYPE(FFT),
836 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
837 .factors[0] = TX_FACTOR_ANY,
838 .nb_factors = 1,
839 .min_len = 2,
840 .max_len = TX_LEN_UNLIMITED,
841 .init = TX_NAME(ff_tx_fft_init),
842 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
843 .prio = FF_TX_PRIO_BASE - 512,
844 };
845
846 8 static av_cold int TX_NAME(ff_tx_fft_init_naive_small)(AVTXContext *s,
847 const FFTXCodelet *cd,
848 uint64_t flags,
849 FFTXCodeletOptions *opts,
850 int len, int inv,
851 const void *scale)
852 {
853
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 const double phase = s->inv ? 2.0*M_PI/len : -2.0*M_PI/len;
854
855
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
8 if (!(s->exp = av_malloc(len*len*sizeof(*s->exp))))
856 return AVERROR(ENOMEM);
857
858
2/2
✓ Branch 0 taken 104 times.
✓ Branch 1 taken 8 times.
112 for (int i = 0; i < len; i++) {
859
2/2
✓ Branch 0 taken 1352 times.
✓ Branch 1 taken 104 times.
1456 for (int j = 0; j < len; j++) {
860 1352 const double factor = phase*i*j;
861 1352 s->exp[i*j] = (TXComplex){
862 1352 RESCALE(cos(factor)),
863 1352 RESCALE(sin(factor)),
864 };
865 }
866 }
867
868 8 return 0;
869 }
870
871 static void TX_NAME(ff_tx_fft_naive)(AVTXContext *s, void *_dst, void *_src,
872 ptrdiff_t stride)
873 {
874 TXComplex *src = _src;
875 TXComplex *dst = _dst;
876 const int n = s->len;
877 double phase = s->inv ? 2.0*M_PI/n : -2.0*M_PI/n;
878
879 stride /= sizeof(*dst);
880
881 for (int i = 0; i < n; i++) {
882 TXComplex tmp = { 0 };
883 for (int j = 0; j < n; j++) {
884 const double factor = phase*i*j;
885 const TXComplex mult = {
886 RESCALE(cos(factor)),
887 RESCALE(sin(factor)),
888 };
889 TXComplex res;
890 CMUL3(res, src[j], mult);
891 tmp.re += res.re;
892 tmp.im += res.im;
893 }
894 dst[i*stride] = tmp;
895 }
896 }
897
898 28070 static void TX_NAME(ff_tx_fft_naive_small)(AVTXContext *s, void *_dst, void *_src,
899 ptrdiff_t stride)
900 {
901 28070 TXComplex *src = _src;
902 28070 TXComplex *dst = _dst;
903 28070 const int n = s->len;
904
905 28070 stride /= sizeof(*dst);
906
907
2/2
✓ Branch 0 taken 364910 times.
✓ Branch 1 taken 28070 times.
392980 for (int i = 0; i < n; i++) {
908 364910 TXComplex tmp = { 0 };
909
2/2
✓ Branch 0 taken 4743830 times.
✓ Branch 1 taken 364910 times.
5108740 for (int j = 0; j < n; j++) {
910 TXComplex res;
911 4743830 const TXComplex mult = s->exp[i*j];
912 4743830 CMUL3(res, src[j], mult);
913 4743830 tmp.re += res.re;
914 4743830 tmp.im += res.im;
915 }
916 364910 dst[i*stride] = tmp;
917 }
918 28070 }
919
920 static const FFTXCodelet TX_NAME(ff_tx_fft_naive_small_def) = {
921 .name = TX_NAME_STR("fft_naive_small"),
922 .function = TX_NAME(ff_tx_fft_naive_small),
923 .type = TX_TYPE(FFT),
924 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE,
925 .factors[0] = TX_FACTOR_ANY,
926 .nb_factors = 1,
927 .min_len = 2,
928 .max_len = 1024,
929 .init = TX_NAME(ff_tx_fft_init_naive_small),
930 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
931 .prio = FF_TX_PRIO_MIN/2,
932 };
933
934 static const FFTXCodelet TX_NAME(ff_tx_fft_naive_def) = {
935 .name = TX_NAME_STR("fft_naive"),
936 .function = TX_NAME(ff_tx_fft_naive),
937 .type = TX_TYPE(FFT),
938 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE,
939 .factors[0] = TX_FACTOR_ANY,
940 .nb_factors = 1,
941 .min_len = 2,
942 .max_len = TX_LEN_UNLIMITED,
943 .init = NULL,
944 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
945 .prio = FF_TX_PRIO_MIN,
946 };
947
948 4273 static av_cold int TX_NAME(ff_tx_fft_pfa_init)(AVTXContext *s,
949 const FFTXCodelet *cd,
950 uint64_t flags,
951 FFTXCodeletOptions *opts,
952 int len, int inv,
953 const void *scale)
954 {
955 4273 int ret, *tmp, ps = flags & FF_TX_PRESHUFFLE;
956 4273 FFTXCodeletOptions sub_opts = { .map_dir = FF_TX_MAP_GATHER };
957 4273 size_t extra_tmp_len = 0;
958 int len_list[TX_MAX_DECOMPOSITIONS];
959
960
2/2
✓ Branch 1 taken 4176 times.
✓ Branch 2 taken 97 times.
4273 if ((ret = ff_tx_decompose_length(len_list, TX_TYPE(FFT), len, inv)) < 0)
961 4176 return ret;
962
963 /* Two iterations to test both orderings. */
964
1/2
✓ Branch 0 taken 97 times.
✗ Branch 1 not taken.
97 for (int i = 0; i < ret; i++) {
965 97 int len1 = len_list[i];
966 97 int len2 = len / len1;
967
968 /* Our ptwo transforms don't support striding the output. */
969
2/2
✓ Branch 0 taken 70 times.
✓ Branch 1 taken 27 times.
97 if (len2 & (len2 - 1))
970 70 FFSWAP(int, len1, len2);
971
972 97 ff_tx_clear_ctx(s);
973
974 /* First transform */
975 97 sub_opts.map_dir = FF_TX_MAP_GATHER;
976 97 flags &= ~AV_TX_INPLACE;
977 97 flags |= FF_TX_OUT_OF_PLACE;
978 97 flags |= FF_TX_PRESHUFFLE; /* This function handles the permute step */
979 97 ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts,
980 len1, inv, scale);
981
982
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 97 times.
97 if (ret == AVERROR(ENOMEM)) {
983 return ret;
984
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 89 times.
97 } else if (ret < 0) { /* Try again without a preshuffle flag */
985 8 flags &= ~FF_TX_PRESHUFFLE;
986 8 ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts,
987 len1, inv, scale);
988
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 if (ret == AVERROR(ENOMEM))
989 return ret;
990
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 else if (ret < 0)
991 continue;
992 }
993
994 /* Second transform. */
995 97 sub_opts.map_dir = FF_TX_MAP_SCATTER;
996 97 flags |= FF_TX_PRESHUFFLE;
997 97 retry:
998 97 flags &= ~FF_TX_OUT_OF_PLACE;
999 97 flags |= AV_TX_INPLACE;
1000 97 ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts,
1001 len2, inv, scale);
1002
1003
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 97 times.
97 if (ret == AVERROR(ENOMEM)) {
1004 return ret;
1005
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 97 times.
97 } else if (ret < 0) { /* Try again with an out-of-place transform */
1006 flags |= FF_TX_OUT_OF_PLACE;
1007 flags &= ~AV_TX_INPLACE;
1008 ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts,
1009 len2, inv, scale);
1010 if (ret == AVERROR(ENOMEM)) {
1011 return ret;
1012 } else if (ret < 0) {
1013 if (flags & FF_TX_PRESHUFFLE) { /* Retry again without a preshuf flag */
1014 flags &= ~FF_TX_PRESHUFFLE;
1015 goto retry;
1016 } else {
1017 continue;
1018 }
1019 }
1020 }
1021
1022 /* Success */
1023 97 break;
1024 }
1025
1026 /* If nothing was successful, error out */
1027
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 97 times.
97 if (ret < 0)
1028 return ret;
1029
1030 /* Generate PFA map */
1031
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 97 times.
97 if ((ret = ff_tx_gen_compound_mapping(s, opts, 0,
1032 97 s->sub[0].len, s->sub[1].len)))
1033 return ret;
1034
1035
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 97 times.
97 if (!(s->tmp = av_malloc(len*sizeof(*s->tmp))))
1036 return AVERROR(ENOMEM);
1037
1038 /* Flatten input map */
1039 97 tmp = (int *)s->tmp;
1040
2/2
✓ Branch 0 taken 5512 times.
✓ Branch 1 taken 97 times.
5609 for (int k = 0; k < len; k += s->sub[0].len) {
1041 5512 memcpy(tmp, &s->map[k], s->sub[0].len*sizeof(*tmp));
1042
2/2
✓ Branch 0 taken 82024 times.
✓ Branch 1 taken 5512 times.
87536 for (int i = 0; i < s->sub[0].len; i++)
1043 82024 s->map[k + i] = tmp[s->sub[0].map[i]];
1044 }
1045
1046 /* Only allocate extra temporary memory if we need it */
1047
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 97 times.
97 if (!(s->sub[1].flags & AV_TX_INPLACE))
1048 extra_tmp_len = len;
1049
1/2
✓ Branch 0 taken 97 times.
✗ Branch 1 not taken.
97 else if (!ps)
1050 97 extra_tmp_len = s->sub[0].len;
1051
1052
2/4
✓ Branch 0 taken 97 times.
✗ Branch 1 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 97 times.
97 if (extra_tmp_len && !(s->exp = av_malloc(extra_tmp_len*sizeof(*s->exp))))
1053 return AVERROR(ENOMEM);
1054
1055 97 return 0;
1056 }
1057
1058 11243 static void TX_NAME(ff_tx_fft_pfa)(AVTXContext *s, void *_out,
1059 void *_in, ptrdiff_t stride)
1060 {
1061 11243 const int n = s->sub[0].len, m = s->sub[1].len, l = s->len;
1062 11243 const int *in_map = s->map, *out_map = in_map + l;
1063 11243 const int *sub_map = s->sub[1].map;
1064
1/2
✓ Branch 0 taken 11243 times.
✗ Branch 1 not taken.
11243 TXComplex *tmp1 = s->sub[1].flags & AV_TX_INPLACE ? s->tmp : s->exp;
1065 11243 TXComplex *in = _in, *out = _out;
1066
1067 11243 stride /= sizeof(*out);
1068
1069
2/2
✓ Branch 0 taken 79596 times.
✓ Branch 1 taken 11243 times.
90839 for (int i = 0; i < m; i++) {
1070
2/2
✓ Branch 0 taken 733592 times.
✓ Branch 1 taken 79596 times.
813188 for (int j = 0; j < n; j++)
1071 733592 s->exp[j] = in[in_map[i*n + j]];
1072 79596 s->fn[0](&s->sub[0], &s->tmp[sub_map[i]], s->exp, m*sizeof(TXComplex));
1073 }
1074
1075
2/2
✓ Branch 0 taken 112505 times.
✓ Branch 1 taken 11243 times.
123748 for (int i = 0; i < n; i++)
1076 112505 s->fn[1](&s->sub[1], &tmp1[m*i], &s->tmp[m*i], sizeof(TXComplex));
1077
1078
2/2
✓ Branch 0 taken 733592 times.
✓ Branch 1 taken 11243 times.
744835 for (int i = 0; i < l; i++)
1079 733592 out[i*stride] = tmp1[out_map[i]];
1080 11243 }
1081
1082 static void TX_NAME(ff_tx_fft_pfa_ns)(AVTXContext *s, void *_out,
1083 void *_in, ptrdiff_t stride)
1084 {
1085 const int n = s->sub[0].len, m = s->sub[1].len, l = s->len;
1086 const int *in_map = s->map, *out_map = in_map + l;
1087 const int *sub_map = s->sub[1].map;
1088 TXComplex *tmp1 = s->sub[1].flags & AV_TX_INPLACE ? s->tmp : s->exp;
1089 TXComplex *in = _in, *out = _out;
1090
1091 stride /= sizeof(*out);
1092
1093 for (int i = 0; i < m; i++)
1094 s->fn[0](&s->sub[0], &s->tmp[sub_map[i]], &in[i*n], m*sizeof(TXComplex));
1095
1096 for (int i = 0; i < n; i++)
1097 s->fn[1](&s->sub[1], &tmp1[m*i], &s->tmp[m*i], sizeof(TXComplex));
1098
1099 for (int i = 0; i < l; i++)
1100 out[i*stride] = tmp1[out_map[i]];
1101 }
1102
1103 static const FFTXCodelet TX_NAME(ff_tx_fft_pfa_def) = {
1104 .name = TX_NAME_STR("fft_pfa"),
1105 .function = TX_NAME(ff_tx_fft_pfa),
1106 .type = TX_TYPE(FFT),
1107 .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE,
1108 .factors = { 7, 5, 3, 2, TX_FACTOR_ANY },
1109 .nb_factors = 2,
1110 .min_len = 2*3,
1111 .max_len = TX_LEN_UNLIMITED,
1112 .init = TX_NAME(ff_tx_fft_pfa_init),
1113 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
1114 .prio = FF_TX_PRIO_BASE,
1115 };
1116
1117 static const FFTXCodelet TX_NAME(ff_tx_fft_pfa_ns_def) = {
1118 .name = TX_NAME_STR("fft_pfa_ns"),
1119 .function = TX_NAME(ff_tx_fft_pfa_ns),
1120 .type = TX_TYPE(FFT),
1121 .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE |
1122 FF_TX_PRESHUFFLE,
1123 .factors = { 7, 5, 3, 2, TX_FACTOR_ANY },
1124 .nb_factors = 2,
1125 .min_len = 2*3,
1126 .max_len = TX_LEN_UNLIMITED,
1127 .init = TX_NAME(ff_tx_fft_pfa_init),
1128 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
1129 .prio = FF_TX_PRIO_BASE,
1130 };
1131
1132 static av_cold int TX_NAME(ff_tx_mdct_naive_init)(AVTXContext *s,
1133 const FFTXCodelet *cd,
1134 uint64_t flags,
1135 FFTXCodeletOptions *opts,
1136 int len, int inv,
1137 const void *scale)
1138 {
1139 s->scale_d = *((SCALE_TYPE *)scale);
1140 s->scale_f = s->scale_d;
1141 return 0;
1142 }
1143
1144 static void TX_NAME(ff_tx_mdct_naive_fwd)(AVTXContext *s, void *_dst,
1145 void *_src, ptrdiff_t stride)
1146 {
1147 TXSample *src = _src;
1148 TXSample *dst = _dst;
1149 double scale = s->scale_d;
1150 int len = s->len;
1151 const double phase = M_PI/(4.0*len);
1152
1153 stride /= sizeof(*dst);
1154
1155 for (int i = 0; i < len; i++) {
1156 double sum = 0.0;
1157 for (int j = 0; j < len*2; j++) {
1158 int a = (2*j + 1 + len) * (2*i + 1);
1159 sum += UNSCALE(src[j]) * cos(a * phase);
1160 }
1161 dst[i*stride] = RESCALE(sum*scale);
1162 }
1163 }
1164
1165 static void TX_NAME(ff_tx_mdct_naive_inv)(AVTXContext *s, void *_dst,
1166 void *_src, ptrdiff_t stride)
1167 {
1168 TXSample *src = _src;
1169 TXSample *dst = _dst;
1170 double scale = s->scale_d;
1171 int len = s->len >> 1;
1172 int len2 = len*2;
1173 const double phase = M_PI/(4.0*len2);
1174
1175 stride /= sizeof(*src);
1176
1177 for (int i = 0; i < len; i++) {
1178 double sum_d = 0.0;
1179 double sum_u = 0.0;
1180 double i_d = phase * (4*len - 2*i - 1);
1181 double i_u = phase * (3*len2 + 2*i + 1);
1182 for (int j = 0; j < len2; j++) {
1183 double a = (2 * j + 1);
1184 double a_d = cos(a * i_d);
1185 double a_u = cos(a * i_u);
1186 double val = UNSCALE(src[j*stride]);
1187 sum_d += a_d * val;
1188 sum_u += a_u * val;
1189 }
1190 dst[i + 0] = RESCALE( sum_d*scale);
1191 dst[i + len] = RESCALE(-sum_u*scale);
1192 }
1193 }
1194
1195 static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_fwd_def) = {
1196 .name = TX_NAME_STR("mdct_naive_fwd"),
1197 .function = TX_NAME(ff_tx_mdct_naive_fwd),
1198 .type = TX_TYPE(MDCT),
1199 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
1200 .factors = { 2, TX_FACTOR_ANY }, /* MDCTs need an even length */
1201 .nb_factors = 2,
1202 .min_len = 2,
1203 .max_len = TX_LEN_UNLIMITED,
1204 .init = TX_NAME(ff_tx_mdct_naive_init),
1205 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
1206 .prio = FF_TX_PRIO_MIN,
1207 };
1208
1209 static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_inv_def) = {
1210 .name = TX_NAME_STR("mdct_naive_inv"),
1211 .function = TX_NAME(ff_tx_mdct_naive_inv),
1212 .type = TX_TYPE(MDCT),
1213 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
1214 .factors = { 2, TX_FACTOR_ANY },
1215 .nb_factors = 2,
1216 .min_len = 2,
1217 .max_len = TX_LEN_UNLIMITED,
1218 .init = TX_NAME(ff_tx_mdct_naive_init),
1219 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
1220 .prio = FF_TX_PRIO_MIN,
1221 };
1222
1223 2566 static av_cold int TX_NAME(ff_tx_mdct_init)(AVTXContext *s,
1224 const FFTXCodelet *cd,
1225 uint64_t flags,
1226 FFTXCodeletOptions *opts,
1227 int len, int inv,
1228 const void *scale)
1229 {
1230 int ret;
1231 2566 FFTXCodeletOptions sub_opts = {
1232
2/2
✓ Branch 0 taken 361 times.
✓ Branch 1 taken 2205 times.
2566 .map_dir = !inv ? FF_TX_MAP_SCATTER : FF_TX_MAP_GATHER,
1233 };
1234
1235 2566 s->scale_d = *((SCALE_TYPE *)scale);
1236 2566 s->scale_f = s->scale_d;
1237
1238 2566 flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */
1239 2566 flags |= AV_TX_INPLACE; /* in-place */
1240 2566 flags |= FF_TX_PRESHUFFLE; /* First try with an in-place transform */
1241
1242
2/2
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 2552 times.
2566 if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len >> 1,
1243 inv, scale))) {
1244 14 flags &= ~FF_TX_PRESHUFFLE; /* Now try with a generic FFT */
1245
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 14 times.
14 if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len >> 1,
1246 inv, scale)))
1247 return ret;
1248 }
1249
1250 2566 s->map = av_malloc((len >> 1)*sizeof(*s->map));
1251
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2566 times.
2566 if (!s->map)
1252 return AVERROR(ENOMEM);
1253
1254 /* If we need to preshuffle copy the map from the subcontext */
1255
2/2
✓ Branch 0 taken 2552 times.
✓ Branch 1 taken 14 times.
2566 if (s->sub[0].flags & FF_TX_PRESHUFFLE) {
1256 2552 memcpy(s->map, s->sub->map, (len >> 1)*sizeof(*s->map));
1257 } else {
1258
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
28 for (int i = 0; i < len >> 1; i++)
1259 14 s->map[i] = i;
1260 }
1261
1262
3/4
✓ Branch 0 taken 2205 times.
✓ Branch 1 taken 361 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2566 times.
2566 if ((ret = TX_TAB(ff_tx_mdct_gen_exp)(s, inv ? s->map : NULL)))
1263 return ret;
1264
1265 /* Saves a multiply in a hot path. */
1266
2/2
✓ Branch 0 taken 2205 times.
✓ Branch 1 taken 361 times.
2566 if (inv)
1267
2/2
✓ Branch 0 taken 496714 times.
✓ Branch 1 taken 2205 times.
498919 for (int i = 0; i < (s->len >> 1); i++)
1268 496714 s->map[i] <<= 1;
1269
1270 2566 return 0;
1271 }
1272
1273 26730 static void TX_NAME(ff_tx_mdct_fwd)(AVTXContext *s, void *_dst, void *_src,
1274 ptrdiff_t stride)
1275 {
1276 26730 TXSample *src = _src, *dst = _dst;
1277 26730 TXComplex *exp = s->exp, tmp, *z = _dst;
1278 26730 const int len2 = s->len >> 1;
1279 26730 const int len4 = s->len >> 2;
1280 26730 const int len3 = len2 * 3;
1281 26730 const int *sub_map = s->map;
1282
1283 26730 stride /= sizeof(*dst);
1284
1285
2/2
✓ Branch 0 taken 7080192 times.
✓ Branch 1 taken 26730 times.
7106922 for (int i = 0; i < len2; i++) { /* Folding and pre-reindexing */
1286 7080192 const int k = 2*i;
1287 7080192 const int idx = sub_map[i];
1288
2/2
✓ Branch 0 taken 3540096 times.
✓ Branch 1 taken 3540096 times.
7080192 if (k < len2) {
1289 3540096 tmp.re = FOLD(-src[ len2 + k], src[1*len2 - 1 - k]);
1290 3540096 tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);
1291 } else {
1292 3540096 tmp.re = FOLD(-src[ len2 + k], -src[5*len2 - 1 - k]);
1293 3540096 tmp.im = FOLD( src[-len2 + k], -src[1*len3 - 1 - k]);
1294 }
1295 7080192 CMUL(z[idx].im, z[idx].re, tmp.re, tmp.im, exp[i].re, exp[i].im);
1296 }
1297
1298 26730 s->fn[0](&s->sub[0], z, z, sizeof(TXComplex));
1299
1300
2/2
✓ Branch 0 taken 3540096 times.
✓ Branch 1 taken 26730 times.
3566826 for (int i = 0; i < len4; i++) {
1301 3540096 const int i0 = len4 + i, i1 = len4 - i - 1;
1302 3540096 TXComplex src1 = { z[i1].re, z[i1].im };
1303 3540096 TXComplex src0 = { z[i0].re, z[i0].im };
1304
1305 3540096 CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im,
1306 exp[i0].im, exp[i0].re);
1307 3540096 CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im,
1308 exp[i1].im, exp[i1].re);
1309 }
1310 26730 }
1311
1312 2341147 static void TX_NAME(ff_tx_mdct_inv)(AVTXContext *s, void *_dst, void *_src,
1313 ptrdiff_t stride)
1314 {
1315 2341147 TXComplex *z = _dst, *exp = s->exp;
1316 2341147 const TXSample *src = _src, *in1, *in2;
1317 2341147 const int len2 = s->len >> 1;
1318 2341147 const int len4 = s->len >> 2;
1319 2341147 const int *sub_map = s->map;
1320
1321 2341147 stride /= sizeof(*src);
1322 2341147 in1 = src;
1323 2341147 in2 = src + ((len2*2) - 1) * stride;
1324
1325
2/2
✓ Branch 0 taken 121306358 times.
✓ Branch 1 taken 2341147 times.
123647505 for (int i = 0; i < len2; i++) {
1326 121306358 int k = sub_map[i];
1327 121306358 TXComplex tmp = { in2[-k*stride], in1[k*stride] };
1328 121306358 CMUL3(z[i], tmp, exp[i]);
1329 }
1330
1331 2341147 s->fn[0](&s->sub[0], z, z, sizeof(TXComplex));
1332
1333 2341147 exp += len2;
1334
2/2
✓ Branch 0 taken 60653178 times.
✓ Branch 1 taken 2341147 times.
62994325 for (int i = 0; i < len4; i++) {
1335 60653178 const int i0 = len4 + i, i1 = len4 - i - 1;
1336 60653178 TXComplex src1 = { z[i1].im, z[i1].re };
1337 60653178 TXComplex src0 = { z[i0].im, z[i0].re };
1338
1339 60653178 CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re);
1340 60653178 CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re);
1341 }
1342 2341147 }
1343
1344 static const FFTXCodelet TX_NAME(ff_tx_mdct_fwd_def) = {
1345 .name = TX_NAME_STR("mdct_fwd"),
1346 .function = TX_NAME(ff_tx_mdct_fwd),
1347 .type = TX_TYPE(MDCT),
1348 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
1349 .factors = { 2, TX_FACTOR_ANY },
1350 .nb_factors = 2,
1351 .min_len = 2,
1352 .max_len = TX_LEN_UNLIMITED,
1353 .init = TX_NAME(ff_tx_mdct_init),
1354 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
1355 .prio = FF_TX_PRIO_BASE,
1356 };
1357
1358 static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_def) = {
1359 .name = TX_NAME_STR("mdct_inv"),
1360 .function = TX_NAME(ff_tx_mdct_inv),
1361 .type = TX_TYPE(MDCT),
1362 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
1363 .factors = { 2, TX_FACTOR_ANY },
1364 .nb_factors = 2,
1365 .min_len = 2,
1366 .max_len = TX_LEN_UNLIMITED,
1367 .init = TX_NAME(ff_tx_mdct_init),
1368 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
1369 .prio = FF_TX_PRIO_BASE,
1370 };
1371
1372 69 static av_cold int TX_NAME(ff_tx_mdct_inv_full_init)(AVTXContext *s,
1373 const FFTXCodelet *cd,
1374 uint64_t flags,
1375 FFTXCodeletOptions *opts,
1376 int len, int inv,
1377 const void *scale)
1378 {
1379 int ret;
1380
1381 69 s->scale_d = *((SCALE_TYPE *)scale);
1382 69 s->scale_f = s->scale_d;
1383
1384 69 flags &= ~AV_TX_FULL_IMDCT;
1385
1386
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 69 times.
69 if ((ret = ff_tx_init_subtx(s, TX_TYPE(MDCT), flags, NULL, len, 1, scale)))
1387 return ret;
1388
1389 69 return 0;
1390 }
1391
1392 26238 static void TX_NAME(ff_tx_mdct_inv_full)(AVTXContext *s, void *_dst,
1393 void *_src, ptrdiff_t stride)
1394 {
1395 26238 int len = s->len << 1;
1396 26238 int len2 = len >> 1;
1397 26238 int len4 = len >> 2;
1398 26238 TXSample *dst = _dst;
1399
1400 26238 s->fn[0](&s->sub[0], dst + len4, _src, stride);
1401
1402 26238 stride /= sizeof(*dst);
1403
1404
2/2
✓ Branch 0 taken 3164800 times.
✓ Branch 1 taken 26238 times.
3191038 for (int i = 0; i < len4; i++) {
1405 3164800 dst[ i*stride] = -dst[(len2 - i - 1)*stride];
1406 3164800 dst[(len - i - 1)*stride] = dst[(len2 + i + 0)*stride];
1407 }
1408 26238 }
1409
1410 static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_full_def) = {
1411 .name = TX_NAME_STR("mdct_inv_full"),
1412 .function = TX_NAME(ff_tx_mdct_inv_full),
1413 .type = TX_TYPE(MDCT),
1414 .flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
1415 FF_TX_OUT_OF_PLACE | AV_TX_FULL_IMDCT,
1416 .factors = { 2, TX_FACTOR_ANY },
1417 .nb_factors = 2,
1418 .min_len = 2,
1419 .max_len = TX_LEN_UNLIMITED,
1420 .init = TX_NAME(ff_tx_mdct_inv_full_init),
1421 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
1422 .prio = FF_TX_PRIO_BASE,
1423 };
1424
1425 1712 static av_cold int TX_NAME(ff_tx_mdct_pfa_init)(AVTXContext *s,
1426 const FFTXCodelet *cd,
1427 uint64_t flags,
1428 FFTXCodeletOptions *opts,
1429 int len, int inv,
1430 const void *scale)
1431 {
1432 int ret, sub_len;
1433 1712 FFTXCodeletOptions sub_opts = { .map_dir = FF_TX_MAP_SCATTER };
1434
1435 1712 len >>= 1;
1436 1712 sub_len = len / cd->factors[0];
1437
1438 1712 s->scale_d = *((SCALE_TYPE *)scale);
1439 1712 s->scale_f = s->scale_d;
1440
1441 1712 flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */
1442 1712 flags |= AV_TX_INPLACE; /* in-place */
1443 1712 flags |= FF_TX_PRESHUFFLE; /* This function handles the permute step */
1444
1445
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1712 times.
1712 if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts,
1446 sub_len, inv, scale)))
1447 return ret;
1448
1449
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1712 times.
1712 if ((ret = ff_tx_gen_compound_mapping(s, opts, s->inv, cd->factors[0], sub_len)))
1450 return ret;
1451
1452 /* Our 15-point transform is also a compound one, so embed its input map */
1453
2/2
✓ Branch 0 taken 1092 times.
✓ Branch 1 taken 620 times.
1712 if (cd->factors[0] == 15)
1454
6/6
✓ Branch 0 taken 283200 times.
✓ Branch 1 taken 94400 times.
✓ Branch 2 taken 94400 times.
✓ Branch 3 taken 18880 times.
✓ Branch 4 taken 18880 times.
✓ Branch 5 taken 1092 times.
397572 TX_EMBED_INPUT_PFA_MAP(s->map, len, 3, 5);
1455
1456
2/4
✓ Branch 0 taken 1712 times.
✗ Branch 1 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1712 times.
1712 if ((ret = TX_TAB(ff_tx_mdct_gen_exp)(s, inv ? s->map : NULL)))
1457 return ret;
1458
1459 /* Saves multiplies in loops. */
1460
2/2
✓ Branch 0 taken 417008 times.
✓ Branch 1 taken 1712 times.
418720 for (int i = 0; i < len; i++)
1461 417008 s->map[i] <<= 1;
1462
1463
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1712 times.
1712 if (!(s->tmp = av_malloc(len*sizeof(*s->tmp))))
1464 return AVERROR(ENOMEM);
1465
1466 1712 TX_TAB(ff_tx_init_tabs)(len / sub_len);
1467
1468 1712 return 0;
1469 }
1470
1471 #define DECL_COMP_IMDCT(N) \
1472 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_inv)(AVTXContext *s, void *_dst, \
1473 void *_src, ptrdiff_t stride) \
1474 { \
1475 TXComplex fft##N##in[N]; \
1476 TXComplex *z = _dst, *exp = s->exp; \
1477 const TXSample *src = _src, *in1, *in2; \
1478 const int len4 = s->len >> 2; \
1479 const int len2 = s->len >> 1; \
1480 const int m = s->sub->len; \
1481 const int *in_map = s->map, *out_map = in_map + N*m; \
1482 const int *sub_map = s->sub->map; \
1483 \
1484 stride /= sizeof(*src); /* To convert it from bytes */ \
1485 in1 = src; \
1486 in2 = src + ((N*m*2) - 1) * stride; \
1487 \
1488 for (int i = 0; i < len2; i += N) { \
1489 for (int j = 0; j < N; j++) { \
1490 const int k = in_map[j]; \
1491 TXComplex tmp = { in2[-k*stride], in1[k*stride] }; \
1492 CMUL3(fft##N##in[j], tmp, exp[j]); \
1493 } \
1494 fft##N(s->tmp + *(sub_map++), fft##N##in, m); \
1495 exp += N; \
1496 in_map += N; \
1497 } \
1498 \
1499 for (int i = 0; i < N; i++) \
1500 s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex)); \
1501 \
1502 for (int i = 0; i < len4; i++) { \
1503 const int i0 = len4 + i, i1 = len4 - i - 1; \
1504 const int s0 = out_map[i0], s1 = out_map[i1]; \
1505 TXComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
1506 TXComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
1507 \
1508 CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
1509 CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
1510 } \
1511 } \
1512 \
1513 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_inv_def) = { \
1514 .name = TX_NAME_STR("mdct_pfa_" #N "xM_inv"), \
1515 .function = TX_NAME(ff_tx_mdct_pfa_##N##xM_inv), \
1516 .type = TX_TYPE(MDCT), \
1517 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, \
1518 .factors = { N, TX_FACTOR_ANY }, \
1519 .nb_factors = 2, \
1520 .min_len = N*2, \
1521 .max_len = TX_LEN_UNLIMITED, \
1522 .init = TX_NAME(ff_tx_mdct_pfa_init), \
1523 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1524 .prio = FF_TX_PRIO_BASE, \
1525 };
1526
1527 DECL_COMP_IMDCT(3)
1528
8/8
✓ Branch 0 taken 32800 times.
✓ Branch 1 taken 6560 times.
✓ Branch 3 taken 6560 times.
✓ Branch 4 taken 205 times.
✓ Branch 6 taken 1025 times.
✓ Branch 7 taken 205 times.
✓ Branch 8 taken 16400 times.
✓ Branch 9 taken 205 times.
56990 DECL_COMP_IMDCT(5)
1529 DECL_COMP_IMDCT(7)
1530 DECL_COMP_IMDCT(9)
1531
8/8
✓ Branch 0 taken 13384380 times.
✓ Branch 1 taken 892292 times.
✓ Branch 3 taken 892292 times.
✓ Branch 4 taken 100543 times.
✓ Branch 6 taken 1508145 times.
✓ Branch 7 taken 100543 times.
✓ Branch 8 taken 6692190 times.
✓ Branch 9 taken 100543 times.
22577550 DECL_COMP_IMDCT(15)
1532
1533 #define DECL_COMP_MDCT(N) \
1534 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd)(AVTXContext *s, void *_dst, \
1535 void *_src, ptrdiff_t stride) \
1536 { \
1537 TXComplex fft##N##in[N]; \
1538 TXSample *src = _src, *dst = _dst; \
1539 TXComplex *exp = s->exp, tmp; \
1540 const int m = s->sub->len; \
1541 const int len4 = N*m; \
1542 const int len3 = len4 * 3; \
1543 const int len8 = s->len >> 2; \
1544 const int *in_map = s->map, *out_map = in_map + N*m; \
1545 const int *sub_map = s->sub->map; \
1546 \
1547 stride /= sizeof(*dst); \
1548 \
1549 for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */ \
1550 for (int j = 0; j < N; j++) { \
1551 const int k = in_map[i*N + j]; \
1552 if (k < len4) { \
1553 tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
1554 tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
1555 } else { \
1556 tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
1557 tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
1558 } \
1559 CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
1560 exp[k >> 1].re, exp[k >> 1].im); \
1561 } \
1562 fft##N(s->tmp + sub_map[i], fft##N##in, m); \
1563 } \
1564 \
1565 for (int i = 0; i < N; i++) \
1566 s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex)); \
1567 \
1568 for (int i = 0; i < len8; i++) { \
1569 const int i0 = len8 + i, i1 = len8 - i - 1; \
1570 const int s0 = out_map[i0], s1 = out_map[i1]; \
1571 TXComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
1572 TXComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
1573 \
1574 CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
1575 exp[i0].im, exp[i0].re); \
1576 CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
1577 exp[i1].im, exp[i1].re); \
1578 } \
1579 } \
1580 \
1581 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd_def) = { \
1582 .name = TX_NAME_STR("mdct_pfa_" #N "xM_fwd"), \
1583 .function = TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd), \
1584 .type = TX_TYPE(MDCT), \
1585 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
1586 .factors = { N, TX_FACTOR_ANY }, \
1587 .nb_factors = 2, \
1588 .min_len = N*2, \
1589 .max_len = TX_LEN_UNLIMITED, \
1590 .init = TX_NAME(ff_tx_mdct_pfa_init), \
1591 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1592 .prio = FF_TX_PRIO_BASE, \
1593 };
1594
1595 DECL_COMP_MDCT(3)
1596 DECL_COMP_MDCT(5)
1597 DECL_COMP_MDCT(7)
1598 DECL_COMP_MDCT(9)
1599 DECL_COMP_MDCT(15)
1600
1601 59 static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext *s,
1602 const FFTXCodelet *cd,
1603 uint64_t flags,
1604 FFTXCodeletOptions *opts,
1605 int len, int inv,
1606 const void *scale)
1607 {
1608 int ret;
1609 double f, m;
1610 TXSample *tab;
1611 59 uint64_t r2r = flags & AV_TX_REAL_TO_REAL;
1612 59 int len4 = FFALIGN(len, 4) / 4;
1613
1614 59 s->scale_d = *((SCALE_TYPE *)scale);
1615 59 s->scale_f = s->scale_d;
1616
1617 59 flags &= ~(AV_TX_REAL_TO_REAL | AV_TX_REAL_TO_IMAGINARY);
1618
1619
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 59 times.
59 if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, NULL, len >> 1, inv, scale)))
1620 return ret;
1621
1622
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 59 times.
59 if (!(s->exp = av_mallocz((8 + 2*len4)*sizeof(*s->exp))))
1623 return AVERROR(ENOMEM);
1624
1625 59 tab = (TXSample *)s->exp;
1626
1627 59 f = 2*M_PI/len;
1628
1629
2/2
✓ Branch 0 taken 29 times.
✓ Branch 1 taken 30 times.
59 m = (inv ? 2*s->scale_d : s->scale_d);
1630
1631
2/2
✓ Branch 0 taken 29 times.
✓ Branch 1 taken 30 times.
59 *tab++ = RESCALE((inv ? 0.5 : 1.0) * m);
1632
2/2
✓ Branch 0 taken 29 times.
✓ Branch 1 taken 30 times.
59 *tab++ = RESCALE(inv ? 0.5*m : 1.0*m);
1633 59 *tab++ = RESCALE( m);
1634 59 *tab++ = RESCALE(-m);
1635
1636 59 *tab++ = RESCALE( (0.5 - 0.0) * m);
1637
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 51 times.
59 if (r2r)
1638 8 *tab++ = 1 / s->scale_f;
1639 else
1640 51 *tab++ = RESCALE( (0.0 - 0.5) * m);
1641 59 *tab++ = RESCALE( (0.5 - inv) * m);
1642 59 *tab++ = RESCALE(-(0.5 - inv) * m);
1643
1644
2/2
✓ Branch 0 taken 46088 times.
✓ Branch 1 taken 59 times.
46147 for (int i = 0; i < len4; i++)
1645 46088 *tab++ = RESCALE(cos(i*f));
1646
1647 59 tab = ((TXSample *)s->exp) + len4 + 8;
1648
1649
2/2
✓ Branch 0 taken 46088 times.
✓ Branch 1 taken 59 times.
46147 for (int i = 0; i < len4; i++)
1650
2/2
✓ Branch 0 taken 35072 times.
✓ Branch 1 taken 11016 times.
46088 *tab++ = RESCALE(cos(((len - i*4)/4.0)*f)) * (inv ? 1 : -1);
1651
1652 59 return 0;
1653 }
1654
1655 #define DECL_RDFT(n, inv) \
1656 static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
1657 void *_src, ptrdiff_t stride) \
1658 { \
1659 const int len2 = s->len >> 1; \
1660 const int len4 = s->len >> 2; \
1661 const TXSample *fact = (void *)s->exp; \
1662 const TXSample *tcos = fact + 8; \
1663 const TXSample *tsin = tcos + len4; \
1664 TXComplex *data = inv ? _src : _dst; \
1665 TXComplex t[3]; \
1666 \
1667 if (!inv) \
1668 s->fn[0](&s->sub[0], data, _src, sizeof(TXComplex)); \
1669 else \
1670 data[0].im = data[len2].re; \
1671 \
1672 /* The DC value's both components are real, but we need to change them \
1673 * into complex values. Also, the middle of the array is special-cased. \
1674 * These operations can be done before or after the loop. */ \
1675 t[0].re = data[0].re; \
1676 data[0].re = t[0].re + data[0].im; \
1677 data[0].im = t[0].re - data[0].im; \
1678 data[ 0].re = MULT(fact[0], data[ 0].re); \
1679 data[ 0].im = MULT(fact[1], data[ 0].im); \
1680 data[len4].re = MULT(fact[2], data[len4].re); \
1681 data[len4].im = MULT(fact[3], data[len4].im); \
1682 \
1683 for (int i = 1; i < len4; i++) { \
1684 /* Separate even and odd FFTs */ \
1685 t[0].re = MULT(fact[4], (data[i].re + data[len2 - i].re)); \
1686 t[0].im = MULT(fact[5], (data[i].im - data[len2 - i].im)); \
1687 t[1].re = MULT(fact[6], (data[i].im + data[len2 - i].im)); \
1688 t[1].im = MULT(fact[7], (data[i].re - data[len2 - i].re)); \
1689 \
1690 /* Apply twiddle factors to the odd FFT and add to the even FFT */ \
1691 CMUL(t[2].re, t[2].im, t[1].re, t[1].im, tcos[i], tsin[i]); \
1692 \
1693 data[ i].re = t[0].re + t[2].re; \
1694 data[ i].im = t[2].im - t[0].im; \
1695 data[len2 - i].re = t[0].re - t[2].re; \
1696 data[len2 - i].im = t[2].im + t[0].im; \
1697 } \
1698 \
1699 if (inv) { \
1700 s->fn[0](&s->sub[0], _dst, data, sizeof(TXComplex)); \
1701 } else { \
1702 /* Move [0].im to the last position, as convention requires */ \
1703 data[len2].re = data[0].im; \
1704 data[ 0].im = data[len2].im = 0; \
1705 } \
1706 } \
1707 \
1708 static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
1709 .name = TX_NAME_STR("rdft_" #n), \
1710 .function = TX_NAME(ff_tx_rdft_ ##n), \
1711 .type = TX_TYPE(RDFT), \
1712 .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
1713 (inv ? FF_TX_INVERSE_ONLY : FF_TX_FORWARD_ONLY), \
1714 .factors = { 4, TX_FACTOR_ANY }, \
1715 .nb_factors = 2, \
1716 .min_len = 4, \
1717 .max_len = TX_LEN_UNLIMITED, \
1718 .init = TX_NAME(ff_tx_rdft_init), \
1719 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1720 .prio = FF_TX_PRIO_BASE, \
1721 };
1722
1723
2/2
✓ Branch 1 taken 1701749 times.
✓ Branch 2 taken 17867 times.
1719616 DECL_RDFT(r2c, 0)
1724
2/2
✓ Branch 0 taken 2096993 times.
✓ Branch 1 taken 16927 times.
2113920 DECL_RDFT(c2r, 1)
1725
1726 #define DECL_RDFT_HALF(n, mode, mod2) \
1727 static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
1728 void *_src, ptrdiff_t stride) \
1729 { \
1730 const int len = s->len; \
1731 const int len2 = len >> 1; \
1732 const int len4 = len >> 2; \
1733 const int aligned_len4 = FFALIGN(len, 4)/4; \
1734 const TXSample *fact = (void *)s->exp; \
1735 const TXSample *tcos = fact + 8; \
1736 const TXSample *tsin = tcos + aligned_len4; \
1737 TXComplex *data = _dst; \
1738 TXSample *out = _dst; /* Half-complex is forward-only */ \
1739 TXSample tmp_dc; \
1740 av_unused TXSample tmp_mid; \
1741 TXSample tmp[4]; \
1742 TXComplex sf, sl; \
1743 \
1744 s->fn[0](&s->sub[0], _dst, _src, sizeof(TXComplex)); \
1745 \
1746 tmp_dc = data[0].re; \
1747 data[ 0].re = tmp_dc + data[0].im; \
1748 tmp_dc = tmp_dc - data[0].im; \
1749 \
1750 data[ 0].re = MULT(fact[0], data[ 0].re); \
1751 tmp_dc = MULT(fact[1], tmp_dc); \
1752 data[len4].re = MULT(fact[2], data[len4].re); \
1753 \
1754 if (!mod2) { \
1755 data[len4].im = MULT(fact[3], data[len4].im); \
1756 } else { \
1757 sf = data[len4]; \
1758 sl = data[len4 + 1]; \
1759 if (mode == AV_TX_REAL_TO_REAL) \
1760 tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
1761 else \
1762 tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
1763 tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
1764 tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
1765 \
1766 if (mode == AV_TX_REAL_TO_REAL) { \
1767 tmp[3] = tmp[1]*tcos[len4] - tmp[2]*tsin[len4]; \
1768 tmp_mid = (tmp[0] - tmp[3]); \
1769 } else { \
1770 tmp[3] = tmp[1]*tsin[len4] + tmp[2]*tcos[len4]; \
1771 tmp_mid = (tmp[0] + tmp[3]); \
1772 } \
1773 } \
1774 \
1775 /* NOTE: unrolling this breaks non-mod8 lengths */ \
1776 for (int i = 1; i <= len4; i++) { \
1777 TXSample tmp[4]; \
1778 TXComplex sf = data[i]; \
1779 TXComplex sl = data[len2 - i]; \
1780 \
1781 if (mode == AV_TX_REAL_TO_REAL) \
1782 tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
1783 else \
1784 tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
1785 \
1786 tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
1787 tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
1788 \
1789 if (mode == AV_TX_REAL_TO_REAL) { \
1790 tmp[3] = tmp[1]*tcos[i] - tmp[2]*tsin[i]; \
1791 out[i] = (tmp[0] + tmp[3]); \
1792 out[len - i] = (tmp[0] - tmp[3]); \
1793 } else { \
1794 tmp[3] = tmp[1]*tsin[i] + tmp[2]*tcos[i]; \
1795 out[i - 1] = (tmp[3] - tmp[0]); \
1796 out[len - i - 1] = (tmp[0] + tmp[3]); \
1797 } \
1798 } \
1799 \
1800 for (int i = 1; i < (len4 + (mode == AV_TX_REAL_TO_IMAGINARY)); i++) \
1801 out[len2 - i] = out[len - i]; \
1802 \
1803 if (mode == AV_TX_REAL_TO_REAL) { \
1804 out[len2] = tmp_dc; \
1805 if (mod2) \
1806 out[len4 + 1] = tmp_mid * fact[5]; \
1807 } else if (mod2) { \
1808 out[len4] = tmp_mid; \
1809 } \
1810 } \
1811 \
1812 static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
1813 .name = TX_NAME_STR("rdft_" #n), \
1814 .function = TX_NAME(ff_tx_rdft_ ##n), \
1815 .type = TX_TYPE(RDFT), \
1816 .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | mode | \
1817 FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
1818 .factors = { 2 + 2*(!mod2), TX_FACTOR_ANY }, \
1819 .nb_factors = 2, \
1820 .min_len = 2 + 2*(!mod2), \
1821 .max_len = TX_LEN_UNLIMITED, \
1822 .init = TX_NAME(ff_tx_rdft_init), \
1823 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1824 .prio = FF_TX_PRIO_BASE, \
1825 };
1826
1827 DECL_RDFT_HALF(r2r, AV_TX_REAL_TO_REAL, 0)
1828
4/4
✓ Branch 1 taken 174034 times.
✓ Branch 2 taken 5614 times.
✓ Branch 3 taken 168420 times.
✓ Branch 4 taken 5614 times.
348068 DECL_RDFT_HALF(r2r_mod2, AV_TX_REAL_TO_REAL, 1)
1829 DECL_RDFT_HALF(r2i, AV_TX_REAL_TO_IMAGINARY, 0)
1830
4/4
✓ Branch 1 taken 179648 times.
✓ Branch 2 taken 5614 times.
✓ Branch 3 taken 179648 times.
✓ Branch 4 taken 5614 times.
364910 DECL_RDFT_HALF(r2i_mod2, AV_TX_REAL_TO_IMAGINARY, 1)
1831
1832 4 static av_cold int TX_NAME(ff_tx_dct_init)(AVTXContext *s,
1833 const FFTXCodelet *cd,
1834 uint64_t flags,
1835 FFTXCodeletOptions *opts,
1836 int len, int inv,
1837 const void *scale)
1838 {
1839 int ret;
1840 double freq;
1841 TXSample *tab;
1842 4 SCALE_TYPE rsc = *((SCALE_TYPE *)scale);
1843
1844
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (inv) {
1845 4 len *= 2;
1846 4 s->len *= 2;
1847 4 rsc *= 0.5;
1848 }
1849
1850
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
4 if ((ret = ff_tx_init_subtx(s, TX_TYPE(RDFT), flags, NULL, len, inv, &rsc)))
1851 return ret;
1852
1853 4 s->exp = av_malloc((len/2)*3*sizeof(TXSample));
1854
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (!s->exp)
1855 return AVERROR(ENOMEM);
1856
1857 4 tab = (TXSample *)s->exp;
1858
1859 4 freq = M_PI/(len*2);
1860
1861
2/2
✓ Branch 0 taken 6144 times.
✓ Branch 1 taken 4 times.
6148 for (int i = 0; i < len; i++)
1862
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6144 times.
6144 tab[i] = RESCALE(cos(i*freq)*(!inv + 1));
1863
1864
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (inv) {
1865
2/2
✓ Branch 0 taken 3072 times.
✓ Branch 1 taken 4 times.
3076 for (int i = 0; i < len/2; i++)
1866 3072 tab[len + i] = RESCALE(0.5 / sin((2*i + 1)*freq));
1867 } else {
1868 for (int i = 0; i < len/2; i++)
1869 tab[len + i] = RESCALE(cos((len - 2*i - 1)*freq));
1870 }
1871
1872 4 return 0;
1873 }
1874
1875 static void TX_NAME(ff_tx_dctII)(AVTXContext *s, void *_dst,
1876 void *_src, ptrdiff_t stride)
1877 {
1878 TXSample *dst = _dst;
1879 TXSample *src = _src;
1880 const int len = s->len;
1881 const int len2 = len >> 1;
1882 const TXSample *exp = (void *)s->exp;
1883 TXSample next;
1884 #ifdef TX_INT32
1885 int64_t tmp1, tmp2;
1886 #else
1887 TXSample tmp1, tmp2;
1888 #endif
1889
1890 for (int i = 0; i < len2; i++) {
1891 TXSample in1 = src[i];
1892 TXSample in2 = src[len - i - 1];
1893 TXSample s = exp[len + i];
1894
1895 #ifdef TX_INT32
1896 tmp1 = in1 + in2;
1897 tmp2 = in1 - in2;
1898
1899 tmp1 >>= 1;
1900 tmp2 *= s;
1901
1902 tmp2 = (tmp2 + 0x40000000) >> 31;
1903 #else
1904 tmp1 = (in1 + in2)*0.5;
1905 tmp2 = (in1 - in2)*s;
1906 #endif
1907
1908 src[i] = tmp1 + tmp2;
1909 src[len - i - 1] = tmp1 - tmp2;
1910 }
1911
1912 s->fn[0](&s->sub[0], dst, src, sizeof(TXComplex));
1913
1914 next = dst[len];
1915
1916 for (int i = len - 2; i > 0; i -= 2) {
1917 TXSample tmp;
1918
1919 CMUL(tmp, dst[i], exp[len - i], exp[i], dst[i + 0], dst[i + 1]);
1920
1921 dst[i + 1] = next;
1922
1923 next += tmp;
1924 }
1925
1926 #ifdef TX_INT32
1927 tmp1 = ((int64_t)exp[0]) * ((int64_t)dst[0]);
1928 dst[0] = (tmp1 + 0x40000000) >> 31;
1929 #else
1930 dst[0] = exp[0] * dst[0];
1931 #endif
1932 dst[1] = next;
1933 }
1934
1935 238 static void TX_NAME(ff_tx_dctIII)(AVTXContext *s, void *_dst,
1936 void *_src, ptrdiff_t stride)
1937 {
1938 238 TXSample *dst = _dst;
1939 238 TXSample *src = _src;
1940 238 const int len = s->len;
1941 238 const int len2 = len >> 1;
1942 238 const TXSample *exp = (void *)s->exp;
1943 #ifdef TX_INT32
1944 int64_t tmp1, tmp2 = src[len - 1];
1945 tmp2 = (2*tmp2 + 0x40000000) >> 31;
1946 #else
1947 238 TXSample tmp1, tmp2 = 2*src[len - 1];
1948 #endif
1949
1950 238 src[len] = tmp2;
1951
1952
2/2
✓ Branch 0 taken 121618 times.
✓ Branch 1 taken 238 times.
121856 for (int i = len - 2; i >= 2; i -= 2) {
1953 121618 TXSample val1 = src[i - 0];
1954 121618 TXSample val2 = src[i - 1] - src[i + 1];
1955
1956 121618 CMUL(src[i + 1], src[i], exp[len - i], exp[i], val1, val2);
1957 }
1958
1959 238 s->fn[0](&s->sub[0], dst, src, sizeof(float));
1960
1961
2/2
✓ Branch 0 taken 121856 times.
✓ Branch 1 taken 238 times.
122094 for (int i = 0; i < len2; i++) {
1962 121856 TXSample in1 = dst[i];
1963 121856 TXSample in2 = dst[len - i - 1];
1964 121856 TXSample c = exp[len + i];
1965
1966 121856 tmp1 = in1 + in2;
1967 121856 tmp2 = in1 - in2;
1968 121856 tmp2 *= c;
1969 #ifdef TX_INT32
1970 tmp2 = (tmp2 + 0x40000000) >> 31;
1971 #endif
1972
1973 121856 dst[i] = tmp1 + tmp2;
1974 121856 dst[len - i - 1] = tmp1 - tmp2;
1975 }
1976 238 }
1977
1978 static const FFTXCodelet TX_NAME(ff_tx_dctII_def) = {
1979 .name = TX_NAME_STR("dctII"),
1980 .function = TX_NAME(ff_tx_dctII),
1981 .type = TX_TYPE(DCT),
1982 .flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
1983 FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
1984 .factors = { 2, TX_FACTOR_ANY },
1985 .min_len = 2,
1986 .max_len = TX_LEN_UNLIMITED,
1987 .init = TX_NAME(ff_tx_dct_init),
1988 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
1989 .prio = FF_TX_PRIO_BASE,
1990 };
1991
1992 static const FFTXCodelet TX_NAME(ff_tx_dctIII_def) = {
1993 .name = TX_NAME_STR("dctIII"),
1994 .function = TX_NAME(ff_tx_dctIII),
1995 .type = TX_TYPE(DCT),
1996 .flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
1997 FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
1998 .factors = { 2, TX_FACTOR_ANY },
1999 .min_len = 2,
2000 .max_len = TX_LEN_UNLIMITED,
2001 .init = TX_NAME(ff_tx_dct_init),
2002 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
2003 .prio = FF_TX_PRIO_BASE,
2004 };
2005
2006 16 static av_cold int TX_NAME(ff_tx_dcstI_init)(AVTXContext *s,
2007 const FFTXCodelet *cd,
2008 uint64_t flags,
2009 FFTXCodeletOptions *opts,
2010 int len, int inv,
2011 const void *scale)
2012 {
2013 int ret;
2014 16 SCALE_TYPE rsc = *((SCALE_TYPE *)scale);
2015
2016
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 if (inv) {
2017 len *= 2;
2018 s->len *= 2;
2019 rsc *= 0.5;
2020 }
2021
2022 /* We want a half-complex RDFT */
2023
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 flags |= cd->type == TX_TYPE(DCT_I) ? AV_TX_REAL_TO_REAL :
2024 AV_TX_REAL_TO_IMAGINARY;
2025
2026
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 if ((ret = ff_tx_init_subtx(s, TX_TYPE(RDFT), flags, NULL,
2027
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 (len - 1 + 2*(cd->type == TX_TYPE(DST_I)))*2,
2028 0, &rsc)))
2029 return ret;
2030
2031 16 s->tmp = av_mallocz((len + 1)*2*sizeof(TXSample));
2032
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 if (!s->tmp)
2033 return AVERROR(ENOMEM);
2034
2035 16 return 0;
2036 }
2037
2038 5614 static void TX_NAME(ff_tx_dctI)(AVTXContext *s, void *_dst,
2039 void *_src, ptrdiff_t stride)
2040 {
2041 5614 TXSample *dst = _dst;
2042 5614 TXSample *src = _src;
2043 5614 const int len = s->len - 1;
2044 5614 TXSample *tmp = (TXSample *)s->tmp;
2045
2046 5614 stride /= sizeof(TXSample);
2047
2048
2/2
✓ Branch 0 taken 353682 times.
✓ Branch 1 taken 5614 times.
359296 for (int i = 0; i < len; i++)
2049 353682 tmp[i] = tmp[2*len - i] = src[i * stride];
2050
2051 5614 tmp[len] = src[len * stride]; /* Middle */
2052
2053 5614 s->fn[0](&s->sub[0], dst, tmp, sizeof(TXSample));
2054 5614 }
2055
2056 5614 static void TX_NAME(ff_tx_dstI)(AVTXContext *s, void *_dst,
2057 void *_src, ptrdiff_t stride)
2058 {
2059 5614 TXSample *dst = _dst;
2060 5614 TXSample *src = _src;
2061 5614 const int len = s->len + 1;
2062 5614 TXSample *tmp = (void *)s->tmp;
2063
2064 5614 stride /= sizeof(TXSample);
2065
2066 5614 tmp[0] = 0;
2067
2068
2/2
✓ Branch 0 taken 359296 times.
✓ Branch 1 taken 5614 times.
364910 for (int i = 1; i < len; i++) {
2069 359296 TXSample a = src[(i - 1) * stride];
2070 359296 tmp[i] = -a;
2071 359296 tmp[2*len - i] = a;
2072 }
2073
2074 5614 tmp[len] = 0; /* i == n, Nyquist */
2075
2076 5614 s->fn[0](&s->sub[0], dst, tmp, sizeof(float));
2077 5614 }
2078
2079 static const FFTXCodelet TX_NAME(ff_tx_dctI_def) = {
2080 .name = TX_NAME_STR("dctI"),
2081 .function = TX_NAME(ff_tx_dctI),
2082 .type = TX_TYPE(DCT_I),
2083 .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE,
2084 .factors = { 2, TX_FACTOR_ANY },
2085 .nb_factors = 2,
2086 .min_len = 2,
2087 .max_len = TX_LEN_UNLIMITED,
2088 .init = TX_NAME(ff_tx_dcstI_init),
2089 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
2090 .prio = FF_TX_PRIO_BASE,
2091 };
2092
2093 static const FFTXCodelet TX_NAME(ff_tx_dstI_def) = {
2094 .name = TX_NAME_STR("dstI"),
2095 .function = TX_NAME(ff_tx_dstI),
2096 .type = TX_TYPE(DST_I),
2097 .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE,
2098 .factors = { 2, TX_FACTOR_ANY },
2099 .nb_factors = 2,
2100 .min_len = 2,
2101 .max_len = TX_LEN_UNLIMITED,
2102 .init = TX_NAME(ff_tx_dcstI_init),
2103 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
2104 .prio = FF_TX_PRIO_BASE,
2105 };
2106
2107 5212 int TX_TAB(ff_tx_mdct_gen_exp)(AVTXContext *s, int *pre_tab)
2108 {
2109 5212 int off = 0;
2110 5212 int len4 = s->len >> 1;
2111 5212 double scale = s->scale_d;
2112
2/2
✓ Branch 0 taken 1459 times.
✓ Branch 1 taken 3753 times.
5212 const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
2113
2/2
✓ Branch 0 taken 4851 times.
✓ Branch 1 taken 361 times.
5212 size_t alloc = pre_tab ? 2*len4 : len4;
2114
2115
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 5212 times.
5212 if (!(s->exp = av_malloc_array(alloc, sizeof(*s->exp))))
2116 return AVERROR(ENOMEM);
2117
2118 5212 scale = sqrt(fabs(scale));
2119
2120
2/2
✓ Branch 0 taken 4851 times.
✓ Branch 1 taken 361 times.
5212 if (pre_tab)
2121 4851 off = len4;
2122
2123
2/2
✓ Branch 0 taken 1265938 times.
✓ Branch 1 taken 5212 times.
1271150 for (int i = 0; i < len4; i++) {
2124 1265938 const double alpha = M_PI_2 * (i + theta) / len4;
2125 1265938 s->exp[off + i] = (TXComplex){ RESCALE(cos(alpha) * scale),
2126 1265938 RESCALE(sin(alpha) * scale) };
2127 }
2128
2129
2/2
✓ Branch 0 taken 4851 times.
✓ Branch 1 taken 361 times.
5212 if (pre_tab)
2130
2/2
✓ Branch 0 taken 1097058 times.
✓ Branch 1 taken 4851 times.
1101909 for (int i = 0; i < len4; i++)
2131 1097058 s->exp[i] = s->exp[len4 + pre_tab[i]];
2132
2133 5212 return 0;
2134 }
2135
2136 const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
2137 /* Split-Radix codelets */
2138 &TX_NAME(ff_tx_fft2_ns_def),
2139 &TX_NAME(ff_tx_fft4_ns_def),
2140 &TX_NAME(ff_tx_fft8_ns_def),
2141 &TX_NAME(ff_tx_fft16_ns_def),
2142 &TX_NAME(ff_tx_fft32_ns_def),
2143 &TX_NAME(ff_tx_fft64_ns_def),
2144 &TX_NAME(ff_tx_fft128_ns_def),
2145 &TX_NAME(ff_tx_fft256_ns_def),
2146 &TX_NAME(ff_tx_fft512_ns_def),
2147 &TX_NAME(ff_tx_fft1024_ns_def),
2148 &TX_NAME(ff_tx_fft2048_ns_def),
2149 &TX_NAME(ff_tx_fft4096_ns_def),
2150 &TX_NAME(ff_tx_fft8192_ns_def),
2151 &TX_NAME(ff_tx_fft16384_ns_def),
2152 &TX_NAME(ff_tx_fft32768_ns_def),
2153 &TX_NAME(ff_tx_fft65536_ns_def),
2154 &TX_NAME(ff_tx_fft131072_ns_def),
2155
2156 /* Prime factor codelets */
2157 &TX_NAME(ff_tx_fft3_ns_def),
2158 &TX_NAME(ff_tx_fft5_ns_def),
2159 &TX_NAME(ff_tx_fft7_ns_def),
2160 &TX_NAME(ff_tx_fft9_ns_def),
2161 &TX_NAME(ff_tx_fft15_ns_def),
2162
2163 /* We get these for free */
2164 &TX_NAME(ff_tx_fft3_fwd_def),
2165 &TX_NAME(ff_tx_fft5_fwd_def),
2166 &TX_NAME(ff_tx_fft7_fwd_def),
2167 &TX_NAME(ff_tx_fft9_fwd_def),
2168
2169 /* Standalone transforms */
2170 &TX_NAME(ff_tx_fft_def),
2171 &TX_NAME(ff_tx_fft_inplace_def),
2172 &TX_NAME(ff_tx_fft_inplace_small_def),
2173 &TX_NAME(ff_tx_fft_pfa_def),
2174 &TX_NAME(ff_tx_fft_pfa_ns_def),
2175 &TX_NAME(ff_tx_fft_naive_def),
2176 &TX_NAME(ff_tx_fft_naive_small_def),
2177 &TX_NAME(ff_tx_mdct_fwd_def),
2178 &TX_NAME(ff_tx_mdct_inv_def),
2179 &TX_NAME(ff_tx_mdct_pfa_3xM_fwd_def),
2180 &TX_NAME(ff_tx_mdct_pfa_5xM_fwd_def),
2181 &TX_NAME(ff_tx_mdct_pfa_7xM_fwd_def),
2182 &TX_NAME(ff_tx_mdct_pfa_9xM_fwd_def),
2183 &TX_NAME(ff_tx_mdct_pfa_15xM_fwd_def),
2184 &TX_NAME(ff_tx_mdct_pfa_3xM_inv_def),
2185 &TX_NAME(ff_tx_mdct_pfa_5xM_inv_def),
2186 &TX_NAME(ff_tx_mdct_pfa_7xM_inv_def),
2187 &TX_NAME(ff_tx_mdct_pfa_9xM_inv_def),
2188 &TX_NAME(ff_tx_mdct_pfa_15xM_inv_def),
2189 &TX_NAME(ff_tx_mdct_naive_fwd_def),
2190 &TX_NAME(ff_tx_mdct_naive_inv_def),
2191 &TX_NAME(ff_tx_mdct_inv_full_def),
2192 &TX_NAME(ff_tx_rdft_r2c_def),
2193 &TX_NAME(ff_tx_rdft_r2r_def),
2194 &TX_NAME(ff_tx_rdft_r2r_mod2_def),
2195 &TX_NAME(ff_tx_rdft_r2i_def),
2196 &TX_NAME(ff_tx_rdft_r2i_mod2_def),
2197 &TX_NAME(ff_tx_rdft_c2r_def),
2198 &TX_NAME(ff_tx_dctII_def),
2199 &TX_NAME(ff_tx_dctIII_def),
2200 &TX_NAME(ff_tx_dctI_def),
2201 &TX_NAME(ff_tx_dstI_def),
2202
2203 NULL,
2204 };
2205