1 |
|
|
/* |
2 |
|
|
* Copyright (c) 2013-2014 Mozilla Corporation |
3 |
|
|
* Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com> |
4 |
|
|
* |
5 |
|
|
* This file is part of FFmpeg. |
6 |
|
|
* |
7 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
8 |
|
|
* modify it under the terms of the GNU Lesser General Public |
9 |
|
|
* License as published by the Free Software Foundation; either |
10 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
11 |
|
|
* |
12 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
13 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 |
|
|
* Lesser General Public License for more details. |
16 |
|
|
* |
17 |
|
|
* You should have received a copy of the GNU Lesser General Public |
18 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
19 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 |
|
|
*/ |
21 |
|
|
|
22 |
|
|
/** |
23 |
|
|
* @file |
24 |
|
|
* Celt non-power of 2 iMDCT |
25 |
|
|
*/ |
26 |
|
|
|
27 |
|
|
#include <float.h> |
28 |
|
|
#include <math.h> |
29 |
|
|
#include <stddef.h> |
30 |
|
|
|
31 |
|
|
#include "config.h" |
32 |
|
|
|
33 |
|
|
#include "libavutil/attributes.h" |
34 |
|
|
#include "libavutil/common.h" |
35 |
|
|
|
36 |
|
|
#include "mdct15.h" |
37 |
|
|
|
38 |
|
|
#define FFT_FLOAT 1 |
39 |
|
|
#include "fft-internal.h" |
40 |
|
|
|
41 |
|
|
#define CMUL3(c, a, b) CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im) |
42 |
|
|
|
43 |
|
914 |
av_cold void ff_mdct15_uninit(MDCT15Context **ps) |
44 |
|
|
{ |
45 |
|
914 |
MDCT15Context *s = *ps; |
46 |
|
|
|
47 |
✗✓ |
914 |
if (!s) |
48 |
|
|
return; |
49 |
|
|
|
50 |
|
914 |
ff_fft_end(&s->ptwo_fft); |
51 |
|
|
|
52 |
|
914 |
av_freep(&s->pfa_prereindex); |
53 |
|
914 |
av_freep(&s->pfa_postreindex); |
54 |
|
914 |
av_freep(&s->twiddle_exptab); |
55 |
|
914 |
av_freep(&s->tmp); |
56 |
|
|
|
57 |
|
914 |
av_freep(ps); |
58 |
|
|
} |
59 |
|
|
|
60 |
|
914 |
static inline int init_pfa_reindex_tabs(MDCT15Context *s) |
61 |
|
|
{ |
62 |
|
|
int i, j; |
63 |
|
914 |
const int b_ptwo = s->ptwo_fft.nbits; /* Bits for the power of two FFTs */ |
64 |
|
914 |
const int l_ptwo = 1 << b_ptwo; /* Total length for the power of two FFTs */ |
65 |
|
914 |
const int inv_1 = l_ptwo << ((4 - b_ptwo) & 3); /* (2^b_ptwo)^-1 mod 15 */ |
66 |
|
914 |
const int inv_2 = 0xeeeeeeef & ((1U << b_ptwo) - 1); /* 15^-1 mod 2^b_ptwo */ |
67 |
|
|
|
68 |
|
914 |
s->pfa_prereindex = av_malloc_array(15 * l_ptwo, sizeof(*s->pfa_prereindex)); |
69 |
✗✓ |
914 |
if (!s->pfa_prereindex) |
70 |
|
|
return 1; |
71 |
|
|
|
72 |
|
914 |
s->pfa_postreindex = av_malloc_array(15 * l_ptwo, sizeof(*s->pfa_postreindex)); |
73 |
✗✓ |
914 |
if (!s->pfa_postreindex) |
74 |
|
|
return 1; |
75 |
|
|
|
76 |
|
|
/* Pre/Post-reindex */ |
77 |
✓✓ |
16374 |
for (i = 0; i < l_ptwo; i++) { |
78 |
✓✓ |
247360 |
for (j = 0; j < 15; j++) { |
79 |
|
231900 |
const int q_pre = ((l_ptwo * j)/15 + i) >> b_ptwo; |
80 |
|
231900 |
const int q_post = (((j*inv_1)/15) + (i*inv_2)) >> b_ptwo; |
81 |
|
231900 |
const int k_pre = 15*i + (j - q_pre*15)*(1 << b_ptwo); |
82 |
|
231900 |
const int k_post = i*inv_2*15 + j*inv_1 - 15*q_post*l_ptwo; |
83 |
|
231900 |
s->pfa_prereindex[i*15 + j] = k_pre << 1; |
84 |
|
231900 |
s->pfa_postreindex[k_post] = l_ptwo*j + i; |
85 |
|
|
} |
86 |
|
|
} |
87 |
|
|
|
88 |
|
914 |
return 0; |
89 |
|
|
} |
90 |
|
|
|
91 |
|
|
/* Stride is hardcoded to 3 */ |
92 |
|
2665416 |
static inline void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2]) |
93 |
|
|
{ |
94 |
|
|
FFTComplex z0[4], t[6]; |
95 |
|
|
|
96 |
|
2665416 |
t[0].re = in[3].re + in[12].re; |
97 |
|
2665416 |
t[0].im = in[3].im + in[12].im; |
98 |
|
2665416 |
t[1].im = in[3].re - in[12].re; |
99 |
|
2665416 |
t[1].re = in[3].im - in[12].im; |
100 |
|
2665416 |
t[2].re = in[6].re + in[ 9].re; |
101 |
|
2665416 |
t[2].im = in[6].im + in[ 9].im; |
102 |
|
2665416 |
t[3].im = in[6].re - in[ 9].re; |
103 |
|
2665416 |
t[3].re = in[6].im - in[ 9].im; |
104 |
|
|
|
105 |
|
2665416 |
out[0].re = in[0].re + in[3].re + in[6].re + in[9].re + in[12].re; |
106 |
|
2665416 |
out[0].im = in[0].im + in[3].im + in[6].im + in[9].im + in[12].im; |
107 |
|
|
|
108 |
|
2665416 |
t[4].re = exptab[0].re * t[2].re - exptab[1].re * t[0].re; |
109 |
|
2665416 |
t[4].im = exptab[0].re * t[2].im - exptab[1].re * t[0].im; |
110 |
|
2665416 |
t[0].re = exptab[0].re * t[0].re - exptab[1].re * t[2].re; |
111 |
|
2665416 |
t[0].im = exptab[0].re * t[0].im - exptab[1].re * t[2].im; |
112 |
|
2665416 |
t[5].re = exptab[0].im * t[3].re - exptab[1].im * t[1].re; |
113 |
|
2665416 |
t[5].im = exptab[0].im * t[3].im - exptab[1].im * t[1].im; |
114 |
|
2665416 |
t[1].re = exptab[0].im * t[1].re + exptab[1].im * t[3].re; |
115 |
|
2665416 |
t[1].im = exptab[0].im * t[1].im + exptab[1].im * t[3].im; |
116 |
|
|
|
117 |
|
2665416 |
z0[0].re = t[0].re - t[1].re; |
118 |
|
2665416 |
z0[0].im = t[0].im - t[1].im; |
119 |
|
2665416 |
z0[1].re = t[4].re + t[5].re; |
120 |
|
2665416 |
z0[1].im = t[4].im + t[5].im; |
121 |
|
|
|
122 |
|
2665416 |
z0[2].re = t[4].re - t[5].re; |
123 |
|
2665416 |
z0[2].im = t[4].im - t[5].im; |
124 |
|
2665416 |
z0[3].re = t[0].re + t[1].re; |
125 |
|
2665416 |
z0[3].im = t[0].im + t[1].im; |
126 |
|
|
|
127 |
|
2665416 |
out[1].re = in[0].re + z0[3].re; |
128 |
|
2665416 |
out[1].im = in[0].im + z0[0].im; |
129 |
|
2665416 |
out[2].re = in[0].re + z0[2].re; |
130 |
|
2665416 |
out[2].im = in[0].im + z0[1].im; |
131 |
|
2665416 |
out[3].re = in[0].re + z0[1].re; |
132 |
|
2665416 |
out[3].im = in[0].im + z0[2].im; |
133 |
|
2665416 |
out[4].re = in[0].re + z0[0].re; |
134 |
|
2665416 |
out[4].im = in[0].im + z0[3].im; |
135 |
|
2665416 |
} |
136 |
|
|
|
137 |
|
888472 |
static void fft15_c(FFTComplex *out, FFTComplex *in, FFTComplex *exptab, ptrdiff_t stride) |
138 |
|
|
{ |
139 |
|
|
int k; |
140 |
|
|
FFTComplex tmp1[5], tmp2[5], tmp3[5]; |
141 |
|
|
|
142 |
|
888472 |
fft5(tmp1, in + 0, exptab + 19); |
143 |
|
888472 |
fft5(tmp2, in + 1, exptab + 19); |
144 |
|
888472 |
fft5(tmp3, in + 2, exptab + 19); |
145 |
|
|
|
146 |
✓✓ |
5330832 |
for (k = 0; k < 5; k++) { |
147 |
|
|
FFTComplex t[2]; |
148 |
|
|
|
149 |
|
4442360 |
CMUL3(t[0], tmp2[k], exptab[k]); |
150 |
|
4442360 |
CMUL3(t[1], tmp3[k], exptab[2 * k]); |
151 |
|
4442360 |
out[stride*k].re = tmp1[k].re + t[0].re + t[1].re; |
152 |
|
4442360 |
out[stride*k].im = tmp1[k].im + t[0].im + t[1].im; |
153 |
|
|
|
154 |
|
4442360 |
CMUL3(t[0], tmp2[k], exptab[k + 5]); |
155 |
|
4442360 |
CMUL3(t[1], tmp3[k], exptab[2 * (k + 5)]); |
156 |
|
4442360 |
out[stride*(k + 5)].re = tmp1[k].re + t[0].re + t[1].re; |
157 |
|
4442360 |
out[stride*(k + 5)].im = tmp1[k].im + t[0].im + t[1].im; |
158 |
|
|
|
159 |
|
4442360 |
CMUL3(t[0], tmp2[k], exptab[k + 10]); |
160 |
|
4442360 |
CMUL3(t[1], tmp3[k], exptab[2 * k + 5]); |
161 |
|
4442360 |
out[stride*(k + 10)].re = tmp1[k].re + t[0].re + t[1].re; |
162 |
|
4442360 |
out[stride*(k + 10)].im = tmp1[k].im + t[0].im + t[1].im; |
163 |
|
|
} |
164 |
|
888472 |
} |
165 |
|
|
|
166 |
|
|
static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride) |
167 |
|
|
{ |
168 |
|
|
int i, j; |
169 |
|
|
const int len4 = s->len4, len3 = len4 * 3, len8 = len4 >> 1; |
170 |
|
|
const int l_ptwo = 1 << s->ptwo_fft.nbits; |
171 |
|
|
FFTComplex fft15in[15]; |
172 |
|
|
|
173 |
|
|
/* Folding and pre-reindexing */ |
174 |
|
|
for (i = 0; i < l_ptwo; i++) { |
175 |
|
|
for (j = 0; j < 15; j++) { |
176 |
|
|
const int k = s->pfa_prereindex[i*15 + j]; |
177 |
|
|
FFTComplex tmp, exp = s->twiddle_exptab[k >> 1]; |
178 |
|
|
if (k < len4) { |
179 |
|
|
tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k]; |
180 |
|
|
tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k]; |
181 |
|
|
} else { |
182 |
|
|
tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k]; |
183 |
|
|
tmp.im = src[-len4 + k] - src[1*len3 - 1 - k]; |
184 |
|
|
} |
185 |
|
|
CMUL(fft15in[j].im, fft15in[j].re, tmp.re, tmp.im, exp.re, exp.im); |
186 |
|
|
} |
187 |
|
|
s->fft15(s->tmp + s->ptwo_fft.revtab[i], fft15in, s->exptab, l_ptwo); |
188 |
|
|
} |
189 |
|
|
|
190 |
|
|
/* Then a 15xN FFT (where N is a power of two) */ |
191 |
|
|
for (i = 0; i < 15; i++) |
192 |
|
|
s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i); |
193 |
|
|
|
194 |
|
|
/* Reindex again, apply twiddles and output */ |
195 |
|
|
for (i = 0; i < len8; i++) { |
196 |
|
|
const int i0 = len8 + i, i1 = len8 - i - 1; |
197 |
|
|
const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1]; |
198 |
|
|
|
199 |
|
|
CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], s->tmp[s0].re, s->tmp[s0].im, |
200 |
|
|
s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re); |
201 |
|
|
CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], s->tmp[s1].re, s->tmp[s1].im, |
202 |
|
|
s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re); |
203 |
|
|
} |
204 |
|
|
} |
205 |
|
|
|
206 |
|
100096 |
static void imdct15_half(MDCT15Context *s, float *dst, const float *src, |
207 |
|
|
ptrdiff_t stride) |
208 |
|
|
{ |
209 |
|
|
FFTComplex fft15in[15]; |
210 |
|
100096 |
FFTComplex *z = (FFTComplex *)dst; |
211 |
|
100096 |
int i, j, len8 = s->len4 >> 1, l_ptwo = 1 << s->ptwo_fft.nbits; |
212 |
|
100096 |
const float *in1 = src, *in2 = src + (s->len2 - 1) * stride; |
213 |
|
|
|
214 |
|
|
/* Reindex input, putting it into a buffer and doing an Nx15 FFT */ |
215 |
✓✓ |
988600 |
for (i = 0; i < l_ptwo; i++) { |
216 |
✓✓ |
14216064 |
for (j = 0; j < 15; j++) { |
217 |
|
13327560 |
const int k = s->pfa_prereindex[i*15 + j]; |
218 |
|
13327560 |
FFTComplex tmp = { in2[-k*stride], in1[k*stride] }; |
219 |
|
13327560 |
CMUL3(fft15in[j], tmp, s->twiddle_exptab[k >> 1]); |
220 |
|
|
} |
221 |
|
888504 |
s->fft15(s->tmp + s->ptwo_fft.revtab[i], fft15in, s->exptab, l_ptwo); |
222 |
|
|
} |
223 |
|
|
|
224 |
|
|
/* Then a 15xN FFT (where N is a power of two) */ |
225 |
✓✓ |
1601536 |
for (i = 0; i < 15; i++) |
226 |
|
1501440 |
s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i); |
227 |
|
|
|
228 |
|
|
/* Reindex again, apply twiddles and output */ |
229 |
|
100096 |
s->postreindex(z, s->tmp, s->twiddle_exptab, s->pfa_postreindex, len8); |
230 |
|
100096 |
} |
231 |
|
|
|
232 |
|
100088 |
static void postrotate_c(FFTComplex *out, FFTComplex *in, FFTComplex *exp, |
233 |
|
|
int *lut, ptrdiff_t len8) |
234 |
|
|
{ |
235 |
|
|
int i; |
236 |
|
|
|
237 |
|
|
/* Reindex again, apply twiddles and output */ |
238 |
✓✓ |
6763628 |
for (i = 0; i < len8; i++) { |
239 |
|
6663540 |
const int i0 = len8 + i, i1 = len8 - i - 1; |
240 |
|
6663540 |
const int s0 = lut[i0], s1 = lut[i1]; |
241 |
|
|
|
242 |
|
6663540 |
CMUL(out[i1].re, out[i0].im, in[s1].im, in[s1].re, exp[i1].im, exp[i1].re); |
243 |
|
6663540 |
CMUL(out[i0].re, out[i1].im, in[s0].im, in[s0].re, exp[i0].im, exp[i0].re); |
244 |
|
|
} |
245 |
|
100088 |
} |
246 |
|
|
|
247 |
|
914 |
av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale) |
248 |
|
|
{ |
249 |
|
|
MDCT15Context *s; |
250 |
|
|
double alpha, theta; |
251 |
|
914 |
int len2 = 15 * (1 << N); |
252 |
|
914 |
int len = 2 * len2; |
253 |
|
|
int i; |
254 |
|
|
|
255 |
|
|
/* Tested and verified to work on everything in between */ |
256 |
✓✗✗✓
|
914 |
if ((N < 2) || (N > 13)) |
257 |
|
|
return AVERROR(EINVAL); |
258 |
|
|
|
259 |
|
914 |
s = av_mallocz(sizeof(*s)); |
260 |
✗✓ |
914 |
if (!s) |
261 |
|
|
return AVERROR(ENOMEM); |
262 |
|
|
|
263 |
|
914 |
s->fft_n = N - 1; |
264 |
|
914 |
s->len4 = len2 / 2; |
265 |
|
914 |
s->len2 = len2; |
266 |
|
914 |
s->inverse = inverse; |
267 |
|
914 |
s->fft15 = fft15_c; |
268 |
|
914 |
s->mdct = mdct15; |
269 |
|
914 |
s->imdct_half = imdct15_half; |
270 |
|
914 |
s->postreindex = postrotate_c; |
271 |
|
|
|
272 |
✗✓ |
914 |
if (ff_fft_init(&s->ptwo_fft, N - 1, s->inverse) < 0) |
273 |
|
|
goto fail; |
274 |
|
|
|
275 |
✗✓ |
914 |
if (init_pfa_reindex_tabs(s)) |
276 |
|
|
goto fail; |
277 |
|
|
|
278 |
|
914 |
s->tmp = av_malloc_array(len, 2 * sizeof(*s->tmp)); |
279 |
✗✓ |
914 |
if (!s->tmp) |
280 |
|
|
goto fail; |
281 |
|
|
|
282 |
|
914 |
s->twiddle_exptab = av_malloc_array(s->len4, sizeof(*s->twiddle_exptab)); |
283 |
✗✓ |
914 |
if (!s->twiddle_exptab) |
284 |
|
|
goto fail; |
285 |
|
|
|
286 |
✓✓ |
914 |
theta = 0.125f + (scale < 0 ? s->len4 : 0); |
287 |
|
914 |
scale = sqrt(fabs(scale)); |
288 |
✓✓ |
232814 |
for (i = 0; i < s->len4; i++) { |
289 |
|
231900 |
alpha = 2 * M_PI * (i + theta) / len; |
290 |
|
231900 |
s->twiddle_exptab[i].re = cosf(alpha) * scale; |
291 |
|
231900 |
s->twiddle_exptab[i].im = sinf(alpha) * scale; |
292 |
|
|
} |
293 |
|
|
|
294 |
|
|
/* 15-point FFT exptab */ |
295 |
✓✓ |
18280 |
for (i = 0; i < 19; i++) { |
296 |
✓✓ |
17366 |
if (i < 15) { |
297 |
|
13710 |
double theta = (2.0f * M_PI * i) / 15.0f; |
298 |
✗✓ |
13710 |
if (!s->inverse) |
299 |
|
|
theta *= -1; |
300 |
|
13710 |
s->exptab[i].re = cosf(theta); |
301 |
|
13710 |
s->exptab[i].im = sinf(theta); |
302 |
|
|
} else { /* Wrap around to simplify fft15 */ |
303 |
|
3656 |
s->exptab[i] = s->exptab[i - 15]; |
304 |
|
|
} |
305 |
|
|
} |
306 |
|
|
|
307 |
|
|
/* 5-point FFT exptab */ |
308 |
|
914 |
s->exptab[19].re = cosf(2.0f * M_PI / 5.0f); |
309 |
|
914 |
s->exptab[19].im = sinf(2.0f * M_PI / 5.0f); |
310 |
|
914 |
s->exptab[20].re = cosf(1.0f * M_PI / 5.0f); |
311 |
|
914 |
s->exptab[20].im = sinf(1.0f * M_PI / 5.0f); |
312 |
|
|
|
313 |
|
|
/* Invert the phase for an inverse transform, do nothing for a forward transform */ |
314 |
✓✗ |
914 |
if (s->inverse) { |
315 |
|
914 |
s->exptab[19].im *= -1; |
316 |
|
914 |
s->exptab[20].im *= -1; |
317 |
|
|
} |
318 |
|
|
|
319 |
|
|
if (ARCH_X86) |
320 |
|
914 |
ff_mdct15_init_x86(s); |
321 |
|
|
|
322 |
|
914 |
*ps = s; |
323 |
|
|
|
324 |
|
914 |
return 0; |
325 |
|
|
|
326 |
|
|
fail: |
327 |
|
|
ff_mdct15_uninit(&s); |
328 |
|
|
return AVERROR(ENOMEM); |
329 |
|
|
} |