Line |
Branch |
Exec |
Source |
1 |
|
|
/* |
2 |
|
|
* This file is part of FFmpeg. |
3 |
|
|
* |
4 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
5 |
|
|
* modify it under the terms of the GNU Lesser General Public |
6 |
|
|
* License as published by the Free Software Foundation; either |
7 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
8 |
|
|
* |
9 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
10 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 |
|
|
* Lesser General Public License for more details. |
13 |
|
|
* |
14 |
|
|
* You should have received a copy of the GNU Lesser General Public |
15 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
16 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 |
|
|
*/ |
18 |
|
|
|
19 |
|
|
#include "libavutil/mem.h" |
20 |
|
|
#include "libavutil/tx.h" |
21 |
|
|
#include "avfilter.h" |
22 |
|
|
#include "audio.h" |
23 |
|
|
|
24 |
|
|
#undef ctype |
25 |
|
|
#undef ftype |
26 |
|
|
#undef SQRT |
27 |
|
|
#undef HYPOT |
28 |
|
|
#undef SAMPLE_FORMAT |
29 |
|
|
#undef TX_TYPE |
30 |
|
|
#undef ONE |
31 |
|
|
#undef ZERO |
32 |
|
|
#undef HALF |
33 |
|
|
#undef SIN |
34 |
|
|
#undef CLIP |
35 |
|
|
#undef EPSILON |
36 |
|
|
#if DEPTH == 32 |
37 |
|
|
#define SAMPLE_FORMAT float |
38 |
|
|
#define SQRT sqrtf |
39 |
|
|
#define HYPOT hypotf |
40 |
|
|
#define ctype AVComplexFloat |
41 |
|
|
#define ftype float |
42 |
|
|
#define TX_TYPE AV_TX_FLOAT_RDFT |
43 |
|
|
#define ONE 1.f |
44 |
|
|
#define ZERO 0.f |
45 |
|
|
#define HALF 0.5f |
46 |
|
|
#define SIN sinf |
47 |
|
|
#define CLIP av_clipf |
48 |
|
|
#define EPSILON FLT_EPSILON |
49 |
|
|
#else |
50 |
|
|
#define SAMPLE_FORMAT double |
51 |
|
|
#define SQRT sqrt |
52 |
|
|
#define HYPOT hypot |
53 |
|
|
#define ctype AVComplexDouble |
54 |
|
|
#define ftype double |
55 |
|
|
#define TX_TYPE AV_TX_DOUBLE_RDFT |
56 |
|
|
#define ONE 1.0 |
57 |
|
|
#define ZERO 0.0 |
58 |
|
|
#define HALF 0.5 |
59 |
|
|
#define SIN sin |
60 |
|
|
#define CLIP av_clipd |
61 |
|
|
#define EPSILON DBL_EPSILON |
62 |
|
|
#endif |
63 |
|
|
|
64 |
|
|
#define fn3(a,b) a##_##b |
65 |
|
|
#define fn2(a,b) fn3(a,b) |
66 |
|
|
#define fn(a) fn2(a, SAMPLE_FORMAT) |
67 |
|
|
|
68 |
|
✗ |
static int fn(de_tx_init)(AVFilterContext *ctx) |
69 |
|
|
{ |
70 |
|
✗ |
AudioDialogueEnhanceContext *s = ctx->priv; |
71 |
|
✗ |
ftype scale = ONE, iscale = ONE / (s->fft_size * 1.5f); |
72 |
|
|
int ret; |
73 |
|
|
|
74 |
|
✗ |
s->window = av_calloc(s->fft_size, sizeof(ftype)); |
75 |
|
✗ |
if (!s->window) |
76 |
|
✗ |
return AVERROR(ENOMEM); |
77 |
|
✗ |
fn(s->window) = s->window; |
78 |
|
✗ |
for (int n = 0; n < s->fft_size; n++) |
79 |
|
✗ |
fn(s->window)[n] = SIN(M_PI*n/(s->fft_size-1)); |
80 |
|
|
|
81 |
|
✗ |
ret = av_tx_init(&s->tx_ctx[0], &s->tx_fn, TX_TYPE, 0, s->fft_size, &scale, 0); |
82 |
|
✗ |
if (ret < 0) |
83 |
|
✗ |
return ret; |
84 |
|
|
|
85 |
|
✗ |
ret = av_tx_init(&s->tx_ctx[1], &s->tx_fn, TX_TYPE, 0, s->fft_size, &scale, 0); |
86 |
|
✗ |
if (ret < 0) |
87 |
|
✗ |
return ret; |
88 |
|
|
|
89 |
|
✗ |
ret = av_tx_init(&s->itx_ctx, &s->itx_fn, TX_TYPE, 1, s->fft_size, &iscale, 0); |
90 |
|
✗ |
if (ret < 0) |
91 |
|
✗ |
return ret; |
92 |
|
|
|
93 |
|
✗ |
return 0; |
94 |
|
|
} |
95 |
|
|
|
96 |
|
✗ |
static void fn(apply_window)(AudioDialogueEnhanceContext *s, |
97 |
|
|
const ftype *in_frame, ftype *out_frame, const int add_to_out_frame) |
98 |
|
|
{ |
99 |
|
✗ |
const ftype *window = fn(s->window); |
100 |
|
✗ |
const int fft_size = s->fft_size; |
101 |
|
|
|
102 |
|
✗ |
if (add_to_out_frame) { |
103 |
|
✗ |
for (int i = 0; i < fft_size; i++) |
104 |
|
✗ |
out_frame[i] += in_frame[i] * window[i]; |
105 |
|
|
} else { |
106 |
|
✗ |
for (int i = 0; i < fft_size; i++) |
107 |
|
✗ |
out_frame[i] = in_frame[i] * window[i]; |
108 |
|
|
} |
109 |
|
✗ |
} |
110 |
|
|
|
111 |
|
✗ |
static ftype fn(sqr)(ftype x) |
112 |
|
|
{ |
113 |
|
✗ |
return x * x; |
114 |
|
|
} |
115 |
|
|
|
116 |
|
✗ |
static void fn(get_centere)(ctype *left, ctype *right, |
117 |
|
|
ctype *center, int N) |
118 |
|
|
{ |
119 |
|
✗ |
for (int i = 0; i < N; i++) { |
120 |
|
✗ |
const ftype l_re = left[i].re; |
121 |
|
✗ |
const ftype l_im = left[i].im; |
122 |
|
✗ |
const ftype r_re = right[i].re; |
123 |
|
✗ |
const ftype r_im = right[i].im; |
124 |
|
✗ |
const ftype a = HALF * (ONE - SQRT((fn(sqr)(l_re - r_re) + fn(sqr)(l_im - r_im))/ |
125 |
|
✗ |
(fn(sqr)(l_re + r_re) + fn(sqr)(l_im + r_im) + EPSILON))); |
126 |
|
|
|
127 |
|
✗ |
center[i].re = a * (l_re + r_re); |
128 |
|
✗ |
center[i].im = a * (l_im + r_im); |
129 |
|
|
} |
130 |
|
✗ |
} |
131 |
|
|
|
132 |
|
✗ |
static ftype fn(flux)(ftype *curf, ftype *prevf, int N) |
133 |
|
|
{ |
134 |
|
✗ |
ctype *cur = (ctype *)curf; |
135 |
|
✗ |
ctype *prev = (ctype *)prevf; |
136 |
|
✗ |
ftype sum = ZERO; |
137 |
|
|
|
138 |
|
✗ |
for (int i = 0; i < N; i++) { |
139 |
|
✗ |
ftype c_re = cur[i].re; |
140 |
|
✗ |
ftype c_im = cur[i].im; |
141 |
|
✗ |
ftype p_re = prev[i].re; |
142 |
|
✗ |
ftype p_im = prev[i].im; |
143 |
|
|
|
144 |
|
✗ |
sum += fn(sqr)(HYPOT(c_re, c_im) - HYPOT(p_re, p_im)); |
145 |
|
|
} |
146 |
|
|
|
147 |
|
✗ |
return sum; |
148 |
|
|
} |
149 |
|
|
|
150 |
|
✗ |
static ftype fn(fluxlr)(ftype *lf, ftype *lpf, |
151 |
|
|
ftype *rf, ftype *rpf, |
152 |
|
|
int N) |
153 |
|
|
{ |
154 |
|
✗ |
ctype *l = (ctype *)lf; |
155 |
|
✗ |
ctype *lp = (ctype *)lpf; |
156 |
|
✗ |
ctype *r = (ctype *)rf; |
157 |
|
✗ |
ctype *rp = (ctype *)rpf; |
158 |
|
✗ |
ftype sum = ZERO; |
159 |
|
|
|
160 |
|
✗ |
for (int i = 0; i < N; i++) { |
161 |
|
✗ |
ftype c_re = l[i].re - r[i].re; |
162 |
|
✗ |
ftype c_im = l[i].im - r[i].im; |
163 |
|
✗ |
ftype p_re = lp[i].re - rp[i].re; |
164 |
|
✗ |
ftype p_im = lp[i].im - rp[i].im; |
165 |
|
|
|
166 |
|
✗ |
sum += fn(sqr)(HYPOT(c_re, c_im) - HYPOT(p_re, p_im)); |
167 |
|
|
} |
168 |
|
|
|
169 |
|
✗ |
return sum; |
170 |
|
|
} |
171 |
|
|
|
172 |
|
✗ |
static ftype fn(calc_vad)(ftype fc, ftype flr, ftype a) |
173 |
|
|
{ |
174 |
|
✗ |
const ftype vad = a * (fc / (fc + flr) - HALF); |
175 |
|
|
|
176 |
|
✗ |
return CLIP(vad, ZERO, ONE); |
177 |
|
|
} |
178 |
|
|
|
179 |
|
✗ |
static void fn(get_final)(ftype *c, ftype *l, |
180 |
|
|
ftype *r, ftype vad, int N, |
181 |
|
|
ftype original, ftype enhance) |
182 |
|
|
{ |
183 |
|
✗ |
ctype *center = (ctype *)c; |
184 |
|
✗ |
ctype *left = (ctype *)l; |
185 |
|
✗ |
ctype *right = (ctype *)r; |
186 |
|
|
|
187 |
|
✗ |
for (int i = 0; i < N; i++) { |
188 |
|
✗ |
ftype cP = fn(sqr)(center[i].re) + fn(sqr)(center[i].im); |
189 |
|
✗ |
ftype lrP = fn(sqr)(left[i].re - right[i].re) + fn(sqr)(left[i].im - right[i].im); |
190 |
|
✗ |
ftype G = cP / (cP + lrP + EPSILON); |
191 |
|
|
ftype re, im; |
192 |
|
|
|
193 |
|
✗ |
re = center[i].re * (original + vad * G * enhance); |
194 |
|
✗ |
im = center[i].im * (original + vad * G * enhance); |
195 |
|
|
|
196 |
|
✗ |
center[i].re = re; |
197 |
|
✗ |
center[i].im = im; |
198 |
|
|
} |
199 |
|
✗ |
} |
200 |
|
|
|
201 |
|
✗ |
static int fn(de_stereo)(AVFilterContext *ctx, AVFrame *out) |
202 |
|
|
{ |
203 |
|
✗ |
AudioDialogueEnhanceContext *s = ctx->priv; |
204 |
|
✗ |
ftype *center = (ftype *)s->center_frame->extended_data[0]; |
205 |
|
✗ |
ftype *center_prev = (ftype *)s->center_frame->extended_data[1]; |
206 |
|
✗ |
ftype *left_in = (ftype *)s->in_frame->extended_data[0]; |
207 |
|
✗ |
ftype *right_in = (ftype *)s->in_frame->extended_data[1]; |
208 |
|
✗ |
ftype *left_out = (ftype *)s->out_dist_frame->extended_data[0]; |
209 |
|
✗ |
ftype *right_out = (ftype *)s->out_dist_frame->extended_data[1]; |
210 |
|
✗ |
ftype *left_samples = (ftype *)s->in->extended_data[0]; |
211 |
|
✗ |
ftype *right_samples = (ftype *)s->in->extended_data[1]; |
212 |
|
✗ |
ftype *windowed_left = (ftype *)s->windowed_frame->extended_data[0]; |
213 |
|
✗ |
ftype *windowed_right = (ftype *)s->windowed_frame->extended_data[1]; |
214 |
|
✗ |
ftype *windowed_oleft = (ftype *)s->windowed_out->extended_data[0]; |
215 |
|
✗ |
ftype *windowed_oright = (ftype *)s->windowed_out->extended_data[1]; |
216 |
|
✗ |
ftype *windowed_pleft = (ftype *)s->windowed_prev->extended_data[0]; |
217 |
|
✗ |
ftype *windowed_pright = (ftype *)s->windowed_prev->extended_data[1]; |
218 |
|
✗ |
ftype *left_osamples = (ftype *)out->extended_data[0]; |
219 |
|
✗ |
ftype *right_osamples = (ftype *)out->extended_data[1]; |
220 |
|
✗ |
ftype *center_osamples = (ftype *)out->extended_data[2]; |
221 |
|
✗ |
const int overlap = s->overlap; |
222 |
|
✗ |
const int offset = s->fft_size - overlap; |
223 |
|
✗ |
const int nb_samples = FFMIN(overlap, s->in->nb_samples); |
224 |
|
|
ftype vad; |
225 |
|
|
|
226 |
|
|
// shift in/out buffers |
227 |
|
✗ |
memmove(left_in, &left_in[overlap], offset * sizeof(ftype)); |
228 |
|
✗ |
memmove(right_in, &right_in[overlap], offset * sizeof(ftype)); |
229 |
|
✗ |
memmove(left_out, &left_out[overlap], offset * sizeof(ftype)); |
230 |
|
✗ |
memmove(right_out, &right_out[overlap], offset * sizeof(ftype)); |
231 |
|
|
|
232 |
|
✗ |
memcpy(&left_in[offset], left_samples, nb_samples * sizeof(ftype)); |
233 |
|
✗ |
memcpy(&right_in[offset], right_samples, nb_samples * sizeof(ftype)); |
234 |
|
✗ |
memset(&left_out[offset], 0, overlap * sizeof(ftype)); |
235 |
|
✗ |
memset(&right_out[offset], 0, overlap * sizeof(ftype)); |
236 |
|
|
|
237 |
|
✗ |
fn(apply_window)(s, left_in, windowed_left, 0); |
238 |
|
✗ |
fn(apply_window)(s, right_in, windowed_right, 0); |
239 |
|
|
|
240 |
|
✗ |
s->tx_fn(s->tx_ctx[0], windowed_oleft, windowed_left, sizeof(ftype)); |
241 |
|
✗ |
s->tx_fn(s->tx_ctx[1], windowed_oright, windowed_right, sizeof(ftype)); |
242 |
|
|
|
243 |
|
✗ |
fn(get_centere)((ctype *)windowed_oleft, |
244 |
|
|
(ctype *)windowed_oright, |
245 |
|
|
(ctype *)center, |
246 |
|
✗ |
s->fft_size / 2 + 1); |
247 |
|
|
|
248 |
|
✗ |
vad = fn(calc_vad)(fn(flux)(center, center_prev, s->fft_size / 2 + 1), |
249 |
|
|
fn(fluxlr)(windowed_oleft, windowed_pleft, |
250 |
|
✗ |
windowed_oright, windowed_pright, s->fft_size / 2 + 1), s->voice); |
251 |
|
✗ |
vad = vad * 0.1 + 0.9 * fn(s->prev_vad); |
252 |
|
✗ |
fn(s->prev_vad) = vad; |
253 |
|
|
|
254 |
|
✗ |
memcpy(center_prev, center, s->fft_size * sizeof(ftype)); |
255 |
|
✗ |
memcpy(windowed_pleft, windowed_oleft, s->fft_size * sizeof(ftype)); |
256 |
|
✗ |
memcpy(windowed_pright, windowed_oright, s->fft_size * sizeof(ftype)); |
257 |
|
|
|
258 |
|
✗ |
fn(get_final)(center, windowed_oleft, windowed_oright, vad, s->fft_size / 2 + 1, |
259 |
|
✗ |
s->original, s->enhance); |
260 |
|
|
|
261 |
|
✗ |
s->itx_fn(s->itx_ctx, windowed_oleft, center, sizeof(ctype)); |
262 |
|
|
|
263 |
|
✗ |
fn(apply_window)(s, windowed_oleft, left_out, 1); |
264 |
|
|
|
265 |
|
✗ |
memcpy(left_osamples, left_in, overlap * sizeof(ftype)); |
266 |
|
✗ |
memcpy(right_osamples, right_in, overlap * sizeof(ftype)); |
267 |
|
|
|
268 |
|
✗ |
if (ctx->is_disabled) |
269 |
|
✗ |
memset(center_osamples, 0, overlap * sizeof(ftype)); |
270 |
|
|
else |
271 |
|
✗ |
memcpy(center_osamples, left_out, overlap * sizeof(ftype)); |
272 |
|
|
|
273 |
|
✗ |
return 0; |
274 |
|
|
} |
275 |
|
|
|