FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/dialoguenhance_template.c
Date: 2024-05-03 15:42:48
Exec Total Coverage
Lines: 0 134 0.0%
Functions: 0 16 0.0%
Branches: 0 24 0.0%

Line Branch Exec Source
1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "libavutil/mem.h"
20 #include "libavutil/tx.h"
21 #include "avfilter.h"
22 #include "internal.h"
23 #include "audio.h"
24
25 #undef ctype
26 #undef ftype
27 #undef SQRT
28 #undef HYPOT
29 #undef SAMPLE_FORMAT
30 #undef TX_TYPE
31 #undef ONE
32 #undef ZERO
33 #undef HALF
34 #undef SIN
35 #undef CLIP
36 #undef EPSILON
37 #if DEPTH == 32
38 #define SAMPLE_FORMAT float
39 #define SQRT sqrtf
40 #define HYPOT hypotf
41 #define ctype AVComplexFloat
42 #define ftype float
43 #define TX_TYPE AV_TX_FLOAT_RDFT
44 #define ONE 1.f
45 #define ZERO 0.f
46 #define HALF 0.5f
47 #define SIN sinf
48 #define CLIP av_clipf
49 #define EPSILON FLT_EPSILON
50 #else
51 #define SAMPLE_FORMAT double
52 #define SQRT sqrt
53 #define HYPOT hypot
54 #define ctype AVComplexDouble
55 #define ftype double
56 #define TX_TYPE AV_TX_DOUBLE_RDFT
57 #define ONE 1.0
58 #define ZERO 0.0
59 #define HALF 0.5
60 #define SIN sin
61 #define CLIP av_clipd
62 #define EPSILON DBL_EPSILON
63 #endif
64
65 #define fn3(a,b) a##_##b
66 #define fn2(a,b) fn3(a,b)
67 #define fn(a) fn2(a, SAMPLE_FORMAT)
68
69 static int fn(de_tx_init)(AVFilterContext *ctx)
70 {
71 AudioDialogueEnhanceContext *s = ctx->priv;
72 ftype scale = ONE, iscale = ONE / (s->fft_size * 1.5f);
73 int ret;
74
75 s->window = av_calloc(s->fft_size, sizeof(ftype));
76 if (!s->window)
77 return AVERROR(ENOMEM);
78 fn(s->window) = s->window;
79 for (int n = 0; n < s->fft_size; n++)
80 fn(s->window)[n] = SIN(M_PI*n/(s->fft_size-1));
81
82 ret = av_tx_init(&s->tx_ctx[0], &s->tx_fn, TX_TYPE, 0, s->fft_size, &scale, 0);
83 if (ret < 0)
84 return ret;
85
86 ret = av_tx_init(&s->tx_ctx[1], &s->tx_fn, TX_TYPE, 0, s->fft_size, &scale, 0);
87 if (ret < 0)
88 return ret;
89
90 ret = av_tx_init(&s->itx_ctx, &s->itx_fn, TX_TYPE, 1, s->fft_size, &iscale, 0);
91 if (ret < 0)
92 return ret;
93
94 return 0;
95 }
96
97 static void fn(apply_window)(AudioDialogueEnhanceContext *s,
98 const ftype *in_frame, ftype *out_frame, const int add_to_out_frame)
99 {
100 const ftype *window = fn(s->window);
101 const int fft_size = s->fft_size;
102
103 if (add_to_out_frame) {
104 for (int i = 0; i < fft_size; i++)
105 out_frame[i] += in_frame[i] * window[i];
106 } else {
107 for (int i = 0; i < fft_size; i++)
108 out_frame[i] = in_frame[i] * window[i];
109 }
110 }
111
112 static ftype fn(sqr)(ftype x)
113 {
114 return x * x;
115 }
116
117 static void fn(get_centere)(ctype *left, ctype *right,
118 ctype *center, int N)
119 {
120 for (int i = 0; i < N; i++) {
121 const ftype l_re = left[i].re;
122 const ftype l_im = left[i].im;
123 const ftype r_re = right[i].re;
124 const ftype r_im = right[i].im;
125 const ftype a = HALF * (ONE - SQRT((fn(sqr)(l_re - r_re) + fn(sqr)(l_im - r_im))/
126 (fn(sqr)(l_re + r_re) + fn(sqr)(l_im + r_im) + EPSILON)));
127
128 center[i].re = a * (l_re + r_re);
129 center[i].im = a * (l_im + r_im);
130 }
131 }
132
133 static ftype fn(flux)(ftype *curf, ftype *prevf, int N)
134 {
135 ctype *cur = (ctype *)curf;
136 ctype *prev = (ctype *)prevf;
137 ftype sum = ZERO;
138
139 for (int i = 0; i < N; i++) {
140 ftype c_re = cur[i].re;
141 ftype c_im = cur[i].im;
142 ftype p_re = prev[i].re;
143 ftype p_im = prev[i].im;
144
145 sum += fn(sqr)(HYPOT(c_re, c_im) - HYPOT(p_re, p_im));
146 }
147
148 return sum;
149 }
150
151 static ftype fn(fluxlr)(ftype *lf, ftype *lpf,
152 ftype *rf, ftype *rpf,
153 int N)
154 {
155 ctype *l = (ctype *)lf;
156 ctype *lp = (ctype *)lpf;
157 ctype *r = (ctype *)rf;
158 ctype *rp = (ctype *)rpf;
159 ftype sum = ZERO;
160
161 for (int i = 0; i < N; i++) {
162 ftype c_re = l[i].re - r[i].re;
163 ftype c_im = l[i].im - r[i].im;
164 ftype p_re = lp[i].re - rp[i].re;
165 ftype p_im = lp[i].im - rp[i].im;
166
167 sum += fn(sqr)(HYPOT(c_re, c_im) - HYPOT(p_re, p_im));
168 }
169
170 return sum;
171 }
172
173 static ftype fn(calc_vad)(ftype fc, ftype flr, ftype a)
174 {
175 const ftype vad = a * (fc / (fc + flr) - HALF);
176
177 return CLIP(vad, ZERO, ONE);
178 }
179
180 static void fn(get_final)(ftype *c, ftype *l,
181 ftype *r, ftype vad, int N,
182 ftype original, ftype enhance)
183 {
184 ctype *center = (ctype *)c;
185 ctype *left = (ctype *)l;
186 ctype *right = (ctype *)r;
187
188 for (int i = 0; i < N; i++) {
189 ftype cP = fn(sqr)(center[i].re) + fn(sqr)(center[i].im);
190 ftype lrP = fn(sqr)(left[i].re - right[i].re) + fn(sqr)(left[i].im - right[i].im);
191 ftype G = cP / (cP + lrP + EPSILON);
192 ftype re, im;
193
194 re = center[i].re * (original + vad * G * enhance);
195 im = center[i].im * (original + vad * G * enhance);
196
197 center[i].re = re;
198 center[i].im = im;
199 }
200 }
201
202 static int fn(de_stereo)(AVFilterContext *ctx, AVFrame *out)
203 {
204 AudioDialogueEnhanceContext *s = ctx->priv;
205 ftype *center = (ftype *)s->center_frame->extended_data[0];
206 ftype *center_prev = (ftype *)s->center_frame->extended_data[1];
207 ftype *left_in = (ftype *)s->in_frame->extended_data[0];
208 ftype *right_in = (ftype *)s->in_frame->extended_data[1];
209 ftype *left_out = (ftype *)s->out_dist_frame->extended_data[0];
210 ftype *right_out = (ftype *)s->out_dist_frame->extended_data[1];
211 ftype *left_samples = (ftype *)s->in->extended_data[0];
212 ftype *right_samples = (ftype *)s->in->extended_data[1];
213 ftype *windowed_left = (ftype *)s->windowed_frame->extended_data[0];
214 ftype *windowed_right = (ftype *)s->windowed_frame->extended_data[1];
215 ftype *windowed_oleft = (ftype *)s->windowed_out->extended_data[0];
216 ftype *windowed_oright = (ftype *)s->windowed_out->extended_data[1];
217 ftype *windowed_pleft = (ftype *)s->windowed_prev->extended_data[0];
218 ftype *windowed_pright = (ftype *)s->windowed_prev->extended_data[1];
219 ftype *left_osamples = (ftype *)out->extended_data[0];
220 ftype *right_osamples = (ftype *)out->extended_data[1];
221 ftype *center_osamples = (ftype *)out->extended_data[2];
222 const int overlap = s->overlap;
223 const int offset = s->fft_size - overlap;
224 const int nb_samples = FFMIN(overlap, s->in->nb_samples);
225 ftype vad;
226
227 // shift in/out buffers
228 memmove(left_in, &left_in[overlap], offset * sizeof(ftype));
229 memmove(right_in, &right_in[overlap], offset * sizeof(ftype));
230 memmove(left_out, &left_out[overlap], offset * sizeof(ftype));
231 memmove(right_out, &right_out[overlap], offset * sizeof(ftype));
232
233 memcpy(&left_in[offset], left_samples, nb_samples * sizeof(ftype));
234 memcpy(&right_in[offset], right_samples, nb_samples * sizeof(ftype));
235 memset(&left_out[offset], 0, overlap * sizeof(ftype));
236 memset(&right_out[offset], 0, overlap * sizeof(ftype));
237
238 fn(apply_window)(s, left_in, windowed_left, 0);
239 fn(apply_window)(s, right_in, windowed_right, 0);
240
241 s->tx_fn(s->tx_ctx[0], windowed_oleft, windowed_left, sizeof(ftype));
242 s->tx_fn(s->tx_ctx[1], windowed_oright, windowed_right, sizeof(ftype));
243
244 fn(get_centere)((ctype *)windowed_oleft,
245 (ctype *)windowed_oright,
246 (ctype *)center,
247 s->fft_size / 2 + 1);
248
249 vad = fn(calc_vad)(fn(flux)(center, center_prev, s->fft_size / 2 + 1),
250 fn(fluxlr)(windowed_oleft, windowed_pleft,
251 windowed_oright, windowed_pright, s->fft_size / 2 + 1), s->voice);
252 vad = vad * 0.1 + 0.9 * fn(s->prev_vad);
253 fn(s->prev_vad) = vad;
254
255 memcpy(center_prev, center, s->fft_size * sizeof(ftype));
256 memcpy(windowed_pleft, windowed_oleft, s->fft_size * sizeof(ftype));
257 memcpy(windowed_pright, windowed_oright, s->fft_size * sizeof(ftype));
258
259 fn(get_final)(center, windowed_oleft, windowed_oright, vad, s->fft_size / 2 + 1,
260 s->original, s->enhance);
261
262 s->itx_fn(s->itx_ctx, windowed_oleft, center, sizeof(ctype));
263
264 fn(apply_window)(s, windowed_oleft, left_out, 1);
265
266 memcpy(left_osamples, left_in, overlap * sizeof(ftype));
267 memcpy(right_osamples, right_in, overlap * sizeof(ftype));
268
269 if (ctx->is_disabled)
270 memset(center_osamples, 0, overlap * sizeof(ftype));
271 else
272 memcpy(center_osamples, left_out, overlap * sizeof(ftype));
273
274 return 0;
275 }
276