FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_afftdn.c
Date: 2024-07-26 21:54:09
Exec Total Coverage
Lines: 0 772 0.0%
Functions: 0 25 0.0%
Branches: 0 417 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2018 The FFmpeg Project
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <float.h>
22
23 #include "libavutil/avassert.h"
24 #include "libavutil/avstring.h"
25 #include "libavutil/channel_layout.h"
26 #include "libavutil/mem.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/tx.h"
29 #include "avfilter.h"
30 #include "audio.h"
31 #include "filters.h"
32
33 #define C (M_LN10 * 0.1)
34 #define SOLVE_SIZE (5)
35 #define NB_PROFILE_BANDS (15)
36
37 enum SampleNoiseModes {
38 SAMPLE_NONE,
39 SAMPLE_START,
40 SAMPLE_STOP,
41 NB_SAMPLEMODES
42 };
43
44 enum OutModes {
45 IN_MODE,
46 OUT_MODE,
47 NOISE_MODE,
48 NB_MODES
49 };
50
51 enum NoiseLinkType {
52 NONE_LINK,
53 MIN_LINK,
54 MAX_LINK,
55 AVERAGE_LINK,
56 NB_LINK
57 };
58
59 enum NoiseType {
60 WHITE_NOISE,
61 VINYL_NOISE,
62 SHELLAC_NOISE,
63 CUSTOM_NOISE,
64 NB_NOISE
65 };
66
67 typedef struct DeNoiseChannel {
68 double band_noise[NB_PROFILE_BANDS];
69 double noise_band_auto_var[NB_PROFILE_BANDS];
70 double noise_band_sample[NB_PROFILE_BANDS];
71 double *amt;
72 double *band_amt;
73 double *band_excit;
74 double *gain;
75 double *smoothed_gain;
76 double *prior;
77 double *prior_band_excit;
78 double *clean_data;
79 double *noisy_data;
80 double *out_samples;
81 double *spread_function;
82 double *abs_var;
83 double *rel_var;
84 double *min_abs_var;
85 void *fft_in;
86 void *fft_out;
87 AVTXContext *fft, *ifft;
88 av_tx_fn tx_fn, itx_fn;
89
90 double noise_band_norm[NB_PROFILE_BANDS];
91 double noise_band_avr[NB_PROFILE_BANDS];
92 double noise_band_avi[NB_PROFILE_BANDS];
93 double noise_band_var[NB_PROFILE_BANDS];
94
95 double noise_reduction;
96 double last_noise_reduction;
97 double noise_floor;
98 double last_noise_floor;
99 double residual_floor;
100 double last_residual_floor;
101 double max_gain;
102 double max_var;
103 double gain_scale;
104 } DeNoiseChannel;
105
106 typedef struct AudioFFTDeNoiseContext {
107 const AVClass *class;
108
109 int format;
110 size_t sample_size;
111 size_t complex_sample_size;
112
113 float noise_reduction;
114 float noise_floor;
115 int noise_type;
116 char *band_noise_str;
117 float residual_floor;
118 int track_noise;
119 int track_residual;
120 int output_mode;
121 int noise_floor_link;
122 float ratio;
123 int gain_smooth;
124 float band_multiplier;
125 float floor_offset;
126
127 int channels;
128 int sample_noise;
129 int sample_noise_blocks;
130 int sample_noise_mode;
131 float sample_rate;
132 int buffer_length;
133 int fft_length;
134 int fft_length2;
135 int bin_count;
136 int window_length;
137 int sample_advance;
138 int number_of_bands;
139
140 int band_centre[NB_PROFILE_BANDS];
141
142 int *bin2band;
143 double *window;
144 double *band_alpha;
145 double *band_beta;
146
147 DeNoiseChannel *dnch;
148
149 AVFrame *winframe;
150
151 double window_weight;
152 double floor;
153 double sample_floor;
154
155 int noise_band_edge[NB_PROFILE_BANDS + 2];
156 int noise_band_count;
157 double matrix_a[SOLVE_SIZE * SOLVE_SIZE];
158 double vector_b[SOLVE_SIZE];
159 double matrix_b[SOLVE_SIZE * NB_PROFILE_BANDS];
160 double matrix_c[SOLVE_SIZE * NB_PROFILE_BANDS];
161 } AudioFFTDeNoiseContext;
162
163 #define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x)
164 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
165 #define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
166
167 static const AVOption afftdn_options[] = {
168 { "noise_reduction", "set the noise reduction",OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT,{.dbl = 12}, .01, 97, AFR },
169 { "nr", "set the noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT, {.dbl = 12}, .01, 97, AFR },
170 { "noise_floor", "set the noise floor",OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR },
171 { "nf", "set the noise floor", OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR },
172 { "noise_type", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" },
173 { "nt", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" },
174 { "white", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" },
175 { "w", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" },
176 { "vinyl", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" },
177 { "v", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" },
178 { "shellac", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" },
179 { "s", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" },
180 { "custom", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" },
181 { "c", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" },
182 { "band_noise", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF },
183 { "bn", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF },
184 { "residual_floor", "set the residual floor",OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR },
185 { "rf", "set the residual floor", OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR },
186 { "track_noise", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
187 { "tn", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
188 { "track_residual", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
189 { "tr", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
190 { "output_mode", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" },
191 { "om", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" },
192 { "input", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" },
193 { "i", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" },
194 { "output", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" },
195 { "o", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" },
196 { "noise", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" },
197 { "n", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" },
198 { "adaptivity", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR },
199 { "ad", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR },
200 { "floor_offset", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR },
201 { "fo", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR },
202 { "noise_link", "set the noise floor link",OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" },
203 { "nl", "set the noise floor link", OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" },
204 { "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = NONE_LINK}, 0, 0, AFR, .unit = "link" },
205 { "min", "min", 0, AV_OPT_TYPE_CONST, {.i64 = MIN_LINK}, 0, 0, AFR, .unit = "link" },
206 { "max", "max", 0, AV_OPT_TYPE_CONST, {.i64 = MAX_LINK}, 0, 0, AFR, .unit = "link" },
207 { "average", "average", 0, AV_OPT_TYPE_CONST, {.i64 = AVERAGE_LINK}, 0, 0, AFR, .unit = "link" },
208 { "band_multiplier", "set band multiplier",OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF },
209 { "bm", "set band multiplier", OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF },
210 { "sample_noise", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" },
211 { "sn", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" },
212 { "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_NONE}, 0, 0, AFR, .unit = "sample" },
213 { "start", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" },
214 { "begin", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" },
215 { "stop", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" },
216 { "end", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" },
217 { "gain_smooth", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR },
218 { "gs", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR },
219 { NULL }
220 };
221
222 AVFILTER_DEFINE_CLASS(afftdn);
223
224 static double get_band_noise(AudioFFTDeNoiseContext *s,
225 int band, double a,
226 double b, double c)
227 {
228 double d1, d2, d3;
229
230 d1 = a / s->band_centre[band];
231 d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10;
232 d2 = b / s->band_centre[band];
233 d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10;
234 d3 = s->band_centre[band] / c;
235 d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10;
236
237 return -d1 + d2 - d3;
238 }
239
240 static void factor(double *array, int size)
241 {
242 for (int i = 0; i < size - 1; i++) {
243 for (int j = i + 1; j < size; j++) {
244 double d = array[j + i * size] / array[i + i * size];
245
246 array[j + i * size] = d;
247 for (int k = i + 1; k < size; k++) {
248 array[j + k * size] -= d * array[i + k * size];
249 }
250 }
251 }
252 }
253
254 static void solve(double *matrix, double *vector, int size)
255 {
256 for (int i = 0; i < size - 1; i++) {
257 for (int j = i + 1; j < size; j++) {
258 double d = matrix[j + i * size];
259 vector[j] -= d * vector[i];
260 }
261 }
262
263 vector[size - 1] /= matrix[size * size - 1];
264
265 for (int i = size - 2; i >= 0; i--) {
266 double d = vector[i];
267 for (int j = i + 1; j < size; j++)
268 d -= matrix[i + j * size] * vector[j];
269 vector[i] = d / matrix[i + i * size];
270 }
271 }
272
273 static double process_get_band_noise(AudioFFTDeNoiseContext *s,
274 DeNoiseChannel *dnch,
275 int band)
276 {
277 double product, sum, f;
278 int i = 0;
279
280 if (band < NB_PROFILE_BANDS)
281 return dnch->band_noise[band];
282
283 for (int j = 0; j < SOLVE_SIZE; j++) {
284 sum = 0.0;
285 for (int k = 0; k < NB_PROFILE_BANDS; k++)
286 sum += s->matrix_b[i++] * dnch->band_noise[k];
287 s->vector_b[j] = sum;
288 }
289
290 solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
291 f = (0.5 * s->sample_rate) / s->band_centre[NB_PROFILE_BANDS-1];
292 f = 15.0 + log(f / 1.5) / log(1.5);
293 sum = 0.0;
294 product = 1.0;
295 for (int j = 0; j < SOLVE_SIZE; j++) {
296 sum += product * s->vector_b[j];
297 product *= f;
298 }
299
300 return sum;
301 }
302
303 static double limit_gain(double a, double b)
304 {
305 if (a > 1.0)
306 return (b * a - 1.0) / (b + a - 2.0);
307 if (a < 1.0)
308 return (b * a - 2.0 * a + 1.0) / (b - a);
309 return 1.0;
310 }
311
312 static void spectral_flatness(AudioFFTDeNoiseContext *s, const double *const spectral,
313 double floor, int len, double *rnum, double *rden)
314 {
315 double num = 0., den = 0.;
316 int size = 0;
317
318 for (int n = 0; n < len; n++) {
319 const double v = spectral[n];
320 if (v > floor) {
321 num += log(v);
322 den += v;
323 size++;
324 }
325 }
326
327 size = FFMAX(size, 1);
328
329 num /= size;
330 den /= size;
331
332 num = exp(num);
333
334 *rnum = num;
335 *rden = den;
336 }
337
338 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var);
339
340 static double floor_offset(const double *S, int size, double mean)
341 {
342 double offset = 0.0;
343
344 for (int n = 0; n < size; n++) {
345 const double p = S[n] - mean;
346
347 offset = fmax(offset, fabs(p));
348 }
349
350 return offset / mean;
351 }
352
353 static void process_frame(AVFilterContext *ctx,
354 AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch,
355 double *prior, double *prior_band_excit, int track_noise)
356 {
357 AVFilterLink *outlink = ctx->outputs[0];
358 const double *abs_var = dnch->abs_var;
359 const double ratio = outlink->frame_count_out ? s->ratio : 1.0;
360 const double rratio = 1. - ratio;
361 const int *bin2band = s->bin2band;
362 double *noisy_data = dnch->noisy_data;
363 double *band_excit = dnch->band_excit;
364 double *band_amt = dnch->band_amt;
365 double *smoothed_gain = dnch->smoothed_gain;
366 AVComplexDouble *fft_data_dbl = dnch->fft_out;
367 AVComplexFloat *fft_data_flt = dnch->fft_out;
368 double *gain = dnch->gain;
369
370 for (int i = 0; i < s->bin_count; i++) {
371 double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var;
372
373 switch (s->format) {
374 case AV_SAMPLE_FMT_FLTP:
375 noisy_data[i] = mag = hypot(fft_data_flt[i].re, fft_data_flt[i].im);
376 break;
377 case AV_SAMPLE_FMT_DBLP:
378 noisy_data[i] = mag = hypot(fft_data_dbl[i].re, fft_data_dbl[i].im);
379 break;
380 default:
381 av_assert2(0);
382 }
383
384 power = mag * mag;
385 mag_abs_var = power / abs_var[i];
386 new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0);
387 new_gain = new_mag_abs_var / (1.0 + new_mag_abs_var);
388 sqr_new_gain = new_gain * new_gain;
389 prior[i] = mag_abs_var * sqr_new_gain;
390 dnch->clean_data[i] = power * sqr_new_gain;
391 gain[i] = new_gain;
392 }
393
394 if (track_noise) {
395 double flatness, num, den;
396
397 spectral_flatness(s, noisy_data, s->floor, s->bin_count, &num, &den);
398
399 flatness = num / den;
400 if (flatness > 0.8) {
401 const double offset = s->floor_offset * floor_offset(noisy_data, s->bin_count, den);
402 const double new_floor = av_clipd(10.0 * log10(den) - 100.0 + offset, -90., -20.);
403
404 dnch->noise_floor = 0.1 * new_floor + dnch->noise_floor * 0.9;
405 set_parameters(s, dnch, 1, 1);
406 }
407 }
408
409 for (int i = 0; i < s->number_of_bands; i++) {
410 band_excit[i] = 0.0;
411 band_amt[i] = 0.0;
412 }
413
414 for (int i = 0; i < s->bin_count; i++)
415 band_excit[bin2band[i]] += dnch->clean_data[i];
416
417 for (int i = 0; i < s->number_of_bands; i++) {
418 band_excit[i] = fmax(band_excit[i],
419 s->band_alpha[i] * band_excit[i] +
420 s->band_beta[i] * prior_band_excit[i]);
421 prior_band_excit[i] = band_excit[i];
422 }
423
424 for (int j = 0, i = 0; j < s->number_of_bands; j++) {
425 for (int k = 0; k < s->number_of_bands; k++) {
426 band_amt[j] += dnch->spread_function[i++] * band_excit[k];
427 }
428 }
429
430 for (int i = 0; i < s->bin_count; i++)
431 dnch->amt[i] = band_amt[bin2band[i]];
432
433 for (int i = 0; i < s->bin_count; i++) {
434 if (dnch->amt[i] > abs_var[i]) {
435 gain[i] = 1.0;
436 } else if (dnch->amt[i] > dnch->min_abs_var[i]) {
437 const double limit = sqrt(abs_var[i] / dnch->amt[i]);
438
439 gain[i] = limit_gain(gain[i], limit);
440 } else {
441 gain[i] = limit_gain(gain[i], dnch->max_gain);
442 }
443 }
444
445 memcpy(smoothed_gain, gain, s->bin_count * sizeof(*smoothed_gain));
446 if (s->gain_smooth > 0) {
447 const int r = s->gain_smooth;
448
449 for (int i = r; i < s->bin_count - r; i++) {
450 const double gc = gain[i];
451 double num = 0., den = 0.;
452
453 for (int j = -r; j <= r; j++) {
454 const double g = gain[i + j];
455 const double d = 1. - fabs(g - gc);
456
457 num += g * d;
458 den += d;
459 }
460
461 smoothed_gain[i] = num / den;
462 }
463 }
464
465 switch (s->format) {
466 case AV_SAMPLE_FMT_FLTP:
467 for (int i = 0; i < s->bin_count; i++) {
468 const float new_gain = smoothed_gain[i];
469
470 fft_data_flt[i].re *= new_gain;
471 fft_data_flt[i].im *= new_gain;
472 }
473 break;
474 case AV_SAMPLE_FMT_DBLP:
475 for (int i = 0; i < s->bin_count; i++) {
476 const double new_gain = smoothed_gain[i];
477
478 fft_data_dbl[i].re *= new_gain;
479 fft_data_dbl[i].im *= new_gain;
480 }
481 break;
482 }
483 }
484
485 static double freq2bark(double x)
486 {
487 double d = x / 7500.0;
488
489 return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d);
490 }
491
492 static int get_band_centre(AudioFFTDeNoiseContext *s, int band)
493 {
494 if (band == -1)
495 return lrint(s->band_centre[0] / 1.5);
496
497 return s->band_centre[band];
498 }
499
500 static int get_band_edge(AudioFFTDeNoiseContext *s, int band)
501 {
502 int i;
503
504 if (band == NB_PROFILE_BANDS) {
505 i = lrint(s->band_centre[NB_PROFILE_BANDS - 1] * 1.224745);
506 } else {
507 i = lrint(s->band_centre[band] / 1.224745);
508 }
509
510 return FFMIN(i, s->sample_rate / 2);
511 }
512
513 static void set_band_parameters(AudioFFTDeNoiseContext *s,
514 DeNoiseChannel *dnch)
515 {
516 double band_noise, d2, d3, d4, d5;
517 int i = 0, j = 0, k = 0;
518
519 d5 = 0.0;
520 band_noise = process_get_band_noise(s, dnch, 0);
521 for (int m = j; m < s->bin_count; m++) {
522 if (m == j) {
523 i = j;
524 d5 = band_noise;
525 if (k >= NB_PROFILE_BANDS) {
526 j = s->bin_count;
527 } else {
528 j = s->fft_length * get_band_centre(s, k) / s->sample_rate;
529 }
530 d2 = j - i;
531 band_noise = process_get_band_noise(s, dnch, k);
532 k++;
533 }
534 d3 = (j - m) / d2;
535 d4 = (m - i) / d2;
536 dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C);
537 }
538
539 for (i = 0; i < NB_PROFILE_BANDS; i++)
540 dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
541 }
542
543 static void read_custom_noise(AudioFFTDeNoiseContext *s, int ch)
544 {
545 DeNoiseChannel *dnch = &s->dnch[ch];
546 char *custom_noise_str, *p, *arg, *saveptr = NULL;
547 double band_noise[NB_PROFILE_BANDS] = { 0.f };
548 int ret;
549
550 if (!s->band_noise_str)
551 return;
552
553 custom_noise_str = p = av_strdup(s->band_noise_str);
554 if (!p)
555 return;
556
557 for (int i = 0; i < NB_PROFILE_BANDS; i++) {
558 float noise;
559
560 if (!(arg = av_strtok(p, "| ", &saveptr)))
561 break;
562
563 p = NULL;
564
565 ret = av_sscanf(arg, "%f", &noise);
566 if (ret != 1) {
567 av_log(s, AV_LOG_ERROR, "Custom band noise must be float.\n");
568 break;
569 }
570
571 band_noise[i] = av_clipd(noise, -24., 24.);
572 }
573
574 av_free(custom_noise_str);
575 memcpy(dnch->band_noise, band_noise, sizeof(band_noise));
576 }
577
578 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var)
579 {
580 if (dnch->last_noise_floor != dnch->noise_floor)
581 dnch->last_noise_floor = dnch->noise_floor;
582
583 if (s->track_residual)
584 dnch->last_noise_floor = fmax(dnch->last_noise_floor, dnch->residual_floor);
585
586 dnch->max_var = s->floor * exp((100.0 + dnch->last_noise_floor) * C);
587 if (update_auto_var) {
588 for (int i = 0; i < NB_PROFILE_BANDS; i++)
589 dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
590 }
591
592 if (s->track_residual) {
593 if (update_var || dnch->last_residual_floor != dnch->residual_floor) {
594 update_var = 1;
595 dnch->last_residual_floor = dnch->residual_floor;
596 dnch->last_noise_reduction = fmax(dnch->last_noise_floor - dnch->last_residual_floor + 100., 0);
597 dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
598 }
599 } else if (update_var || dnch->noise_reduction != dnch->last_noise_reduction) {
600 update_var = 1;
601 dnch->last_noise_reduction = dnch->noise_reduction;
602 dnch->last_residual_floor = av_clipd(dnch->last_noise_floor - dnch->last_noise_reduction, -80, -20);
603 dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
604 }
605
606 dnch->gain_scale = 1.0 / (dnch->max_gain * dnch->max_gain);
607
608 if (update_var) {
609 set_band_parameters(s, dnch);
610
611 for (int i = 0; i < s->bin_count; i++) {
612 dnch->abs_var[i] = fmax(dnch->max_var * dnch->rel_var[i], 1.0);
613 dnch->min_abs_var[i] = dnch->gain_scale * dnch->abs_var[i];
614 }
615 }
616 }
617
618 static void reduce_mean(double *band_noise)
619 {
620 double mean = 0.f;
621
622 for (int i = 0; i < NB_PROFILE_BANDS; i++)
623 mean += band_noise[i];
624 mean /= NB_PROFILE_BANDS;
625
626 for (int i = 0; i < NB_PROFILE_BANDS; i++)
627 band_noise[i] -= mean;
628 }
629
630 static int config_input(AVFilterLink *inlink)
631 {
632 AVFilterContext *ctx = inlink->dst;
633 AudioFFTDeNoiseContext *s = ctx->priv;
634 double wscale, sar, sum, sdiv;
635 int i, j, k, m, n, ret, tx_type;
636 double dscale = 1.;
637 float fscale = 1.f;
638 void *scale;
639
640 s->format = inlink->format;
641
642 switch (s->format) {
643 case AV_SAMPLE_FMT_FLTP:
644 s->sample_size = sizeof(float);
645 s->complex_sample_size = sizeof(AVComplexFloat);
646 tx_type = AV_TX_FLOAT_RDFT;
647 scale = &fscale;
648 break;
649 case AV_SAMPLE_FMT_DBLP:
650 s->sample_size = sizeof(double);
651 s->complex_sample_size = sizeof(AVComplexDouble);
652 tx_type = AV_TX_DOUBLE_RDFT;
653 scale = &dscale;
654 break;
655 }
656
657 s->dnch = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dnch));
658 if (!s->dnch)
659 return AVERROR(ENOMEM);
660
661 s->channels = inlink->ch_layout.nb_channels;
662 s->sample_rate = inlink->sample_rate;
663 s->sample_advance = s->sample_rate / 80;
664 s->window_length = 3 * s->sample_advance;
665 s->fft_length2 = 1 << (32 - ff_clz(s->window_length));
666 s->fft_length = s->fft_length2;
667 s->buffer_length = s->fft_length * 2;
668 s->bin_count = s->fft_length2 / 2 + 1;
669
670 s->band_centre[0] = 80;
671 for (i = 1; i < NB_PROFILE_BANDS; i++) {
672 s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0);
673 if (s->band_centre[i] < 1000) {
674 s->band_centre[i] = 10 * (s->band_centre[i] / 10);
675 } else if (s->band_centre[i] < 5000) {
676 s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50);
677 } else if (s->band_centre[i] < 15000) {
678 s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100);
679 } else {
680 s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000);
681 }
682 }
683
684 for (j = 0; j < SOLVE_SIZE; j++) {
685 for (k = 0; k < SOLVE_SIZE; k++) {
686 s->matrix_a[j + k * SOLVE_SIZE] = 0.0;
687 for (m = 0; m < NB_PROFILE_BANDS; m++)
688 s->matrix_a[j + k * SOLVE_SIZE] += pow(m, j + k);
689 }
690 }
691
692 factor(s->matrix_a, SOLVE_SIZE);
693
694 i = 0;
695 for (j = 0; j < SOLVE_SIZE; j++)
696 for (k = 0; k < NB_PROFILE_BANDS; k++)
697 s->matrix_b[i++] = pow(k, j);
698
699 i = 0;
700 for (j = 0; j < NB_PROFILE_BANDS; j++)
701 for (k = 0; k < SOLVE_SIZE; k++)
702 s->matrix_c[i++] = pow(j, k);
703
704 s->window = av_calloc(s->window_length, sizeof(*s->window));
705 s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band));
706 if (!s->window || !s->bin2band)
707 return AVERROR(ENOMEM);
708
709 sdiv = s->band_multiplier;
710 for (i = 0; i < s->bin_count; i++)
711 s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2));
712
713 s->number_of_bands = s->bin2band[s->bin_count - 1] + 1;
714
715 s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha));
716 s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta));
717 if (!s->band_alpha || !s->band_beta)
718 return AVERROR(ENOMEM);
719
720 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
721 DeNoiseChannel *dnch = &s->dnch[ch];
722
723 switch (s->noise_type) {
724 case WHITE_NOISE:
725 for (i = 0; i < NB_PROFILE_BANDS; i++)
726 dnch->band_noise[i] = 0.;
727 break;
728 case VINYL_NOISE:
729 for (i = 0; i < NB_PROFILE_BANDS; i++)
730 dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0);
731 break;
732 case SHELLAC_NOISE:
733 for (i = 0; i < NB_PROFILE_BANDS; i++)
734 dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10);
735 break;
736 case CUSTOM_NOISE:
737 read_custom_noise(s, ch);
738 break;
739 default:
740 return AVERROR_BUG;
741 }
742
743 reduce_mean(dnch->band_noise);
744
745 dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt));
746 dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt));
747 dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit));
748 dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain));
749 dnch->smoothed_gain = av_calloc(s->bin_count, sizeof(*dnch->smoothed_gain));
750 dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior));
751 dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit));
752 dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data));
753 dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data));
754 dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples));
755 dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var));
756 dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var));
757 dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var));
758 dnch->fft_in = av_calloc(s->fft_length2, s->sample_size);
759 dnch->fft_out = av_calloc(s->fft_length2 + 1, s->complex_sample_size);
760 ret = av_tx_init(&dnch->fft, &dnch->tx_fn, tx_type, 0, s->fft_length2, scale, 0);
761 if (ret < 0)
762 return ret;
763 ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, tx_type, 1, s->fft_length2, scale, 0);
764 if (ret < 0)
765 return ret;
766 dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands,
767 sizeof(*dnch->spread_function));
768
769 if (!dnch->amt ||
770 !dnch->band_amt ||
771 !dnch->band_excit ||
772 !dnch->gain ||
773 !dnch->smoothed_gain ||
774 !dnch->prior ||
775 !dnch->prior_band_excit ||
776 !dnch->clean_data ||
777 !dnch->noisy_data ||
778 !dnch->out_samples ||
779 !dnch->fft_in ||
780 !dnch->fft_out ||
781 !dnch->abs_var ||
782 !dnch->rel_var ||
783 !dnch->min_abs_var ||
784 !dnch->spread_function ||
785 !dnch->fft ||
786 !dnch->ifft)
787 return AVERROR(ENOMEM);
788 }
789
790 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
791 DeNoiseChannel *dnch = &s->dnch[ch];
792 double *prior_band_excit = dnch->prior_band_excit;
793 double min, max;
794 double p1, p2;
795
796 p1 = pow(0.1, 2.5 / sdiv);
797 p2 = pow(0.1, 1.0 / sdiv);
798 j = 0;
799 for (m = 0; m < s->number_of_bands; m++) {
800 for (n = 0; n < s->number_of_bands; n++) {
801 if (n < m) {
802 dnch->spread_function[j++] = pow(p2, m - n);
803 } else if (n > m) {
804 dnch->spread_function[j++] = pow(p1, n - m);
805 } else {
806 dnch->spread_function[j++] = 1.0;
807 }
808 }
809 }
810
811 for (m = 0; m < s->number_of_bands; m++) {
812 dnch->band_excit[m] = 0.0;
813 prior_band_excit[m] = 0.0;
814 }
815
816 for (m = 0; m < s->bin_count; m++)
817 dnch->band_excit[s->bin2band[m]] += 1.0;
818
819 j = 0;
820 for (m = 0; m < s->number_of_bands; m++) {
821 for (n = 0; n < s->number_of_bands; n++)
822 prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n];
823 }
824
825 min = pow(0.1, 2.5);
826 max = pow(0.1, 1.0);
827 for (int i = 0; i < s->number_of_bands; i++) {
828 if (i < lrint(12.0 * sdiv)) {
829 dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv);
830 } else {
831 dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0));
832 }
833 dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max);
834 }
835
836 for (int i = 0; i < s->buffer_length; i++)
837 dnch->out_samples[i] = 0;
838
839 j = 0;
840 for (int i = 0; i < s->number_of_bands; i++)
841 for (int k = 0; k < s->number_of_bands; k++)
842 dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i];
843 }
844
845 j = 0;
846 sar = s->sample_advance / s->sample_rate;
847 for (int i = 0; i < s->bin_count; i++) {
848 if ((i == s->fft_length2) || (s->bin2band[i] > j)) {
849 double d6 = (i - 1) * s->sample_rate / s->fft_length;
850 double d7 = fmin(0.008 + 2.2 / d6, 0.03);
851 s->band_alpha[j] = exp(-sar / d7);
852 s->band_beta[j] = 1.0 - s->band_alpha[j];
853 j = s->bin2band[i];
854 }
855 }
856
857 s->winframe = ff_get_audio_buffer(inlink, s->window_length);
858 if (!s->winframe)
859 return AVERROR(ENOMEM);
860
861 wscale = sqrt(8.0 / (9.0 * s->fft_length));
862 sum = 0.0;
863 for (int i = 0; i < s->window_length; i++) {
864 double d10 = sin(i * M_PI / s->window_length);
865 d10 *= wscale * d10;
866 s->window[i] = d10;
867 sum += d10 * d10;
868 }
869
870 s->window_weight = 0.5 * sum;
871 s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight;
872 s->sample_floor = s->floor * exp(4.144600506562284);
873
874 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
875 DeNoiseChannel *dnch = &s->dnch[ch];
876
877 dnch->noise_reduction = s->noise_reduction;
878 dnch->noise_floor = s->noise_floor;
879 dnch->residual_floor = s->residual_floor;
880
881 set_parameters(s, dnch, 1, 1);
882 }
883
884 s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate);
885 i = 0;
886 for (int j = 1; j < NB_PROFILE_BANDS + 1; j++) {
887 s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate);
888 if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1]))
889 i++;
890 s->noise_band_edge[NB_PROFILE_BANDS + 1] = i;
891 }
892 s->noise_band_count = s->noise_band_edge[NB_PROFILE_BANDS + 1];
893
894 return 0;
895 }
896
897 static void init_sample_noise(DeNoiseChannel *dnch)
898 {
899 for (int i = 0; i < NB_PROFILE_BANDS; i++) {
900 dnch->noise_band_norm[i] = 0.0;
901 dnch->noise_band_avr[i] = 0.0;
902 dnch->noise_band_avi[i] = 0.0;
903 dnch->noise_band_var[i] = 0.0;
904 }
905 }
906
907 static void sample_noise_block(AudioFFTDeNoiseContext *s,
908 DeNoiseChannel *dnch,
909 AVFrame *in, int ch)
910 {
911 double *src_dbl = (double *)in->extended_data[ch];
912 float *src_flt = (float *)in->extended_data[ch];
913 double mag2, var = 0.0, avr = 0.0, avi = 0.0;
914 AVComplexDouble *fft_out_dbl = dnch->fft_out;
915 AVComplexFloat *fft_out_flt = dnch->fft_out;
916 double *fft_in_dbl = dnch->fft_in;
917 float *fft_in_flt = dnch->fft_in;
918 int edge, j, k, n, edgemax;
919
920 switch (s->format) {
921 case AV_SAMPLE_FMT_FLTP:
922 for (int i = 0; i < s->window_length; i++)
923 fft_in_flt[i] = s->window[i] * src_flt[i] * (1LL << 23);
924
925 for (int i = s->window_length; i < s->fft_length2; i++)
926 fft_in_flt[i] = 0.f;
927 break;
928 case AV_SAMPLE_FMT_DBLP:
929 for (int i = 0; i < s->window_length; i++)
930 fft_in_dbl[i] = s->window[i] * src_dbl[i] * (1LL << 23);
931
932 for (int i = s->window_length; i < s->fft_length2; i++)
933 fft_in_dbl[i] = 0.;
934 break;
935 }
936
937 dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size);
938
939 edge = s->noise_band_edge[0];
940 j = edge;
941 k = 0;
942 n = j;
943 edgemax = fmin(s->fft_length2, s->noise_band_edge[NB_PROFILE_BANDS]);
944 for (int i = j; i <= edgemax; i++) {
945 if ((i == j) && (i < edgemax)) {
946 if (j > edge) {
947 dnch->noise_band_norm[k - 1] += j - edge;
948 dnch->noise_band_avr[k - 1] += avr;
949 dnch->noise_band_avi[k - 1] += avi;
950 dnch->noise_band_var[k - 1] += var;
951 }
952 k++;
953 edge = j;
954 j = s->noise_band_edge[k];
955 if (k == NB_PROFILE_BANDS) {
956 j++;
957 }
958 var = 0.0;
959 avr = 0.0;
960 avi = 0.0;
961 }
962
963 switch (s->format) {
964 case AV_SAMPLE_FMT_FLTP:
965 avr += fft_out_flt[n].re;
966 avi += fft_out_flt[n].im;
967 mag2 = fft_out_flt[n].re * fft_out_flt[n].re +
968 fft_out_flt[n].im * fft_out_flt[n].im;
969 break;
970 case AV_SAMPLE_FMT_DBLP:
971 avr += fft_out_dbl[n].re;
972 avi += fft_out_dbl[n].im;
973 mag2 = fft_out_dbl[n].re * fft_out_dbl[n].re +
974 fft_out_dbl[n].im * fft_out_dbl[n].im;
975 break;
976 default:
977 av_assert2(0);
978 }
979
980 mag2 = fmax(mag2, s->sample_floor);
981
982 var += mag2;
983 n++;
984 }
985
986 dnch->noise_band_norm[k - 1] += j - edge;
987 dnch->noise_band_avr[k - 1] += avr;
988 dnch->noise_band_avi[k - 1] += avi;
989 dnch->noise_band_var[k - 1] += var;
990 }
991
992 static void finish_sample_noise(AudioFFTDeNoiseContext *s,
993 DeNoiseChannel *dnch,
994 double *sample_noise)
995 {
996 for (int i = 0; i < s->noise_band_count; i++) {
997 dnch->noise_band_avr[i] /= dnch->noise_band_norm[i];
998 dnch->noise_band_avi[i] /= dnch->noise_band_norm[i];
999 dnch->noise_band_var[i] /= dnch->noise_band_norm[i];
1000 dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] +
1001 dnch->noise_band_avi[i] * dnch->noise_band_avi[i];
1002 dnch->noise_band_auto_var[i] = dnch->noise_band_var[i];
1003 sample_noise[i] = 10.0 * log10(dnch->noise_band_var[i] / s->floor) - 100.0;
1004 }
1005 if (s->noise_band_count < NB_PROFILE_BANDS) {
1006 for (int i = s->noise_band_count; i < NB_PROFILE_BANDS; i++)
1007 sample_noise[i] = sample_noise[i - 1];
1008 }
1009 }
1010
1011 static void set_noise_profile(AudioFFTDeNoiseContext *s,
1012 DeNoiseChannel *dnch,
1013 double *sample_noise)
1014 {
1015 double new_band_noise[NB_PROFILE_BANDS];
1016 double temp[NB_PROFILE_BANDS];
1017 double sum = 0.0;
1018
1019 for (int m = 0; m < NB_PROFILE_BANDS; m++)
1020 temp[m] = sample_noise[m];
1021
1022 for (int m = 0, i = 0; m < SOLVE_SIZE; m++) {
1023 sum = 0.0;
1024 for (int n = 0; n < NB_PROFILE_BANDS; n++)
1025 sum += s->matrix_b[i++] * temp[n];
1026 s->vector_b[m] = sum;
1027 }
1028 solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
1029 for (int m = 0, i = 0; m < NB_PROFILE_BANDS; m++) {
1030 sum = 0.0;
1031 for (int n = 0; n < SOLVE_SIZE; n++)
1032 sum += s->matrix_c[i++] * s->vector_b[n];
1033 temp[m] = sum;
1034 }
1035
1036 reduce_mean(temp);
1037
1038 av_log(s, AV_LOG_INFO, "bn=");
1039 for (int m = 0; m < NB_PROFILE_BANDS; m++) {
1040 new_band_noise[m] = temp[m];
1041 new_band_noise[m] = av_clipd(new_band_noise[m], -24.0, 24.0);
1042 av_log(s, AV_LOG_INFO, "%f ", new_band_noise[m]);
1043 }
1044 av_log(s, AV_LOG_INFO, "\n");
1045 memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise));
1046 }
1047
1048 static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
1049 {
1050 AudioFFTDeNoiseContext *s = ctx->priv;
1051 AVFrame *in = arg;
1052 const int start = (in->ch_layout.nb_channels * jobnr) / nb_jobs;
1053 const int end = (in->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;
1054 const int window_length = s->window_length;
1055 const double *window = s->window;
1056
1057 for (int ch = start; ch < end; ch++) {
1058 DeNoiseChannel *dnch = &s->dnch[ch];
1059 const double *src_dbl = (const double *)in->extended_data[ch];
1060 const float *src_flt = (const float *)in->extended_data[ch];
1061 double *dst = dnch->out_samples;
1062 double *fft_in_dbl = dnch->fft_in;
1063 float *fft_in_flt = dnch->fft_in;
1064
1065 switch (s->format) {
1066 case AV_SAMPLE_FMT_FLTP:
1067 for (int m = 0; m < window_length; m++)
1068 fft_in_flt[m] = window[m] * src_flt[m] * (1LL << 23);
1069
1070 for (int m = window_length; m < s->fft_length2; m++)
1071 fft_in_flt[m] = 0.f;
1072 break;
1073 case AV_SAMPLE_FMT_DBLP:
1074 for (int m = 0; m < window_length; m++)
1075 fft_in_dbl[m] = window[m] * src_dbl[m] * (1LL << 23);
1076
1077 for (int m = window_length; m < s->fft_length2; m++)
1078 fft_in_dbl[m] = 0.;
1079 break;
1080 }
1081
1082 dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size);
1083
1084 process_frame(ctx, s, dnch,
1085 dnch->prior,
1086 dnch->prior_band_excit,
1087 s->track_noise);
1088
1089 dnch->itx_fn(dnch->ifft, dnch->fft_in, dnch->fft_out, s->complex_sample_size);
1090
1091 switch (s->format) {
1092 case AV_SAMPLE_FMT_FLTP:
1093 for (int m = 0; m < window_length; m++)
1094 dst[m] += s->window[m] * fft_in_flt[m] / (1LL << 23);
1095 break;
1096 case AV_SAMPLE_FMT_DBLP:
1097 for (int m = 0; m < window_length; m++)
1098 dst[m] += s->window[m] * fft_in_dbl[m] / (1LL << 23);
1099 break;
1100 }
1101 }
1102
1103 return 0;
1104 }
1105
1106 static int output_frame(AVFilterLink *inlink, AVFrame *in)
1107 {
1108 AVFilterContext *ctx = inlink->dst;
1109 AVFilterLink *outlink = ctx->outputs[0];
1110 AudioFFTDeNoiseContext *s = ctx->priv;
1111 const int output_mode = ctx->is_disabled ? IN_MODE : s->output_mode;
1112 const int offset = s->window_length - s->sample_advance;
1113 AVFrame *out;
1114
1115 for (int ch = 0; ch < s->channels; ch++) {
1116 uint8_t *src = (uint8_t *)s->winframe->extended_data[ch];
1117
1118 memmove(src, src + s->sample_advance * s->sample_size,
1119 offset * s->sample_size);
1120 memcpy(src + offset * s->sample_size, in->extended_data[ch],
1121 in->nb_samples * s->sample_size);
1122 memset(src + s->sample_size * (offset + in->nb_samples), 0,
1123 (s->sample_advance - in->nb_samples) * s->sample_size);
1124 }
1125
1126 if (s->track_noise) {
1127 double average = 0.0, min = DBL_MAX, max = -DBL_MAX;
1128
1129 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1130 DeNoiseChannel *dnch = &s->dnch[ch];
1131
1132 average += dnch->noise_floor;
1133 max = fmax(max, dnch->noise_floor);
1134 min = fmin(min, dnch->noise_floor);
1135 }
1136
1137 average /= inlink->ch_layout.nb_channels;
1138
1139 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1140 DeNoiseChannel *dnch = &s->dnch[ch];
1141
1142 switch (s->noise_floor_link) {
1143 case MIN_LINK: dnch->noise_floor = min; break;
1144 case MAX_LINK: dnch->noise_floor = max; break;
1145 case AVERAGE_LINK: dnch->noise_floor = average; break;
1146 case NONE_LINK:
1147 default:
1148 break;
1149 }
1150
1151 if (dnch->noise_floor != dnch->last_noise_floor)
1152 set_parameters(s, dnch, 1, 0);
1153 }
1154 }
1155
1156 if (s->sample_noise_mode == SAMPLE_START) {
1157 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1158 DeNoiseChannel *dnch = &s->dnch[ch];
1159
1160 init_sample_noise(dnch);
1161 }
1162 s->sample_noise_mode = SAMPLE_NONE;
1163 s->sample_noise = 1;
1164 s->sample_noise_blocks = 0;
1165 }
1166
1167 if (s->sample_noise) {
1168 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1169 DeNoiseChannel *dnch = &s->dnch[ch];
1170
1171 sample_noise_block(s, dnch, s->winframe, ch);
1172 }
1173 s->sample_noise_blocks++;
1174 }
1175
1176 if (s->sample_noise_mode == SAMPLE_STOP) {
1177 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1178 DeNoiseChannel *dnch = &s->dnch[ch];
1179 double sample_noise[NB_PROFILE_BANDS];
1180
1181 if (s->sample_noise_blocks <= 0)
1182 break;
1183 finish_sample_noise(s, dnch, sample_noise);
1184 set_noise_profile(s, dnch, sample_noise);
1185 set_parameters(s, dnch, 1, 1);
1186 }
1187 s->sample_noise = 0;
1188 s->sample_noise_blocks = 0;
1189 s->sample_noise_mode = SAMPLE_NONE;
1190 }
1191
1192 ff_filter_execute(ctx, filter_channel, s->winframe, NULL,
1193 FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));
1194
1195 if (av_frame_is_writable(in)) {
1196 out = in;
1197 } else {
1198 out = ff_get_audio_buffer(outlink, in->nb_samples);
1199 if (!out) {
1200 av_frame_free(&in);
1201 return AVERROR(ENOMEM);
1202 }
1203
1204 av_frame_copy_props(out, in);
1205 }
1206
1207 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1208 DeNoiseChannel *dnch = &s->dnch[ch];
1209 double *src = dnch->out_samples;
1210 const double *orig_dbl = (const double *)s->winframe->extended_data[ch];
1211 const float *orig_flt = (const float *)s->winframe->extended_data[ch];
1212 double *dst_dbl = (double *)out->extended_data[ch];
1213 float *dst_flt = (float *)out->extended_data[ch];
1214
1215 switch (output_mode) {
1216 case IN_MODE:
1217 switch (s->format) {
1218 case AV_SAMPLE_FMT_FLTP:
1219 for (int m = 0; m < out->nb_samples; m++)
1220 dst_flt[m] = orig_flt[m];
1221 break;
1222 case AV_SAMPLE_FMT_DBLP:
1223 for (int m = 0; m < out->nb_samples; m++)
1224 dst_dbl[m] = orig_dbl[m];
1225 break;
1226 }
1227 break;
1228 case OUT_MODE:
1229 switch (s->format) {
1230 case AV_SAMPLE_FMT_FLTP:
1231 for (int m = 0; m < out->nb_samples; m++)
1232 dst_flt[m] = src[m];
1233 break;
1234 case AV_SAMPLE_FMT_DBLP:
1235 for (int m = 0; m < out->nb_samples; m++)
1236 dst_dbl[m] = src[m];
1237 break;
1238 }
1239 break;
1240 case NOISE_MODE:
1241 switch (s->format) {
1242 case AV_SAMPLE_FMT_FLTP:
1243 for (int m = 0; m < out->nb_samples; m++)
1244 dst_flt[m] = orig_flt[m] - src[m];
1245 break;
1246 case AV_SAMPLE_FMT_DBLP:
1247 for (int m = 0; m < out->nb_samples; m++)
1248 dst_dbl[m] = orig_dbl[m] - src[m];
1249 break;
1250 }
1251 break;
1252 default:
1253 if (in != out)
1254 av_frame_free(&in);
1255 av_frame_free(&out);
1256 return AVERROR_BUG;
1257 }
1258
1259 memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
1260 memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
1261 }
1262
1263 if (out != in)
1264 av_frame_free(&in);
1265 return ff_filter_frame(outlink, out);
1266 }
1267
1268 static int activate(AVFilterContext *ctx)
1269 {
1270 AVFilterLink *inlink = ctx->inputs[0];
1271 AVFilterLink *outlink = ctx->outputs[0];
1272 AudioFFTDeNoiseContext *s = ctx->priv;
1273 AVFrame *in = NULL;
1274 int ret;
1275
1276 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
1277
1278 ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in);
1279 if (ret < 0)
1280 return ret;
1281 if (ret > 0)
1282 return output_frame(inlink, in);
1283
1284 if (ff_inlink_queued_samples(inlink) >= s->sample_advance) {
1285 ff_filter_set_ready(ctx, 10);
1286 return 0;
1287 }
1288
1289 FF_FILTER_FORWARD_STATUS(inlink, outlink);
1290 FF_FILTER_FORWARD_WANTED(outlink, inlink);
1291
1292 return FFERROR_NOT_READY;
1293 }
1294
1295 static av_cold void uninit(AVFilterContext *ctx)
1296 {
1297 AudioFFTDeNoiseContext *s = ctx->priv;
1298
1299 av_freep(&s->window);
1300 av_freep(&s->bin2band);
1301 av_freep(&s->band_alpha);
1302 av_freep(&s->band_beta);
1303 av_frame_free(&s->winframe);
1304
1305 if (s->dnch) {
1306 for (int ch = 0; ch < s->channels; ch++) {
1307 DeNoiseChannel *dnch = &s->dnch[ch];
1308 av_freep(&dnch->amt);
1309 av_freep(&dnch->band_amt);
1310 av_freep(&dnch->band_excit);
1311 av_freep(&dnch->gain);
1312 av_freep(&dnch->smoothed_gain);
1313 av_freep(&dnch->prior);
1314 av_freep(&dnch->prior_band_excit);
1315 av_freep(&dnch->clean_data);
1316 av_freep(&dnch->noisy_data);
1317 av_freep(&dnch->out_samples);
1318 av_freep(&dnch->spread_function);
1319 av_freep(&dnch->abs_var);
1320 av_freep(&dnch->rel_var);
1321 av_freep(&dnch->min_abs_var);
1322 av_freep(&dnch->fft_in);
1323 av_freep(&dnch->fft_out);
1324 av_tx_uninit(&dnch->fft);
1325 av_tx_uninit(&dnch->ifft);
1326 }
1327 av_freep(&s->dnch);
1328 }
1329 }
1330
1331 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
1332 char *res, int res_len, int flags)
1333 {
1334 AudioFFTDeNoiseContext *s = ctx->priv;
1335 int ret = 0;
1336
1337 ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
1338 if (ret < 0)
1339 return ret;
1340
1341 if (!strcmp(cmd, "sample_noise") || !strcmp(cmd, "sn"))
1342 return 0;
1343
1344 for (int ch = 0; ch < s->channels; ch++) {
1345 DeNoiseChannel *dnch = &s->dnch[ch];
1346
1347 dnch->noise_reduction = s->noise_reduction;
1348 dnch->noise_floor = s->noise_floor;
1349 dnch->residual_floor = s->residual_floor;
1350
1351 set_parameters(s, dnch, 1, 1);
1352 }
1353
1354 return 0;
1355 }
1356
1357 static const AVFilterPad inputs[] = {
1358 {
1359 .name = "default",
1360 .type = AVMEDIA_TYPE_AUDIO,
1361 .config_props = config_input,
1362 },
1363 };
1364
1365 const AVFilter ff_af_afftdn = {
1366 .name = "afftdn",
1367 .description = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."),
1368 .priv_size = sizeof(AudioFFTDeNoiseContext),
1369 .priv_class = &afftdn_class,
1370 .activate = activate,
1371 .uninit = uninit,
1372 FILTER_INPUTS(inputs),
1373 FILTER_OUTPUTS(ff_audio_default_filterpad),
1374 FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP),
1375 .process_command = process_command,
1376 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
1377 AVFILTER_FLAG_SLICE_THREADS,
1378 };
1379