FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_afftdn.c
Date: 2024-04-19 17:50:32
Exec Total Coverage
Lines: 0 770 0.0%
Functions: 0 25 0.0%
Branches: 0 417 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2018 The FFmpeg Project
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <float.h>
22
23 #include "libavutil/avstring.h"
24 #include "libavutil/channel_layout.h"
25 #include "libavutil/mem.h"
26 #include "libavutil/opt.h"
27 #include "libavutil/tx.h"
28 #include "avfilter.h"
29 #include "audio.h"
30 #include "filters.h"
31
32 #define C (M_LN10 * 0.1)
33 #define SOLVE_SIZE (5)
34 #define NB_PROFILE_BANDS (15)
35
36 enum SampleNoiseModes {
37 SAMPLE_NONE,
38 SAMPLE_START,
39 SAMPLE_STOP,
40 NB_SAMPLEMODES
41 };
42
43 enum OutModes {
44 IN_MODE,
45 OUT_MODE,
46 NOISE_MODE,
47 NB_MODES
48 };
49
50 enum NoiseLinkType {
51 NONE_LINK,
52 MIN_LINK,
53 MAX_LINK,
54 AVERAGE_LINK,
55 NB_LINK
56 };
57
58 enum NoiseType {
59 WHITE_NOISE,
60 VINYL_NOISE,
61 SHELLAC_NOISE,
62 CUSTOM_NOISE,
63 NB_NOISE
64 };
65
66 typedef struct DeNoiseChannel {
67 double band_noise[NB_PROFILE_BANDS];
68 double noise_band_auto_var[NB_PROFILE_BANDS];
69 double noise_band_sample[NB_PROFILE_BANDS];
70 double *amt;
71 double *band_amt;
72 double *band_excit;
73 double *gain;
74 double *smoothed_gain;
75 double *prior;
76 double *prior_band_excit;
77 double *clean_data;
78 double *noisy_data;
79 double *out_samples;
80 double *spread_function;
81 double *abs_var;
82 double *rel_var;
83 double *min_abs_var;
84 void *fft_in;
85 void *fft_out;
86 AVTXContext *fft, *ifft;
87 av_tx_fn tx_fn, itx_fn;
88
89 double noise_band_norm[NB_PROFILE_BANDS];
90 double noise_band_avr[NB_PROFILE_BANDS];
91 double noise_band_avi[NB_PROFILE_BANDS];
92 double noise_band_var[NB_PROFILE_BANDS];
93
94 double noise_reduction;
95 double last_noise_reduction;
96 double noise_floor;
97 double last_noise_floor;
98 double residual_floor;
99 double last_residual_floor;
100 double max_gain;
101 double max_var;
102 double gain_scale;
103 } DeNoiseChannel;
104
105 typedef struct AudioFFTDeNoiseContext {
106 const AVClass *class;
107
108 int format;
109 size_t sample_size;
110 size_t complex_sample_size;
111
112 float noise_reduction;
113 float noise_floor;
114 int noise_type;
115 char *band_noise_str;
116 float residual_floor;
117 int track_noise;
118 int track_residual;
119 int output_mode;
120 int noise_floor_link;
121 float ratio;
122 int gain_smooth;
123 float band_multiplier;
124 float floor_offset;
125
126 int channels;
127 int sample_noise;
128 int sample_noise_blocks;
129 int sample_noise_mode;
130 float sample_rate;
131 int buffer_length;
132 int fft_length;
133 int fft_length2;
134 int bin_count;
135 int window_length;
136 int sample_advance;
137 int number_of_bands;
138
139 int band_centre[NB_PROFILE_BANDS];
140
141 int *bin2band;
142 double *window;
143 double *band_alpha;
144 double *band_beta;
145
146 DeNoiseChannel *dnch;
147
148 AVFrame *winframe;
149
150 double window_weight;
151 double floor;
152 double sample_floor;
153
154 int noise_band_edge[NB_PROFILE_BANDS + 2];
155 int noise_band_count;
156 double matrix_a[SOLVE_SIZE * SOLVE_SIZE];
157 double vector_b[SOLVE_SIZE];
158 double matrix_b[SOLVE_SIZE * NB_PROFILE_BANDS];
159 double matrix_c[SOLVE_SIZE * NB_PROFILE_BANDS];
160 } AudioFFTDeNoiseContext;
161
162 #define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x)
163 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
164 #define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
165
166 static const AVOption afftdn_options[] = {
167 { "noise_reduction", "set the noise reduction",OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT,{.dbl = 12}, .01, 97, AFR },
168 { "nr", "set the noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT, {.dbl = 12}, .01, 97, AFR },
169 { "noise_floor", "set the noise floor",OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR },
170 { "nf", "set the noise floor", OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR },
171 { "noise_type", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" },
172 { "nt", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" },
173 { "white", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" },
174 { "w", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" },
175 { "vinyl", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" },
176 { "v", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" },
177 { "shellac", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" },
178 { "s", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" },
179 { "custom", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" },
180 { "c", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" },
181 { "band_noise", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF },
182 { "bn", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF },
183 { "residual_floor", "set the residual floor",OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR },
184 { "rf", "set the residual floor", OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR },
185 { "track_noise", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
186 { "tn", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
187 { "track_residual", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
188 { "tr", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
189 { "output_mode", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" },
190 { "om", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" },
191 { "input", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" },
192 { "i", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" },
193 { "output", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" },
194 { "o", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" },
195 { "noise", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" },
196 { "n", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" },
197 { "adaptivity", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR },
198 { "ad", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR },
199 { "floor_offset", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR },
200 { "fo", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR },
201 { "noise_link", "set the noise floor link",OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" },
202 { "nl", "set the noise floor link", OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" },
203 { "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = NONE_LINK}, 0, 0, AFR, .unit = "link" },
204 { "min", "min", 0, AV_OPT_TYPE_CONST, {.i64 = MIN_LINK}, 0, 0, AFR, .unit = "link" },
205 { "max", "max", 0, AV_OPT_TYPE_CONST, {.i64 = MAX_LINK}, 0, 0, AFR, .unit = "link" },
206 { "average", "average", 0, AV_OPT_TYPE_CONST, {.i64 = AVERAGE_LINK}, 0, 0, AFR, .unit = "link" },
207 { "band_multiplier", "set band multiplier",OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF },
208 { "bm", "set band multiplier", OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF },
209 { "sample_noise", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" },
210 { "sn", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" },
211 { "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_NONE}, 0, 0, AFR, .unit = "sample" },
212 { "start", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" },
213 { "begin", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" },
214 { "stop", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" },
215 { "end", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" },
216 { "gain_smooth", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR },
217 { "gs", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR },
218 { NULL }
219 };
220
221 AVFILTER_DEFINE_CLASS(afftdn);
222
223 static double get_band_noise(AudioFFTDeNoiseContext *s,
224 int band, double a,
225 double b, double c)
226 {
227 double d1, d2, d3;
228
229 d1 = a / s->band_centre[band];
230 d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10;
231 d2 = b / s->band_centre[band];
232 d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10;
233 d3 = s->band_centre[band] / c;
234 d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10;
235
236 return -d1 + d2 - d3;
237 }
238
239 static void factor(double *array, int size)
240 {
241 for (int i = 0; i < size - 1; i++) {
242 for (int j = i + 1; j < size; j++) {
243 double d = array[j + i * size] / array[i + i * size];
244
245 array[j + i * size] = d;
246 for (int k = i + 1; k < size; k++) {
247 array[j + k * size] -= d * array[i + k * size];
248 }
249 }
250 }
251 }
252
253 static void solve(double *matrix, double *vector, int size)
254 {
255 for (int i = 0; i < size - 1; i++) {
256 for (int j = i + 1; j < size; j++) {
257 double d = matrix[j + i * size];
258 vector[j] -= d * vector[i];
259 }
260 }
261
262 vector[size - 1] /= matrix[size * size - 1];
263
264 for (int i = size - 2; i >= 0; i--) {
265 double d = vector[i];
266 for (int j = i + 1; j < size; j++)
267 d -= matrix[i + j * size] * vector[j];
268 vector[i] = d / matrix[i + i * size];
269 }
270 }
271
272 static double process_get_band_noise(AudioFFTDeNoiseContext *s,
273 DeNoiseChannel *dnch,
274 int band)
275 {
276 double product, sum, f;
277 int i = 0;
278
279 if (band < NB_PROFILE_BANDS)
280 return dnch->band_noise[band];
281
282 for (int j = 0; j < SOLVE_SIZE; j++) {
283 sum = 0.0;
284 for (int k = 0; k < NB_PROFILE_BANDS; k++)
285 sum += s->matrix_b[i++] * dnch->band_noise[k];
286 s->vector_b[j] = sum;
287 }
288
289 solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
290 f = (0.5 * s->sample_rate) / s->band_centre[NB_PROFILE_BANDS-1];
291 f = 15.0 + log(f / 1.5) / log(1.5);
292 sum = 0.0;
293 product = 1.0;
294 for (int j = 0; j < SOLVE_SIZE; j++) {
295 sum += product * s->vector_b[j];
296 product *= f;
297 }
298
299 return sum;
300 }
301
302 static double limit_gain(double a, double b)
303 {
304 if (a > 1.0)
305 return (b * a - 1.0) / (b + a - 2.0);
306 if (a < 1.0)
307 return (b * a - 2.0 * a + 1.0) / (b - a);
308 return 1.0;
309 }
310
311 static void spectral_flatness(AudioFFTDeNoiseContext *s, const double *const spectral,
312 double floor, int len, double *rnum, double *rden)
313 {
314 double num = 0., den = 0.;
315 int size = 0;
316
317 for (int n = 0; n < len; n++) {
318 const double v = spectral[n];
319 if (v > floor) {
320 num += log(v);
321 den += v;
322 size++;
323 }
324 }
325
326 size = FFMAX(size, 1);
327
328 num /= size;
329 den /= size;
330
331 num = exp(num);
332
333 *rnum = num;
334 *rden = den;
335 }
336
337 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var);
338
339 static double floor_offset(const double *S, int size, double mean)
340 {
341 double offset = 0.0;
342
343 for (int n = 0; n < size; n++) {
344 const double p = S[n] - mean;
345
346 offset = fmax(offset, fabs(p));
347 }
348
349 return offset / mean;
350 }
351
352 static void process_frame(AVFilterContext *ctx,
353 AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch,
354 double *prior, double *prior_band_excit, int track_noise)
355 {
356 AVFilterLink *outlink = ctx->outputs[0];
357 const double *abs_var = dnch->abs_var;
358 const double ratio = outlink->frame_count_out ? s->ratio : 1.0;
359 const double rratio = 1. - ratio;
360 const int *bin2band = s->bin2band;
361 double *noisy_data = dnch->noisy_data;
362 double *band_excit = dnch->band_excit;
363 double *band_amt = dnch->band_amt;
364 double *smoothed_gain = dnch->smoothed_gain;
365 AVComplexDouble *fft_data_dbl = dnch->fft_out;
366 AVComplexFloat *fft_data_flt = dnch->fft_out;
367 double *gain = dnch->gain;
368
369 for (int i = 0; i < s->bin_count; i++) {
370 double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var;
371
372 switch (s->format) {
373 case AV_SAMPLE_FMT_FLTP:
374 noisy_data[i] = mag = hypot(fft_data_flt[i].re, fft_data_flt[i].im);
375 break;
376 case AV_SAMPLE_FMT_DBLP:
377 noisy_data[i] = mag = hypot(fft_data_dbl[i].re, fft_data_dbl[i].im);
378 break;
379 }
380
381 power = mag * mag;
382 mag_abs_var = power / abs_var[i];
383 new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0);
384 new_gain = new_mag_abs_var / (1.0 + new_mag_abs_var);
385 sqr_new_gain = new_gain * new_gain;
386 prior[i] = mag_abs_var * sqr_new_gain;
387 dnch->clean_data[i] = power * sqr_new_gain;
388 gain[i] = new_gain;
389 }
390
391 if (track_noise) {
392 double flatness, num, den;
393
394 spectral_flatness(s, noisy_data, s->floor, s->bin_count, &num, &den);
395
396 flatness = num / den;
397 if (flatness > 0.8) {
398 const double offset = s->floor_offset * floor_offset(noisy_data, s->bin_count, den);
399 const double new_floor = av_clipd(10.0 * log10(den) - 100.0 + offset, -90., -20.);
400
401 dnch->noise_floor = 0.1 * new_floor + dnch->noise_floor * 0.9;
402 set_parameters(s, dnch, 1, 1);
403 }
404 }
405
406 for (int i = 0; i < s->number_of_bands; i++) {
407 band_excit[i] = 0.0;
408 band_amt[i] = 0.0;
409 }
410
411 for (int i = 0; i < s->bin_count; i++)
412 band_excit[bin2band[i]] += dnch->clean_data[i];
413
414 for (int i = 0; i < s->number_of_bands; i++) {
415 band_excit[i] = fmax(band_excit[i],
416 s->band_alpha[i] * band_excit[i] +
417 s->band_beta[i] * prior_band_excit[i]);
418 prior_band_excit[i] = band_excit[i];
419 }
420
421 for (int j = 0, i = 0; j < s->number_of_bands; j++) {
422 for (int k = 0; k < s->number_of_bands; k++) {
423 band_amt[j] += dnch->spread_function[i++] * band_excit[k];
424 }
425 }
426
427 for (int i = 0; i < s->bin_count; i++)
428 dnch->amt[i] = band_amt[bin2band[i]];
429
430 for (int i = 0; i < s->bin_count; i++) {
431 if (dnch->amt[i] > abs_var[i]) {
432 gain[i] = 1.0;
433 } else if (dnch->amt[i] > dnch->min_abs_var[i]) {
434 const double limit = sqrt(abs_var[i] / dnch->amt[i]);
435
436 gain[i] = limit_gain(gain[i], limit);
437 } else {
438 gain[i] = limit_gain(gain[i], dnch->max_gain);
439 }
440 }
441
442 memcpy(smoothed_gain, gain, s->bin_count * sizeof(*smoothed_gain));
443 if (s->gain_smooth > 0) {
444 const int r = s->gain_smooth;
445
446 for (int i = r; i < s->bin_count - r; i++) {
447 const double gc = gain[i];
448 double num = 0., den = 0.;
449
450 for (int j = -r; j <= r; j++) {
451 const double g = gain[i + j];
452 const double d = 1. - fabs(g - gc);
453
454 num += g * d;
455 den += d;
456 }
457
458 smoothed_gain[i] = num / den;
459 }
460 }
461
462 switch (s->format) {
463 case AV_SAMPLE_FMT_FLTP:
464 for (int i = 0; i < s->bin_count; i++) {
465 const float new_gain = smoothed_gain[i];
466
467 fft_data_flt[i].re *= new_gain;
468 fft_data_flt[i].im *= new_gain;
469 }
470 break;
471 case AV_SAMPLE_FMT_DBLP:
472 for (int i = 0; i < s->bin_count; i++) {
473 const double new_gain = smoothed_gain[i];
474
475 fft_data_dbl[i].re *= new_gain;
476 fft_data_dbl[i].im *= new_gain;
477 }
478 break;
479 }
480 }
481
482 static double freq2bark(double x)
483 {
484 double d = x / 7500.0;
485
486 return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d);
487 }
488
489 static int get_band_centre(AudioFFTDeNoiseContext *s, int band)
490 {
491 if (band == -1)
492 return lrint(s->band_centre[0] / 1.5);
493
494 return s->band_centre[band];
495 }
496
497 static int get_band_edge(AudioFFTDeNoiseContext *s, int band)
498 {
499 int i;
500
501 if (band == NB_PROFILE_BANDS) {
502 i = lrint(s->band_centre[NB_PROFILE_BANDS - 1] * 1.224745);
503 } else {
504 i = lrint(s->band_centre[band] / 1.224745);
505 }
506
507 return FFMIN(i, s->sample_rate / 2);
508 }
509
510 static void set_band_parameters(AudioFFTDeNoiseContext *s,
511 DeNoiseChannel *dnch)
512 {
513 double band_noise, d2, d3, d4, d5;
514 int i = 0, j = 0, k = 0;
515
516 d5 = 0.0;
517 band_noise = process_get_band_noise(s, dnch, 0);
518 for (int m = j; m < s->bin_count; m++) {
519 if (m == j) {
520 i = j;
521 d5 = band_noise;
522 if (k >= NB_PROFILE_BANDS) {
523 j = s->bin_count;
524 } else {
525 j = s->fft_length * get_band_centre(s, k) / s->sample_rate;
526 }
527 d2 = j - i;
528 band_noise = process_get_band_noise(s, dnch, k);
529 k++;
530 }
531 d3 = (j - m) / d2;
532 d4 = (m - i) / d2;
533 dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C);
534 }
535
536 for (i = 0; i < NB_PROFILE_BANDS; i++)
537 dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
538 }
539
540 static void read_custom_noise(AudioFFTDeNoiseContext *s, int ch)
541 {
542 DeNoiseChannel *dnch = &s->dnch[ch];
543 char *custom_noise_str, *p, *arg, *saveptr = NULL;
544 double band_noise[NB_PROFILE_BANDS] = { 0.f };
545 int ret;
546
547 if (!s->band_noise_str)
548 return;
549
550 custom_noise_str = p = av_strdup(s->band_noise_str);
551 if (!p)
552 return;
553
554 for (int i = 0; i < NB_PROFILE_BANDS; i++) {
555 float noise;
556
557 if (!(arg = av_strtok(p, "| ", &saveptr)))
558 break;
559
560 p = NULL;
561
562 ret = av_sscanf(arg, "%f", &noise);
563 if (ret != 1) {
564 av_log(s, AV_LOG_ERROR, "Custom band noise must be float.\n");
565 break;
566 }
567
568 band_noise[i] = av_clipd(noise, -24., 24.);
569 }
570
571 av_free(custom_noise_str);
572 memcpy(dnch->band_noise, band_noise, sizeof(band_noise));
573 }
574
575 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var)
576 {
577 if (dnch->last_noise_floor != dnch->noise_floor)
578 dnch->last_noise_floor = dnch->noise_floor;
579
580 if (s->track_residual)
581 dnch->last_noise_floor = fmax(dnch->last_noise_floor, dnch->residual_floor);
582
583 dnch->max_var = s->floor * exp((100.0 + dnch->last_noise_floor) * C);
584 if (update_auto_var) {
585 for (int i = 0; i < NB_PROFILE_BANDS; i++)
586 dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
587 }
588
589 if (s->track_residual) {
590 if (update_var || dnch->last_residual_floor != dnch->residual_floor) {
591 update_var = 1;
592 dnch->last_residual_floor = dnch->residual_floor;
593 dnch->last_noise_reduction = fmax(dnch->last_noise_floor - dnch->last_residual_floor + 100., 0);
594 dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
595 }
596 } else if (update_var || dnch->noise_reduction != dnch->last_noise_reduction) {
597 update_var = 1;
598 dnch->last_noise_reduction = dnch->noise_reduction;
599 dnch->last_residual_floor = av_clipd(dnch->last_noise_floor - dnch->last_noise_reduction, -80, -20);
600 dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
601 }
602
603 dnch->gain_scale = 1.0 / (dnch->max_gain * dnch->max_gain);
604
605 if (update_var) {
606 set_band_parameters(s, dnch);
607
608 for (int i = 0; i < s->bin_count; i++) {
609 dnch->abs_var[i] = fmax(dnch->max_var * dnch->rel_var[i], 1.0);
610 dnch->min_abs_var[i] = dnch->gain_scale * dnch->abs_var[i];
611 }
612 }
613 }
614
615 static void reduce_mean(double *band_noise)
616 {
617 double mean = 0.f;
618
619 for (int i = 0; i < NB_PROFILE_BANDS; i++)
620 mean += band_noise[i];
621 mean /= NB_PROFILE_BANDS;
622
623 for (int i = 0; i < NB_PROFILE_BANDS; i++)
624 band_noise[i] -= mean;
625 }
626
627 static int config_input(AVFilterLink *inlink)
628 {
629 AVFilterContext *ctx = inlink->dst;
630 AudioFFTDeNoiseContext *s = ctx->priv;
631 double wscale, sar, sum, sdiv;
632 int i, j, k, m, n, ret, tx_type;
633 double dscale = 1.;
634 float fscale = 1.f;
635 void *scale;
636
637 s->format = inlink->format;
638
639 switch (s->format) {
640 case AV_SAMPLE_FMT_FLTP:
641 s->sample_size = sizeof(float);
642 s->complex_sample_size = sizeof(AVComplexFloat);
643 tx_type = AV_TX_FLOAT_RDFT;
644 scale = &fscale;
645 break;
646 case AV_SAMPLE_FMT_DBLP:
647 s->sample_size = sizeof(double);
648 s->complex_sample_size = sizeof(AVComplexDouble);
649 tx_type = AV_TX_DOUBLE_RDFT;
650 scale = &dscale;
651 break;
652 }
653
654 s->dnch = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dnch));
655 if (!s->dnch)
656 return AVERROR(ENOMEM);
657
658 s->channels = inlink->ch_layout.nb_channels;
659 s->sample_rate = inlink->sample_rate;
660 s->sample_advance = s->sample_rate / 80;
661 s->window_length = 3 * s->sample_advance;
662 s->fft_length2 = 1 << (32 - ff_clz(s->window_length));
663 s->fft_length = s->fft_length2;
664 s->buffer_length = s->fft_length * 2;
665 s->bin_count = s->fft_length2 / 2 + 1;
666
667 s->band_centre[0] = 80;
668 for (i = 1; i < NB_PROFILE_BANDS; i++) {
669 s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0);
670 if (s->band_centre[i] < 1000) {
671 s->band_centre[i] = 10 * (s->band_centre[i] / 10);
672 } else if (s->band_centre[i] < 5000) {
673 s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50);
674 } else if (s->band_centre[i] < 15000) {
675 s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100);
676 } else {
677 s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000);
678 }
679 }
680
681 for (j = 0; j < SOLVE_SIZE; j++) {
682 for (k = 0; k < SOLVE_SIZE; k++) {
683 s->matrix_a[j + k * SOLVE_SIZE] = 0.0;
684 for (m = 0; m < NB_PROFILE_BANDS; m++)
685 s->matrix_a[j + k * SOLVE_SIZE] += pow(m, j + k);
686 }
687 }
688
689 factor(s->matrix_a, SOLVE_SIZE);
690
691 i = 0;
692 for (j = 0; j < SOLVE_SIZE; j++)
693 for (k = 0; k < NB_PROFILE_BANDS; k++)
694 s->matrix_b[i++] = pow(k, j);
695
696 i = 0;
697 for (j = 0; j < NB_PROFILE_BANDS; j++)
698 for (k = 0; k < SOLVE_SIZE; k++)
699 s->matrix_c[i++] = pow(j, k);
700
701 s->window = av_calloc(s->window_length, sizeof(*s->window));
702 s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band));
703 if (!s->window || !s->bin2band)
704 return AVERROR(ENOMEM);
705
706 sdiv = s->band_multiplier;
707 for (i = 0; i < s->bin_count; i++)
708 s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2));
709
710 s->number_of_bands = s->bin2band[s->bin_count - 1] + 1;
711
712 s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha));
713 s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta));
714 if (!s->band_alpha || !s->band_beta)
715 return AVERROR(ENOMEM);
716
717 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
718 DeNoiseChannel *dnch = &s->dnch[ch];
719
720 switch (s->noise_type) {
721 case WHITE_NOISE:
722 for (i = 0; i < NB_PROFILE_BANDS; i++)
723 dnch->band_noise[i] = 0.;
724 break;
725 case VINYL_NOISE:
726 for (i = 0; i < NB_PROFILE_BANDS; i++)
727 dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0);
728 break;
729 case SHELLAC_NOISE:
730 for (i = 0; i < NB_PROFILE_BANDS; i++)
731 dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10);
732 break;
733 case CUSTOM_NOISE:
734 read_custom_noise(s, ch);
735 break;
736 default:
737 return AVERROR_BUG;
738 }
739
740 reduce_mean(dnch->band_noise);
741
742 dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt));
743 dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt));
744 dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit));
745 dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain));
746 dnch->smoothed_gain = av_calloc(s->bin_count, sizeof(*dnch->smoothed_gain));
747 dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior));
748 dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit));
749 dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data));
750 dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data));
751 dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples));
752 dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var));
753 dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var));
754 dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var));
755 dnch->fft_in = av_calloc(s->fft_length2, s->sample_size);
756 dnch->fft_out = av_calloc(s->fft_length2 + 1, s->complex_sample_size);
757 ret = av_tx_init(&dnch->fft, &dnch->tx_fn, tx_type, 0, s->fft_length2, scale, 0);
758 if (ret < 0)
759 return ret;
760 ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, tx_type, 1, s->fft_length2, scale, 0);
761 if (ret < 0)
762 return ret;
763 dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands,
764 sizeof(*dnch->spread_function));
765
766 if (!dnch->amt ||
767 !dnch->band_amt ||
768 !dnch->band_excit ||
769 !dnch->gain ||
770 !dnch->smoothed_gain ||
771 !dnch->prior ||
772 !dnch->prior_band_excit ||
773 !dnch->clean_data ||
774 !dnch->noisy_data ||
775 !dnch->out_samples ||
776 !dnch->fft_in ||
777 !dnch->fft_out ||
778 !dnch->abs_var ||
779 !dnch->rel_var ||
780 !dnch->min_abs_var ||
781 !dnch->spread_function ||
782 !dnch->fft ||
783 !dnch->ifft)
784 return AVERROR(ENOMEM);
785 }
786
787 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
788 DeNoiseChannel *dnch = &s->dnch[ch];
789 double *prior_band_excit = dnch->prior_band_excit;
790 double min, max;
791 double p1, p2;
792
793 p1 = pow(0.1, 2.5 / sdiv);
794 p2 = pow(0.1, 1.0 / sdiv);
795 j = 0;
796 for (m = 0; m < s->number_of_bands; m++) {
797 for (n = 0; n < s->number_of_bands; n++) {
798 if (n < m) {
799 dnch->spread_function[j++] = pow(p2, m - n);
800 } else if (n > m) {
801 dnch->spread_function[j++] = pow(p1, n - m);
802 } else {
803 dnch->spread_function[j++] = 1.0;
804 }
805 }
806 }
807
808 for (m = 0; m < s->number_of_bands; m++) {
809 dnch->band_excit[m] = 0.0;
810 prior_band_excit[m] = 0.0;
811 }
812
813 for (m = 0; m < s->bin_count; m++)
814 dnch->band_excit[s->bin2band[m]] += 1.0;
815
816 j = 0;
817 for (m = 0; m < s->number_of_bands; m++) {
818 for (n = 0; n < s->number_of_bands; n++)
819 prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n];
820 }
821
822 min = pow(0.1, 2.5);
823 max = pow(0.1, 1.0);
824 for (int i = 0; i < s->number_of_bands; i++) {
825 if (i < lrint(12.0 * sdiv)) {
826 dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv);
827 } else {
828 dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0));
829 }
830 dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max);
831 }
832
833 for (int i = 0; i < s->buffer_length; i++)
834 dnch->out_samples[i] = 0;
835
836 j = 0;
837 for (int i = 0; i < s->number_of_bands; i++)
838 for (int k = 0; k < s->number_of_bands; k++)
839 dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i];
840 }
841
842 j = 0;
843 sar = s->sample_advance / s->sample_rate;
844 for (int i = 0; i < s->bin_count; i++) {
845 if ((i == s->fft_length2) || (s->bin2band[i] > j)) {
846 double d6 = (i - 1) * s->sample_rate / s->fft_length;
847 double d7 = fmin(0.008 + 2.2 / d6, 0.03);
848 s->band_alpha[j] = exp(-sar / d7);
849 s->band_beta[j] = 1.0 - s->band_alpha[j];
850 j = s->bin2band[i];
851 }
852 }
853
854 s->winframe = ff_get_audio_buffer(inlink, s->window_length);
855 if (!s->winframe)
856 return AVERROR(ENOMEM);
857
858 wscale = sqrt(8.0 / (9.0 * s->fft_length));
859 sum = 0.0;
860 for (int i = 0; i < s->window_length; i++) {
861 double d10 = sin(i * M_PI / s->window_length);
862 d10 *= wscale * d10;
863 s->window[i] = d10;
864 sum += d10 * d10;
865 }
866
867 s->window_weight = 0.5 * sum;
868 s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight;
869 s->sample_floor = s->floor * exp(4.144600506562284);
870
871 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
872 DeNoiseChannel *dnch = &s->dnch[ch];
873
874 dnch->noise_reduction = s->noise_reduction;
875 dnch->noise_floor = s->noise_floor;
876 dnch->residual_floor = s->residual_floor;
877
878 set_parameters(s, dnch, 1, 1);
879 }
880
881 s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate);
882 i = 0;
883 for (int j = 1; j < NB_PROFILE_BANDS + 1; j++) {
884 s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate);
885 if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1]))
886 i++;
887 s->noise_band_edge[NB_PROFILE_BANDS + 1] = i;
888 }
889 s->noise_band_count = s->noise_band_edge[NB_PROFILE_BANDS + 1];
890
891 return 0;
892 }
893
894 static void init_sample_noise(DeNoiseChannel *dnch)
895 {
896 for (int i = 0; i < NB_PROFILE_BANDS; i++) {
897 dnch->noise_band_norm[i] = 0.0;
898 dnch->noise_band_avr[i] = 0.0;
899 dnch->noise_band_avi[i] = 0.0;
900 dnch->noise_band_var[i] = 0.0;
901 }
902 }
903
904 static void sample_noise_block(AudioFFTDeNoiseContext *s,
905 DeNoiseChannel *dnch,
906 AVFrame *in, int ch)
907 {
908 double *src_dbl = (double *)in->extended_data[ch];
909 float *src_flt = (float *)in->extended_data[ch];
910 double mag2, var = 0.0, avr = 0.0, avi = 0.0;
911 AVComplexDouble *fft_out_dbl = dnch->fft_out;
912 AVComplexFloat *fft_out_flt = dnch->fft_out;
913 double *fft_in_dbl = dnch->fft_in;
914 float *fft_in_flt = dnch->fft_in;
915 int edge, j, k, n, edgemax;
916
917 switch (s->format) {
918 case AV_SAMPLE_FMT_FLTP:
919 for (int i = 0; i < s->window_length; i++)
920 fft_in_flt[i] = s->window[i] * src_flt[i] * (1LL << 23);
921
922 for (int i = s->window_length; i < s->fft_length2; i++)
923 fft_in_flt[i] = 0.f;
924 break;
925 case AV_SAMPLE_FMT_DBLP:
926 for (int i = 0; i < s->window_length; i++)
927 fft_in_dbl[i] = s->window[i] * src_dbl[i] * (1LL << 23);
928
929 for (int i = s->window_length; i < s->fft_length2; i++)
930 fft_in_dbl[i] = 0.;
931 break;
932 }
933
934 dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size);
935
936 edge = s->noise_band_edge[0];
937 j = edge;
938 k = 0;
939 n = j;
940 edgemax = fmin(s->fft_length2, s->noise_band_edge[NB_PROFILE_BANDS]);
941 for (int i = j; i <= edgemax; i++) {
942 if ((i == j) && (i < edgemax)) {
943 if (j > edge) {
944 dnch->noise_band_norm[k - 1] += j - edge;
945 dnch->noise_band_avr[k - 1] += avr;
946 dnch->noise_band_avi[k - 1] += avi;
947 dnch->noise_band_var[k - 1] += var;
948 }
949 k++;
950 edge = j;
951 j = s->noise_band_edge[k];
952 if (k == NB_PROFILE_BANDS) {
953 j++;
954 }
955 var = 0.0;
956 avr = 0.0;
957 avi = 0.0;
958 }
959
960 switch (s->format) {
961 case AV_SAMPLE_FMT_FLTP:
962 avr += fft_out_flt[n].re;
963 avi += fft_out_flt[n].im;
964 mag2 = fft_out_flt[n].re * fft_out_flt[n].re +
965 fft_out_flt[n].im * fft_out_flt[n].im;
966 break;
967 case AV_SAMPLE_FMT_DBLP:
968 avr += fft_out_dbl[n].re;
969 avi += fft_out_dbl[n].im;
970 mag2 = fft_out_dbl[n].re * fft_out_dbl[n].re +
971 fft_out_dbl[n].im * fft_out_dbl[n].im;
972 break;
973 }
974
975 mag2 = fmax(mag2, s->sample_floor);
976
977 var += mag2;
978 n++;
979 }
980
981 dnch->noise_band_norm[k - 1] += j - edge;
982 dnch->noise_band_avr[k - 1] += avr;
983 dnch->noise_band_avi[k - 1] += avi;
984 dnch->noise_band_var[k - 1] += var;
985 }
986
987 static void finish_sample_noise(AudioFFTDeNoiseContext *s,
988 DeNoiseChannel *dnch,
989 double *sample_noise)
990 {
991 for (int i = 0; i < s->noise_band_count; i++) {
992 dnch->noise_band_avr[i] /= dnch->noise_band_norm[i];
993 dnch->noise_band_avi[i] /= dnch->noise_band_norm[i];
994 dnch->noise_band_var[i] /= dnch->noise_band_norm[i];
995 dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] +
996 dnch->noise_band_avi[i] * dnch->noise_band_avi[i];
997 dnch->noise_band_auto_var[i] = dnch->noise_band_var[i];
998 sample_noise[i] = 10.0 * log10(dnch->noise_band_var[i] / s->floor) - 100.0;
999 }
1000 if (s->noise_band_count < NB_PROFILE_BANDS) {
1001 for (int i = s->noise_band_count; i < NB_PROFILE_BANDS; i++)
1002 sample_noise[i] = sample_noise[i - 1];
1003 }
1004 }
1005
1006 static void set_noise_profile(AudioFFTDeNoiseContext *s,
1007 DeNoiseChannel *dnch,
1008 double *sample_noise)
1009 {
1010 double new_band_noise[NB_PROFILE_BANDS];
1011 double temp[NB_PROFILE_BANDS];
1012 double sum = 0.0;
1013
1014 for (int m = 0; m < NB_PROFILE_BANDS; m++)
1015 temp[m] = sample_noise[m];
1016
1017 for (int m = 0, i = 0; m < SOLVE_SIZE; m++) {
1018 sum = 0.0;
1019 for (int n = 0; n < NB_PROFILE_BANDS; n++)
1020 sum += s->matrix_b[i++] * temp[n];
1021 s->vector_b[m] = sum;
1022 }
1023 solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
1024 for (int m = 0, i = 0; m < NB_PROFILE_BANDS; m++) {
1025 sum = 0.0;
1026 for (int n = 0; n < SOLVE_SIZE; n++)
1027 sum += s->matrix_c[i++] * s->vector_b[n];
1028 temp[m] = sum;
1029 }
1030
1031 reduce_mean(temp);
1032
1033 av_log(s, AV_LOG_INFO, "bn=");
1034 for (int m = 0; m < NB_PROFILE_BANDS; m++) {
1035 new_band_noise[m] = temp[m];
1036 new_band_noise[m] = av_clipd(new_band_noise[m], -24.0, 24.0);
1037 av_log(s, AV_LOG_INFO, "%f ", new_band_noise[m]);
1038 }
1039 av_log(s, AV_LOG_INFO, "\n");
1040 memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise));
1041 }
1042
1043 static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
1044 {
1045 AudioFFTDeNoiseContext *s = ctx->priv;
1046 AVFrame *in = arg;
1047 const int start = (in->ch_layout.nb_channels * jobnr) / nb_jobs;
1048 const int end = (in->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;
1049 const int window_length = s->window_length;
1050 const double *window = s->window;
1051
1052 for (int ch = start; ch < end; ch++) {
1053 DeNoiseChannel *dnch = &s->dnch[ch];
1054 const double *src_dbl = (const double *)in->extended_data[ch];
1055 const float *src_flt = (const float *)in->extended_data[ch];
1056 double *dst = dnch->out_samples;
1057 double *fft_in_dbl = dnch->fft_in;
1058 float *fft_in_flt = dnch->fft_in;
1059
1060 switch (s->format) {
1061 case AV_SAMPLE_FMT_FLTP:
1062 for (int m = 0; m < window_length; m++)
1063 fft_in_flt[m] = window[m] * src_flt[m] * (1LL << 23);
1064
1065 for (int m = window_length; m < s->fft_length2; m++)
1066 fft_in_flt[m] = 0.f;
1067 break;
1068 case AV_SAMPLE_FMT_DBLP:
1069 for (int m = 0; m < window_length; m++)
1070 fft_in_dbl[m] = window[m] * src_dbl[m] * (1LL << 23);
1071
1072 for (int m = window_length; m < s->fft_length2; m++)
1073 fft_in_dbl[m] = 0.;
1074 break;
1075 }
1076
1077 dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size);
1078
1079 process_frame(ctx, s, dnch,
1080 dnch->prior,
1081 dnch->prior_band_excit,
1082 s->track_noise);
1083
1084 dnch->itx_fn(dnch->ifft, dnch->fft_in, dnch->fft_out, s->complex_sample_size);
1085
1086 switch (s->format) {
1087 case AV_SAMPLE_FMT_FLTP:
1088 for (int m = 0; m < window_length; m++)
1089 dst[m] += s->window[m] * fft_in_flt[m] / (1LL << 23);
1090 break;
1091 case AV_SAMPLE_FMT_DBLP:
1092 for (int m = 0; m < window_length; m++)
1093 dst[m] += s->window[m] * fft_in_dbl[m] / (1LL << 23);
1094 break;
1095 }
1096 }
1097
1098 return 0;
1099 }
1100
1101 static int output_frame(AVFilterLink *inlink, AVFrame *in)
1102 {
1103 AVFilterContext *ctx = inlink->dst;
1104 AVFilterLink *outlink = ctx->outputs[0];
1105 AudioFFTDeNoiseContext *s = ctx->priv;
1106 const int output_mode = ctx->is_disabled ? IN_MODE : s->output_mode;
1107 const int offset = s->window_length - s->sample_advance;
1108 AVFrame *out;
1109
1110 for (int ch = 0; ch < s->channels; ch++) {
1111 uint8_t *src = (uint8_t *)s->winframe->extended_data[ch];
1112
1113 memmove(src, src + s->sample_advance * s->sample_size,
1114 offset * s->sample_size);
1115 memcpy(src + offset * s->sample_size, in->extended_data[ch],
1116 in->nb_samples * s->sample_size);
1117 memset(src + s->sample_size * (offset + in->nb_samples), 0,
1118 (s->sample_advance - in->nb_samples) * s->sample_size);
1119 }
1120
1121 if (s->track_noise) {
1122 double average = 0.0, min = DBL_MAX, max = -DBL_MAX;
1123
1124 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1125 DeNoiseChannel *dnch = &s->dnch[ch];
1126
1127 average += dnch->noise_floor;
1128 max = fmax(max, dnch->noise_floor);
1129 min = fmin(min, dnch->noise_floor);
1130 }
1131
1132 average /= inlink->ch_layout.nb_channels;
1133
1134 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1135 DeNoiseChannel *dnch = &s->dnch[ch];
1136
1137 switch (s->noise_floor_link) {
1138 case MIN_LINK: dnch->noise_floor = min; break;
1139 case MAX_LINK: dnch->noise_floor = max; break;
1140 case AVERAGE_LINK: dnch->noise_floor = average; break;
1141 case NONE_LINK:
1142 default:
1143 break;
1144 }
1145
1146 if (dnch->noise_floor != dnch->last_noise_floor)
1147 set_parameters(s, dnch, 1, 0);
1148 }
1149 }
1150
1151 if (s->sample_noise_mode == SAMPLE_START) {
1152 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1153 DeNoiseChannel *dnch = &s->dnch[ch];
1154
1155 init_sample_noise(dnch);
1156 }
1157 s->sample_noise_mode = SAMPLE_NONE;
1158 s->sample_noise = 1;
1159 s->sample_noise_blocks = 0;
1160 }
1161
1162 if (s->sample_noise) {
1163 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1164 DeNoiseChannel *dnch = &s->dnch[ch];
1165
1166 sample_noise_block(s, dnch, s->winframe, ch);
1167 }
1168 s->sample_noise_blocks++;
1169 }
1170
1171 if (s->sample_noise_mode == SAMPLE_STOP) {
1172 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1173 DeNoiseChannel *dnch = &s->dnch[ch];
1174 double sample_noise[NB_PROFILE_BANDS];
1175
1176 if (s->sample_noise_blocks <= 0)
1177 break;
1178 finish_sample_noise(s, dnch, sample_noise);
1179 set_noise_profile(s, dnch, sample_noise);
1180 set_parameters(s, dnch, 1, 1);
1181 }
1182 s->sample_noise = 0;
1183 s->sample_noise_blocks = 0;
1184 s->sample_noise_mode = SAMPLE_NONE;
1185 }
1186
1187 ff_filter_execute(ctx, filter_channel, s->winframe, NULL,
1188 FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));
1189
1190 if (av_frame_is_writable(in)) {
1191 out = in;
1192 } else {
1193 out = ff_get_audio_buffer(outlink, in->nb_samples);
1194 if (!out) {
1195 av_frame_free(&in);
1196 return AVERROR(ENOMEM);
1197 }
1198
1199 av_frame_copy_props(out, in);
1200 }
1201
1202 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1203 DeNoiseChannel *dnch = &s->dnch[ch];
1204 double *src = dnch->out_samples;
1205 const double *orig_dbl = (const double *)s->winframe->extended_data[ch];
1206 const float *orig_flt = (const float *)s->winframe->extended_data[ch];
1207 double *dst_dbl = (double *)out->extended_data[ch];
1208 float *dst_flt = (float *)out->extended_data[ch];
1209
1210 switch (output_mode) {
1211 case IN_MODE:
1212 switch (s->format) {
1213 case AV_SAMPLE_FMT_FLTP:
1214 for (int m = 0; m < out->nb_samples; m++)
1215 dst_flt[m] = orig_flt[m];
1216 break;
1217 case AV_SAMPLE_FMT_DBLP:
1218 for (int m = 0; m < out->nb_samples; m++)
1219 dst_dbl[m] = orig_dbl[m];
1220 break;
1221 }
1222 break;
1223 case OUT_MODE:
1224 switch (s->format) {
1225 case AV_SAMPLE_FMT_FLTP:
1226 for (int m = 0; m < out->nb_samples; m++)
1227 dst_flt[m] = src[m];
1228 break;
1229 case AV_SAMPLE_FMT_DBLP:
1230 for (int m = 0; m < out->nb_samples; m++)
1231 dst_dbl[m] = src[m];
1232 break;
1233 }
1234 break;
1235 case NOISE_MODE:
1236 switch (s->format) {
1237 case AV_SAMPLE_FMT_FLTP:
1238 for (int m = 0; m < out->nb_samples; m++)
1239 dst_flt[m] = orig_flt[m] - src[m];
1240 break;
1241 case AV_SAMPLE_FMT_DBLP:
1242 for (int m = 0; m < out->nb_samples; m++)
1243 dst_dbl[m] = orig_dbl[m] - src[m];
1244 break;
1245 }
1246 break;
1247 default:
1248 if (in != out)
1249 av_frame_free(&in);
1250 av_frame_free(&out);
1251 return AVERROR_BUG;
1252 }
1253
1254 memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
1255 memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
1256 }
1257
1258 if (out != in)
1259 av_frame_free(&in);
1260 return ff_filter_frame(outlink, out);
1261 }
1262
1263 static int activate(AVFilterContext *ctx)
1264 {
1265 AVFilterLink *inlink = ctx->inputs[0];
1266 AVFilterLink *outlink = ctx->outputs[0];
1267 AudioFFTDeNoiseContext *s = ctx->priv;
1268 AVFrame *in = NULL;
1269 int ret;
1270
1271 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
1272
1273 ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in);
1274 if (ret < 0)
1275 return ret;
1276 if (ret > 0)
1277 return output_frame(inlink, in);
1278
1279 if (ff_inlink_queued_samples(inlink) >= s->sample_advance) {
1280 ff_filter_set_ready(ctx, 10);
1281 return 0;
1282 }
1283
1284 FF_FILTER_FORWARD_STATUS(inlink, outlink);
1285 FF_FILTER_FORWARD_WANTED(outlink, inlink);
1286
1287 return FFERROR_NOT_READY;
1288 }
1289
1290 static av_cold void uninit(AVFilterContext *ctx)
1291 {
1292 AudioFFTDeNoiseContext *s = ctx->priv;
1293
1294 av_freep(&s->window);
1295 av_freep(&s->bin2band);
1296 av_freep(&s->band_alpha);
1297 av_freep(&s->band_beta);
1298 av_frame_free(&s->winframe);
1299
1300 if (s->dnch) {
1301 for (int ch = 0; ch < s->channels; ch++) {
1302 DeNoiseChannel *dnch = &s->dnch[ch];
1303 av_freep(&dnch->amt);
1304 av_freep(&dnch->band_amt);
1305 av_freep(&dnch->band_excit);
1306 av_freep(&dnch->gain);
1307 av_freep(&dnch->smoothed_gain);
1308 av_freep(&dnch->prior);
1309 av_freep(&dnch->prior_band_excit);
1310 av_freep(&dnch->clean_data);
1311 av_freep(&dnch->noisy_data);
1312 av_freep(&dnch->out_samples);
1313 av_freep(&dnch->spread_function);
1314 av_freep(&dnch->abs_var);
1315 av_freep(&dnch->rel_var);
1316 av_freep(&dnch->min_abs_var);
1317 av_freep(&dnch->fft_in);
1318 av_freep(&dnch->fft_out);
1319 av_tx_uninit(&dnch->fft);
1320 av_tx_uninit(&dnch->ifft);
1321 }
1322 av_freep(&s->dnch);
1323 }
1324 }
1325
1326 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
1327 char *res, int res_len, int flags)
1328 {
1329 AudioFFTDeNoiseContext *s = ctx->priv;
1330 int ret = 0;
1331
1332 ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
1333 if (ret < 0)
1334 return ret;
1335
1336 if (!strcmp(cmd, "sample_noise") || !strcmp(cmd, "sn"))
1337 return 0;
1338
1339 for (int ch = 0; ch < s->channels; ch++) {
1340 DeNoiseChannel *dnch = &s->dnch[ch];
1341
1342 dnch->noise_reduction = s->noise_reduction;
1343 dnch->noise_floor = s->noise_floor;
1344 dnch->residual_floor = s->residual_floor;
1345
1346 set_parameters(s, dnch, 1, 1);
1347 }
1348
1349 return 0;
1350 }
1351
1352 static const AVFilterPad inputs[] = {
1353 {
1354 .name = "default",
1355 .type = AVMEDIA_TYPE_AUDIO,
1356 .config_props = config_input,
1357 },
1358 };
1359
1360 const AVFilter ff_af_afftdn = {
1361 .name = "afftdn",
1362 .description = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."),
1363 .priv_size = sizeof(AudioFFTDeNoiseContext),
1364 .priv_class = &afftdn_class,
1365 .activate = activate,
1366 .uninit = uninit,
1367 FILTER_INPUTS(inputs),
1368 FILTER_OUTPUTS(ff_audio_default_filterpad),
1369 FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP),
1370 .process_command = process_command,
1371 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
1372 AVFILTER_FLAG_SLICE_THREADS,
1373 };
1374