FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_afftdn.c
Date: 2024-11-20 23:03:26
Exec Total Coverage
Lines: 0 774 0.0%
Functions: 0 25 0.0%
Branches: 0 417 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2018 The FFmpeg Project
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <float.h>
22
23 #include "libavutil/avassert.h"
24 #include "libavutil/avstring.h"
25 #include "libavutil/channel_layout.h"
26 #include "libavutil/mem.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/tx.h"
29 #include "avfilter.h"
30 #include "audio.h"
31 #include "filters.h"
32
33 #define C (M_LN10 * 0.1)
34 #define SOLVE_SIZE (5)
35 #define NB_PROFILE_BANDS (15)
36
37 enum SampleNoiseModes {
38 SAMPLE_NONE,
39 SAMPLE_START,
40 SAMPLE_STOP,
41 NB_SAMPLEMODES
42 };
43
44 enum OutModes {
45 IN_MODE,
46 OUT_MODE,
47 NOISE_MODE,
48 NB_MODES
49 };
50
51 enum NoiseLinkType {
52 NONE_LINK,
53 MIN_LINK,
54 MAX_LINK,
55 AVERAGE_LINK,
56 NB_LINK
57 };
58
59 enum NoiseType {
60 WHITE_NOISE,
61 VINYL_NOISE,
62 SHELLAC_NOISE,
63 CUSTOM_NOISE,
64 NB_NOISE
65 };
66
67 typedef struct DeNoiseChannel {
68 double band_noise[NB_PROFILE_BANDS];
69 double noise_band_auto_var[NB_PROFILE_BANDS];
70 double noise_band_sample[NB_PROFILE_BANDS];
71 double *amt;
72 double *band_amt;
73 double *band_excit;
74 double *gain;
75 double *smoothed_gain;
76 double *prior;
77 double *prior_band_excit;
78 double *clean_data;
79 double *noisy_data;
80 double *out_samples;
81 double *spread_function;
82 double *abs_var;
83 double *rel_var;
84 double *min_abs_var;
85 void *fft_in;
86 void *fft_out;
87 AVTXContext *fft, *ifft;
88 av_tx_fn tx_fn, itx_fn;
89
90 double noise_band_norm[NB_PROFILE_BANDS];
91 double noise_band_avr[NB_PROFILE_BANDS];
92 double noise_band_avi[NB_PROFILE_BANDS];
93 double noise_band_var[NB_PROFILE_BANDS];
94
95 double noise_reduction;
96 double last_noise_reduction;
97 double noise_floor;
98 double last_noise_floor;
99 double residual_floor;
100 double last_residual_floor;
101 double max_gain;
102 double max_var;
103 double gain_scale;
104 } DeNoiseChannel;
105
106 typedef struct AudioFFTDeNoiseContext {
107 const AVClass *class;
108
109 int format;
110 size_t sample_size;
111 size_t complex_sample_size;
112
113 float noise_reduction;
114 float noise_floor;
115 int noise_type;
116 char *band_noise_str;
117 float residual_floor;
118 int track_noise;
119 int track_residual;
120 int output_mode;
121 int noise_floor_link;
122 float ratio;
123 int gain_smooth;
124 float band_multiplier;
125 float floor_offset;
126
127 int channels;
128 int sample_noise;
129 int sample_noise_blocks;
130 int sample_noise_mode;
131 float sample_rate;
132 int buffer_length;
133 int fft_length;
134 int fft_length2;
135 int bin_count;
136 int window_length;
137 int sample_advance;
138 int number_of_bands;
139
140 int band_centre[NB_PROFILE_BANDS];
141
142 int *bin2band;
143 double *window;
144 double *band_alpha;
145 double *band_beta;
146
147 DeNoiseChannel *dnch;
148
149 AVFrame *winframe;
150
151 double window_weight;
152 double floor;
153 double sample_floor;
154
155 int noise_band_edge[NB_PROFILE_BANDS + 2];
156 int noise_band_count;
157 double matrix_a[SOLVE_SIZE * SOLVE_SIZE];
158 double vector_b[SOLVE_SIZE];
159 double matrix_b[SOLVE_SIZE * NB_PROFILE_BANDS];
160 double matrix_c[SOLVE_SIZE * NB_PROFILE_BANDS];
161 } AudioFFTDeNoiseContext;
162
163 #define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x)
164 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
165 #define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
166
167 static const AVOption afftdn_options[] = {
168 { "noise_reduction", "set the noise reduction",OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT,{.dbl = 12}, .01, 97, AFR },
169 { "nr", "set the noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT, {.dbl = 12}, .01, 97, AFR },
170 { "noise_floor", "set the noise floor",OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR },
171 { "nf", "set the noise floor", OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR },
172 { "noise_type", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" },
173 { "nt", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" },
174 { "white", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" },
175 { "w", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" },
176 { "vinyl", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" },
177 { "v", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" },
178 { "shellac", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" },
179 { "s", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" },
180 { "custom", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" },
181 { "c", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" },
182 { "band_noise", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF },
183 { "bn", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF },
184 { "residual_floor", "set the residual floor",OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR },
185 { "rf", "set the residual floor", OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR },
186 { "track_noise", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
187 { "tn", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
188 { "track_residual", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
189 { "tr", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
190 { "output_mode", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" },
191 { "om", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" },
192 { "input", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" },
193 { "i", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" },
194 { "output", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" },
195 { "o", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" },
196 { "noise", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" },
197 { "n", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" },
198 { "adaptivity", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR },
199 { "ad", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR },
200 { "floor_offset", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR },
201 { "fo", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR },
202 { "noise_link", "set the noise floor link",OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" },
203 { "nl", "set the noise floor link", OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" },
204 { "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = NONE_LINK}, 0, 0, AFR, .unit = "link" },
205 { "min", "min", 0, AV_OPT_TYPE_CONST, {.i64 = MIN_LINK}, 0, 0, AFR, .unit = "link" },
206 { "max", "max", 0, AV_OPT_TYPE_CONST, {.i64 = MAX_LINK}, 0, 0, AFR, .unit = "link" },
207 { "average", "average", 0, AV_OPT_TYPE_CONST, {.i64 = AVERAGE_LINK}, 0, 0, AFR, .unit = "link" },
208 { "band_multiplier", "set band multiplier",OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF },
209 { "bm", "set band multiplier", OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF },
210 { "sample_noise", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" },
211 { "sn", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" },
212 { "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_NONE}, 0, 0, AFR, .unit = "sample" },
213 { "start", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" },
214 { "begin", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" },
215 { "stop", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" },
216 { "end", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" },
217 { "gain_smooth", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR },
218 { "gs", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR },
219 { NULL }
220 };
221
222 AVFILTER_DEFINE_CLASS(afftdn);
223
224 static double get_band_noise(AudioFFTDeNoiseContext *s,
225 int band, double a,
226 double b, double c)
227 {
228 double d1, d2, d3;
229
230 d1 = a / s->band_centre[band];
231 d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10;
232 d2 = b / s->band_centre[band];
233 d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10;
234 d3 = s->band_centre[band] / c;
235 d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10;
236
237 return -d1 + d2 - d3;
238 }
239
240 static void factor(double *array, int size)
241 {
242 for (int i = 0; i < size - 1; i++) {
243 for (int j = i + 1; j < size; j++) {
244 double d = array[j + i * size] / array[i + i * size];
245
246 array[j + i * size] = d;
247 for (int k = i + 1; k < size; k++) {
248 array[j + k * size] -= d * array[i + k * size];
249 }
250 }
251 }
252 }
253
254 static void solve(double *matrix, double *vector, int size)
255 {
256 for (int i = 0; i < size - 1; i++) {
257 for (int j = i + 1; j < size; j++) {
258 double d = matrix[j + i * size];
259 vector[j] -= d * vector[i];
260 }
261 }
262
263 vector[size - 1] /= matrix[size * size - 1];
264
265 for (int i = size - 2; i >= 0; i--) {
266 double d = vector[i];
267 for (int j = i + 1; j < size; j++)
268 d -= matrix[i + j * size] * vector[j];
269 vector[i] = d / matrix[i + i * size];
270 }
271 }
272
273 static double process_get_band_noise(AudioFFTDeNoiseContext *s,
274 DeNoiseChannel *dnch,
275 int band)
276 {
277 double product, sum, f;
278 int i = 0;
279
280 if (band < NB_PROFILE_BANDS)
281 return dnch->band_noise[band];
282
283 for (int j = 0; j < SOLVE_SIZE; j++) {
284 sum = 0.0;
285 for (int k = 0; k < NB_PROFILE_BANDS; k++)
286 sum += s->matrix_b[i++] * dnch->band_noise[k];
287 s->vector_b[j] = sum;
288 }
289
290 solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
291 f = (0.5 * s->sample_rate) / s->band_centre[NB_PROFILE_BANDS-1];
292 f = 15.0 + log(f / 1.5) / log(1.5);
293 sum = 0.0;
294 product = 1.0;
295 for (int j = 0; j < SOLVE_SIZE; j++) {
296 sum += product * s->vector_b[j];
297 product *= f;
298 }
299
300 return sum;
301 }
302
303 static double limit_gain(double a, double b)
304 {
305 if (a > 1.0)
306 return (b * a - 1.0) / (b + a - 2.0);
307 if (a < 1.0)
308 return (b * a - 2.0 * a + 1.0) / (b - a);
309 return 1.0;
310 }
311
312 static void spectral_flatness(AudioFFTDeNoiseContext *s, const double *const spectral,
313 double floor, int len, double *rnum, double *rden)
314 {
315 double num = 0., den = 0.;
316 int size = 0;
317
318 for (int n = 0; n < len; n++) {
319 const double v = spectral[n];
320 if (v > floor) {
321 num += log(v);
322 den += v;
323 size++;
324 }
325 }
326
327 size = FFMAX(size, 1);
328
329 num /= size;
330 den /= size;
331
332 num = exp(num);
333
334 *rnum = num;
335 *rden = den;
336 }
337
338 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var);
339
340 static double floor_offset(const double *S, int size, double mean)
341 {
342 double offset = 0.0;
343
344 for (int n = 0; n < size; n++) {
345 const double p = S[n] - mean;
346
347 offset = fmax(offset, fabs(p));
348 }
349
350 return offset / mean;
351 }
352
353 static void process_frame(AVFilterContext *ctx,
354 AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch,
355 double *prior, double *prior_band_excit, int track_noise)
356 {
357 AVFilterLink *outlink = ctx->outputs[0];
358 FilterLink *outl = ff_filter_link(outlink);
359 const double *abs_var = dnch->abs_var;
360 const double ratio = outl->frame_count_out ? s->ratio : 1.0;
361 const double rratio = 1. - ratio;
362 const int *bin2band = s->bin2band;
363 double *noisy_data = dnch->noisy_data;
364 double *band_excit = dnch->band_excit;
365 double *band_amt = dnch->band_amt;
366 double *smoothed_gain = dnch->smoothed_gain;
367 AVComplexDouble *fft_data_dbl = dnch->fft_out;
368 AVComplexFloat *fft_data_flt = dnch->fft_out;
369 double *gain = dnch->gain;
370
371 for (int i = 0; i < s->bin_count; i++) {
372 double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var;
373
374 switch (s->format) {
375 case AV_SAMPLE_FMT_FLTP:
376 noisy_data[i] = mag = hypot(fft_data_flt[i].re, fft_data_flt[i].im);
377 break;
378 case AV_SAMPLE_FMT_DBLP:
379 noisy_data[i] = mag = hypot(fft_data_dbl[i].re, fft_data_dbl[i].im);
380 break;
381 default:
382 av_assert0(0);
383 }
384
385 power = mag * mag;
386 mag_abs_var = power / abs_var[i];
387 new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0);
388 new_gain = new_mag_abs_var / (1.0 + new_mag_abs_var);
389 sqr_new_gain = new_gain * new_gain;
390 prior[i] = mag_abs_var * sqr_new_gain;
391 dnch->clean_data[i] = power * sqr_new_gain;
392 gain[i] = new_gain;
393 }
394
395 if (track_noise) {
396 double flatness, num, den;
397
398 spectral_flatness(s, noisy_data, s->floor, s->bin_count, &num, &den);
399
400 flatness = num / den;
401 if (flatness > 0.8) {
402 const double offset = s->floor_offset * floor_offset(noisy_data, s->bin_count, den);
403 const double new_floor = av_clipd(10.0 * log10(den) - 100.0 + offset, -90., -20.);
404
405 dnch->noise_floor = 0.1 * new_floor + dnch->noise_floor * 0.9;
406 set_parameters(s, dnch, 1, 1);
407 }
408 }
409
410 for (int i = 0; i < s->number_of_bands; i++) {
411 band_excit[i] = 0.0;
412 band_amt[i] = 0.0;
413 }
414
415 for (int i = 0; i < s->bin_count; i++)
416 band_excit[bin2band[i]] += dnch->clean_data[i];
417
418 for (int i = 0; i < s->number_of_bands; i++) {
419 band_excit[i] = fmax(band_excit[i],
420 s->band_alpha[i] * band_excit[i] +
421 s->band_beta[i] * prior_band_excit[i]);
422 prior_band_excit[i] = band_excit[i];
423 }
424
425 for (int j = 0, i = 0; j < s->number_of_bands; j++) {
426 for (int k = 0; k < s->number_of_bands; k++) {
427 band_amt[j] += dnch->spread_function[i++] * band_excit[k];
428 }
429 }
430
431 for (int i = 0; i < s->bin_count; i++)
432 dnch->amt[i] = band_amt[bin2band[i]];
433
434 for (int i = 0; i < s->bin_count; i++) {
435 if (dnch->amt[i] > abs_var[i]) {
436 gain[i] = 1.0;
437 } else if (dnch->amt[i] > dnch->min_abs_var[i]) {
438 const double limit = sqrt(abs_var[i] / dnch->amt[i]);
439
440 gain[i] = limit_gain(gain[i], limit);
441 } else {
442 gain[i] = limit_gain(gain[i], dnch->max_gain);
443 }
444 }
445
446 memcpy(smoothed_gain, gain, s->bin_count * sizeof(*smoothed_gain));
447 if (s->gain_smooth > 0) {
448 const int r = s->gain_smooth;
449
450 for (int i = r; i < s->bin_count - r; i++) {
451 const double gc = gain[i];
452 double num = 0., den = 0.;
453
454 for (int j = -r; j <= r; j++) {
455 const double g = gain[i + j];
456 const double d = 1. - fabs(g - gc);
457
458 num += g * d;
459 den += d;
460 }
461
462 smoothed_gain[i] = num / den;
463 }
464 }
465
466 switch (s->format) {
467 case AV_SAMPLE_FMT_FLTP:
468 for (int i = 0; i < s->bin_count; i++) {
469 const float new_gain = smoothed_gain[i];
470
471 fft_data_flt[i].re *= new_gain;
472 fft_data_flt[i].im *= new_gain;
473 }
474 break;
475 case AV_SAMPLE_FMT_DBLP:
476 for (int i = 0; i < s->bin_count; i++) {
477 const double new_gain = smoothed_gain[i];
478
479 fft_data_dbl[i].re *= new_gain;
480 fft_data_dbl[i].im *= new_gain;
481 }
482 break;
483 }
484 }
485
486 static double freq2bark(double x)
487 {
488 double d = x / 7500.0;
489
490 return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d);
491 }
492
493 static int get_band_centre(AudioFFTDeNoiseContext *s, int band)
494 {
495 if (band == -1)
496 return lrint(s->band_centre[0] / 1.5);
497
498 return s->band_centre[band];
499 }
500
501 static int get_band_edge(AudioFFTDeNoiseContext *s, int band)
502 {
503 int i;
504
505 if (band == NB_PROFILE_BANDS) {
506 i = lrint(s->band_centre[NB_PROFILE_BANDS - 1] * 1.224745);
507 } else {
508 i = lrint(s->band_centre[band] / 1.224745);
509 }
510
511 return FFMIN(i, s->sample_rate / 2);
512 }
513
514 static void set_band_parameters(AudioFFTDeNoiseContext *s,
515 DeNoiseChannel *dnch)
516 {
517 double band_noise, d2, d3, d4, d5;
518 int i = 0, j = 0, k = 0;
519
520 d5 = 0.0;
521 band_noise = process_get_band_noise(s, dnch, 0);
522 for (int m = j; m < s->bin_count; m++) {
523 if (m == j) {
524 i = j;
525 d5 = band_noise;
526 if (k >= NB_PROFILE_BANDS) {
527 j = s->bin_count;
528 } else {
529 j = s->fft_length * get_band_centre(s, k) / s->sample_rate;
530 }
531 d2 = j - i;
532 band_noise = process_get_band_noise(s, dnch, k);
533 k++;
534 }
535 d3 = (j - m) / d2;
536 d4 = (m - i) / d2;
537 dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C);
538 }
539
540 for (i = 0; i < NB_PROFILE_BANDS; i++)
541 dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
542 }
543
544 static void read_custom_noise(AudioFFTDeNoiseContext *s, int ch)
545 {
546 DeNoiseChannel *dnch = &s->dnch[ch];
547 char *custom_noise_str, *p, *arg, *saveptr = NULL;
548 double band_noise[NB_PROFILE_BANDS] = { 0.f };
549 int ret;
550
551 if (!s->band_noise_str)
552 return;
553
554 custom_noise_str = p = av_strdup(s->band_noise_str);
555 if (!p)
556 return;
557
558 for (int i = 0; i < NB_PROFILE_BANDS; i++) {
559 float noise;
560
561 if (!(arg = av_strtok(p, "| ", &saveptr)))
562 break;
563
564 p = NULL;
565
566 ret = av_sscanf(arg, "%f", &noise);
567 if (ret != 1) {
568 av_log(s, AV_LOG_ERROR, "Custom band noise must be float.\n");
569 break;
570 }
571
572 band_noise[i] = av_clipd(noise, -24., 24.);
573 }
574
575 av_free(custom_noise_str);
576 memcpy(dnch->band_noise, band_noise, sizeof(band_noise));
577 }
578
579 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var)
580 {
581 if (dnch->last_noise_floor != dnch->noise_floor)
582 dnch->last_noise_floor = dnch->noise_floor;
583
584 if (s->track_residual)
585 dnch->last_noise_floor = fmax(dnch->last_noise_floor, dnch->residual_floor);
586
587 dnch->max_var = s->floor * exp((100.0 + dnch->last_noise_floor) * C);
588 if (update_auto_var) {
589 for (int i = 0; i < NB_PROFILE_BANDS; i++)
590 dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
591 }
592
593 if (s->track_residual) {
594 if (update_var || dnch->last_residual_floor != dnch->residual_floor) {
595 update_var = 1;
596 dnch->last_residual_floor = dnch->residual_floor;
597 dnch->last_noise_reduction = fmax(dnch->last_noise_floor - dnch->last_residual_floor + 100., 0);
598 dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
599 }
600 } else if (update_var || dnch->noise_reduction != dnch->last_noise_reduction) {
601 update_var = 1;
602 dnch->last_noise_reduction = dnch->noise_reduction;
603 dnch->last_residual_floor = av_clipd(dnch->last_noise_floor - dnch->last_noise_reduction, -80, -20);
604 dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
605 }
606
607 dnch->gain_scale = 1.0 / (dnch->max_gain * dnch->max_gain);
608
609 if (update_var) {
610 set_band_parameters(s, dnch);
611
612 for (int i = 0; i < s->bin_count; i++) {
613 dnch->abs_var[i] = fmax(dnch->max_var * dnch->rel_var[i], 1.0);
614 dnch->min_abs_var[i] = dnch->gain_scale * dnch->abs_var[i];
615 }
616 }
617 }
618
619 static void reduce_mean(double *band_noise)
620 {
621 double mean = 0.f;
622
623 for (int i = 0; i < NB_PROFILE_BANDS; i++)
624 mean += band_noise[i];
625 mean /= NB_PROFILE_BANDS;
626
627 for (int i = 0; i < NB_PROFILE_BANDS; i++)
628 band_noise[i] -= mean;
629 }
630
631 static int config_input(AVFilterLink *inlink)
632 {
633 AVFilterContext *ctx = inlink->dst;
634 AudioFFTDeNoiseContext *s = ctx->priv;
635 double wscale, sar, sum, sdiv;
636 int i, j, k, m, n, ret, tx_type;
637 double dscale = 1.;
638 float fscale = 1.f;
639 void *scale;
640
641 s->format = inlink->format;
642
643 switch (s->format) {
644 case AV_SAMPLE_FMT_FLTP:
645 s->sample_size = sizeof(float);
646 s->complex_sample_size = sizeof(AVComplexFloat);
647 tx_type = AV_TX_FLOAT_RDFT;
648 scale = &fscale;
649 break;
650 case AV_SAMPLE_FMT_DBLP:
651 s->sample_size = sizeof(double);
652 s->complex_sample_size = sizeof(AVComplexDouble);
653 tx_type = AV_TX_DOUBLE_RDFT;
654 scale = &dscale;
655 break;
656 }
657
658 s->dnch = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dnch));
659 if (!s->dnch)
660 return AVERROR(ENOMEM);
661
662 s->channels = inlink->ch_layout.nb_channels;
663 s->sample_rate = inlink->sample_rate;
664 s->sample_advance = s->sample_rate / 80;
665 s->window_length = 3 * s->sample_advance;
666 s->fft_length2 = 1 << (32 - ff_clz(s->window_length));
667 s->fft_length = s->fft_length2;
668 s->buffer_length = s->fft_length * 2;
669 s->bin_count = s->fft_length2 / 2 + 1;
670
671 s->band_centre[0] = 80;
672 for (i = 1; i < NB_PROFILE_BANDS; i++) {
673 s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0);
674 if (s->band_centre[i] < 1000) {
675 s->band_centre[i] = 10 * (s->band_centre[i] / 10);
676 } else if (s->band_centre[i] < 5000) {
677 s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50);
678 } else if (s->band_centre[i] < 15000) {
679 s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100);
680 } else {
681 s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000);
682 }
683 }
684
685 for (j = 0; j < SOLVE_SIZE; j++) {
686 for (k = 0; k < SOLVE_SIZE; k++) {
687 s->matrix_a[j + k * SOLVE_SIZE] = 0.0;
688 for (m = 0; m < NB_PROFILE_BANDS; m++)
689 s->matrix_a[j + k * SOLVE_SIZE] += pow(m, j + k);
690 }
691 }
692
693 factor(s->matrix_a, SOLVE_SIZE);
694
695 i = 0;
696 for (j = 0; j < SOLVE_SIZE; j++)
697 for (k = 0; k < NB_PROFILE_BANDS; k++)
698 s->matrix_b[i++] = pow(k, j);
699
700 i = 0;
701 for (j = 0; j < NB_PROFILE_BANDS; j++)
702 for (k = 0; k < SOLVE_SIZE; k++)
703 s->matrix_c[i++] = pow(j, k);
704
705 s->window = av_calloc(s->window_length, sizeof(*s->window));
706 s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band));
707 if (!s->window || !s->bin2band)
708 return AVERROR(ENOMEM);
709
710 sdiv = s->band_multiplier;
711 for (i = 0; i < s->bin_count; i++)
712 s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2));
713
714 s->number_of_bands = s->bin2band[s->bin_count - 1] + 1;
715
716 s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha));
717 s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta));
718 if (!s->band_alpha || !s->band_beta)
719 return AVERROR(ENOMEM);
720
721 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
722 DeNoiseChannel *dnch = &s->dnch[ch];
723
724 switch (s->noise_type) {
725 case WHITE_NOISE:
726 for (i = 0; i < NB_PROFILE_BANDS; i++)
727 dnch->band_noise[i] = 0.;
728 break;
729 case VINYL_NOISE:
730 for (i = 0; i < NB_PROFILE_BANDS; i++)
731 dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0);
732 break;
733 case SHELLAC_NOISE:
734 for (i = 0; i < NB_PROFILE_BANDS; i++)
735 dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10);
736 break;
737 case CUSTOM_NOISE:
738 read_custom_noise(s, ch);
739 break;
740 default:
741 return AVERROR_BUG;
742 }
743
744 reduce_mean(dnch->band_noise);
745
746 dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt));
747 dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt));
748 dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit));
749 dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain));
750 dnch->smoothed_gain = av_calloc(s->bin_count, sizeof(*dnch->smoothed_gain));
751 dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior));
752 dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit));
753 dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data));
754 dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data));
755 dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples));
756 dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var));
757 dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var));
758 dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var));
759 dnch->fft_in = av_calloc(s->fft_length2, s->sample_size);
760 dnch->fft_out = av_calloc(s->fft_length2 + 1, s->complex_sample_size);
761 ret = av_tx_init(&dnch->fft, &dnch->tx_fn, tx_type, 0, s->fft_length2, scale, 0);
762 if (ret < 0)
763 return ret;
764 ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, tx_type, 1, s->fft_length2, scale, 0);
765 if (ret < 0)
766 return ret;
767 dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands,
768 sizeof(*dnch->spread_function));
769
770 if (!dnch->amt ||
771 !dnch->band_amt ||
772 !dnch->band_excit ||
773 !dnch->gain ||
774 !dnch->smoothed_gain ||
775 !dnch->prior ||
776 !dnch->prior_band_excit ||
777 !dnch->clean_data ||
778 !dnch->noisy_data ||
779 !dnch->out_samples ||
780 !dnch->fft_in ||
781 !dnch->fft_out ||
782 !dnch->abs_var ||
783 !dnch->rel_var ||
784 !dnch->min_abs_var ||
785 !dnch->spread_function ||
786 !dnch->fft ||
787 !dnch->ifft)
788 return AVERROR(ENOMEM);
789 }
790
791 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
792 DeNoiseChannel *dnch = &s->dnch[ch];
793 double *prior_band_excit = dnch->prior_band_excit;
794 double min, max;
795 double p1, p2;
796
797 p1 = pow(0.1, 2.5 / sdiv);
798 p2 = pow(0.1, 1.0 / sdiv);
799 j = 0;
800 for (m = 0; m < s->number_of_bands; m++) {
801 for (n = 0; n < s->number_of_bands; n++) {
802 if (n < m) {
803 dnch->spread_function[j++] = pow(p2, m - n);
804 } else if (n > m) {
805 dnch->spread_function[j++] = pow(p1, n - m);
806 } else {
807 dnch->spread_function[j++] = 1.0;
808 }
809 }
810 }
811
812 for (m = 0; m < s->number_of_bands; m++) {
813 dnch->band_excit[m] = 0.0;
814 prior_band_excit[m] = 0.0;
815 }
816
817 for (m = 0; m < s->bin_count; m++)
818 dnch->band_excit[s->bin2band[m]] += 1.0;
819
820 j = 0;
821 for (m = 0; m < s->number_of_bands; m++) {
822 for (n = 0; n < s->number_of_bands; n++)
823 prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n];
824 }
825
826 min = pow(0.1, 2.5);
827 max = pow(0.1, 1.0);
828 for (int i = 0; i < s->number_of_bands; i++) {
829 if (i < lrint(12.0 * sdiv)) {
830 dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv);
831 } else {
832 dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0));
833 }
834 dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max);
835 }
836
837 for (int i = 0; i < s->buffer_length; i++)
838 dnch->out_samples[i] = 0;
839
840 j = 0;
841 for (int i = 0; i < s->number_of_bands; i++)
842 for (int k = 0; k < s->number_of_bands; k++)
843 dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i];
844 }
845
846 j = 0;
847 sar = s->sample_advance / s->sample_rate;
848 for (int i = 0; i < s->bin_count; i++) {
849 if ((i == s->fft_length2) || (s->bin2band[i] > j)) {
850 double d6 = (i - 1) * s->sample_rate / s->fft_length;
851 double d7 = fmin(0.008 + 2.2 / d6, 0.03);
852 s->band_alpha[j] = exp(-sar / d7);
853 s->band_beta[j] = 1.0 - s->band_alpha[j];
854 j = s->bin2band[i];
855 }
856 }
857
858 s->winframe = ff_get_audio_buffer(inlink, s->window_length);
859 if (!s->winframe)
860 return AVERROR(ENOMEM);
861
862 wscale = sqrt(8.0 / (9.0 * s->fft_length));
863 sum = 0.0;
864 for (int i = 0; i < s->window_length; i++) {
865 double d10 = sin(i * M_PI / s->window_length);
866 d10 *= wscale * d10;
867 s->window[i] = d10;
868 sum += d10 * d10;
869 }
870
871 s->window_weight = 0.5 * sum;
872 s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight;
873 s->sample_floor = s->floor * exp(4.144600506562284);
874
875 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
876 DeNoiseChannel *dnch = &s->dnch[ch];
877
878 dnch->noise_reduction = s->noise_reduction;
879 dnch->noise_floor = s->noise_floor;
880 dnch->residual_floor = s->residual_floor;
881
882 set_parameters(s, dnch, 1, 1);
883 }
884
885 s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate);
886 i = 0;
887 for (int j = 1; j < NB_PROFILE_BANDS + 1; j++) {
888 s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate);
889 if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1]))
890 i++;
891 s->noise_band_edge[NB_PROFILE_BANDS + 1] = i;
892 }
893 s->noise_band_count = s->noise_band_edge[NB_PROFILE_BANDS + 1];
894
895 return 0;
896 }
897
898 static void init_sample_noise(DeNoiseChannel *dnch)
899 {
900 for (int i = 0; i < NB_PROFILE_BANDS; i++) {
901 dnch->noise_band_norm[i] = 0.0;
902 dnch->noise_band_avr[i] = 0.0;
903 dnch->noise_band_avi[i] = 0.0;
904 dnch->noise_band_var[i] = 0.0;
905 }
906 }
907
908 static void sample_noise_block(AudioFFTDeNoiseContext *s,
909 DeNoiseChannel *dnch,
910 AVFrame *in, int ch)
911 {
912 double *src_dbl = (double *)in->extended_data[ch];
913 float *src_flt = (float *)in->extended_data[ch];
914 double mag2, var = 0.0, avr = 0.0, avi = 0.0;
915 AVComplexDouble *fft_out_dbl = dnch->fft_out;
916 AVComplexFloat *fft_out_flt = dnch->fft_out;
917 double *fft_in_dbl = dnch->fft_in;
918 float *fft_in_flt = dnch->fft_in;
919 int edge, j, k, n, edgemax;
920
921 switch (s->format) {
922 case AV_SAMPLE_FMT_FLTP:
923 for (int i = 0; i < s->window_length; i++)
924 fft_in_flt[i] = s->window[i] * src_flt[i] * (1LL << 23);
925
926 for (int i = s->window_length; i < s->fft_length2; i++)
927 fft_in_flt[i] = 0.f;
928 break;
929 case AV_SAMPLE_FMT_DBLP:
930 for (int i = 0; i < s->window_length; i++)
931 fft_in_dbl[i] = s->window[i] * src_dbl[i] * (1LL << 23);
932
933 for (int i = s->window_length; i < s->fft_length2; i++)
934 fft_in_dbl[i] = 0.;
935 break;
936 }
937
938 dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size);
939
940 edge = s->noise_band_edge[0];
941 j = edge;
942 k = 0;
943 n = j;
944 edgemax = fmin(s->fft_length2, s->noise_band_edge[NB_PROFILE_BANDS]);
945 for (int i = j; i <= edgemax; i++) {
946 if ((i == j) && (i < edgemax)) {
947 if (j > edge) {
948 dnch->noise_band_norm[k - 1] += j - edge;
949 dnch->noise_band_avr[k - 1] += avr;
950 dnch->noise_band_avi[k - 1] += avi;
951 dnch->noise_band_var[k - 1] += var;
952 }
953 k++;
954 edge = j;
955 j = s->noise_band_edge[k];
956 if (k == NB_PROFILE_BANDS) {
957 j++;
958 }
959 var = 0.0;
960 avr = 0.0;
961 avi = 0.0;
962 }
963
964 switch (s->format) {
965 case AV_SAMPLE_FMT_FLTP:
966 avr += fft_out_flt[n].re;
967 avi += fft_out_flt[n].im;
968 mag2 = fft_out_flt[n].re * fft_out_flt[n].re +
969 fft_out_flt[n].im * fft_out_flt[n].im;
970 break;
971 case AV_SAMPLE_FMT_DBLP:
972 avr += fft_out_dbl[n].re;
973 avi += fft_out_dbl[n].im;
974 mag2 = fft_out_dbl[n].re * fft_out_dbl[n].re +
975 fft_out_dbl[n].im * fft_out_dbl[n].im;
976 break;
977 default:
978 av_assert2(0);
979 }
980
981 mag2 = fmax(mag2, s->sample_floor);
982
983 var += mag2;
984 n++;
985 }
986
987 dnch->noise_band_norm[k - 1] += j - edge;
988 dnch->noise_band_avr[k - 1] += avr;
989 dnch->noise_band_avi[k - 1] += avi;
990 dnch->noise_band_var[k - 1] += var;
991 }
992
993 static void finish_sample_noise(AudioFFTDeNoiseContext *s,
994 DeNoiseChannel *dnch,
995 double *sample_noise)
996 {
997 for (int i = 0; i < s->noise_band_count; i++) {
998 dnch->noise_band_avr[i] /= dnch->noise_band_norm[i];
999 dnch->noise_band_avi[i] /= dnch->noise_band_norm[i];
1000 dnch->noise_band_var[i] /= dnch->noise_band_norm[i];
1001 dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] +
1002 dnch->noise_band_avi[i] * dnch->noise_band_avi[i];
1003 dnch->noise_band_auto_var[i] = dnch->noise_band_var[i];
1004 sample_noise[i] = 10.0 * log10(dnch->noise_band_var[i] / s->floor) - 100.0;
1005 }
1006 if (s->noise_band_count < NB_PROFILE_BANDS) {
1007 for (int i = s->noise_band_count; i < NB_PROFILE_BANDS; i++)
1008 sample_noise[i] = sample_noise[i - 1];
1009 }
1010 }
1011
1012 static void set_noise_profile(AudioFFTDeNoiseContext *s,
1013 DeNoiseChannel *dnch,
1014 double *sample_noise)
1015 {
1016 double new_band_noise[NB_PROFILE_BANDS];
1017 double temp[NB_PROFILE_BANDS];
1018 double sum = 0.0;
1019
1020 for (int m = 0; m < NB_PROFILE_BANDS; m++)
1021 temp[m] = sample_noise[m];
1022
1023 for (int m = 0, i = 0; m < SOLVE_SIZE; m++) {
1024 sum = 0.0;
1025 for (int n = 0; n < NB_PROFILE_BANDS; n++)
1026 sum += s->matrix_b[i++] * temp[n];
1027 s->vector_b[m] = sum;
1028 }
1029 solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
1030 for (int m = 0, i = 0; m < NB_PROFILE_BANDS; m++) {
1031 sum = 0.0;
1032 for (int n = 0; n < SOLVE_SIZE; n++)
1033 sum += s->matrix_c[i++] * s->vector_b[n];
1034 temp[m] = sum;
1035 }
1036
1037 reduce_mean(temp);
1038
1039 av_log(s, AV_LOG_INFO, "bn=");
1040 for (int m = 0; m < NB_PROFILE_BANDS; m++) {
1041 new_band_noise[m] = temp[m];
1042 new_band_noise[m] = av_clipd(new_band_noise[m], -24.0, 24.0);
1043 av_log(s, AV_LOG_INFO, "%f ", new_band_noise[m]);
1044 }
1045 av_log(s, AV_LOG_INFO, "\n");
1046 memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise));
1047 }
1048
1049 static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
1050 {
1051 AudioFFTDeNoiseContext *s = ctx->priv;
1052 AVFrame *in = arg;
1053 const int start = (in->ch_layout.nb_channels * jobnr) / nb_jobs;
1054 const int end = (in->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;
1055 const int window_length = s->window_length;
1056 const double *window = s->window;
1057
1058 for (int ch = start; ch < end; ch++) {
1059 DeNoiseChannel *dnch = &s->dnch[ch];
1060 const double *src_dbl = (const double *)in->extended_data[ch];
1061 const float *src_flt = (const float *)in->extended_data[ch];
1062 double *dst = dnch->out_samples;
1063 double *fft_in_dbl = dnch->fft_in;
1064 float *fft_in_flt = dnch->fft_in;
1065
1066 switch (s->format) {
1067 case AV_SAMPLE_FMT_FLTP:
1068 for (int m = 0; m < window_length; m++)
1069 fft_in_flt[m] = window[m] * src_flt[m] * (1LL << 23);
1070
1071 for (int m = window_length; m < s->fft_length2; m++)
1072 fft_in_flt[m] = 0.f;
1073 break;
1074 case AV_SAMPLE_FMT_DBLP:
1075 for (int m = 0; m < window_length; m++)
1076 fft_in_dbl[m] = window[m] * src_dbl[m] * (1LL << 23);
1077
1078 for (int m = window_length; m < s->fft_length2; m++)
1079 fft_in_dbl[m] = 0.;
1080 break;
1081 }
1082
1083 dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size);
1084
1085 process_frame(ctx, s, dnch,
1086 dnch->prior,
1087 dnch->prior_band_excit,
1088 s->track_noise);
1089
1090 dnch->itx_fn(dnch->ifft, dnch->fft_in, dnch->fft_out, s->complex_sample_size);
1091
1092 switch (s->format) {
1093 case AV_SAMPLE_FMT_FLTP:
1094 for (int m = 0; m < window_length; m++)
1095 dst[m] += s->window[m] * fft_in_flt[m] / (1LL << 23);
1096 break;
1097 case AV_SAMPLE_FMT_DBLP:
1098 for (int m = 0; m < window_length; m++)
1099 dst[m] += s->window[m] * fft_in_dbl[m] / (1LL << 23);
1100 break;
1101 }
1102 }
1103
1104 return 0;
1105 }
1106
1107 static int output_frame(AVFilterLink *inlink, AVFrame *in)
1108 {
1109 AVFilterContext *ctx = inlink->dst;
1110 AVFilterLink *outlink = ctx->outputs[0];
1111 AudioFFTDeNoiseContext *s = ctx->priv;
1112 const int output_mode = ctx->is_disabled ? IN_MODE : s->output_mode;
1113 const int offset = s->window_length - s->sample_advance;
1114 AVFrame *out;
1115
1116 for (int ch = 0; ch < s->channels; ch++) {
1117 uint8_t *src = (uint8_t *)s->winframe->extended_data[ch];
1118
1119 memmove(src, src + s->sample_advance * s->sample_size,
1120 offset * s->sample_size);
1121 memcpy(src + offset * s->sample_size, in->extended_data[ch],
1122 in->nb_samples * s->sample_size);
1123 memset(src + s->sample_size * (offset + in->nb_samples), 0,
1124 (s->sample_advance - in->nb_samples) * s->sample_size);
1125 }
1126
1127 if (s->track_noise) {
1128 double average = 0.0, min = DBL_MAX, max = -DBL_MAX;
1129
1130 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1131 DeNoiseChannel *dnch = &s->dnch[ch];
1132
1133 average += dnch->noise_floor;
1134 max = fmax(max, dnch->noise_floor);
1135 min = fmin(min, dnch->noise_floor);
1136 }
1137
1138 average /= inlink->ch_layout.nb_channels;
1139
1140 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1141 DeNoiseChannel *dnch = &s->dnch[ch];
1142
1143 switch (s->noise_floor_link) {
1144 case MIN_LINK: dnch->noise_floor = min; break;
1145 case MAX_LINK: dnch->noise_floor = max; break;
1146 case AVERAGE_LINK: dnch->noise_floor = average; break;
1147 case NONE_LINK:
1148 default:
1149 break;
1150 }
1151
1152 if (dnch->noise_floor != dnch->last_noise_floor)
1153 set_parameters(s, dnch, 1, 0);
1154 }
1155 }
1156
1157 if (s->sample_noise_mode == SAMPLE_START) {
1158 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1159 DeNoiseChannel *dnch = &s->dnch[ch];
1160
1161 init_sample_noise(dnch);
1162 }
1163 s->sample_noise_mode = SAMPLE_NONE;
1164 s->sample_noise = 1;
1165 s->sample_noise_blocks = 0;
1166 }
1167
1168 if (s->sample_noise) {
1169 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1170 DeNoiseChannel *dnch = &s->dnch[ch];
1171
1172 sample_noise_block(s, dnch, s->winframe, ch);
1173 }
1174 s->sample_noise_blocks++;
1175 }
1176
1177 if (s->sample_noise_mode == SAMPLE_STOP) {
1178 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1179 DeNoiseChannel *dnch = &s->dnch[ch];
1180 double sample_noise[NB_PROFILE_BANDS];
1181
1182 if (s->sample_noise_blocks <= 0)
1183 break;
1184 finish_sample_noise(s, dnch, sample_noise);
1185 set_noise_profile(s, dnch, sample_noise);
1186 set_parameters(s, dnch, 1, 1);
1187 }
1188 s->sample_noise = 0;
1189 s->sample_noise_blocks = 0;
1190 s->sample_noise_mode = SAMPLE_NONE;
1191 }
1192
1193 ff_filter_execute(ctx, filter_channel, s->winframe, NULL,
1194 FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));
1195
1196 if (av_frame_is_writable(in)) {
1197 out = in;
1198 } else {
1199 out = ff_get_audio_buffer(outlink, in->nb_samples);
1200 if (!out) {
1201 av_frame_free(&in);
1202 return AVERROR(ENOMEM);
1203 }
1204
1205 av_frame_copy_props(out, in);
1206 }
1207
1208 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1209 DeNoiseChannel *dnch = &s->dnch[ch];
1210 double *src = dnch->out_samples;
1211 const double *orig_dbl = (const double *)s->winframe->extended_data[ch];
1212 const float *orig_flt = (const float *)s->winframe->extended_data[ch];
1213 double *dst_dbl = (double *)out->extended_data[ch];
1214 float *dst_flt = (float *)out->extended_data[ch];
1215
1216 switch (output_mode) {
1217 case IN_MODE:
1218 switch (s->format) {
1219 case AV_SAMPLE_FMT_FLTP:
1220 for (int m = 0; m < out->nb_samples; m++)
1221 dst_flt[m] = orig_flt[m];
1222 break;
1223 case AV_SAMPLE_FMT_DBLP:
1224 for (int m = 0; m < out->nb_samples; m++)
1225 dst_dbl[m] = orig_dbl[m];
1226 break;
1227 }
1228 break;
1229 case OUT_MODE:
1230 switch (s->format) {
1231 case AV_SAMPLE_FMT_FLTP:
1232 for (int m = 0; m < out->nb_samples; m++)
1233 dst_flt[m] = src[m];
1234 break;
1235 case AV_SAMPLE_FMT_DBLP:
1236 for (int m = 0; m < out->nb_samples; m++)
1237 dst_dbl[m] = src[m];
1238 break;
1239 }
1240 break;
1241 case NOISE_MODE:
1242 switch (s->format) {
1243 case AV_SAMPLE_FMT_FLTP:
1244 for (int m = 0; m < out->nb_samples; m++)
1245 dst_flt[m] = orig_flt[m] - src[m];
1246 break;
1247 case AV_SAMPLE_FMT_DBLP:
1248 for (int m = 0; m < out->nb_samples; m++)
1249 dst_dbl[m] = orig_dbl[m] - src[m];
1250 break;
1251 }
1252 break;
1253 default:
1254 if (in != out)
1255 av_frame_free(&in);
1256 av_frame_free(&out);
1257 return AVERROR_BUG;
1258 }
1259
1260 memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
1261 memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
1262 }
1263
1264 if (out != in)
1265 av_frame_free(&in);
1266 return ff_filter_frame(outlink, out);
1267 }
1268
1269 static int activate(AVFilterContext *ctx)
1270 {
1271 AVFilterLink *inlink = ctx->inputs[0];
1272 AVFilterLink *outlink = ctx->outputs[0];
1273 AudioFFTDeNoiseContext *s = ctx->priv;
1274 AVFrame *in = NULL;
1275 int ret;
1276
1277 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
1278
1279 ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in);
1280 if (ret < 0)
1281 return ret;
1282 if (ret > 0)
1283 return output_frame(inlink, in);
1284
1285 if (ff_inlink_queued_samples(inlink) >= s->sample_advance) {
1286 ff_filter_set_ready(ctx, 10);
1287 return 0;
1288 }
1289
1290 FF_FILTER_FORWARD_STATUS(inlink, outlink);
1291 FF_FILTER_FORWARD_WANTED(outlink, inlink);
1292
1293 return FFERROR_NOT_READY;
1294 }
1295
1296 static av_cold void uninit(AVFilterContext *ctx)
1297 {
1298 AudioFFTDeNoiseContext *s = ctx->priv;
1299
1300 av_freep(&s->window);
1301 av_freep(&s->bin2band);
1302 av_freep(&s->band_alpha);
1303 av_freep(&s->band_beta);
1304 av_frame_free(&s->winframe);
1305
1306 if (s->dnch) {
1307 for (int ch = 0; ch < s->channels; ch++) {
1308 DeNoiseChannel *dnch = &s->dnch[ch];
1309 av_freep(&dnch->amt);
1310 av_freep(&dnch->band_amt);
1311 av_freep(&dnch->band_excit);
1312 av_freep(&dnch->gain);
1313 av_freep(&dnch->smoothed_gain);
1314 av_freep(&dnch->prior);
1315 av_freep(&dnch->prior_band_excit);
1316 av_freep(&dnch->clean_data);
1317 av_freep(&dnch->noisy_data);
1318 av_freep(&dnch->out_samples);
1319 av_freep(&dnch->spread_function);
1320 av_freep(&dnch->abs_var);
1321 av_freep(&dnch->rel_var);
1322 av_freep(&dnch->min_abs_var);
1323 av_freep(&dnch->fft_in);
1324 av_freep(&dnch->fft_out);
1325 av_tx_uninit(&dnch->fft);
1326 av_tx_uninit(&dnch->ifft);
1327 }
1328 av_freep(&s->dnch);
1329 }
1330 }
1331
1332 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
1333 char *res, int res_len, int flags)
1334 {
1335 AudioFFTDeNoiseContext *s = ctx->priv;
1336 int ret = 0;
1337
1338 ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
1339 if (ret < 0)
1340 return ret;
1341
1342 if (!strcmp(cmd, "sample_noise") || !strcmp(cmd, "sn"))
1343 return 0;
1344
1345 for (int ch = 0; ch < s->channels; ch++) {
1346 DeNoiseChannel *dnch = &s->dnch[ch];
1347
1348 dnch->noise_reduction = s->noise_reduction;
1349 dnch->noise_floor = s->noise_floor;
1350 dnch->residual_floor = s->residual_floor;
1351
1352 set_parameters(s, dnch, 1, 1);
1353 }
1354
1355 return 0;
1356 }
1357
1358 static const AVFilterPad inputs[] = {
1359 {
1360 .name = "default",
1361 .type = AVMEDIA_TYPE_AUDIO,
1362 .config_props = config_input,
1363 },
1364 };
1365
1366 const AVFilter ff_af_afftdn = {
1367 .name = "afftdn",
1368 .description = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."),
1369 .priv_size = sizeof(AudioFFTDeNoiseContext),
1370 .priv_class = &afftdn_class,
1371 .activate = activate,
1372 .uninit = uninit,
1373 FILTER_INPUTS(inputs),
1374 FILTER_OUTPUTS(ff_audio_default_filterpad),
1375 FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP),
1376 .process_command = process_command,
1377 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
1378 AVFILTER_FLAG_SLICE_THREADS,
1379 };
1380