FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_afftdn.c
Date: 2025-08-19 23:55:23
Exec Total Coverage
Lines: 0 776 0.0%
Functions: 0 25 0.0%
Branches: 0 417 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2018 The FFmpeg Project
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <float.h>
22
23 #include "libavutil/avassert.h"
24 #include "libavutil/avstring.h"
25 #include "libavutil/channel_layout.h"
26 #include "libavutil/mem.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/tx.h"
29 #include "avfilter.h"
30 #include "audio.h"
31 #include "filters.h"
32
33 #define C (M_LN10 * 0.1)
34 #define SOLVE_SIZE (5)
35 #define NB_PROFILE_BANDS (15)
36
37 enum SampleNoiseModes {
38 SAMPLE_NONE,
39 SAMPLE_START,
40 SAMPLE_STOP,
41 NB_SAMPLEMODES
42 };
43
44 enum OutModes {
45 IN_MODE,
46 OUT_MODE,
47 NOISE_MODE,
48 NB_MODES
49 };
50
51 enum NoiseLinkType {
52 NONE_LINK,
53 MIN_LINK,
54 MAX_LINK,
55 AVERAGE_LINK,
56 NB_LINK
57 };
58
59 enum NoiseType {
60 WHITE_NOISE,
61 VINYL_NOISE,
62 SHELLAC_NOISE,
63 CUSTOM_NOISE,
64 NB_NOISE
65 };
66
67 typedef struct DeNoiseChannel {
68 double band_noise[NB_PROFILE_BANDS];
69 double noise_band_auto_var[NB_PROFILE_BANDS];
70 double noise_band_sample[NB_PROFILE_BANDS];
71 double *amt;
72 double *band_amt;
73 double *band_excit;
74 double *gain;
75 double *smoothed_gain;
76 double *prior;
77 double *prior_band_excit;
78 double *clean_data;
79 double *noisy_data;
80 double *out_samples;
81 double *spread_function;
82 double *abs_var;
83 double *rel_var;
84 double *min_abs_var;
85 void *fft_in;
86 void *fft_out;
87 AVTXContext *fft, *ifft;
88 av_tx_fn tx_fn, itx_fn;
89
90 double noise_band_norm[NB_PROFILE_BANDS];
91 double noise_band_avr[NB_PROFILE_BANDS];
92 double noise_band_avi[NB_PROFILE_BANDS];
93 double noise_band_var[NB_PROFILE_BANDS];
94
95 double noise_reduction;
96 double last_noise_reduction;
97 double noise_floor;
98 double last_noise_floor;
99 double residual_floor;
100 double last_residual_floor;
101 double max_gain;
102 double max_var;
103 double gain_scale;
104 } DeNoiseChannel;
105
106 typedef struct AudioFFTDeNoiseContext {
107 const AVClass *class;
108
109 int format;
110 size_t sample_size;
111 size_t complex_sample_size;
112
113 float noise_reduction;
114 float noise_floor;
115 int noise_type;
116 char *band_noise_str;
117 float residual_floor;
118 int track_noise;
119 int track_residual;
120 int output_mode;
121 int noise_floor_link;
122 float ratio;
123 int gain_smooth;
124 float band_multiplier;
125 float floor_offset;
126
127 int channels;
128 int sample_noise;
129 int sample_noise_blocks;
130 int sample_noise_mode;
131 float sample_rate;
132 int buffer_length;
133 int fft_length;
134 int fft_length2;
135 int bin_count;
136 int window_length;
137 int sample_advance;
138 int number_of_bands;
139
140 int band_centre[NB_PROFILE_BANDS];
141
142 int *bin2band;
143 double *window;
144 double *band_alpha;
145 double *band_beta;
146
147 DeNoiseChannel *dnch;
148
149 AVFrame *winframe;
150
151 double window_weight;
152 double floor;
153 double sample_floor;
154
155 int noise_band_edge[NB_PROFILE_BANDS + 2];
156 int noise_band_count;
157 double matrix_a[SOLVE_SIZE * SOLVE_SIZE];
158 double vector_b[SOLVE_SIZE];
159 double matrix_b[SOLVE_SIZE * NB_PROFILE_BANDS];
160 double matrix_c[SOLVE_SIZE * NB_PROFILE_BANDS];
161 } AudioFFTDeNoiseContext;
162
163 #define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x)
164 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
165 #define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
166
167 static const AVOption afftdn_options[] = {
168 { "noise_reduction", "set the noise reduction",OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT,{.dbl = 12}, .01, 97, AFR },
169 { "nr", "set the noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT, {.dbl = 12}, .01, 97, AFR },
170 { "noise_floor", "set the noise floor",OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR },
171 { "nf", "set the noise floor", OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR },
172 { "noise_type", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" },
173 { "nt", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" },
174 { "white", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" },
175 { "w", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" },
176 { "vinyl", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" },
177 { "v", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" },
178 { "shellac", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" },
179 { "s", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" },
180 { "custom", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" },
181 { "c", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" },
182 { "band_noise", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF },
183 { "bn", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF },
184 { "residual_floor", "set the residual floor",OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR },
185 { "rf", "set the residual floor", OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR },
186 { "track_noise", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
187 { "tn", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
188 { "track_residual", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
189 { "tr", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
190 { "output_mode", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" },
191 { "om", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" },
192 { "input", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" },
193 { "i", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" },
194 { "output", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" },
195 { "o", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" },
196 { "noise", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" },
197 { "n", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" },
198 { "adaptivity", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR },
199 { "ad", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR },
200 { "floor_offset", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR },
201 { "fo", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR },
202 { "noise_link", "set the noise floor link",OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" },
203 { "nl", "set the noise floor link", OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" },
204 { "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = NONE_LINK}, 0, 0, AFR, .unit = "link" },
205 { "min", "min", 0, AV_OPT_TYPE_CONST, {.i64 = MIN_LINK}, 0, 0, AFR, .unit = "link" },
206 { "max", "max", 0, AV_OPT_TYPE_CONST, {.i64 = MAX_LINK}, 0, 0, AFR, .unit = "link" },
207 { "average", "average", 0, AV_OPT_TYPE_CONST, {.i64 = AVERAGE_LINK}, 0, 0, AFR, .unit = "link" },
208 { "band_multiplier", "set band multiplier",OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF },
209 { "bm", "set band multiplier", OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF },
210 { "sample_noise", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" },
211 { "sn", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" },
212 { "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_NONE}, 0, 0, AFR, .unit = "sample" },
213 { "start", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" },
214 { "begin", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" },
215 { "stop", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" },
216 { "end", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" },
217 { "gain_smooth", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR },
218 { "gs", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR },
219 { NULL }
220 };
221
222 AVFILTER_DEFINE_CLASS(afftdn);
223
224 static double get_band_noise(AudioFFTDeNoiseContext *s,
225 int band, double a,
226 double b, double c)
227 {
228 double d1, d2, d3;
229
230 d1 = a / s->band_centre[band];
231 d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10;
232 d2 = b / s->band_centre[band];
233 d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10;
234 d3 = s->band_centre[band] / c;
235 d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10;
236
237 return -d1 + d2 - d3;
238 }
239
240 static void factor(double *array, int size)
241 {
242 for (int i = 0; i < size - 1; i++) {
243 for (int j = i + 1; j < size; j++) {
244 double d = array[j + i * size] / array[i + i * size];
245
246 array[j + i * size] = d;
247 for (int k = i + 1; k < size; k++) {
248 array[j + k * size] -= d * array[i + k * size];
249 }
250 }
251 }
252 }
253
254 static void solve(double *matrix, double *vector, int size)
255 {
256 for (int i = 0; i < size - 1; i++) {
257 for (int j = i + 1; j < size; j++) {
258 double d = matrix[j + i * size];
259 vector[j] -= d * vector[i];
260 }
261 }
262
263 vector[size - 1] /= matrix[size * size - 1];
264
265 for (int i = size - 2; i >= 0; i--) {
266 double d = vector[i];
267 for (int j = i + 1; j < size; j++)
268 d -= matrix[i + j * size] * vector[j];
269 vector[i] = d / matrix[i + i * size];
270 }
271 }
272
273 static double process_get_band_noise(AudioFFTDeNoiseContext *s,
274 DeNoiseChannel *dnch,
275 int band)
276 {
277 double product, sum, f;
278 int i = 0;
279
280 if (band < NB_PROFILE_BANDS)
281 return dnch->band_noise[band];
282
283 for (int j = 0; j < SOLVE_SIZE; j++) {
284 sum = 0.0;
285 for (int k = 0; k < NB_PROFILE_BANDS; k++)
286 sum += s->matrix_b[i++] * dnch->band_noise[k];
287 s->vector_b[j] = sum;
288 }
289
290 solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
291 f = (0.5 * s->sample_rate) / s->band_centre[NB_PROFILE_BANDS-1];
292 f = 15.0 + log(f / 1.5) / log(1.5);
293 sum = 0.0;
294 product = 1.0;
295 for (int j = 0; j < SOLVE_SIZE; j++) {
296 sum += product * s->vector_b[j];
297 product *= f;
298 }
299
300 return sum;
301 }
302
303 static double limit_gain(double a, double b)
304 {
305 if (a > 1.0)
306 return (b * a - 1.0) / (b + a - 2.0);
307 if (a < 1.0)
308 return (b * a - 2.0 * a + 1.0) / (b - a);
309 return 1.0;
310 }
311
312 static void spectral_flatness(AudioFFTDeNoiseContext *s, const double *const spectral,
313 double floor, int len, double *rnum, double *rden)
314 {
315 double num = 0., den = 0.;
316 int size = 0;
317
318 for (int n = 0; n < len; n++) {
319 const double v = spectral[n];
320 if (v > floor) {
321 num += log(v);
322 den += v;
323 size++;
324 }
325 }
326
327 size = FFMAX(size, 1);
328
329 num /= size;
330 den /= size;
331
332 num = exp(num);
333
334 *rnum = num;
335 *rden = den;
336 }
337
338 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var);
339
340 static double floor_offset(const double *S, int size, double mean)
341 {
342 double offset = 0.0;
343
344 for (int n = 0; n < size; n++) {
345 const double p = S[n] - mean;
346
347 offset = fmax(offset, fabs(p));
348 }
349
350 return offset / mean;
351 }
352
353 static void process_frame(AVFilterContext *ctx,
354 AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch,
355 double *prior, double *prior_band_excit, int track_noise)
356 {
357 AVFilterLink *outlink = ctx->outputs[0];
358 FilterLink *outl = ff_filter_link(outlink);
359 const double *abs_var = dnch->abs_var;
360 const double ratio = outl->frame_count_out ? s->ratio : 1.0;
361 const double rratio = 1. - ratio;
362 const int *bin2band = s->bin2band;
363 double *noisy_data = dnch->noisy_data;
364 double *band_excit = dnch->band_excit;
365 double *band_amt = dnch->band_amt;
366 double *smoothed_gain = dnch->smoothed_gain;
367 AVComplexDouble *fft_data_dbl = dnch->fft_out;
368 AVComplexFloat *fft_data_flt = dnch->fft_out;
369 double *gain = dnch->gain;
370
371 for (int i = 0; i < s->bin_count; i++) {
372 double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var;
373
374 switch (s->format) {
375 case AV_SAMPLE_FMT_FLTP:
376 noisy_data[i] = mag = hypot(fft_data_flt[i].re, fft_data_flt[i].im);
377 break;
378 case AV_SAMPLE_FMT_DBLP:
379 noisy_data[i] = mag = hypot(fft_data_dbl[i].re, fft_data_dbl[i].im);
380 break;
381 default:
382 av_assert0(0);
383 }
384
385 power = mag * mag;
386 mag_abs_var = power / abs_var[i];
387 new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0);
388 new_gain = new_mag_abs_var / (1.0 + new_mag_abs_var);
389 sqr_new_gain = new_gain * new_gain;
390 prior[i] = mag_abs_var * sqr_new_gain;
391 dnch->clean_data[i] = power * sqr_new_gain;
392 gain[i] = new_gain;
393 }
394
395 if (track_noise) {
396 double flatness, num, den;
397
398 spectral_flatness(s, noisy_data, s->floor, s->bin_count, &num, &den);
399
400 flatness = num / den;
401 if (flatness > 0.8) {
402 const double offset = s->floor_offset * floor_offset(noisy_data, s->bin_count, den);
403 const double new_floor = av_clipd(10.0 * log10(den) - 100.0 + offset, -90., -20.);
404
405 dnch->noise_floor = 0.1 * new_floor + dnch->noise_floor * 0.9;
406 set_parameters(s, dnch, 1, 1);
407 }
408 }
409
410 for (int i = 0; i < s->number_of_bands; i++) {
411 band_excit[i] = 0.0;
412 band_amt[i] = 0.0;
413 }
414
415 for (int i = 0; i < s->bin_count; i++)
416 band_excit[bin2band[i]] += dnch->clean_data[i];
417
418 for (int i = 0; i < s->number_of_bands; i++) {
419 band_excit[i] = fmax(band_excit[i],
420 s->band_alpha[i] * band_excit[i] +
421 s->band_beta[i] * prior_band_excit[i]);
422 prior_band_excit[i] = band_excit[i];
423 }
424
425 for (int j = 0, i = 0; j < s->number_of_bands; j++) {
426 for (int k = 0; k < s->number_of_bands; k++) {
427 band_amt[j] += dnch->spread_function[i++] * band_excit[k];
428 }
429 }
430
431 for (int i = 0; i < s->bin_count; i++)
432 dnch->amt[i] = band_amt[bin2band[i]];
433
434 for (int i = 0; i < s->bin_count; i++) {
435 if (dnch->amt[i] > abs_var[i]) {
436 gain[i] = 1.0;
437 } else if (dnch->amt[i] > dnch->min_abs_var[i]) {
438 const double limit = sqrt(abs_var[i] / dnch->amt[i]);
439
440 gain[i] = limit_gain(gain[i], limit);
441 } else {
442 gain[i] = limit_gain(gain[i], dnch->max_gain);
443 }
444 }
445
446 memcpy(smoothed_gain, gain, s->bin_count * sizeof(*smoothed_gain));
447 if (s->gain_smooth > 0) {
448 const int r = s->gain_smooth;
449
450 for (int i = r; i < s->bin_count - r; i++) {
451 const double gc = gain[i];
452 double num = 0., den = 0.;
453
454 for (int j = -r; j <= r; j++) {
455 const double g = gain[i + j];
456 const double d = 1. - fabs(g - gc);
457
458 num += g * d;
459 den += d;
460 }
461
462 smoothed_gain[i] = num / den;
463 }
464 }
465
466 switch (s->format) {
467 case AV_SAMPLE_FMT_FLTP:
468 for (int i = 0; i < s->bin_count; i++) {
469 const float new_gain = smoothed_gain[i];
470
471 fft_data_flt[i].re *= new_gain;
472 fft_data_flt[i].im *= new_gain;
473 }
474 break;
475 case AV_SAMPLE_FMT_DBLP:
476 for (int i = 0; i < s->bin_count; i++) {
477 const double new_gain = smoothed_gain[i];
478
479 fft_data_dbl[i].re *= new_gain;
480 fft_data_dbl[i].im *= new_gain;
481 }
482 break;
483 }
484 }
485
486 static double freq2bark(double x)
487 {
488 double d = x / 7500.0;
489
490 return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d);
491 }
492
493 static int get_band_centre(AudioFFTDeNoiseContext *s, int band)
494 {
495 if (band == -1)
496 return lrint(s->band_centre[0] / 1.5);
497
498 return s->band_centre[band];
499 }
500
501 static int get_band_edge(AudioFFTDeNoiseContext *s, int band)
502 {
503 int i;
504
505 if (band == NB_PROFILE_BANDS) {
506 i = lrint(s->band_centre[NB_PROFILE_BANDS - 1] * 1.224745);
507 } else {
508 i = lrint(s->band_centre[band] / 1.224745);
509 }
510
511 return FFMIN(i, s->sample_rate / 2);
512 }
513
514 static void set_band_parameters(AudioFFTDeNoiseContext *s,
515 DeNoiseChannel *dnch)
516 {
517 double band_noise, d2, d3, d4, d5;
518 int i = 0, j = 0, k = 0;
519
520 d5 = 0.0;
521 band_noise = process_get_band_noise(s, dnch, 0);
522 for (int m = j; m < s->bin_count; m++) {
523 if (m == j) {
524 i = j;
525 d5 = band_noise;
526 if (k >= NB_PROFILE_BANDS) {
527 j = s->bin_count;
528 } else {
529 j = s->fft_length * get_band_centre(s, k) / s->sample_rate;
530 }
531 d2 = j - i;
532 band_noise = process_get_band_noise(s, dnch, k);
533 k++;
534 }
535 d3 = (j - m) / d2;
536 d4 = (m - i) / d2;
537 dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C);
538 }
539
540 for (i = 0; i < NB_PROFILE_BANDS; i++)
541 dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
542 }
543
544 static void read_custom_noise(AVFilterContext *ctx, int ch)
545 {
546 AudioFFTDeNoiseContext *s = ctx->priv;
547 DeNoiseChannel *dnch = &s->dnch[ch];
548 char *custom_noise_str, *p, *arg, *saveptr = NULL;
549 double band_noise[NB_PROFILE_BANDS] = { 0.f };
550 int ret;
551
552 if (!s->band_noise_str)
553 return;
554
555 custom_noise_str = p = av_strdup(s->band_noise_str);
556 if (!p)
557 return;
558
559 for (int i = 0; i < NB_PROFILE_BANDS; i++) {
560 float noise;
561
562 if (!(arg = av_strtok(p, "| ", &saveptr)))
563 break;
564
565 p = NULL;
566
567 ret = av_sscanf(arg, "%f", &noise);
568 if (ret != 1) {
569 av_log(ctx, AV_LOG_ERROR, "Custom band noise must be float.\n");
570 break;
571 }
572
573 band_noise[i] = av_clipd(noise, -24., 24.);
574 }
575
576 av_free(custom_noise_str);
577 memcpy(dnch->band_noise, band_noise, sizeof(band_noise));
578 }
579
580 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var)
581 {
582 if (dnch->last_noise_floor != dnch->noise_floor)
583 dnch->last_noise_floor = dnch->noise_floor;
584
585 if (s->track_residual)
586 dnch->last_noise_floor = fmax(dnch->last_noise_floor, dnch->residual_floor);
587
588 dnch->max_var = s->floor * exp((100.0 + dnch->last_noise_floor) * C);
589 if (update_auto_var) {
590 for (int i = 0; i < NB_PROFILE_BANDS; i++)
591 dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
592 }
593
594 if (s->track_residual) {
595 if (update_var || dnch->last_residual_floor != dnch->residual_floor) {
596 update_var = 1;
597 dnch->last_residual_floor = dnch->residual_floor;
598 dnch->last_noise_reduction = fmax(dnch->last_noise_floor - dnch->last_residual_floor + 100., 0);
599 dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
600 }
601 } else if (update_var || dnch->noise_reduction != dnch->last_noise_reduction) {
602 update_var = 1;
603 dnch->last_noise_reduction = dnch->noise_reduction;
604 dnch->last_residual_floor = av_clipd(dnch->last_noise_floor - dnch->last_noise_reduction, -80, -20);
605 dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
606 }
607
608 dnch->gain_scale = 1.0 / (dnch->max_gain * dnch->max_gain);
609
610 if (update_var) {
611 set_band_parameters(s, dnch);
612
613 for (int i = 0; i < s->bin_count; i++) {
614 dnch->abs_var[i] = fmax(dnch->max_var * dnch->rel_var[i], 1.0);
615 dnch->min_abs_var[i] = dnch->gain_scale * dnch->abs_var[i];
616 }
617 }
618 }
619
620 static void reduce_mean(double *band_noise)
621 {
622 double mean = 0.f;
623
624 for (int i = 0; i < NB_PROFILE_BANDS; i++)
625 mean += band_noise[i];
626 mean /= NB_PROFILE_BANDS;
627
628 for (int i = 0; i < NB_PROFILE_BANDS; i++)
629 band_noise[i] -= mean;
630 }
631
632 static int config_input(AVFilterLink *inlink)
633 {
634 AVFilterContext *ctx = inlink->dst;
635 AudioFFTDeNoiseContext *s = ctx->priv;
636 double wscale, sar, sum, sdiv;
637 int i, j, k, m, n, ret, tx_type;
638 double dscale = 1.;
639 float fscale = 1.f;
640 void *scale;
641
642 s->format = inlink->format;
643
644 switch (s->format) {
645 case AV_SAMPLE_FMT_FLTP:
646 s->sample_size = sizeof(float);
647 s->complex_sample_size = sizeof(AVComplexFloat);
648 tx_type = AV_TX_FLOAT_RDFT;
649 scale = &fscale;
650 break;
651 case AV_SAMPLE_FMT_DBLP:
652 s->sample_size = sizeof(double);
653 s->complex_sample_size = sizeof(AVComplexDouble);
654 tx_type = AV_TX_DOUBLE_RDFT;
655 scale = &dscale;
656 break;
657 }
658
659 s->dnch = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dnch));
660 if (!s->dnch)
661 return AVERROR(ENOMEM);
662
663 s->channels = inlink->ch_layout.nb_channels;
664 s->sample_rate = inlink->sample_rate;
665 s->sample_advance = s->sample_rate / 80;
666 s->window_length = 3 * s->sample_advance;
667 s->fft_length2 = 1 << (32 - ff_clz(s->window_length));
668 s->fft_length = s->fft_length2;
669 s->buffer_length = s->fft_length * 2;
670 s->bin_count = s->fft_length2 / 2 + 1;
671
672 s->band_centre[0] = 80;
673 for (i = 1; i < NB_PROFILE_BANDS; i++) {
674 s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0);
675 if (s->band_centre[i] < 1000) {
676 s->band_centre[i] = 10 * (s->band_centre[i] / 10);
677 } else if (s->band_centre[i] < 5000) {
678 s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50);
679 } else if (s->band_centre[i] < 15000) {
680 s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100);
681 } else {
682 s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000);
683 }
684 }
685
686 for (j = 0; j < SOLVE_SIZE; j++) {
687 for (k = 0; k < SOLVE_SIZE; k++) {
688 s->matrix_a[j + k * SOLVE_SIZE] = 0.0;
689 for (m = 0; m < NB_PROFILE_BANDS; m++)
690 s->matrix_a[j + k * SOLVE_SIZE] += pow(m, j + k);
691 }
692 }
693
694 factor(s->matrix_a, SOLVE_SIZE);
695
696 i = 0;
697 for (j = 0; j < SOLVE_SIZE; j++)
698 for (k = 0; k < NB_PROFILE_BANDS; k++)
699 s->matrix_b[i++] = pow(k, j);
700
701 i = 0;
702 for (j = 0; j < NB_PROFILE_BANDS; j++)
703 for (k = 0; k < SOLVE_SIZE; k++)
704 s->matrix_c[i++] = pow(j, k);
705
706 s->window = av_calloc(s->window_length, sizeof(*s->window));
707 s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band));
708 if (!s->window || !s->bin2band)
709 return AVERROR(ENOMEM);
710
711 sdiv = s->band_multiplier;
712 for (i = 0; i < s->bin_count; i++)
713 s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2));
714
715 s->number_of_bands = s->bin2band[s->bin_count - 1] + 1;
716
717 s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha));
718 s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta));
719 if (!s->band_alpha || !s->band_beta)
720 return AVERROR(ENOMEM);
721
722 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
723 DeNoiseChannel *dnch = &s->dnch[ch];
724
725 switch (s->noise_type) {
726 case WHITE_NOISE:
727 for (i = 0; i < NB_PROFILE_BANDS; i++)
728 dnch->band_noise[i] = 0.;
729 break;
730 case VINYL_NOISE:
731 for (i = 0; i < NB_PROFILE_BANDS; i++)
732 dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0);
733 break;
734 case SHELLAC_NOISE:
735 for (i = 0; i < NB_PROFILE_BANDS; i++)
736 dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10);
737 break;
738 case CUSTOM_NOISE:
739 read_custom_noise(ctx, ch);
740 break;
741 default:
742 return AVERROR_BUG;
743 }
744
745 reduce_mean(dnch->band_noise);
746
747 dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt));
748 dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt));
749 dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit));
750 dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain));
751 dnch->smoothed_gain = av_calloc(s->bin_count, sizeof(*dnch->smoothed_gain));
752 dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior));
753 dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit));
754 dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data));
755 dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data));
756 dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples));
757 dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var));
758 dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var));
759 dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var));
760 dnch->fft_in = av_calloc(s->fft_length2, s->sample_size);
761 dnch->fft_out = av_calloc(s->fft_length2 + 1, s->complex_sample_size);
762 ret = av_tx_init(&dnch->fft, &dnch->tx_fn, tx_type, 0, s->fft_length2, scale, 0);
763 if (ret < 0)
764 return ret;
765 ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, tx_type, 1, s->fft_length2, scale, 0);
766 if (ret < 0)
767 return ret;
768 dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands,
769 sizeof(*dnch->spread_function));
770
771 if (!dnch->amt ||
772 !dnch->band_amt ||
773 !dnch->band_excit ||
774 !dnch->gain ||
775 !dnch->smoothed_gain ||
776 !dnch->prior ||
777 !dnch->prior_band_excit ||
778 !dnch->clean_data ||
779 !dnch->noisy_data ||
780 !dnch->out_samples ||
781 !dnch->fft_in ||
782 !dnch->fft_out ||
783 !dnch->abs_var ||
784 !dnch->rel_var ||
785 !dnch->min_abs_var ||
786 !dnch->spread_function ||
787 !dnch->fft ||
788 !dnch->ifft)
789 return AVERROR(ENOMEM);
790 }
791
792 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
793 DeNoiseChannel *dnch = &s->dnch[ch];
794 double *prior_band_excit = dnch->prior_band_excit;
795 double min, max;
796 double p1, p2;
797
798 p1 = pow(0.1, 2.5 / sdiv);
799 p2 = pow(0.1, 1.0 / sdiv);
800 j = 0;
801 for (m = 0; m < s->number_of_bands; m++) {
802 for (n = 0; n < s->number_of_bands; n++) {
803 if (n < m) {
804 dnch->spread_function[j++] = pow(p2, m - n);
805 } else if (n > m) {
806 dnch->spread_function[j++] = pow(p1, n - m);
807 } else {
808 dnch->spread_function[j++] = 1.0;
809 }
810 }
811 }
812
813 for (m = 0; m < s->number_of_bands; m++) {
814 dnch->band_excit[m] = 0.0;
815 prior_band_excit[m] = 0.0;
816 }
817
818 for (m = 0; m < s->bin_count; m++)
819 dnch->band_excit[s->bin2band[m]] += 1.0;
820
821 j = 0;
822 for (m = 0; m < s->number_of_bands; m++) {
823 for (n = 0; n < s->number_of_bands; n++)
824 prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n];
825 }
826
827 min = pow(0.1, 2.5);
828 max = pow(0.1, 1.0);
829 for (int i = 0; i < s->number_of_bands; i++) {
830 if (i < lrint(12.0 * sdiv)) {
831 dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv);
832 } else {
833 dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0));
834 }
835 dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max);
836 }
837
838 for (int i = 0; i < s->buffer_length; i++)
839 dnch->out_samples[i] = 0;
840
841 j = 0;
842 for (int i = 0; i < s->number_of_bands; i++)
843 for (int k = 0; k < s->number_of_bands; k++)
844 dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i];
845 }
846
847 j = 0;
848 sar = s->sample_advance / s->sample_rate;
849 for (int i = 0; i < s->bin_count; i++) {
850 if ((i == s->fft_length2) || (s->bin2band[i] > j)) {
851 double d6 = (i - 1) * s->sample_rate / s->fft_length;
852 double d7 = fmin(0.008 + 2.2 / d6, 0.03);
853 s->band_alpha[j] = exp(-sar / d7);
854 s->band_beta[j] = 1.0 - s->band_alpha[j];
855 j = s->bin2band[i];
856 }
857 }
858
859 s->winframe = ff_get_audio_buffer(inlink, s->window_length);
860 if (!s->winframe)
861 return AVERROR(ENOMEM);
862
863 wscale = sqrt(8.0 / (9.0 * s->fft_length));
864 sum = 0.0;
865 for (int i = 0; i < s->window_length; i++) {
866 double d10 = sin(i * M_PI / s->window_length);
867 d10 *= wscale * d10;
868 s->window[i] = d10;
869 sum += d10 * d10;
870 }
871
872 s->window_weight = 0.5 * sum;
873 s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight;
874 s->sample_floor = s->floor * exp(4.144600506562284);
875
876 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
877 DeNoiseChannel *dnch = &s->dnch[ch];
878
879 dnch->noise_reduction = s->noise_reduction;
880 dnch->noise_floor = s->noise_floor;
881 dnch->residual_floor = s->residual_floor;
882
883 set_parameters(s, dnch, 1, 1);
884 }
885
886 s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate);
887 i = 0;
888 for (int j = 1; j < NB_PROFILE_BANDS + 1; j++) {
889 s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate);
890 if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1]))
891 i++;
892 s->noise_band_edge[NB_PROFILE_BANDS + 1] = i;
893 }
894 s->noise_band_count = s->noise_band_edge[NB_PROFILE_BANDS + 1];
895
896 return 0;
897 }
898
899 static void init_sample_noise(DeNoiseChannel *dnch)
900 {
901 for (int i = 0; i < NB_PROFILE_BANDS; i++) {
902 dnch->noise_band_norm[i] = 0.0;
903 dnch->noise_band_avr[i] = 0.0;
904 dnch->noise_band_avi[i] = 0.0;
905 dnch->noise_band_var[i] = 0.0;
906 }
907 }
908
909 static void sample_noise_block(AudioFFTDeNoiseContext *s,
910 DeNoiseChannel *dnch,
911 AVFrame *in, int ch)
912 {
913 double *src_dbl = (double *)in->extended_data[ch];
914 float *src_flt = (float *)in->extended_data[ch];
915 double mag2, var = 0.0, avr = 0.0, avi = 0.0;
916 AVComplexDouble *fft_out_dbl = dnch->fft_out;
917 AVComplexFloat *fft_out_flt = dnch->fft_out;
918 double *fft_in_dbl = dnch->fft_in;
919 float *fft_in_flt = dnch->fft_in;
920 int edge, j, k, n, edgemax;
921
922 switch (s->format) {
923 case AV_SAMPLE_FMT_FLTP:
924 for (int i = 0; i < s->window_length; i++)
925 fft_in_flt[i] = s->window[i] * src_flt[i] * (1LL << 23);
926
927 for (int i = s->window_length; i < s->fft_length2; i++)
928 fft_in_flt[i] = 0.f;
929 break;
930 case AV_SAMPLE_FMT_DBLP:
931 for (int i = 0; i < s->window_length; i++)
932 fft_in_dbl[i] = s->window[i] * src_dbl[i] * (1LL << 23);
933
934 for (int i = s->window_length; i < s->fft_length2; i++)
935 fft_in_dbl[i] = 0.;
936 break;
937 }
938
939 dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size);
940
941 edge = s->noise_band_edge[0];
942 j = edge;
943 k = 0;
944 n = j;
945 edgemax = fmin(s->fft_length2, s->noise_band_edge[NB_PROFILE_BANDS]);
946 for (int i = j; i <= edgemax; i++) {
947 if ((i == j) && (i < edgemax)) {
948 if (j > edge) {
949 dnch->noise_band_norm[k - 1] += j - edge;
950 dnch->noise_band_avr[k - 1] += avr;
951 dnch->noise_band_avi[k - 1] += avi;
952 dnch->noise_band_var[k - 1] += var;
953 }
954 k++;
955 edge = j;
956 j = s->noise_band_edge[k];
957 if (k == NB_PROFILE_BANDS) {
958 j++;
959 }
960 var = 0.0;
961 avr = 0.0;
962 avi = 0.0;
963 }
964
965 switch (s->format) {
966 case AV_SAMPLE_FMT_FLTP:
967 avr += fft_out_flt[n].re;
968 avi += fft_out_flt[n].im;
969 mag2 = fft_out_flt[n].re * fft_out_flt[n].re +
970 fft_out_flt[n].im * fft_out_flt[n].im;
971 break;
972 case AV_SAMPLE_FMT_DBLP:
973 avr += fft_out_dbl[n].re;
974 avi += fft_out_dbl[n].im;
975 mag2 = fft_out_dbl[n].re * fft_out_dbl[n].re +
976 fft_out_dbl[n].im * fft_out_dbl[n].im;
977 break;
978 default:
979 av_assert2(0);
980 }
981
982 mag2 = fmax(mag2, s->sample_floor);
983
984 var += mag2;
985 n++;
986 }
987
988 dnch->noise_band_norm[k - 1] += j - edge;
989 dnch->noise_band_avr[k - 1] += avr;
990 dnch->noise_band_avi[k - 1] += avi;
991 dnch->noise_band_var[k - 1] += var;
992 }
993
994 static void finish_sample_noise(AudioFFTDeNoiseContext *s,
995 DeNoiseChannel *dnch,
996 double *sample_noise)
997 {
998 for (int i = 0; i < s->noise_band_count; i++) {
999 dnch->noise_band_avr[i] /= dnch->noise_band_norm[i];
1000 dnch->noise_band_avi[i] /= dnch->noise_band_norm[i];
1001 dnch->noise_band_var[i] /= dnch->noise_band_norm[i];
1002 dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] +
1003 dnch->noise_band_avi[i] * dnch->noise_band_avi[i];
1004 dnch->noise_band_auto_var[i] = dnch->noise_band_var[i];
1005 sample_noise[i] = 10.0 * log10(dnch->noise_band_var[i] / s->floor) - 100.0;
1006 }
1007 if (s->noise_band_count < NB_PROFILE_BANDS) {
1008 for (int i = s->noise_band_count; i < NB_PROFILE_BANDS; i++)
1009 sample_noise[i] = sample_noise[i - 1];
1010 }
1011 }
1012
1013 static void set_noise_profile(AVFilterContext *ctx,
1014 DeNoiseChannel *dnch,
1015 double *sample_noise)
1016 {
1017 AudioFFTDeNoiseContext *s = ctx->priv;
1018 double new_band_noise[NB_PROFILE_BANDS];
1019 double temp[NB_PROFILE_BANDS];
1020 double sum = 0.0;
1021
1022 for (int m = 0; m < NB_PROFILE_BANDS; m++)
1023 temp[m] = sample_noise[m];
1024
1025 for (int m = 0, i = 0; m < SOLVE_SIZE; m++) {
1026 sum = 0.0;
1027 for (int n = 0; n < NB_PROFILE_BANDS; n++)
1028 sum += s->matrix_b[i++] * temp[n];
1029 s->vector_b[m] = sum;
1030 }
1031 solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
1032 for (int m = 0, i = 0; m < NB_PROFILE_BANDS; m++) {
1033 sum = 0.0;
1034 for (int n = 0; n < SOLVE_SIZE; n++)
1035 sum += s->matrix_c[i++] * s->vector_b[n];
1036 temp[m] = sum;
1037 }
1038
1039 reduce_mean(temp);
1040
1041 av_log(ctx, AV_LOG_INFO, "bn=");
1042 for (int m = 0; m < NB_PROFILE_BANDS; m++) {
1043 new_band_noise[m] = temp[m];
1044 new_band_noise[m] = av_clipd(new_band_noise[m], -24.0, 24.0);
1045 av_log(ctx, AV_LOG_INFO, "%f ", new_band_noise[m]);
1046 }
1047 av_log(ctx, AV_LOG_INFO, "\n");
1048 memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise));
1049 }
1050
1051 static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
1052 {
1053 AudioFFTDeNoiseContext *s = ctx->priv;
1054 AVFrame *in = arg;
1055 const int start = (in->ch_layout.nb_channels * jobnr) / nb_jobs;
1056 const int end = (in->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;
1057 const int window_length = s->window_length;
1058 const double *window = s->window;
1059
1060 for (int ch = start; ch < end; ch++) {
1061 DeNoiseChannel *dnch = &s->dnch[ch];
1062 const double *src_dbl = (const double *)in->extended_data[ch];
1063 const float *src_flt = (const float *)in->extended_data[ch];
1064 double *dst = dnch->out_samples;
1065 double *fft_in_dbl = dnch->fft_in;
1066 float *fft_in_flt = dnch->fft_in;
1067
1068 switch (s->format) {
1069 case AV_SAMPLE_FMT_FLTP:
1070 for (int m = 0; m < window_length; m++)
1071 fft_in_flt[m] = window[m] * src_flt[m] * (1LL << 23);
1072
1073 for (int m = window_length; m < s->fft_length2; m++)
1074 fft_in_flt[m] = 0.f;
1075 break;
1076 case AV_SAMPLE_FMT_DBLP:
1077 for (int m = 0; m < window_length; m++)
1078 fft_in_dbl[m] = window[m] * src_dbl[m] * (1LL << 23);
1079
1080 for (int m = window_length; m < s->fft_length2; m++)
1081 fft_in_dbl[m] = 0.;
1082 break;
1083 }
1084
1085 dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size);
1086
1087 process_frame(ctx, s, dnch,
1088 dnch->prior,
1089 dnch->prior_band_excit,
1090 s->track_noise);
1091
1092 dnch->itx_fn(dnch->ifft, dnch->fft_in, dnch->fft_out, s->complex_sample_size);
1093
1094 switch (s->format) {
1095 case AV_SAMPLE_FMT_FLTP:
1096 for (int m = 0; m < window_length; m++)
1097 dst[m] += s->window[m] * fft_in_flt[m] / (1LL << 23);
1098 break;
1099 case AV_SAMPLE_FMT_DBLP:
1100 for (int m = 0; m < window_length; m++)
1101 dst[m] += s->window[m] * fft_in_dbl[m] / (1LL << 23);
1102 break;
1103 }
1104 }
1105
1106 return 0;
1107 }
1108
1109 static int output_frame(AVFilterLink *inlink, AVFrame *in)
1110 {
1111 AVFilterContext *ctx = inlink->dst;
1112 AVFilterLink *outlink = ctx->outputs[0];
1113 AudioFFTDeNoiseContext *s = ctx->priv;
1114 const int output_mode = ctx->is_disabled ? IN_MODE : s->output_mode;
1115 const int offset = s->window_length - s->sample_advance;
1116 AVFrame *out;
1117
1118 for (int ch = 0; ch < s->channels; ch++) {
1119 uint8_t *src = (uint8_t *)s->winframe->extended_data[ch];
1120
1121 memmove(src, src + s->sample_advance * s->sample_size,
1122 offset * s->sample_size);
1123 memcpy(src + offset * s->sample_size, in->extended_data[ch],
1124 in->nb_samples * s->sample_size);
1125 memset(src + s->sample_size * (offset + in->nb_samples), 0,
1126 (s->sample_advance - in->nb_samples) * s->sample_size);
1127 }
1128
1129 if (s->track_noise) {
1130 double average = 0.0, min = DBL_MAX, max = -DBL_MAX;
1131
1132 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1133 DeNoiseChannel *dnch = &s->dnch[ch];
1134
1135 average += dnch->noise_floor;
1136 max = fmax(max, dnch->noise_floor);
1137 min = fmin(min, dnch->noise_floor);
1138 }
1139
1140 average /= inlink->ch_layout.nb_channels;
1141
1142 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1143 DeNoiseChannel *dnch = &s->dnch[ch];
1144
1145 switch (s->noise_floor_link) {
1146 case MIN_LINK: dnch->noise_floor = min; break;
1147 case MAX_LINK: dnch->noise_floor = max; break;
1148 case AVERAGE_LINK: dnch->noise_floor = average; break;
1149 case NONE_LINK:
1150 default:
1151 break;
1152 }
1153
1154 if (dnch->noise_floor != dnch->last_noise_floor)
1155 set_parameters(s, dnch, 1, 0);
1156 }
1157 }
1158
1159 if (s->sample_noise_mode == SAMPLE_START) {
1160 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1161 DeNoiseChannel *dnch = &s->dnch[ch];
1162
1163 init_sample_noise(dnch);
1164 }
1165 s->sample_noise_mode = SAMPLE_NONE;
1166 s->sample_noise = 1;
1167 s->sample_noise_blocks = 0;
1168 }
1169
1170 if (s->sample_noise) {
1171 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1172 DeNoiseChannel *dnch = &s->dnch[ch];
1173
1174 sample_noise_block(s, dnch, s->winframe, ch);
1175 }
1176 s->sample_noise_blocks++;
1177 }
1178
1179 if (s->sample_noise_mode == SAMPLE_STOP) {
1180 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1181 DeNoiseChannel *dnch = &s->dnch[ch];
1182 double sample_noise[NB_PROFILE_BANDS];
1183
1184 if (s->sample_noise_blocks <= 0)
1185 break;
1186 finish_sample_noise(s, dnch, sample_noise);
1187 set_noise_profile(ctx, dnch, sample_noise);
1188 set_parameters(s, dnch, 1, 1);
1189 }
1190 s->sample_noise = 0;
1191 s->sample_noise_blocks = 0;
1192 s->sample_noise_mode = SAMPLE_NONE;
1193 }
1194
1195 ff_filter_execute(ctx, filter_channel, s->winframe, NULL,
1196 FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));
1197
1198 if (av_frame_is_writable(in)) {
1199 out = in;
1200 } else {
1201 out = ff_get_audio_buffer(outlink, in->nb_samples);
1202 if (!out) {
1203 av_frame_free(&in);
1204 return AVERROR(ENOMEM);
1205 }
1206
1207 av_frame_copy_props(out, in);
1208 }
1209
1210 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1211 DeNoiseChannel *dnch = &s->dnch[ch];
1212 double *src = dnch->out_samples;
1213 const double *orig_dbl = (const double *)s->winframe->extended_data[ch];
1214 const float *orig_flt = (const float *)s->winframe->extended_data[ch];
1215 double *dst_dbl = (double *)out->extended_data[ch];
1216 float *dst_flt = (float *)out->extended_data[ch];
1217
1218 switch (output_mode) {
1219 case IN_MODE:
1220 switch (s->format) {
1221 case AV_SAMPLE_FMT_FLTP:
1222 for (int m = 0; m < out->nb_samples; m++)
1223 dst_flt[m] = orig_flt[m];
1224 break;
1225 case AV_SAMPLE_FMT_DBLP:
1226 for (int m = 0; m < out->nb_samples; m++)
1227 dst_dbl[m] = orig_dbl[m];
1228 break;
1229 }
1230 break;
1231 case OUT_MODE:
1232 switch (s->format) {
1233 case AV_SAMPLE_FMT_FLTP:
1234 for (int m = 0; m < out->nb_samples; m++)
1235 dst_flt[m] = src[m];
1236 break;
1237 case AV_SAMPLE_FMT_DBLP:
1238 for (int m = 0; m < out->nb_samples; m++)
1239 dst_dbl[m] = src[m];
1240 break;
1241 }
1242 break;
1243 case NOISE_MODE:
1244 switch (s->format) {
1245 case AV_SAMPLE_FMT_FLTP:
1246 for (int m = 0; m < out->nb_samples; m++)
1247 dst_flt[m] = orig_flt[m] - src[m];
1248 break;
1249 case AV_SAMPLE_FMT_DBLP:
1250 for (int m = 0; m < out->nb_samples; m++)
1251 dst_dbl[m] = orig_dbl[m] - src[m];
1252 break;
1253 }
1254 break;
1255 default:
1256 if (in != out)
1257 av_frame_free(&in);
1258 av_frame_free(&out);
1259 return AVERROR_BUG;
1260 }
1261
1262 memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
1263 memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
1264 }
1265
1266 if (out != in)
1267 av_frame_free(&in);
1268 return ff_filter_frame(outlink, out);
1269 }
1270
1271 static int activate(AVFilterContext *ctx)
1272 {
1273 AVFilterLink *inlink = ctx->inputs[0];
1274 AVFilterLink *outlink = ctx->outputs[0];
1275 AudioFFTDeNoiseContext *s = ctx->priv;
1276 AVFrame *in = NULL;
1277 int ret;
1278
1279 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
1280
1281 ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in);
1282 if (ret < 0)
1283 return ret;
1284 if (ret > 0)
1285 return output_frame(inlink, in);
1286
1287 if (ff_inlink_queued_samples(inlink) >= s->sample_advance) {
1288 ff_filter_set_ready(ctx, 10);
1289 return 0;
1290 }
1291
1292 FF_FILTER_FORWARD_STATUS(inlink, outlink);
1293 FF_FILTER_FORWARD_WANTED(outlink, inlink);
1294
1295 return FFERROR_NOT_READY;
1296 }
1297
1298 static av_cold void uninit(AVFilterContext *ctx)
1299 {
1300 AudioFFTDeNoiseContext *s = ctx->priv;
1301
1302 av_freep(&s->window);
1303 av_freep(&s->bin2band);
1304 av_freep(&s->band_alpha);
1305 av_freep(&s->band_beta);
1306 av_frame_free(&s->winframe);
1307
1308 if (s->dnch) {
1309 for (int ch = 0; ch < s->channels; ch++) {
1310 DeNoiseChannel *dnch = &s->dnch[ch];
1311 av_freep(&dnch->amt);
1312 av_freep(&dnch->band_amt);
1313 av_freep(&dnch->band_excit);
1314 av_freep(&dnch->gain);
1315 av_freep(&dnch->smoothed_gain);
1316 av_freep(&dnch->prior);
1317 av_freep(&dnch->prior_band_excit);
1318 av_freep(&dnch->clean_data);
1319 av_freep(&dnch->noisy_data);
1320 av_freep(&dnch->out_samples);
1321 av_freep(&dnch->spread_function);
1322 av_freep(&dnch->abs_var);
1323 av_freep(&dnch->rel_var);
1324 av_freep(&dnch->min_abs_var);
1325 av_freep(&dnch->fft_in);
1326 av_freep(&dnch->fft_out);
1327 av_tx_uninit(&dnch->fft);
1328 av_tx_uninit(&dnch->ifft);
1329 }
1330 av_freep(&s->dnch);
1331 }
1332 }
1333
1334 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
1335 char *res, int res_len, int flags)
1336 {
1337 AudioFFTDeNoiseContext *s = ctx->priv;
1338 int ret = 0;
1339
1340 ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
1341 if (ret < 0)
1342 return ret;
1343
1344 if (!strcmp(cmd, "sample_noise") || !strcmp(cmd, "sn"))
1345 return 0;
1346
1347 for (int ch = 0; ch < s->channels; ch++) {
1348 DeNoiseChannel *dnch = &s->dnch[ch];
1349
1350 dnch->noise_reduction = s->noise_reduction;
1351 dnch->noise_floor = s->noise_floor;
1352 dnch->residual_floor = s->residual_floor;
1353
1354 set_parameters(s, dnch, 1, 1);
1355 }
1356
1357 return 0;
1358 }
1359
1360 static const AVFilterPad inputs[] = {
1361 {
1362 .name = "default",
1363 .type = AVMEDIA_TYPE_AUDIO,
1364 .config_props = config_input,
1365 },
1366 };
1367
1368 const FFFilter ff_af_afftdn = {
1369 .p.name = "afftdn",
1370 .p.description = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."),
1371 .p.priv_class = &afftdn_class,
1372 .p.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
1373 AVFILTER_FLAG_SLICE_THREADS,
1374 .priv_size = sizeof(AudioFFTDeNoiseContext),
1375 .activate = activate,
1376 .uninit = uninit,
1377 FILTER_INPUTS(inputs),
1378 FILTER_OUTPUTS(ff_audio_default_filterpad),
1379 FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP),
1380 .process_command = process_command,
1381 };
1382