Line |
Branch |
Exec |
Source |
1 |
|
|
/* |
2 |
|
|
* Copyright (c) 2018 The FFmpeg Project |
3 |
|
|
* |
4 |
|
|
* This file is part of FFmpeg. |
5 |
|
|
* |
6 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
7 |
|
|
* modify it under the terms of the GNU Lesser General Public |
8 |
|
|
* License as published by the Free Software Foundation; either |
9 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
* |
11 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
12 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
|
|
* Lesser General Public License for more details. |
15 |
|
|
* |
16 |
|
|
* You should have received a copy of the GNU Lesser General Public |
17 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
18 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 |
|
|
*/ |
20 |
|
|
|
21 |
|
|
#include <float.h> |
22 |
|
|
|
23 |
|
|
#include "libavutil/avassert.h" |
24 |
|
|
#include "libavutil/avstring.h" |
25 |
|
|
#include "libavutil/channel_layout.h" |
26 |
|
|
#include "libavutil/mem.h" |
27 |
|
|
#include "libavutil/opt.h" |
28 |
|
|
#include "libavutil/tx.h" |
29 |
|
|
#include "avfilter.h" |
30 |
|
|
#include "audio.h" |
31 |
|
|
#include "filters.h" |
32 |
|
|
|
33 |
|
|
#define C (M_LN10 * 0.1) |
34 |
|
|
#define SOLVE_SIZE (5) |
35 |
|
|
#define NB_PROFILE_BANDS (15) |
36 |
|
|
|
37 |
|
|
enum SampleNoiseModes { |
38 |
|
|
SAMPLE_NONE, |
39 |
|
|
SAMPLE_START, |
40 |
|
|
SAMPLE_STOP, |
41 |
|
|
NB_SAMPLEMODES |
42 |
|
|
}; |
43 |
|
|
|
44 |
|
|
enum OutModes { |
45 |
|
|
IN_MODE, |
46 |
|
|
OUT_MODE, |
47 |
|
|
NOISE_MODE, |
48 |
|
|
NB_MODES |
49 |
|
|
}; |
50 |
|
|
|
51 |
|
|
enum NoiseLinkType { |
52 |
|
|
NONE_LINK, |
53 |
|
|
MIN_LINK, |
54 |
|
|
MAX_LINK, |
55 |
|
|
AVERAGE_LINK, |
56 |
|
|
NB_LINK |
57 |
|
|
}; |
58 |
|
|
|
59 |
|
|
enum NoiseType { |
60 |
|
|
WHITE_NOISE, |
61 |
|
|
VINYL_NOISE, |
62 |
|
|
SHELLAC_NOISE, |
63 |
|
|
CUSTOM_NOISE, |
64 |
|
|
NB_NOISE |
65 |
|
|
}; |
66 |
|
|
|
67 |
|
|
typedef struct DeNoiseChannel { |
68 |
|
|
double band_noise[NB_PROFILE_BANDS]; |
69 |
|
|
double noise_band_auto_var[NB_PROFILE_BANDS]; |
70 |
|
|
double noise_band_sample[NB_PROFILE_BANDS]; |
71 |
|
|
double *amt; |
72 |
|
|
double *band_amt; |
73 |
|
|
double *band_excit; |
74 |
|
|
double *gain; |
75 |
|
|
double *smoothed_gain; |
76 |
|
|
double *prior; |
77 |
|
|
double *prior_band_excit; |
78 |
|
|
double *clean_data; |
79 |
|
|
double *noisy_data; |
80 |
|
|
double *out_samples; |
81 |
|
|
double *spread_function; |
82 |
|
|
double *abs_var; |
83 |
|
|
double *rel_var; |
84 |
|
|
double *min_abs_var; |
85 |
|
|
void *fft_in; |
86 |
|
|
void *fft_out; |
87 |
|
|
AVTXContext *fft, *ifft; |
88 |
|
|
av_tx_fn tx_fn, itx_fn; |
89 |
|
|
|
90 |
|
|
double noise_band_norm[NB_PROFILE_BANDS]; |
91 |
|
|
double noise_band_avr[NB_PROFILE_BANDS]; |
92 |
|
|
double noise_band_avi[NB_PROFILE_BANDS]; |
93 |
|
|
double noise_band_var[NB_PROFILE_BANDS]; |
94 |
|
|
|
95 |
|
|
double noise_reduction; |
96 |
|
|
double last_noise_reduction; |
97 |
|
|
double noise_floor; |
98 |
|
|
double last_noise_floor; |
99 |
|
|
double residual_floor; |
100 |
|
|
double last_residual_floor; |
101 |
|
|
double max_gain; |
102 |
|
|
double max_var; |
103 |
|
|
double gain_scale; |
104 |
|
|
} DeNoiseChannel; |
105 |
|
|
|
106 |
|
|
typedef struct AudioFFTDeNoiseContext { |
107 |
|
|
const AVClass *class; |
108 |
|
|
|
109 |
|
|
int format; |
110 |
|
|
size_t sample_size; |
111 |
|
|
size_t complex_sample_size; |
112 |
|
|
|
113 |
|
|
float noise_reduction; |
114 |
|
|
float noise_floor; |
115 |
|
|
int noise_type; |
116 |
|
|
char *band_noise_str; |
117 |
|
|
float residual_floor; |
118 |
|
|
int track_noise; |
119 |
|
|
int track_residual; |
120 |
|
|
int output_mode; |
121 |
|
|
int noise_floor_link; |
122 |
|
|
float ratio; |
123 |
|
|
int gain_smooth; |
124 |
|
|
float band_multiplier; |
125 |
|
|
float floor_offset; |
126 |
|
|
|
127 |
|
|
int channels; |
128 |
|
|
int sample_noise; |
129 |
|
|
int sample_noise_blocks; |
130 |
|
|
int sample_noise_mode; |
131 |
|
|
float sample_rate; |
132 |
|
|
int buffer_length; |
133 |
|
|
int fft_length; |
134 |
|
|
int fft_length2; |
135 |
|
|
int bin_count; |
136 |
|
|
int window_length; |
137 |
|
|
int sample_advance; |
138 |
|
|
int number_of_bands; |
139 |
|
|
|
140 |
|
|
int band_centre[NB_PROFILE_BANDS]; |
141 |
|
|
|
142 |
|
|
int *bin2band; |
143 |
|
|
double *window; |
144 |
|
|
double *band_alpha; |
145 |
|
|
double *band_beta; |
146 |
|
|
|
147 |
|
|
DeNoiseChannel *dnch; |
148 |
|
|
|
149 |
|
|
AVFrame *winframe; |
150 |
|
|
|
151 |
|
|
double window_weight; |
152 |
|
|
double floor; |
153 |
|
|
double sample_floor; |
154 |
|
|
|
155 |
|
|
int noise_band_edge[NB_PROFILE_BANDS + 2]; |
156 |
|
|
int noise_band_count; |
157 |
|
|
double matrix_a[SOLVE_SIZE * SOLVE_SIZE]; |
158 |
|
|
double vector_b[SOLVE_SIZE]; |
159 |
|
|
double matrix_b[SOLVE_SIZE * NB_PROFILE_BANDS]; |
160 |
|
|
double matrix_c[SOLVE_SIZE * NB_PROFILE_BANDS]; |
161 |
|
|
} AudioFFTDeNoiseContext; |
162 |
|
|
|
163 |
|
|
#define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x) |
164 |
|
|
#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM |
165 |
|
|
#define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM |
166 |
|
|
|
167 |
|
|
static const AVOption afftdn_options[] = { |
168 |
|
|
{ "noise_reduction", "set the noise reduction",OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT,{.dbl = 12}, .01, 97, AFR }, |
169 |
|
|
{ "nr", "set the noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT, {.dbl = 12}, .01, 97, AFR }, |
170 |
|
|
{ "noise_floor", "set the noise floor",OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR }, |
171 |
|
|
{ "nf", "set the noise floor", OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR }, |
172 |
|
|
{ "noise_type", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" }, |
173 |
|
|
{ "nt", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" }, |
174 |
|
|
{ "white", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" }, |
175 |
|
|
{ "w", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" }, |
176 |
|
|
{ "vinyl", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" }, |
177 |
|
|
{ "v", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" }, |
178 |
|
|
{ "shellac", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" }, |
179 |
|
|
{ "s", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" }, |
180 |
|
|
{ "custom", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" }, |
181 |
|
|
{ "c", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" }, |
182 |
|
|
{ "band_noise", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF }, |
183 |
|
|
{ "bn", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF }, |
184 |
|
|
{ "residual_floor", "set the residual floor",OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR }, |
185 |
|
|
{ "rf", "set the residual floor", OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR }, |
186 |
|
|
{ "track_noise", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR }, |
187 |
|
|
{ "tn", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR }, |
188 |
|
|
{ "track_residual", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR }, |
189 |
|
|
{ "tr", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR }, |
190 |
|
|
{ "output_mode", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" }, |
191 |
|
|
{ "om", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" }, |
192 |
|
|
{ "input", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" }, |
193 |
|
|
{ "i", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" }, |
194 |
|
|
{ "output", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" }, |
195 |
|
|
{ "o", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" }, |
196 |
|
|
{ "noise", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" }, |
197 |
|
|
{ "n", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" }, |
198 |
|
|
{ "adaptivity", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR }, |
199 |
|
|
{ "ad", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR }, |
200 |
|
|
{ "floor_offset", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR }, |
201 |
|
|
{ "fo", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR }, |
202 |
|
|
{ "noise_link", "set the noise floor link",OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" }, |
203 |
|
|
{ "nl", "set the noise floor link", OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" }, |
204 |
|
|
{ "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = NONE_LINK}, 0, 0, AFR, .unit = "link" }, |
205 |
|
|
{ "min", "min", 0, AV_OPT_TYPE_CONST, {.i64 = MIN_LINK}, 0, 0, AFR, .unit = "link" }, |
206 |
|
|
{ "max", "max", 0, AV_OPT_TYPE_CONST, {.i64 = MAX_LINK}, 0, 0, AFR, .unit = "link" }, |
207 |
|
|
{ "average", "average", 0, AV_OPT_TYPE_CONST, {.i64 = AVERAGE_LINK}, 0, 0, AFR, .unit = "link" }, |
208 |
|
|
{ "band_multiplier", "set band multiplier",OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF }, |
209 |
|
|
{ "bm", "set band multiplier", OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF }, |
210 |
|
|
{ "sample_noise", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" }, |
211 |
|
|
{ "sn", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" }, |
212 |
|
|
{ "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_NONE}, 0, 0, AFR, .unit = "sample" }, |
213 |
|
|
{ "start", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" }, |
214 |
|
|
{ "begin", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" }, |
215 |
|
|
{ "stop", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" }, |
216 |
|
|
{ "end", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" }, |
217 |
|
|
{ "gain_smooth", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR }, |
218 |
|
|
{ "gs", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR }, |
219 |
|
|
{ NULL } |
220 |
|
|
}; |
221 |
|
|
|
222 |
|
|
AVFILTER_DEFINE_CLASS(afftdn); |
223 |
|
|
|
224 |
|
✗ |
static double get_band_noise(AudioFFTDeNoiseContext *s, |
225 |
|
|
int band, double a, |
226 |
|
|
double b, double c) |
227 |
|
|
{ |
228 |
|
|
double d1, d2, d3; |
229 |
|
|
|
230 |
|
✗ |
d1 = a / s->band_centre[band]; |
231 |
|
✗ |
d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10; |
232 |
|
✗ |
d2 = b / s->band_centre[band]; |
233 |
|
✗ |
d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10; |
234 |
|
✗ |
d3 = s->band_centre[band] / c; |
235 |
|
✗ |
d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10; |
236 |
|
|
|
237 |
|
✗ |
return -d1 + d2 - d3; |
238 |
|
|
} |
239 |
|
|
|
240 |
|
✗ |
static void factor(double *array, int size) |
241 |
|
|
{ |
242 |
|
✗ |
for (int i = 0; i < size - 1; i++) { |
243 |
|
✗ |
for (int j = i + 1; j < size; j++) { |
244 |
|
✗ |
double d = array[j + i * size] / array[i + i * size]; |
245 |
|
|
|
246 |
|
✗ |
array[j + i * size] = d; |
247 |
|
✗ |
for (int k = i + 1; k < size; k++) { |
248 |
|
✗ |
array[j + k * size] -= d * array[i + k * size]; |
249 |
|
|
} |
250 |
|
|
} |
251 |
|
|
} |
252 |
|
✗ |
} |
253 |
|
|
|
254 |
|
✗ |
static void solve(double *matrix, double *vector, int size) |
255 |
|
|
{ |
256 |
|
✗ |
for (int i = 0; i < size - 1; i++) { |
257 |
|
✗ |
for (int j = i + 1; j < size; j++) { |
258 |
|
✗ |
double d = matrix[j + i * size]; |
259 |
|
✗ |
vector[j] -= d * vector[i]; |
260 |
|
|
} |
261 |
|
|
} |
262 |
|
|
|
263 |
|
✗ |
vector[size - 1] /= matrix[size * size - 1]; |
264 |
|
|
|
265 |
|
✗ |
for (int i = size - 2; i >= 0; i--) { |
266 |
|
✗ |
double d = vector[i]; |
267 |
|
✗ |
for (int j = i + 1; j < size; j++) |
268 |
|
✗ |
d -= matrix[i + j * size] * vector[j]; |
269 |
|
✗ |
vector[i] = d / matrix[i + i * size]; |
270 |
|
|
} |
271 |
|
✗ |
} |
272 |
|
|
|
273 |
|
✗ |
static double process_get_band_noise(AudioFFTDeNoiseContext *s, |
274 |
|
|
DeNoiseChannel *dnch, |
275 |
|
|
int band) |
276 |
|
|
{ |
277 |
|
|
double product, sum, f; |
278 |
|
✗ |
int i = 0; |
279 |
|
|
|
280 |
|
✗ |
if (band < NB_PROFILE_BANDS) |
281 |
|
✗ |
return dnch->band_noise[band]; |
282 |
|
|
|
283 |
|
✗ |
for (int j = 0; j < SOLVE_SIZE; j++) { |
284 |
|
✗ |
sum = 0.0; |
285 |
|
✗ |
for (int k = 0; k < NB_PROFILE_BANDS; k++) |
286 |
|
✗ |
sum += s->matrix_b[i++] * dnch->band_noise[k]; |
287 |
|
✗ |
s->vector_b[j] = sum; |
288 |
|
|
} |
289 |
|
|
|
290 |
|
✗ |
solve(s->matrix_a, s->vector_b, SOLVE_SIZE); |
291 |
|
✗ |
f = (0.5 * s->sample_rate) / s->band_centre[NB_PROFILE_BANDS-1]; |
292 |
|
✗ |
f = 15.0 + log(f / 1.5) / log(1.5); |
293 |
|
✗ |
sum = 0.0; |
294 |
|
✗ |
product = 1.0; |
295 |
|
✗ |
for (int j = 0; j < SOLVE_SIZE; j++) { |
296 |
|
✗ |
sum += product * s->vector_b[j]; |
297 |
|
✗ |
product *= f; |
298 |
|
|
} |
299 |
|
|
|
300 |
|
✗ |
return sum; |
301 |
|
|
} |
302 |
|
|
|
303 |
|
✗ |
static double limit_gain(double a, double b) |
304 |
|
|
{ |
305 |
|
✗ |
if (a > 1.0) |
306 |
|
✗ |
return (b * a - 1.0) / (b + a - 2.0); |
307 |
|
✗ |
if (a < 1.0) |
308 |
|
✗ |
return (b * a - 2.0 * a + 1.0) / (b - a); |
309 |
|
✗ |
return 1.0; |
310 |
|
|
} |
311 |
|
|
|
312 |
|
✗ |
static void spectral_flatness(AudioFFTDeNoiseContext *s, const double *const spectral, |
313 |
|
|
double floor, int len, double *rnum, double *rden) |
314 |
|
|
{ |
315 |
|
✗ |
double num = 0., den = 0.; |
316 |
|
✗ |
int size = 0; |
317 |
|
|
|
318 |
|
✗ |
for (int n = 0; n < len; n++) { |
319 |
|
✗ |
const double v = spectral[n]; |
320 |
|
✗ |
if (v > floor) { |
321 |
|
✗ |
num += log(v); |
322 |
|
✗ |
den += v; |
323 |
|
✗ |
size++; |
324 |
|
|
} |
325 |
|
|
} |
326 |
|
|
|
327 |
|
✗ |
size = FFMAX(size, 1); |
328 |
|
|
|
329 |
|
✗ |
num /= size; |
330 |
|
✗ |
den /= size; |
331 |
|
|
|
332 |
|
✗ |
num = exp(num); |
333 |
|
|
|
334 |
|
✗ |
*rnum = num; |
335 |
|
✗ |
*rden = den; |
336 |
|
✗ |
} |
337 |
|
|
|
338 |
|
|
static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var); |
339 |
|
|
|
340 |
|
✗ |
static double floor_offset(const double *S, int size, double mean) |
341 |
|
|
{ |
342 |
|
✗ |
double offset = 0.0; |
343 |
|
|
|
344 |
|
✗ |
for (int n = 0; n < size; n++) { |
345 |
|
✗ |
const double p = S[n] - mean; |
346 |
|
|
|
347 |
|
✗ |
offset = fmax(offset, fabs(p)); |
348 |
|
|
} |
349 |
|
|
|
350 |
|
✗ |
return offset / mean; |
351 |
|
|
} |
352 |
|
|
|
353 |
|
✗ |
static void process_frame(AVFilterContext *ctx, |
354 |
|
|
AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, |
355 |
|
|
double *prior, double *prior_band_excit, int track_noise) |
356 |
|
|
{ |
357 |
|
✗ |
AVFilterLink *outlink = ctx->outputs[0]; |
358 |
|
✗ |
FilterLink *outl = ff_filter_link(outlink); |
359 |
|
✗ |
const double *abs_var = dnch->abs_var; |
360 |
|
✗ |
const double ratio = outl->frame_count_out ? s->ratio : 1.0; |
361 |
|
✗ |
const double rratio = 1. - ratio; |
362 |
|
✗ |
const int *bin2band = s->bin2band; |
363 |
|
✗ |
double *noisy_data = dnch->noisy_data; |
364 |
|
✗ |
double *band_excit = dnch->band_excit; |
365 |
|
✗ |
double *band_amt = dnch->band_amt; |
366 |
|
✗ |
double *smoothed_gain = dnch->smoothed_gain; |
367 |
|
✗ |
AVComplexDouble *fft_data_dbl = dnch->fft_out; |
368 |
|
✗ |
AVComplexFloat *fft_data_flt = dnch->fft_out; |
369 |
|
✗ |
double *gain = dnch->gain; |
370 |
|
|
|
371 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) { |
372 |
|
|
double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var; |
373 |
|
|
|
374 |
|
✗ |
switch (s->format) { |
375 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
376 |
|
✗ |
noisy_data[i] = mag = hypot(fft_data_flt[i].re, fft_data_flt[i].im); |
377 |
|
✗ |
break; |
378 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
379 |
|
✗ |
noisy_data[i] = mag = hypot(fft_data_dbl[i].re, fft_data_dbl[i].im); |
380 |
|
✗ |
break; |
381 |
|
✗ |
default: |
382 |
|
✗ |
av_assert0(0); |
383 |
|
|
} |
384 |
|
|
|
385 |
|
✗ |
power = mag * mag; |
386 |
|
✗ |
mag_abs_var = power / abs_var[i]; |
387 |
|
✗ |
new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0); |
388 |
|
✗ |
new_gain = new_mag_abs_var / (1.0 + new_mag_abs_var); |
389 |
|
✗ |
sqr_new_gain = new_gain * new_gain; |
390 |
|
✗ |
prior[i] = mag_abs_var * sqr_new_gain; |
391 |
|
✗ |
dnch->clean_data[i] = power * sqr_new_gain; |
392 |
|
✗ |
gain[i] = new_gain; |
393 |
|
|
} |
394 |
|
|
|
395 |
|
✗ |
if (track_noise) { |
396 |
|
|
double flatness, num, den; |
397 |
|
|
|
398 |
|
✗ |
spectral_flatness(s, noisy_data, s->floor, s->bin_count, &num, &den); |
399 |
|
|
|
400 |
|
✗ |
flatness = num / den; |
401 |
|
✗ |
if (flatness > 0.8) { |
402 |
|
✗ |
const double offset = s->floor_offset * floor_offset(noisy_data, s->bin_count, den); |
403 |
|
✗ |
const double new_floor = av_clipd(10.0 * log10(den) - 100.0 + offset, -90., -20.); |
404 |
|
|
|
405 |
|
✗ |
dnch->noise_floor = 0.1 * new_floor + dnch->noise_floor * 0.9; |
406 |
|
✗ |
set_parameters(s, dnch, 1, 1); |
407 |
|
|
} |
408 |
|
|
} |
409 |
|
|
|
410 |
|
✗ |
for (int i = 0; i < s->number_of_bands; i++) { |
411 |
|
✗ |
band_excit[i] = 0.0; |
412 |
|
✗ |
band_amt[i] = 0.0; |
413 |
|
|
} |
414 |
|
|
|
415 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) |
416 |
|
✗ |
band_excit[bin2band[i]] += dnch->clean_data[i]; |
417 |
|
|
|
418 |
|
✗ |
for (int i = 0; i < s->number_of_bands; i++) { |
419 |
|
✗ |
band_excit[i] = fmax(band_excit[i], |
420 |
|
✗ |
s->band_alpha[i] * band_excit[i] + |
421 |
|
✗ |
s->band_beta[i] * prior_band_excit[i]); |
422 |
|
✗ |
prior_band_excit[i] = band_excit[i]; |
423 |
|
|
} |
424 |
|
|
|
425 |
|
✗ |
for (int j = 0, i = 0; j < s->number_of_bands; j++) { |
426 |
|
✗ |
for (int k = 0; k < s->number_of_bands; k++) { |
427 |
|
✗ |
band_amt[j] += dnch->spread_function[i++] * band_excit[k]; |
428 |
|
|
} |
429 |
|
|
} |
430 |
|
|
|
431 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) |
432 |
|
✗ |
dnch->amt[i] = band_amt[bin2band[i]]; |
433 |
|
|
|
434 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) { |
435 |
|
✗ |
if (dnch->amt[i] > abs_var[i]) { |
436 |
|
✗ |
gain[i] = 1.0; |
437 |
|
✗ |
} else if (dnch->amt[i] > dnch->min_abs_var[i]) { |
438 |
|
✗ |
const double limit = sqrt(abs_var[i] / dnch->amt[i]); |
439 |
|
|
|
440 |
|
✗ |
gain[i] = limit_gain(gain[i], limit); |
441 |
|
|
} else { |
442 |
|
✗ |
gain[i] = limit_gain(gain[i], dnch->max_gain); |
443 |
|
|
} |
444 |
|
|
} |
445 |
|
|
|
446 |
|
✗ |
memcpy(smoothed_gain, gain, s->bin_count * sizeof(*smoothed_gain)); |
447 |
|
✗ |
if (s->gain_smooth > 0) { |
448 |
|
✗ |
const int r = s->gain_smooth; |
449 |
|
|
|
450 |
|
✗ |
for (int i = r; i < s->bin_count - r; i++) { |
451 |
|
✗ |
const double gc = gain[i]; |
452 |
|
✗ |
double num = 0., den = 0.; |
453 |
|
|
|
454 |
|
✗ |
for (int j = -r; j <= r; j++) { |
455 |
|
✗ |
const double g = gain[i + j]; |
456 |
|
✗ |
const double d = 1. - fabs(g - gc); |
457 |
|
|
|
458 |
|
✗ |
num += g * d; |
459 |
|
✗ |
den += d; |
460 |
|
|
} |
461 |
|
|
|
462 |
|
✗ |
smoothed_gain[i] = num / den; |
463 |
|
|
} |
464 |
|
|
} |
465 |
|
|
|
466 |
|
✗ |
switch (s->format) { |
467 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
468 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) { |
469 |
|
✗ |
const float new_gain = smoothed_gain[i]; |
470 |
|
|
|
471 |
|
✗ |
fft_data_flt[i].re *= new_gain; |
472 |
|
✗ |
fft_data_flt[i].im *= new_gain; |
473 |
|
|
} |
474 |
|
✗ |
break; |
475 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
476 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) { |
477 |
|
✗ |
const double new_gain = smoothed_gain[i]; |
478 |
|
|
|
479 |
|
✗ |
fft_data_dbl[i].re *= new_gain; |
480 |
|
✗ |
fft_data_dbl[i].im *= new_gain; |
481 |
|
|
} |
482 |
|
✗ |
break; |
483 |
|
|
} |
484 |
|
✗ |
} |
485 |
|
|
|
486 |
|
✗ |
static double freq2bark(double x) |
487 |
|
|
{ |
488 |
|
✗ |
double d = x / 7500.0; |
489 |
|
|
|
490 |
|
✗ |
return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d); |
491 |
|
|
} |
492 |
|
|
|
493 |
|
✗ |
static int get_band_centre(AudioFFTDeNoiseContext *s, int band) |
494 |
|
|
{ |
495 |
|
✗ |
if (band == -1) |
496 |
|
✗ |
return lrint(s->band_centre[0] / 1.5); |
497 |
|
|
|
498 |
|
✗ |
return s->band_centre[band]; |
499 |
|
|
} |
500 |
|
|
|
501 |
|
✗ |
static int get_band_edge(AudioFFTDeNoiseContext *s, int band) |
502 |
|
|
{ |
503 |
|
|
int i; |
504 |
|
|
|
505 |
|
✗ |
if (band == NB_PROFILE_BANDS) { |
506 |
|
✗ |
i = lrint(s->band_centre[NB_PROFILE_BANDS - 1] * 1.224745); |
507 |
|
|
} else { |
508 |
|
✗ |
i = lrint(s->band_centre[band] / 1.224745); |
509 |
|
|
} |
510 |
|
|
|
511 |
|
✗ |
return FFMIN(i, s->sample_rate / 2); |
512 |
|
|
} |
513 |
|
|
|
514 |
|
✗ |
static void set_band_parameters(AudioFFTDeNoiseContext *s, |
515 |
|
|
DeNoiseChannel *dnch) |
516 |
|
|
{ |
517 |
|
|
double band_noise, d2, d3, d4, d5; |
518 |
|
✗ |
int i = 0, j = 0, k = 0; |
519 |
|
|
|
520 |
|
✗ |
d5 = 0.0; |
521 |
|
✗ |
band_noise = process_get_band_noise(s, dnch, 0); |
522 |
|
✗ |
for (int m = j; m < s->bin_count; m++) { |
523 |
|
✗ |
if (m == j) { |
524 |
|
✗ |
i = j; |
525 |
|
✗ |
d5 = band_noise; |
526 |
|
✗ |
if (k >= NB_PROFILE_BANDS) { |
527 |
|
✗ |
j = s->bin_count; |
528 |
|
|
} else { |
529 |
|
✗ |
j = s->fft_length * get_band_centre(s, k) / s->sample_rate; |
530 |
|
|
} |
531 |
|
✗ |
d2 = j - i; |
532 |
|
✗ |
band_noise = process_get_band_noise(s, dnch, k); |
533 |
|
✗ |
k++; |
534 |
|
|
} |
535 |
|
✗ |
d3 = (j - m) / d2; |
536 |
|
✗ |
d4 = (m - i) / d2; |
537 |
|
✗ |
dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C); |
538 |
|
|
} |
539 |
|
|
|
540 |
|
✗ |
for (i = 0; i < NB_PROFILE_BANDS; i++) |
541 |
|
✗ |
dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C); |
542 |
|
✗ |
} |
543 |
|
|
|
544 |
|
✗ |
static void read_custom_noise(AudioFFTDeNoiseContext *s, int ch) |
545 |
|
|
{ |
546 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
547 |
|
✗ |
char *custom_noise_str, *p, *arg, *saveptr = NULL; |
548 |
|
✗ |
double band_noise[NB_PROFILE_BANDS] = { 0.f }; |
549 |
|
|
int ret; |
550 |
|
|
|
551 |
|
✗ |
if (!s->band_noise_str) |
552 |
|
✗ |
return; |
553 |
|
|
|
554 |
|
✗ |
custom_noise_str = p = av_strdup(s->band_noise_str); |
555 |
|
✗ |
if (!p) |
556 |
|
✗ |
return; |
557 |
|
|
|
558 |
|
✗ |
for (int i = 0; i < NB_PROFILE_BANDS; i++) { |
559 |
|
|
float noise; |
560 |
|
|
|
561 |
|
✗ |
if (!(arg = av_strtok(p, "| ", &saveptr))) |
562 |
|
✗ |
break; |
563 |
|
|
|
564 |
|
✗ |
p = NULL; |
565 |
|
|
|
566 |
|
✗ |
ret = av_sscanf(arg, "%f", &noise); |
567 |
|
✗ |
if (ret != 1) { |
568 |
|
✗ |
av_log(s, AV_LOG_ERROR, "Custom band noise must be float.\n"); |
569 |
|
✗ |
break; |
570 |
|
|
} |
571 |
|
|
|
572 |
|
✗ |
band_noise[i] = av_clipd(noise, -24., 24.); |
573 |
|
|
} |
574 |
|
|
|
575 |
|
✗ |
av_free(custom_noise_str); |
576 |
|
✗ |
memcpy(dnch->band_noise, band_noise, sizeof(band_noise)); |
577 |
|
|
} |
578 |
|
|
|
579 |
|
✗ |
static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var) |
580 |
|
|
{ |
581 |
|
✗ |
if (dnch->last_noise_floor != dnch->noise_floor) |
582 |
|
✗ |
dnch->last_noise_floor = dnch->noise_floor; |
583 |
|
|
|
584 |
|
✗ |
if (s->track_residual) |
585 |
|
✗ |
dnch->last_noise_floor = fmax(dnch->last_noise_floor, dnch->residual_floor); |
586 |
|
|
|
587 |
|
✗ |
dnch->max_var = s->floor * exp((100.0 + dnch->last_noise_floor) * C); |
588 |
|
✗ |
if (update_auto_var) { |
589 |
|
✗ |
for (int i = 0; i < NB_PROFILE_BANDS; i++) |
590 |
|
✗ |
dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C); |
591 |
|
|
} |
592 |
|
|
|
593 |
|
✗ |
if (s->track_residual) { |
594 |
|
✗ |
if (update_var || dnch->last_residual_floor != dnch->residual_floor) { |
595 |
|
✗ |
update_var = 1; |
596 |
|
✗ |
dnch->last_residual_floor = dnch->residual_floor; |
597 |
|
✗ |
dnch->last_noise_reduction = fmax(dnch->last_noise_floor - dnch->last_residual_floor + 100., 0); |
598 |
|
✗ |
dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C)); |
599 |
|
|
} |
600 |
|
✗ |
} else if (update_var || dnch->noise_reduction != dnch->last_noise_reduction) { |
601 |
|
✗ |
update_var = 1; |
602 |
|
✗ |
dnch->last_noise_reduction = dnch->noise_reduction; |
603 |
|
✗ |
dnch->last_residual_floor = av_clipd(dnch->last_noise_floor - dnch->last_noise_reduction, -80, -20); |
604 |
|
✗ |
dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C)); |
605 |
|
|
} |
606 |
|
|
|
607 |
|
✗ |
dnch->gain_scale = 1.0 / (dnch->max_gain * dnch->max_gain); |
608 |
|
|
|
609 |
|
✗ |
if (update_var) { |
610 |
|
✗ |
set_band_parameters(s, dnch); |
611 |
|
|
|
612 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) { |
613 |
|
✗ |
dnch->abs_var[i] = fmax(dnch->max_var * dnch->rel_var[i], 1.0); |
614 |
|
✗ |
dnch->min_abs_var[i] = dnch->gain_scale * dnch->abs_var[i]; |
615 |
|
|
} |
616 |
|
|
} |
617 |
|
✗ |
} |
618 |
|
|
|
619 |
|
✗ |
static void reduce_mean(double *band_noise) |
620 |
|
|
{ |
621 |
|
✗ |
double mean = 0.f; |
622 |
|
|
|
623 |
|
✗ |
for (int i = 0; i < NB_PROFILE_BANDS; i++) |
624 |
|
✗ |
mean += band_noise[i]; |
625 |
|
✗ |
mean /= NB_PROFILE_BANDS; |
626 |
|
|
|
627 |
|
✗ |
for (int i = 0; i < NB_PROFILE_BANDS; i++) |
628 |
|
✗ |
band_noise[i] -= mean; |
629 |
|
✗ |
} |
630 |
|
|
|
631 |
|
✗ |
static int config_input(AVFilterLink *inlink) |
632 |
|
|
{ |
633 |
|
✗ |
AVFilterContext *ctx = inlink->dst; |
634 |
|
✗ |
AudioFFTDeNoiseContext *s = ctx->priv; |
635 |
|
|
double wscale, sar, sum, sdiv; |
636 |
|
|
int i, j, k, m, n, ret, tx_type; |
637 |
|
✗ |
double dscale = 1.; |
638 |
|
✗ |
float fscale = 1.f; |
639 |
|
|
void *scale; |
640 |
|
|
|
641 |
|
✗ |
s->format = inlink->format; |
642 |
|
|
|
643 |
|
✗ |
switch (s->format) { |
644 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
645 |
|
✗ |
s->sample_size = sizeof(float); |
646 |
|
✗ |
s->complex_sample_size = sizeof(AVComplexFloat); |
647 |
|
✗ |
tx_type = AV_TX_FLOAT_RDFT; |
648 |
|
✗ |
scale = &fscale; |
649 |
|
✗ |
break; |
650 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
651 |
|
✗ |
s->sample_size = sizeof(double); |
652 |
|
✗ |
s->complex_sample_size = sizeof(AVComplexDouble); |
653 |
|
✗ |
tx_type = AV_TX_DOUBLE_RDFT; |
654 |
|
✗ |
scale = &dscale; |
655 |
|
✗ |
break; |
656 |
|
|
} |
657 |
|
|
|
658 |
|
✗ |
s->dnch = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dnch)); |
659 |
|
✗ |
if (!s->dnch) |
660 |
|
✗ |
return AVERROR(ENOMEM); |
661 |
|
|
|
662 |
|
✗ |
s->channels = inlink->ch_layout.nb_channels; |
663 |
|
✗ |
s->sample_rate = inlink->sample_rate; |
664 |
|
✗ |
s->sample_advance = s->sample_rate / 80; |
665 |
|
✗ |
s->window_length = 3 * s->sample_advance; |
666 |
|
✗ |
s->fft_length2 = 1 << (32 - ff_clz(s->window_length)); |
667 |
|
✗ |
s->fft_length = s->fft_length2; |
668 |
|
✗ |
s->buffer_length = s->fft_length * 2; |
669 |
|
✗ |
s->bin_count = s->fft_length2 / 2 + 1; |
670 |
|
|
|
671 |
|
✗ |
s->band_centre[0] = 80; |
672 |
|
✗ |
for (i = 1; i < NB_PROFILE_BANDS; i++) { |
673 |
|
✗ |
s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0); |
674 |
|
✗ |
if (s->band_centre[i] < 1000) { |
675 |
|
✗ |
s->band_centre[i] = 10 * (s->band_centre[i] / 10); |
676 |
|
✗ |
} else if (s->band_centre[i] < 5000) { |
677 |
|
✗ |
s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50); |
678 |
|
✗ |
} else if (s->band_centre[i] < 15000) { |
679 |
|
✗ |
s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100); |
680 |
|
|
} else { |
681 |
|
✗ |
s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000); |
682 |
|
|
} |
683 |
|
|
} |
684 |
|
|
|
685 |
|
✗ |
for (j = 0; j < SOLVE_SIZE; j++) { |
686 |
|
✗ |
for (k = 0; k < SOLVE_SIZE; k++) { |
687 |
|
✗ |
s->matrix_a[j + k * SOLVE_SIZE] = 0.0; |
688 |
|
✗ |
for (m = 0; m < NB_PROFILE_BANDS; m++) |
689 |
|
✗ |
s->matrix_a[j + k * SOLVE_SIZE] += pow(m, j + k); |
690 |
|
|
} |
691 |
|
|
} |
692 |
|
|
|
693 |
|
✗ |
factor(s->matrix_a, SOLVE_SIZE); |
694 |
|
|
|
695 |
|
✗ |
i = 0; |
696 |
|
✗ |
for (j = 0; j < SOLVE_SIZE; j++) |
697 |
|
✗ |
for (k = 0; k < NB_PROFILE_BANDS; k++) |
698 |
|
✗ |
s->matrix_b[i++] = pow(k, j); |
699 |
|
|
|
700 |
|
✗ |
i = 0; |
701 |
|
✗ |
for (j = 0; j < NB_PROFILE_BANDS; j++) |
702 |
|
✗ |
for (k = 0; k < SOLVE_SIZE; k++) |
703 |
|
✗ |
s->matrix_c[i++] = pow(j, k); |
704 |
|
|
|
705 |
|
✗ |
s->window = av_calloc(s->window_length, sizeof(*s->window)); |
706 |
|
✗ |
s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band)); |
707 |
|
✗ |
if (!s->window || !s->bin2band) |
708 |
|
✗ |
return AVERROR(ENOMEM); |
709 |
|
|
|
710 |
|
✗ |
sdiv = s->band_multiplier; |
711 |
|
✗ |
for (i = 0; i < s->bin_count; i++) |
712 |
|
✗ |
s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2)); |
713 |
|
|
|
714 |
|
✗ |
s->number_of_bands = s->bin2band[s->bin_count - 1] + 1; |
715 |
|
|
|
716 |
|
✗ |
s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha)); |
717 |
|
✗ |
s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta)); |
718 |
|
✗ |
if (!s->band_alpha || !s->band_beta) |
719 |
|
✗ |
return AVERROR(ENOMEM); |
720 |
|
|
|
721 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
722 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
723 |
|
|
|
724 |
|
✗ |
switch (s->noise_type) { |
725 |
|
✗ |
case WHITE_NOISE: |
726 |
|
✗ |
for (i = 0; i < NB_PROFILE_BANDS; i++) |
727 |
|
✗ |
dnch->band_noise[i] = 0.; |
728 |
|
✗ |
break; |
729 |
|
✗ |
case VINYL_NOISE: |
730 |
|
✗ |
for (i = 0; i < NB_PROFILE_BANDS; i++) |
731 |
|
✗ |
dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0); |
732 |
|
✗ |
break; |
733 |
|
✗ |
case SHELLAC_NOISE: |
734 |
|
✗ |
for (i = 0; i < NB_PROFILE_BANDS; i++) |
735 |
|
✗ |
dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10); |
736 |
|
✗ |
break; |
737 |
|
✗ |
case CUSTOM_NOISE: |
738 |
|
✗ |
read_custom_noise(s, ch); |
739 |
|
✗ |
break; |
740 |
|
✗ |
default: |
741 |
|
✗ |
return AVERROR_BUG; |
742 |
|
|
} |
743 |
|
|
|
744 |
|
✗ |
reduce_mean(dnch->band_noise); |
745 |
|
|
|
746 |
|
✗ |
dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt)); |
747 |
|
✗ |
dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt)); |
748 |
|
✗ |
dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit)); |
749 |
|
✗ |
dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain)); |
750 |
|
✗ |
dnch->smoothed_gain = av_calloc(s->bin_count, sizeof(*dnch->smoothed_gain)); |
751 |
|
✗ |
dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior)); |
752 |
|
✗ |
dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit)); |
753 |
|
✗ |
dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data)); |
754 |
|
✗ |
dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data)); |
755 |
|
✗ |
dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples)); |
756 |
|
✗ |
dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var)); |
757 |
|
✗ |
dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var)); |
758 |
|
✗ |
dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var)); |
759 |
|
✗ |
dnch->fft_in = av_calloc(s->fft_length2, s->sample_size); |
760 |
|
✗ |
dnch->fft_out = av_calloc(s->fft_length2 + 1, s->complex_sample_size); |
761 |
|
✗ |
ret = av_tx_init(&dnch->fft, &dnch->tx_fn, tx_type, 0, s->fft_length2, scale, 0); |
762 |
|
✗ |
if (ret < 0) |
763 |
|
✗ |
return ret; |
764 |
|
✗ |
ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, tx_type, 1, s->fft_length2, scale, 0); |
765 |
|
✗ |
if (ret < 0) |
766 |
|
✗ |
return ret; |
767 |
|
✗ |
dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands, |
768 |
|
|
sizeof(*dnch->spread_function)); |
769 |
|
|
|
770 |
|
✗ |
if (!dnch->amt || |
771 |
|
✗ |
!dnch->band_amt || |
772 |
|
✗ |
!dnch->band_excit || |
773 |
|
✗ |
!dnch->gain || |
774 |
|
✗ |
!dnch->smoothed_gain || |
775 |
|
✗ |
!dnch->prior || |
776 |
|
✗ |
!dnch->prior_band_excit || |
777 |
|
✗ |
!dnch->clean_data || |
778 |
|
✗ |
!dnch->noisy_data || |
779 |
|
✗ |
!dnch->out_samples || |
780 |
|
✗ |
!dnch->fft_in || |
781 |
|
✗ |
!dnch->fft_out || |
782 |
|
✗ |
!dnch->abs_var || |
783 |
|
✗ |
!dnch->rel_var || |
784 |
|
✗ |
!dnch->min_abs_var || |
785 |
|
✗ |
!dnch->spread_function || |
786 |
|
✗ |
!dnch->fft || |
787 |
|
✗ |
!dnch->ifft) |
788 |
|
✗ |
return AVERROR(ENOMEM); |
789 |
|
|
} |
790 |
|
|
|
791 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
792 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
793 |
|
✗ |
double *prior_band_excit = dnch->prior_band_excit; |
794 |
|
|
double min, max; |
795 |
|
|
double p1, p2; |
796 |
|
|
|
797 |
|
✗ |
p1 = pow(0.1, 2.5 / sdiv); |
798 |
|
✗ |
p2 = pow(0.1, 1.0 / sdiv); |
799 |
|
✗ |
j = 0; |
800 |
|
✗ |
for (m = 0; m < s->number_of_bands; m++) { |
801 |
|
✗ |
for (n = 0; n < s->number_of_bands; n++) { |
802 |
|
✗ |
if (n < m) { |
803 |
|
✗ |
dnch->spread_function[j++] = pow(p2, m - n); |
804 |
|
✗ |
} else if (n > m) { |
805 |
|
✗ |
dnch->spread_function[j++] = pow(p1, n - m); |
806 |
|
|
} else { |
807 |
|
✗ |
dnch->spread_function[j++] = 1.0; |
808 |
|
|
} |
809 |
|
|
} |
810 |
|
|
} |
811 |
|
|
|
812 |
|
✗ |
for (m = 0; m < s->number_of_bands; m++) { |
813 |
|
✗ |
dnch->band_excit[m] = 0.0; |
814 |
|
✗ |
prior_band_excit[m] = 0.0; |
815 |
|
|
} |
816 |
|
|
|
817 |
|
✗ |
for (m = 0; m < s->bin_count; m++) |
818 |
|
✗ |
dnch->band_excit[s->bin2band[m]] += 1.0; |
819 |
|
|
|
820 |
|
✗ |
j = 0; |
821 |
|
✗ |
for (m = 0; m < s->number_of_bands; m++) { |
822 |
|
✗ |
for (n = 0; n < s->number_of_bands; n++) |
823 |
|
✗ |
prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n]; |
824 |
|
|
} |
825 |
|
|
|
826 |
|
✗ |
min = pow(0.1, 2.5); |
827 |
|
✗ |
max = pow(0.1, 1.0); |
828 |
|
✗ |
for (int i = 0; i < s->number_of_bands; i++) { |
829 |
|
✗ |
if (i < lrint(12.0 * sdiv)) { |
830 |
|
✗ |
dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv); |
831 |
|
|
} else { |
832 |
|
✗ |
dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0)); |
833 |
|
|
} |
834 |
|
✗ |
dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max); |
835 |
|
|
} |
836 |
|
|
|
837 |
|
✗ |
for (int i = 0; i < s->buffer_length; i++) |
838 |
|
✗ |
dnch->out_samples[i] = 0; |
839 |
|
|
|
840 |
|
✗ |
j = 0; |
841 |
|
✗ |
for (int i = 0; i < s->number_of_bands; i++) |
842 |
|
✗ |
for (int k = 0; k < s->number_of_bands; k++) |
843 |
|
✗ |
dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i]; |
844 |
|
|
} |
845 |
|
|
|
846 |
|
✗ |
j = 0; |
847 |
|
✗ |
sar = s->sample_advance / s->sample_rate; |
848 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) { |
849 |
|
✗ |
if ((i == s->fft_length2) || (s->bin2band[i] > j)) { |
850 |
|
✗ |
double d6 = (i - 1) * s->sample_rate / s->fft_length; |
851 |
|
✗ |
double d7 = fmin(0.008 + 2.2 / d6, 0.03); |
852 |
|
✗ |
s->band_alpha[j] = exp(-sar / d7); |
853 |
|
✗ |
s->band_beta[j] = 1.0 - s->band_alpha[j]; |
854 |
|
✗ |
j = s->bin2band[i]; |
855 |
|
|
} |
856 |
|
|
} |
857 |
|
|
|
858 |
|
✗ |
s->winframe = ff_get_audio_buffer(inlink, s->window_length); |
859 |
|
✗ |
if (!s->winframe) |
860 |
|
✗ |
return AVERROR(ENOMEM); |
861 |
|
|
|
862 |
|
✗ |
wscale = sqrt(8.0 / (9.0 * s->fft_length)); |
863 |
|
✗ |
sum = 0.0; |
864 |
|
✗ |
for (int i = 0; i < s->window_length; i++) { |
865 |
|
✗ |
double d10 = sin(i * M_PI / s->window_length); |
866 |
|
✗ |
d10 *= wscale * d10; |
867 |
|
✗ |
s->window[i] = d10; |
868 |
|
✗ |
sum += d10 * d10; |
869 |
|
|
} |
870 |
|
|
|
871 |
|
✗ |
s->window_weight = 0.5 * sum; |
872 |
|
✗ |
s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight; |
873 |
|
✗ |
s->sample_floor = s->floor * exp(4.144600506562284); |
874 |
|
|
|
875 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
876 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
877 |
|
|
|
878 |
|
✗ |
dnch->noise_reduction = s->noise_reduction; |
879 |
|
✗ |
dnch->noise_floor = s->noise_floor; |
880 |
|
✗ |
dnch->residual_floor = s->residual_floor; |
881 |
|
|
|
882 |
|
✗ |
set_parameters(s, dnch, 1, 1); |
883 |
|
|
} |
884 |
|
|
|
885 |
|
✗ |
s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate); |
886 |
|
✗ |
i = 0; |
887 |
|
✗ |
for (int j = 1; j < NB_PROFILE_BANDS + 1; j++) { |
888 |
|
✗ |
s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate); |
889 |
|
✗ |
if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1])) |
890 |
|
✗ |
i++; |
891 |
|
✗ |
s->noise_band_edge[NB_PROFILE_BANDS + 1] = i; |
892 |
|
|
} |
893 |
|
✗ |
s->noise_band_count = s->noise_band_edge[NB_PROFILE_BANDS + 1]; |
894 |
|
|
|
895 |
|
✗ |
return 0; |
896 |
|
|
} |
897 |
|
|
|
898 |
|
✗ |
static void init_sample_noise(DeNoiseChannel *dnch) |
899 |
|
|
{ |
900 |
|
✗ |
for (int i = 0; i < NB_PROFILE_BANDS; i++) { |
901 |
|
✗ |
dnch->noise_band_norm[i] = 0.0; |
902 |
|
✗ |
dnch->noise_band_avr[i] = 0.0; |
903 |
|
✗ |
dnch->noise_band_avi[i] = 0.0; |
904 |
|
✗ |
dnch->noise_band_var[i] = 0.0; |
905 |
|
|
} |
906 |
|
✗ |
} |
907 |
|
|
|
908 |
|
✗ |
static void sample_noise_block(AudioFFTDeNoiseContext *s, |
909 |
|
|
DeNoiseChannel *dnch, |
910 |
|
|
AVFrame *in, int ch) |
911 |
|
|
{ |
912 |
|
✗ |
double *src_dbl = (double *)in->extended_data[ch]; |
913 |
|
✗ |
float *src_flt = (float *)in->extended_data[ch]; |
914 |
|
✗ |
double mag2, var = 0.0, avr = 0.0, avi = 0.0; |
915 |
|
✗ |
AVComplexDouble *fft_out_dbl = dnch->fft_out; |
916 |
|
✗ |
AVComplexFloat *fft_out_flt = dnch->fft_out; |
917 |
|
✗ |
double *fft_in_dbl = dnch->fft_in; |
918 |
|
✗ |
float *fft_in_flt = dnch->fft_in; |
919 |
|
|
int edge, j, k, n, edgemax; |
920 |
|
|
|
921 |
|
✗ |
switch (s->format) { |
922 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
923 |
|
✗ |
for (int i = 0; i < s->window_length; i++) |
924 |
|
✗ |
fft_in_flt[i] = s->window[i] * src_flt[i] * (1LL << 23); |
925 |
|
|
|
926 |
|
✗ |
for (int i = s->window_length; i < s->fft_length2; i++) |
927 |
|
✗ |
fft_in_flt[i] = 0.f; |
928 |
|
✗ |
break; |
929 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
930 |
|
✗ |
for (int i = 0; i < s->window_length; i++) |
931 |
|
✗ |
fft_in_dbl[i] = s->window[i] * src_dbl[i] * (1LL << 23); |
932 |
|
|
|
933 |
|
✗ |
for (int i = s->window_length; i < s->fft_length2; i++) |
934 |
|
✗ |
fft_in_dbl[i] = 0.; |
935 |
|
✗ |
break; |
936 |
|
|
} |
937 |
|
|
|
938 |
|
✗ |
dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size); |
939 |
|
|
|
940 |
|
✗ |
edge = s->noise_band_edge[0]; |
941 |
|
✗ |
j = edge; |
942 |
|
✗ |
k = 0; |
943 |
|
✗ |
n = j; |
944 |
|
✗ |
edgemax = fmin(s->fft_length2, s->noise_band_edge[NB_PROFILE_BANDS]); |
945 |
|
✗ |
for (int i = j; i <= edgemax; i++) { |
946 |
|
✗ |
if ((i == j) && (i < edgemax)) { |
947 |
|
✗ |
if (j > edge) { |
948 |
|
✗ |
dnch->noise_band_norm[k - 1] += j - edge; |
949 |
|
✗ |
dnch->noise_band_avr[k - 1] += avr; |
950 |
|
✗ |
dnch->noise_band_avi[k - 1] += avi; |
951 |
|
✗ |
dnch->noise_band_var[k - 1] += var; |
952 |
|
|
} |
953 |
|
✗ |
k++; |
954 |
|
✗ |
edge = j; |
955 |
|
✗ |
j = s->noise_band_edge[k]; |
956 |
|
✗ |
if (k == NB_PROFILE_BANDS) { |
957 |
|
✗ |
j++; |
958 |
|
|
} |
959 |
|
✗ |
var = 0.0; |
960 |
|
✗ |
avr = 0.0; |
961 |
|
✗ |
avi = 0.0; |
962 |
|
|
} |
963 |
|
|
|
964 |
|
✗ |
switch (s->format) { |
965 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
966 |
|
✗ |
avr += fft_out_flt[n].re; |
967 |
|
✗ |
avi += fft_out_flt[n].im; |
968 |
|
✗ |
mag2 = fft_out_flt[n].re * fft_out_flt[n].re + |
969 |
|
✗ |
fft_out_flt[n].im * fft_out_flt[n].im; |
970 |
|
✗ |
break; |
971 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
972 |
|
✗ |
avr += fft_out_dbl[n].re; |
973 |
|
✗ |
avi += fft_out_dbl[n].im; |
974 |
|
✗ |
mag2 = fft_out_dbl[n].re * fft_out_dbl[n].re + |
975 |
|
✗ |
fft_out_dbl[n].im * fft_out_dbl[n].im; |
976 |
|
✗ |
break; |
977 |
|
✗ |
default: |
978 |
|
|
av_assert2(0); |
979 |
|
|
} |
980 |
|
|
|
981 |
|
✗ |
mag2 = fmax(mag2, s->sample_floor); |
982 |
|
|
|
983 |
|
✗ |
var += mag2; |
984 |
|
✗ |
n++; |
985 |
|
|
} |
986 |
|
|
|
987 |
|
✗ |
dnch->noise_band_norm[k - 1] += j - edge; |
988 |
|
✗ |
dnch->noise_band_avr[k - 1] += avr; |
989 |
|
✗ |
dnch->noise_band_avi[k - 1] += avi; |
990 |
|
✗ |
dnch->noise_band_var[k - 1] += var; |
991 |
|
✗ |
} |
992 |
|
|
|
993 |
|
✗ |
static void finish_sample_noise(AudioFFTDeNoiseContext *s, |
994 |
|
|
DeNoiseChannel *dnch, |
995 |
|
|
double *sample_noise) |
996 |
|
|
{ |
997 |
|
✗ |
for (int i = 0; i < s->noise_band_count; i++) { |
998 |
|
✗ |
dnch->noise_band_avr[i] /= dnch->noise_band_norm[i]; |
999 |
|
✗ |
dnch->noise_band_avi[i] /= dnch->noise_band_norm[i]; |
1000 |
|
✗ |
dnch->noise_band_var[i] /= dnch->noise_band_norm[i]; |
1001 |
|
✗ |
dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] + |
1002 |
|
✗ |
dnch->noise_band_avi[i] * dnch->noise_band_avi[i]; |
1003 |
|
✗ |
dnch->noise_band_auto_var[i] = dnch->noise_band_var[i]; |
1004 |
|
✗ |
sample_noise[i] = 10.0 * log10(dnch->noise_band_var[i] / s->floor) - 100.0; |
1005 |
|
|
} |
1006 |
|
✗ |
if (s->noise_band_count < NB_PROFILE_BANDS) { |
1007 |
|
✗ |
for (int i = s->noise_band_count; i < NB_PROFILE_BANDS; i++) |
1008 |
|
✗ |
sample_noise[i] = sample_noise[i - 1]; |
1009 |
|
|
} |
1010 |
|
✗ |
} |
1011 |
|
|
|
1012 |
|
✗ |
static void set_noise_profile(AudioFFTDeNoiseContext *s, |
1013 |
|
|
DeNoiseChannel *dnch, |
1014 |
|
|
double *sample_noise) |
1015 |
|
|
{ |
1016 |
|
|
double new_band_noise[NB_PROFILE_BANDS]; |
1017 |
|
|
double temp[NB_PROFILE_BANDS]; |
1018 |
|
✗ |
double sum = 0.0; |
1019 |
|
|
|
1020 |
|
✗ |
for (int m = 0; m < NB_PROFILE_BANDS; m++) |
1021 |
|
✗ |
temp[m] = sample_noise[m]; |
1022 |
|
|
|
1023 |
|
✗ |
for (int m = 0, i = 0; m < SOLVE_SIZE; m++) { |
1024 |
|
✗ |
sum = 0.0; |
1025 |
|
✗ |
for (int n = 0; n < NB_PROFILE_BANDS; n++) |
1026 |
|
✗ |
sum += s->matrix_b[i++] * temp[n]; |
1027 |
|
✗ |
s->vector_b[m] = sum; |
1028 |
|
|
} |
1029 |
|
✗ |
solve(s->matrix_a, s->vector_b, SOLVE_SIZE); |
1030 |
|
✗ |
for (int m = 0, i = 0; m < NB_PROFILE_BANDS; m++) { |
1031 |
|
✗ |
sum = 0.0; |
1032 |
|
✗ |
for (int n = 0; n < SOLVE_SIZE; n++) |
1033 |
|
✗ |
sum += s->matrix_c[i++] * s->vector_b[n]; |
1034 |
|
✗ |
temp[m] = sum; |
1035 |
|
|
} |
1036 |
|
|
|
1037 |
|
✗ |
reduce_mean(temp); |
1038 |
|
|
|
1039 |
|
✗ |
av_log(s, AV_LOG_INFO, "bn="); |
1040 |
|
✗ |
for (int m = 0; m < NB_PROFILE_BANDS; m++) { |
1041 |
|
✗ |
new_band_noise[m] = temp[m]; |
1042 |
|
✗ |
new_band_noise[m] = av_clipd(new_band_noise[m], -24.0, 24.0); |
1043 |
|
✗ |
av_log(s, AV_LOG_INFO, "%f ", new_band_noise[m]); |
1044 |
|
|
} |
1045 |
|
✗ |
av_log(s, AV_LOG_INFO, "\n"); |
1046 |
|
✗ |
memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise)); |
1047 |
|
✗ |
} |
1048 |
|
|
|
1049 |
|
✗ |
static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) |
1050 |
|
|
{ |
1051 |
|
✗ |
AudioFFTDeNoiseContext *s = ctx->priv; |
1052 |
|
✗ |
AVFrame *in = arg; |
1053 |
|
✗ |
const int start = (in->ch_layout.nb_channels * jobnr) / nb_jobs; |
1054 |
|
✗ |
const int end = (in->ch_layout.nb_channels * (jobnr+1)) / nb_jobs; |
1055 |
|
✗ |
const int window_length = s->window_length; |
1056 |
|
✗ |
const double *window = s->window; |
1057 |
|
|
|
1058 |
|
✗ |
for (int ch = start; ch < end; ch++) { |
1059 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1060 |
|
✗ |
const double *src_dbl = (const double *)in->extended_data[ch]; |
1061 |
|
✗ |
const float *src_flt = (const float *)in->extended_data[ch]; |
1062 |
|
✗ |
double *dst = dnch->out_samples; |
1063 |
|
✗ |
double *fft_in_dbl = dnch->fft_in; |
1064 |
|
✗ |
float *fft_in_flt = dnch->fft_in; |
1065 |
|
|
|
1066 |
|
✗ |
switch (s->format) { |
1067 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
1068 |
|
✗ |
for (int m = 0; m < window_length; m++) |
1069 |
|
✗ |
fft_in_flt[m] = window[m] * src_flt[m] * (1LL << 23); |
1070 |
|
|
|
1071 |
|
✗ |
for (int m = window_length; m < s->fft_length2; m++) |
1072 |
|
✗ |
fft_in_flt[m] = 0.f; |
1073 |
|
✗ |
break; |
1074 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
1075 |
|
✗ |
for (int m = 0; m < window_length; m++) |
1076 |
|
✗ |
fft_in_dbl[m] = window[m] * src_dbl[m] * (1LL << 23); |
1077 |
|
|
|
1078 |
|
✗ |
for (int m = window_length; m < s->fft_length2; m++) |
1079 |
|
✗ |
fft_in_dbl[m] = 0.; |
1080 |
|
✗ |
break; |
1081 |
|
|
} |
1082 |
|
|
|
1083 |
|
✗ |
dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size); |
1084 |
|
|
|
1085 |
|
✗ |
process_frame(ctx, s, dnch, |
1086 |
|
|
dnch->prior, |
1087 |
|
|
dnch->prior_band_excit, |
1088 |
|
|
s->track_noise); |
1089 |
|
|
|
1090 |
|
✗ |
dnch->itx_fn(dnch->ifft, dnch->fft_in, dnch->fft_out, s->complex_sample_size); |
1091 |
|
|
|
1092 |
|
✗ |
switch (s->format) { |
1093 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
1094 |
|
✗ |
for (int m = 0; m < window_length; m++) |
1095 |
|
✗ |
dst[m] += s->window[m] * fft_in_flt[m] / (1LL << 23); |
1096 |
|
✗ |
break; |
1097 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
1098 |
|
✗ |
for (int m = 0; m < window_length; m++) |
1099 |
|
✗ |
dst[m] += s->window[m] * fft_in_dbl[m] / (1LL << 23); |
1100 |
|
✗ |
break; |
1101 |
|
|
} |
1102 |
|
|
} |
1103 |
|
|
|
1104 |
|
✗ |
return 0; |
1105 |
|
|
} |
1106 |
|
|
|
1107 |
|
✗ |
static int output_frame(AVFilterLink *inlink, AVFrame *in) |
1108 |
|
|
{ |
1109 |
|
✗ |
AVFilterContext *ctx = inlink->dst; |
1110 |
|
✗ |
AVFilterLink *outlink = ctx->outputs[0]; |
1111 |
|
✗ |
AudioFFTDeNoiseContext *s = ctx->priv; |
1112 |
|
✗ |
const int output_mode = ctx->is_disabled ? IN_MODE : s->output_mode; |
1113 |
|
✗ |
const int offset = s->window_length - s->sample_advance; |
1114 |
|
|
AVFrame *out; |
1115 |
|
|
|
1116 |
|
✗ |
for (int ch = 0; ch < s->channels; ch++) { |
1117 |
|
✗ |
uint8_t *src = (uint8_t *)s->winframe->extended_data[ch]; |
1118 |
|
|
|
1119 |
|
✗ |
memmove(src, src + s->sample_advance * s->sample_size, |
1120 |
|
✗ |
offset * s->sample_size); |
1121 |
|
✗ |
memcpy(src + offset * s->sample_size, in->extended_data[ch], |
1122 |
|
✗ |
in->nb_samples * s->sample_size); |
1123 |
|
✗ |
memset(src + s->sample_size * (offset + in->nb_samples), 0, |
1124 |
|
✗ |
(s->sample_advance - in->nb_samples) * s->sample_size); |
1125 |
|
|
} |
1126 |
|
|
|
1127 |
|
✗ |
if (s->track_noise) { |
1128 |
|
✗ |
double average = 0.0, min = DBL_MAX, max = -DBL_MAX; |
1129 |
|
|
|
1130 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
1131 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1132 |
|
|
|
1133 |
|
✗ |
average += dnch->noise_floor; |
1134 |
|
✗ |
max = fmax(max, dnch->noise_floor); |
1135 |
|
✗ |
min = fmin(min, dnch->noise_floor); |
1136 |
|
|
} |
1137 |
|
|
|
1138 |
|
✗ |
average /= inlink->ch_layout.nb_channels; |
1139 |
|
|
|
1140 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
1141 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1142 |
|
|
|
1143 |
|
✗ |
switch (s->noise_floor_link) { |
1144 |
|
✗ |
case MIN_LINK: dnch->noise_floor = min; break; |
1145 |
|
✗ |
case MAX_LINK: dnch->noise_floor = max; break; |
1146 |
|
✗ |
case AVERAGE_LINK: dnch->noise_floor = average; break; |
1147 |
|
✗ |
case NONE_LINK: |
1148 |
|
|
default: |
1149 |
|
✗ |
break; |
1150 |
|
|
} |
1151 |
|
|
|
1152 |
|
✗ |
if (dnch->noise_floor != dnch->last_noise_floor) |
1153 |
|
✗ |
set_parameters(s, dnch, 1, 0); |
1154 |
|
|
} |
1155 |
|
|
} |
1156 |
|
|
|
1157 |
|
✗ |
if (s->sample_noise_mode == SAMPLE_START) { |
1158 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
1159 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1160 |
|
|
|
1161 |
|
✗ |
init_sample_noise(dnch); |
1162 |
|
|
} |
1163 |
|
✗ |
s->sample_noise_mode = SAMPLE_NONE; |
1164 |
|
✗ |
s->sample_noise = 1; |
1165 |
|
✗ |
s->sample_noise_blocks = 0; |
1166 |
|
|
} |
1167 |
|
|
|
1168 |
|
✗ |
if (s->sample_noise) { |
1169 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
1170 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1171 |
|
|
|
1172 |
|
✗ |
sample_noise_block(s, dnch, s->winframe, ch); |
1173 |
|
|
} |
1174 |
|
✗ |
s->sample_noise_blocks++; |
1175 |
|
|
} |
1176 |
|
|
|
1177 |
|
✗ |
if (s->sample_noise_mode == SAMPLE_STOP) { |
1178 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
1179 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1180 |
|
|
double sample_noise[NB_PROFILE_BANDS]; |
1181 |
|
|
|
1182 |
|
✗ |
if (s->sample_noise_blocks <= 0) |
1183 |
|
✗ |
break; |
1184 |
|
✗ |
finish_sample_noise(s, dnch, sample_noise); |
1185 |
|
✗ |
set_noise_profile(s, dnch, sample_noise); |
1186 |
|
✗ |
set_parameters(s, dnch, 1, 1); |
1187 |
|
|
} |
1188 |
|
✗ |
s->sample_noise = 0; |
1189 |
|
✗ |
s->sample_noise_blocks = 0; |
1190 |
|
✗ |
s->sample_noise_mode = SAMPLE_NONE; |
1191 |
|
|
} |
1192 |
|
|
|
1193 |
|
✗ |
ff_filter_execute(ctx, filter_channel, s->winframe, NULL, |
1194 |
|
✗ |
FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx))); |
1195 |
|
|
|
1196 |
|
✗ |
if (av_frame_is_writable(in)) { |
1197 |
|
✗ |
out = in; |
1198 |
|
|
} else { |
1199 |
|
✗ |
out = ff_get_audio_buffer(outlink, in->nb_samples); |
1200 |
|
✗ |
if (!out) { |
1201 |
|
✗ |
av_frame_free(&in); |
1202 |
|
✗ |
return AVERROR(ENOMEM); |
1203 |
|
|
} |
1204 |
|
|
|
1205 |
|
✗ |
av_frame_copy_props(out, in); |
1206 |
|
|
} |
1207 |
|
|
|
1208 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
1209 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1210 |
|
✗ |
double *src = dnch->out_samples; |
1211 |
|
✗ |
const double *orig_dbl = (const double *)s->winframe->extended_data[ch]; |
1212 |
|
✗ |
const float *orig_flt = (const float *)s->winframe->extended_data[ch]; |
1213 |
|
✗ |
double *dst_dbl = (double *)out->extended_data[ch]; |
1214 |
|
✗ |
float *dst_flt = (float *)out->extended_data[ch]; |
1215 |
|
|
|
1216 |
|
✗ |
switch (output_mode) { |
1217 |
|
✗ |
case IN_MODE: |
1218 |
|
✗ |
switch (s->format) { |
1219 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
1220 |
|
✗ |
for (int m = 0; m < out->nb_samples; m++) |
1221 |
|
✗ |
dst_flt[m] = orig_flt[m]; |
1222 |
|
✗ |
break; |
1223 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
1224 |
|
✗ |
for (int m = 0; m < out->nb_samples; m++) |
1225 |
|
✗ |
dst_dbl[m] = orig_dbl[m]; |
1226 |
|
✗ |
break; |
1227 |
|
|
} |
1228 |
|
✗ |
break; |
1229 |
|
✗ |
case OUT_MODE: |
1230 |
|
✗ |
switch (s->format) { |
1231 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
1232 |
|
✗ |
for (int m = 0; m < out->nb_samples; m++) |
1233 |
|
✗ |
dst_flt[m] = src[m]; |
1234 |
|
✗ |
break; |
1235 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
1236 |
|
✗ |
for (int m = 0; m < out->nb_samples; m++) |
1237 |
|
✗ |
dst_dbl[m] = src[m]; |
1238 |
|
✗ |
break; |
1239 |
|
|
} |
1240 |
|
✗ |
break; |
1241 |
|
✗ |
case NOISE_MODE: |
1242 |
|
✗ |
switch (s->format) { |
1243 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
1244 |
|
✗ |
for (int m = 0; m < out->nb_samples; m++) |
1245 |
|
✗ |
dst_flt[m] = orig_flt[m] - src[m]; |
1246 |
|
✗ |
break; |
1247 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
1248 |
|
✗ |
for (int m = 0; m < out->nb_samples; m++) |
1249 |
|
✗ |
dst_dbl[m] = orig_dbl[m] - src[m]; |
1250 |
|
✗ |
break; |
1251 |
|
|
} |
1252 |
|
✗ |
break; |
1253 |
|
✗ |
default: |
1254 |
|
✗ |
if (in != out) |
1255 |
|
✗ |
av_frame_free(&in); |
1256 |
|
✗ |
av_frame_free(&out); |
1257 |
|
✗ |
return AVERROR_BUG; |
1258 |
|
|
} |
1259 |
|
|
|
1260 |
|
✗ |
memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src)); |
1261 |
|
✗ |
memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src)); |
1262 |
|
|
} |
1263 |
|
|
|
1264 |
|
✗ |
if (out != in) |
1265 |
|
✗ |
av_frame_free(&in); |
1266 |
|
✗ |
return ff_filter_frame(outlink, out); |
1267 |
|
|
} |
1268 |
|
|
|
1269 |
|
✗ |
static int activate(AVFilterContext *ctx) |
1270 |
|
|
{ |
1271 |
|
✗ |
AVFilterLink *inlink = ctx->inputs[0]; |
1272 |
|
✗ |
AVFilterLink *outlink = ctx->outputs[0]; |
1273 |
|
✗ |
AudioFFTDeNoiseContext *s = ctx->priv; |
1274 |
|
✗ |
AVFrame *in = NULL; |
1275 |
|
|
int ret; |
1276 |
|
|
|
1277 |
|
✗ |
FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); |
1278 |
|
|
|
1279 |
|
✗ |
ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in); |
1280 |
|
✗ |
if (ret < 0) |
1281 |
|
✗ |
return ret; |
1282 |
|
✗ |
if (ret > 0) |
1283 |
|
✗ |
return output_frame(inlink, in); |
1284 |
|
|
|
1285 |
|
✗ |
if (ff_inlink_queued_samples(inlink) >= s->sample_advance) { |
1286 |
|
✗ |
ff_filter_set_ready(ctx, 10); |
1287 |
|
✗ |
return 0; |
1288 |
|
|
} |
1289 |
|
|
|
1290 |
|
✗ |
FF_FILTER_FORWARD_STATUS(inlink, outlink); |
1291 |
|
✗ |
FF_FILTER_FORWARD_WANTED(outlink, inlink); |
1292 |
|
|
|
1293 |
|
✗ |
return FFERROR_NOT_READY; |
1294 |
|
|
} |
1295 |
|
|
|
1296 |
|
✗ |
static av_cold void uninit(AVFilterContext *ctx) |
1297 |
|
|
{ |
1298 |
|
✗ |
AudioFFTDeNoiseContext *s = ctx->priv; |
1299 |
|
|
|
1300 |
|
✗ |
av_freep(&s->window); |
1301 |
|
✗ |
av_freep(&s->bin2band); |
1302 |
|
✗ |
av_freep(&s->band_alpha); |
1303 |
|
✗ |
av_freep(&s->band_beta); |
1304 |
|
✗ |
av_frame_free(&s->winframe); |
1305 |
|
|
|
1306 |
|
✗ |
if (s->dnch) { |
1307 |
|
✗ |
for (int ch = 0; ch < s->channels; ch++) { |
1308 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1309 |
|
✗ |
av_freep(&dnch->amt); |
1310 |
|
✗ |
av_freep(&dnch->band_amt); |
1311 |
|
✗ |
av_freep(&dnch->band_excit); |
1312 |
|
✗ |
av_freep(&dnch->gain); |
1313 |
|
✗ |
av_freep(&dnch->smoothed_gain); |
1314 |
|
✗ |
av_freep(&dnch->prior); |
1315 |
|
✗ |
av_freep(&dnch->prior_band_excit); |
1316 |
|
✗ |
av_freep(&dnch->clean_data); |
1317 |
|
✗ |
av_freep(&dnch->noisy_data); |
1318 |
|
✗ |
av_freep(&dnch->out_samples); |
1319 |
|
✗ |
av_freep(&dnch->spread_function); |
1320 |
|
✗ |
av_freep(&dnch->abs_var); |
1321 |
|
✗ |
av_freep(&dnch->rel_var); |
1322 |
|
✗ |
av_freep(&dnch->min_abs_var); |
1323 |
|
✗ |
av_freep(&dnch->fft_in); |
1324 |
|
✗ |
av_freep(&dnch->fft_out); |
1325 |
|
✗ |
av_tx_uninit(&dnch->fft); |
1326 |
|
✗ |
av_tx_uninit(&dnch->ifft); |
1327 |
|
|
} |
1328 |
|
✗ |
av_freep(&s->dnch); |
1329 |
|
|
} |
1330 |
|
✗ |
} |
1331 |
|
|
|
1332 |
|
✗ |
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, |
1333 |
|
|
char *res, int res_len, int flags) |
1334 |
|
|
{ |
1335 |
|
✗ |
AudioFFTDeNoiseContext *s = ctx->priv; |
1336 |
|
✗ |
int ret = 0; |
1337 |
|
|
|
1338 |
|
✗ |
ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags); |
1339 |
|
✗ |
if (ret < 0) |
1340 |
|
✗ |
return ret; |
1341 |
|
|
|
1342 |
|
✗ |
if (!strcmp(cmd, "sample_noise") || !strcmp(cmd, "sn")) |
1343 |
|
✗ |
return 0; |
1344 |
|
|
|
1345 |
|
✗ |
for (int ch = 0; ch < s->channels; ch++) { |
1346 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1347 |
|
|
|
1348 |
|
✗ |
dnch->noise_reduction = s->noise_reduction; |
1349 |
|
✗ |
dnch->noise_floor = s->noise_floor; |
1350 |
|
✗ |
dnch->residual_floor = s->residual_floor; |
1351 |
|
|
|
1352 |
|
✗ |
set_parameters(s, dnch, 1, 1); |
1353 |
|
|
} |
1354 |
|
|
|
1355 |
|
✗ |
return 0; |
1356 |
|
|
} |
1357 |
|
|
|
1358 |
|
|
static const AVFilterPad inputs[] = { |
1359 |
|
|
{ |
1360 |
|
|
.name = "default", |
1361 |
|
|
.type = AVMEDIA_TYPE_AUDIO, |
1362 |
|
|
.config_props = config_input, |
1363 |
|
|
}, |
1364 |
|
|
}; |
1365 |
|
|
|
1366 |
|
|
const AVFilter ff_af_afftdn = { |
1367 |
|
|
.name = "afftdn", |
1368 |
|
|
.description = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."), |
1369 |
|
|
.priv_size = sizeof(AudioFFTDeNoiseContext), |
1370 |
|
|
.priv_class = &afftdn_class, |
1371 |
|
|
.activate = activate, |
1372 |
|
|
.uninit = uninit, |
1373 |
|
|
FILTER_INPUTS(inputs), |
1374 |
|
|
FILTER_OUTPUTS(ff_audio_default_filterpad), |
1375 |
|
|
FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP), |
1376 |
|
|
.process_command = process_command, |
1377 |
|
|
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | |
1378 |
|
|
AVFILTER_FLAG_SLICE_THREADS, |
1379 |
|
|
}; |
1380 |
|
|
|