Line |
Branch |
Exec |
Source |
1 |
|
|
/* |
2 |
|
|
* Copyright (c) 2018 The FFmpeg Project |
3 |
|
|
* |
4 |
|
|
* This file is part of FFmpeg. |
5 |
|
|
* |
6 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
7 |
|
|
* modify it under the terms of the GNU Lesser General Public |
8 |
|
|
* License as published by the Free Software Foundation; either |
9 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
* |
11 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
12 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
|
|
* Lesser General Public License for more details. |
15 |
|
|
* |
16 |
|
|
* You should have received a copy of the GNU Lesser General Public |
17 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
18 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 |
|
|
*/ |
20 |
|
|
|
21 |
|
|
#include <float.h> |
22 |
|
|
|
23 |
|
|
#include "libavutil/avstring.h" |
24 |
|
|
#include "libavutil/channel_layout.h" |
25 |
|
|
#include "libavutil/mem.h" |
26 |
|
|
#include "libavutil/opt.h" |
27 |
|
|
#include "libavutil/tx.h" |
28 |
|
|
#include "avfilter.h" |
29 |
|
|
#include "audio.h" |
30 |
|
|
#include "filters.h" |
31 |
|
|
|
32 |
|
|
#define C (M_LN10 * 0.1) |
33 |
|
|
#define SOLVE_SIZE (5) |
34 |
|
|
#define NB_PROFILE_BANDS (15) |
35 |
|
|
|
36 |
|
|
enum SampleNoiseModes { |
37 |
|
|
SAMPLE_NONE, |
38 |
|
|
SAMPLE_START, |
39 |
|
|
SAMPLE_STOP, |
40 |
|
|
NB_SAMPLEMODES |
41 |
|
|
}; |
42 |
|
|
|
43 |
|
|
enum OutModes { |
44 |
|
|
IN_MODE, |
45 |
|
|
OUT_MODE, |
46 |
|
|
NOISE_MODE, |
47 |
|
|
NB_MODES |
48 |
|
|
}; |
49 |
|
|
|
50 |
|
|
enum NoiseLinkType { |
51 |
|
|
NONE_LINK, |
52 |
|
|
MIN_LINK, |
53 |
|
|
MAX_LINK, |
54 |
|
|
AVERAGE_LINK, |
55 |
|
|
NB_LINK |
56 |
|
|
}; |
57 |
|
|
|
58 |
|
|
enum NoiseType { |
59 |
|
|
WHITE_NOISE, |
60 |
|
|
VINYL_NOISE, |
61 |
|
|
SHELLAC_NOISE, |
62 |
|
|
CUSTOM_NOISE, |
63 |
|
|
NB_NOISE |
64 |
|
|
}; |
65 |
|
|
|
66 |
|
|
typedef struct DeNoiseChannel { |
67 |
|
|
double band_noise[NB_PROFILE_BANDS]; |
68 |
|
|
double noise_band_auto_var[NB_PROFILE_BANDS]; |
69 |
|
|
double noise_band_sample[NB_PROFILE_BANDS]; |
70 |
|
|
double *amt; |
71 |
|
|
double *band_amt; |
72 |
|
|
double *band_excit; |
73 |
|
|
double *gain; |
74 |
|
|
double *smoothed_gain; |
75 |
|
|
double *prior; |
76 |
|
|
double *prior_band_excit; |
77 |
|
|
double *clean_data; |
78 |
|
|
double *noisy_data; |
79 |
|
|
double *out_samples; |
80 |
|
|
double *spread_function; |
81 |
|
|
double *abs_var; |
82 |
|
|
double *rel_var; |
83 |
|
|
double *min_abs_var; |
84 |
|
|
void *fft_in; |
85 |
|
|
void *fft_out; |
86 |
|
|
AVTXContext *fft, *ifft; |
87 |
|
|
av_tx_fn tx_fn, itx_fn; |
88 |
|
|
|
89 |
|
|
double noise_band_norm[NB_PROFILE_BANDS]; |
90 |
|
|
double noise_band_avr[NB_PROFILE_BANDS]; |
91 |
|
|
double noise_band_avi[NB_PROFILE_BANDS]; |
92 |
|
|
double noise_band_var[NB_PROFILE_BANDS]; |
93 |
|
|
|
94 |
|
|
double noise_reduction; |
95 |
|
|
double last_noise_reduction; |
96 |
|
|
double noise_floor; |
97 |
|
|
double last_noise_floor; |
98 |
|
|
double residual_floor; |
99 |
|
|
double last_residual_floor; |
100 |
|
|
double max_gain; |
101 |
|
|
double max_var; |
102 |
|
|
double gain_scale; |
103 |
|
|
} DeNoiseChannel; |
104 |
|
|
|
105 |
|
|
typedef struct AudioFFTDeNoiseContext { |
106 |
|
|
const AVClass *class; |
107 |
|
|
|
108 |
|
|
int format; |
109 |
|
|
size_t sample_size; |
110 |
|
|
size_t complex_sample_size; |
111 |
|
|
|
112 |
|
|
float noise_reduction; |
113 |
|
|
float noise_floor; |
114 |
|
|
int noise_type; |
115 |
|
|
char *band_noise_str; |
116 |
|
|
float residual_floor; |
117 |
|
|
int track_noise; |
118 |
|
|
int track_residual; |
119 |
|
|
int output_mode; |
120 |
|
|
int noise_floor_link; |
121 |
|
|
float ratio; |
122 |
|
|
int gain_smooth; |
123 |
|
|
float band_multiplier; |
124 |
|
|
float floor_offset; |
125 |
|
|
|
126 |
|
|
int channels; |
127 |
|
|
int sample_noise; |
128 |
|
|
int sample_noise_blocks; |
129 |
|
|
int sample_noise_mode; |
130 |
|
|
float sample_rate; |
131 |
|
|
int buffer_length; |
132 |
|
|
int fft_length; |
133 |
|
|
int fft_length2; |
134 |
|
|
int bin_count; |
135 |
|
|
int window_length; |
136 |
|
|
int sample_advance; |
137 |
|
|
int number_of_bands; |
138 |
|
|
|
139 |
|
|
int band_centre[NB_PROFILE_BANDS]; |
140 |
|
|
|
141 |
|
|
int *bin2band; |
142 |
|
|
double *window; |
143 |
|
|
double *band_alpha; |
144 |
|
|
double *band_beta; |
145 |
|
|
|
146 |
|
|
DeNoiseChannel *dnch; |
147 |
|
|
|
148 |
|
|
AVFrame *winframe; |
149 |
|
|
|
150 |
|
|
double window_weight; |
151 |
|
|
double floor; |
152 |
|
|
double sample_floor; |
153 |
|
|
|
154 |
|
|
int noise_band_edge[NB_PROFILE_BANDS + 2]; |
155 |
|
|
int noise_band_count; |
156 |
|
|
double matrix_a[SOLVE_SIZE * SOLVE_SIZE]; |
157 |
|
|
double vector_b[SOLVE_SIZE]; |
158 |
|
|
double matrix_b[SOLVE_SIZE * NB_PROFILE_BANDS]; |
159 |
|
|
double matrix_c[SOLVE_SIZE * NB_PROFILE_BANDS]; |
160 |
|
|
} AudioFFTDeNoiseContext; |
161 |
|
|
|
162 |
|
|
#define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x) |
163 |
|
|
#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM |
164 |
|
|
#define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM |
165 |
|
|
|
166 |
|
|
static const AVOption afftdn_options[] = { |
167 |
|
|
{ "noise_reduction", "set the noise reduction",OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT,{.dbl = 12}, .01, 97, AFR }, |
168 |
|
|
{ "nr", "set the noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT, {.dbl = 12}, .01, 97, AFR }, |
169 |
|
|
{ "noise_floor", "set the noise floor",OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR }, |
170 |
|
|
{ "nf", "set the noise floor", OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR }, |
171 |
|
|
{ "noise_type", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" }, |
172 |
|
|
{ "nt", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" }, |
173 |
|
|
{ "white", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" }, |
174 |
|
|
{ "w", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" }, |
175 |
|
|
{ "vinyl", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" }, |
176 |
|
|
{ "v", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" }, |
177 |
|
|
{ "shellac", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" }, |
178 |
|
|
{ "s", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" }, |
179 |
|
|
{ "custom", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" }, |
180 |
|
|
{ "c", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" }, |
181 |
|
|
{ "band_noise", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF }, |
182 |
|
|
{ "bn", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF }, |
183 |
|
|
{ "residual_floor", "set the residual floor",OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR }, |
184 |
|
|
{ "rf", "set the residual floor", OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR }, |
185 |
|
|
{ "track_noise", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR }, |
186 |
|
|
{ "tn", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR }, |
187 |
|
|
{ "track_residual", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR }, |
188 |
|
|
{ "tr", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR }, |
189 |
|
|
{ "output_mode", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" }, |
190 |
|
|
{ "om", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" }, |
191 |
|
|
{ "input", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" }, |
192 |
|
|
{ "i", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" }, |
193 |
|
|
{ "output", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" }, |
194 |
|
|
{ "o", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" }, |
195 |
|
|
{ "noise", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" }, |
196 |
|
|
{ "n", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" }, |
197 |
|
|
{ "adaptivity", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR }, |
198 |
|
|
{ "ad", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR }, |
199 |
|
|
{ "floor_offset", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR }, |
200 |
|
|
{ "fo", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR }, |
201 |
|
|
{ "noise_link", "set the noise floor link",OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" }, |
202 |
|
|
{ "nl", "set the noise floor link", OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" }, |
203 |
|
|
{ "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = NONE_LINK}, 0, 0, AFR, .unit = "link" }, |
204 |
|
|
{ "min", "min", 0, AV_OPT_TYPE_CONST, {.i64 = MIN_LINK}, 0, 0, AFR, .unit = "link" }, |
205 |
|
|
{ "max", "max", 0, AV_OPT_TYPE_CONST, {.i64 = MAX_LINK}, 0, 0, AFR, .unit = "link" }, |
206 |
|
|
{ "average", "average", 0, AV_OPT_TYPE_CONST, {.i64 = AVERAGE_LINK}, 0, 0, AFR, .unit = "link" }, |
207 |
|
|
{ "band_multiplier", "set band multiplier",OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF }, |
208 |
|
|
{ "bm", "set band multiplier", OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF }, |
209 |
|
|
{ "sample_noise", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" }, |
210 |
|
|
{ "sn", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" }, |
211 |
|
|
{ "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_NONE}, 0, 0, AFR, .unit = "sample" }, |
212 |
|
|
{ "start", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" }, |
213 |
|
|
{ "begin", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" }, |
214 |
|
|
{ "stop", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" }, |
215 |
|
|
{ "end", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" }, |
216 |
|
|
{ "gain_smooth", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR }, |
217 |
|
|
{ "gs", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR }, |
218 |
|
|
{ NULL } |
219 |
|
|
}; |
220 |
|
|
|
221 |
|
|
AVFILTER_DEFINE_CLASS(afftdn); |
222 |
|
|
|
223 |
|
✗ |
static double get_band_noise(AudioFFTDeNoiseContext *s, |
224 |
|
|
int band, double a, |
225 |
|
|
double b, double c) |
226 |
|
|
{ |
227 |
|
|
double d1, d2, d3; |
228 |
|
|
|
229 |
|
✗ |
d1 = a / s->band_centre[band]; |
230 |
|
✗ |
d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10; |
231 |
|
✗ |
d2 = b / s->band_centre[band]; |
232 |
|
✗ |
d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10; |
233 |
|
✗ |
d3 = s->band_centre[band] / c; |
234 |
|
✗ |
d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10; |
235 |
|
|
|
236 |
|
✗ |
return -d1 + d2 - d3; |
237 |
|
|
} |
238 |
|
|
|
239 |
|
✗ |
static void factor(double *array, int size) |
240 |
|
|
{ |
241 |
|
✗ |
for (int i = 0; i < size - 1; i++) { |
242 |
|
✗ |
for (int j = i + 1; j < size; j++) { |
243 |
|
✗ |
double d = array[j + i * size] / array[i + i * size]; |
244 |
|
|
|
245 |
|
✗ |
array[j + i * size] = d; |
246 |
|
✗ |
for (int k = i + 1; k < size; k++) { |
247 |
|
✗ |
array[j + k * size] -= d * array[i + k * size]; |
248 |
|
|
} |
249 |
|
|
} |
250 |
|
|
} |
251 |
|
✗ |
} |
252 |
|
|
|
253 |
|
✗ |
static void solve(double *matrix, double *vector, int size) |
254 |
|
|
{ |
255 |
|
✗ |
for (int i = 0; i < size - 1; i++) { |
256 |
|
✗ |
for (int j = i + 1; j < size; j++) { |
257 |
|
✗ |
double d = matrix[j + i * size]; |
258 |
|
✗ |
vector[j] -= d * vector[i]; |
259 |
|
|
} |
260 |
|
|
} |
261 |
|
|
|
262 |
|
✗ |
vector[size - 1] /= matrix[size * size - 1]; |
263 |
|
|
|
264 |
|
✗ |
for (int i = size - 2; i >= 0; i--) { |
265 |
|
✗ |
double d = vector[i]; |
266 |
|
✗ |
for (int j = i + 1; j < size; j++) |
267 |
|
✗ |
d -= matrix[i + j * size] * vector[j]; |
268 |
|
✗ |
vector[i] = d / matrix[i + i * size]; |
269 |
|
|
} |
270 |
|
✗ |
} |
271 |
|
|
|
272 |
|
✗ |
static double process_get_band_noise(AudioFFTDeNoiseContext *s, |
273 |
|
|
DeNoiseChannel *dnch, |
274 |
|
|
int band) |
275 |
|
|
{ |
276 |
|
|
double product, sum, f; |
277 |
|
✗ |
int i = 0; |
278 |
|
|
|
279 |
|
✗ |
if (band < NB_PROFILE_BANDS) |
280 |
|
✗ |
return dnch->band_noise[band]; |
281 |
|
|
|
282 |
|
✗ |
for (int j = 0; j < SOLVE_SIZE; j++) { |
283 |
|
✗ |
sum = 0.0; |
284 |
|
✗ |
for (int k = 0; k < NB_PROFILE_BANDS; k++) |
285 |
|
✗ |
sum += s->matrix_b[i++] * dnch->band_noise[k]; |
286 |
|
✗ |
s->vector_b[j] = sum; |
287 |
|
|
} |
288 |
|
|
|
289 |
|
✗ |
solve(s->matrix_a, s->vector_b, SOLVE_SIZE); |
290 |
|
✗ |
f = (0.5 * s->sample_rate) / s->band_centre[NB_PROFILE_BANDS-1]; |
291 |
|
✗ |
f = 15.0 + log(f / 1.5) / log(1.5); |
292 |
|
✗ |
sum = 0.0; |
293 |
|
✗ |
product = 1.0; |
294 |
|
✗ |
for (int j = 0; j < SOLVE_SIZE; j++) { |
295 |
|
✗ |
sum += product * s->vector_b[j]; |
296 |
|
✗ |
product *= f; |
297 |
|
|
} |
298 |
|
|
|
299 |
|
✗ |
return sum; |
300 |
|
|
} |
301 |
|
|
|
302 |
|
✗ |
static double limit_gain(double a, double b) |
303 |
|
|
{ |
304 |
|
✗ |
if (a > 1.0) |
305 |
|
✗ |
return (b * a - 1.0) / (b + a - 2.0); |
306 |
|
✗ |
if (a < 1.0) |
307 |
|
✗ |
return (b * a - 2.0 * a + 1.0) / (b - a); |
308 |
|
✗ |
return 1.0; |
309 |
|
|
} |
310 |
|
|
|
311 |
|
✗ |
static void spectral_flatness(AudioFFTDeNoiseContext *s, const double *const spectral, |
312 |
|
|
double floor, int len, double *rnum, double *rden) |
313 |
|
|
{ |
314 |
|
✗ |
double num = 0., den = 0.; |
315 |
|
✗ |
int size = 0; |
316 |
|
|
|
317 |
|
✗ |
for (int n = 0; n < len; n++) { |
318 |
|
✗ |
const double v = spectral[n]; |
319 |
|
✗ |
if (v > floor) { |
320 |
|
✗ |
num += log(v); |
321 |
|
✗ |
den += v; |
322 |
|
✗ |
size++; |
323 |
|
|
} |
324 |
|
|
} |
325 |
|
|
|
326 |
|
✗ |
size = FFMAX(size, 1); |
327 |
|
|
|
328 |
|
✗ |
num /= size; |
329 |
|
✗ |
den /= size; |
330 |
|
|
|
331 |
|
✗ |
num = exp(num); |
332 |
|
|
|
333 |
|
✗ |
*rnum = num; |
334 |
|
✗ |
*rden = den; |
335 |
|
✗ |
} |
336 |
|
|
|
337 |
|
|
static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var); |
338 |
|
|
|
339 |
|
✗ |
static double floor_offset(const double *S, int size, double mean) |
340 |
|
|
{ |
341 |
|
✗ |
double offset = 0.0; |
342 |
|
|
|
343 |
|
✗ |
for (int n = 0; n < size; n++) { |
344 |
|
✗ |
const double p = S[n] - mean; |
345 |
|
|
|
346 |
|
✗ |
offset = fmax(offset, fabs(p)); |
347 |
|
|
} |
348 |
|
|
|
349 |
|
✗ |
return offset / mean; |
350 |
|
|
} |
351 |
|
|
|
352 |
|
✗ |
static void process_frame(AVFilterContext *ctx, |
353 |
|
|
AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, |
354 |
|
|
double *prior, double *prior_band_excit, int track_noise) |
355 |
|
|
{ |
356 |
|
✗ |
AVFilterLink *outlink = ctx->outputs[0]; |
357 |
|
✗ |
const double *abs_var = dnch->abs_var; |
358 |
|
✗ |
const double ratio = outlink->frame_count_out ? s->ratio : 1.0; |
359 |
|
✗ |
const double rratio = 1. - ratio; |
360 |
|
✗ |
const int *bin2band = s->bin2band; |
361 |
|
✗ |
double *noisy_data = dnch->noisy_data; |
362 |
|
✗ |
double *band_excit = dnch->band_excit; |
363 |
|
✗ |
double *band_amt = dnch->band_amt; |
364 |
|
✗ |
double *smoothed_gain = dnch->smoothed_gain; |
365 |
|
✗ |
AVComplexDouble *fft_data_dbl = dnch->fft_out; |
366 |
|
✗ |
AVComplexFloat *fft_data_flt = dnch->fft_out; |
367 |
|
✗ |
double *gain = dnch->gain; |
368 |
|
|
|
369 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) { |
370 |
|
|
double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var; |
371 |
|
|
|
372 |
|
✗ |
switch (s->format) { |
373 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
374 |
|
✗ |
noisy_data[i] = mag = hypot(fft_data_flt[i].re, fft_data_flt[i].im); |
375 |
|
✗ |
break; |
376 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
377 |
|
✗ |
noisy_data[i] = mag = hypot(fft_data_dbl[i].re, fft_data_dbl[i].im); |
378 |
|
✗ |
break; |
379 |
|
|
} |
380 |
|
|
|
381 |
|
✗ |
power = mag * mag; |
382 |
|
✗ |
mag_abs_var = power / abs_var[i]; |
383 |
|
✗ |
new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0); |
384 |
|
✗ |
new_gain = new_mag_abs_var / (1.0 + new_mag_abs_var); |
385 |
|
✗ |
sqr_new_gain = new_gain * new_gain; |
386 |
|
✗ |
prior[i] = mag_abs_var * sqr_new_gain; |
387 |
|
✗ |
dnch->clean_data[i] = power * sqr_new_gain; |
388 |
|
✗ |
gain[i] = new_gain; |
389 |
|
|
} |
390 |
|
|
|
391 |
|
✗ |
if (track_noise) { |
392 |
|
|
double flatness, num, den; |
393 |
|
|
|
394 |
|
✗ |
spectral_flatness(s, noisy_data, s->floor, s->bin_count, &num, &den); |
395 |
|
|
|
396 |
|
✗ |
flatness = num / den; |
397 |
|
✗ |
if (flatness > 0.8) { |
398 |
|
✗ |
const double offset = s->floor_offset * floor_offset(noisy_data, s->bin_count, den); |
399 |
|
✗ |
const double new_floor = av_clipd(10.0 * log10(den) - 100.0 + offset, -90., -20.); |
400 |
|
|
|
401 |
|
✗ |
dnch->noise_floor = 0.1 * new_floor + dnch->noise_floor * 0.9; |
402 |
|
✗ |
set_parameters(s, dnch, 1, 1); |
403 |
|
|
} |
404 |
|
|
} |
405 |
|
|
|
406 |
|
✗ |
for (int i = 0; i < s->number_of_bands; i++) { |
407 |
|
✗ |
band_excit[i] = 0.0; |
408 |
|
✗ |
band_amt[i] = 0.0; |
409 |
|
|
} |
410 |
|
|
|
411 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) |
412 |
|
✗ |
band_excit[bin2band[i]] += dnch->clean_data[i]; |
413 |
|
|
|
414 |
|
✗ |
for (int i = 0; i < s->number_of_bands; i++) { |
415 |
|
✗ |
band_excit[i] = fmax(band_excit[i], |
416 |
|
✗ |
s->band_alpha[i] * band_excit[i] + |
417 |
|
✗ |
s->band_beta[i] * prior_band_excit[i]); |
418 |
|
✗ |
prior_band_excit[i] = band_excit[i]; |
419 |
|
|
} |
420 |
|
|
|
421 |
|
✗ |
for (int j = 0, i = 0; j < s->number_of_bands; j++) { |
422 |
|
✗ |
for (int k = 0; k < s->number_of_bands; k++) { |
423 |
|
✗ |
band_amt[j] += dnch->spread_function[i++] * band_excit[k]; |
424 |
|
|
} |
425 |
|
|
} |
426 |
|
|
|
427 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) |
428 |
|
✗ |
dnch->amt[i] = band_amt[bin2band[i]]; |
429 |
|
|
|
430 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) { |
431 |
|
✗ |
if (dnch->amt[i] > abs_var[i]) { |
432 |
|
✗ |
gain[i] = 1.0; |
433 |
|
✗ |
} else if (dnch->amt[i] > dnch->min_abs_var[i]) { |
434 |
|
✗ |
const double limit = sqrt(abs_var[i] / dnch->amt[i]); |
435 |
|
|
|
436 |
|
✗ |
gain[i] = limit_gain(gain[i], limit); |
437 |
|
|
} else { |
438 |
|
✗ |
gain[i] = limit_gain(gain[i], dnch->max_gain); |
439 |
|
|
} |
440 |
|
|
} |
441 |
|
|
|
442 |
|
✗ |
memcpy(smoothed_gain, gain, s->bin_count * sizeof(*smoothed_gain)); |
443 |
|
✗ |
if (s->gain_smooth > 0) { |
444 |
|
✗ |
const int r = s->gain_smooth; |
445 |
|
|
|
446 |
|
✗ |
for (int i = r; i < s->bin_count - r; i++) { |
447 |
|
✗ |
const double gc = gain[i]; |
448 |
|
✗ |
double num = 0., den = 0.; |
449 |
|
|
|
450 |
|
✗ |
for (int j = -r; j <= r; j++) { |
451 |
|
✗ |
const double g = gain[i + j]; |
452 |
|
✗ |
const double d = 1. - fabs(g - gc); |
453 |
|
|
|
454 |
|
✗ |
num += g * d; |
455 |
|
✗ |
den += d; |
456 |
|
|
} |
457 |
|
|
|
458 |
|
✗ |
smoothed_gain[i] = num / den; |
459 |
|
|
} |
460 |
|
|
} |
461 |
|
|
|
462 |
|
✗ |
switch (s->format) { |
463 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
464 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) { |
465 |
|
✗ |
const float new_gain = smoothed_gain[i]; |
466 |
|
|
|
467 |
|
✗ |
fft_data_flt[i].re *= new_gain; |
468 |
|
✗ |
fft_data_flt[i].im *= new_gain; |
469 |
|
|
} |
470 |
|
✗ |
break; |
471 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
472 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) { |
473 |
|
✗ |
const double new_gain = smoothed_gain[i]; |
474 |
|
|
|
475 |
|
✗ |
fft_data_dbl[i].re *= new_gain; |
476 |
|
✗ |
fft_data_dbl[i].im *= new_gain; |
477 |
|
|
} |
478 |
|
✗ |
break; |
479 |
|
|
} |
480 |
|
✗ |
} |
481 |
|
|
|
482 |
|
✗ |
static double freq2bark(double x) |
483 |
|
|
{ |
484 |
|
✗ |
double d = x / 7500.0; |
485 |
|
|
|
486 |
|
✗ |
return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d); |
487 |
|
|
} |
488 |
|
|
|
489 |
|
✗ |
static int get_band_centre(AudioFFTDeNoiseContext *s, int band) |
490 |
|
|
{ |
491 |
|
✗ |
if (band == -1) |
492 |
|
✗ |
return lrint(s->band_centre[0] / 1.5); |
493 |
|
|
|
494 |
|
✗ |
return s->band_centre[band]; |
495 |
|
|
} |
496 |
|
|
|
497 |
|
✗ |
static int get_band_edge(AudioFFTDeNoiseContext *s, int band) |
498 |
|
|
{ |
499 |
|
|
int i; |
500 |
|
|
|
501 |
|
✗ |
if (band == NB_PROFILE_BANDS) { |
502 |
|
✗ |
i = lrint(s->band_centre[NB_PROFILE_BANDS - 1] * 1.224745); |
503 |
|
|
} else { |
504 |
|
✗ |
i = lrint(s->band_centre[band] / 1.224745); |
505 |
|
|
} |
506 |
|
|
|
507 |
|
✗ |
return FFMIN(i, s->sample_rate / 2); |
508 |
|
|
} |
509 |
|
|
|
510 |
|
✗ |
static void set_band_parameters(AudioFFTDeNoiseContext *s, |
511 |
|
|
DeNoiseChannel *dnch) |
512 |
|
|
{ |
513 |
|
|
double band_noise, d2, d3, d4, d5; |
514 |
|
✗ |
int i = 0, j = 0, k = 0; |
515 |
|
|
|
516 |
|
✗ |
d5 = 0.0; |
517 |
|
✗ |
band_noise = process_get_band_noise(s, dnch, 0); |
518 |
|
✗ |
for (int m = j; m < s->bin_count; m++) { |
519 |
|
✗ |
if (m == j) { |
520 |
|
✗ |
i = j; |
521 |
|
✗ |
d5 = band_noise; |
522 |
|
✗ |
if (k >= NB_PROFILE_BANDS) { |
523 |
|
✗ |
j = s->bin_count; |
524 |
|
|
} else { |
525 |
|
✗ |
j = s->fft_length * get_band_centre(s, k) / s->sample_rate; |
526 |
|
|
} |
527 |
|
✗ |
d2 = j - i; |
528 |
|
✗ |
band_noise = process_get_band_noise(s, dnch, k); |
529 |
|
✗ |
k++; |
530 |
|
|
} |
531 |
|
✗ |
d3 = (j - m) / d2; |
532 |
|
✗ |
d4 = (m - i) / d2; |
533 |
|
✗ |
dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C); |
534 |
|
|
} |
535 |
|
|
|
536 |
|
✗ |
for (i = 0; i < NB_PROFILE_BANDS; i++) |
537 |
|
✗ |
dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C); |
538 |
|
✗ |
} |
539 |
|
|
|
540 |
|
✗ |
static void read_custom_noise(AudioFFTDeNoiseContext *s, int ch) |
541 |
|
|
{ |
542 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
543 |
|
✗ |
char *custom_noise_str, *p, *arg, *saveptr = NULL; |
544 |
|
✗ |
double band_noise[NB_PROFILE_BANDS] = { 0.f }; |
545 |
|
|
int ret; |
546 |
|
|
|
547 |
|
✗ |
if (!s->band_noise_str) |
548 |
|
✗ |
return; |
549 |
|
|
|
550 |
|
✗ |
custom_noise_str = p = av_strdup(s->band_noise_str); |
551 |
|
✗ |
if (!p) |
552 |
|
✗ |
return; |
553 |
|
|
|
554 |
|
✗ |
for (int i = 0; i < NB_PROFILE_BANDS; i++) { |
555 |
|
|
float noise; |
556 |
|
|
|
557 |
|
✗ |
if (!(arg = av_strtok(p, "| ", &saveptr))) |
558 |
|
✗ |
break; |
559 |
|
|
|
560 |
|
✗ |
p = NULL; |
561 |
|
|
|
562 |
|
✗ |
ret = av_sscanf(arg, "%f", &noise); |
563 |
|
✗ |
if (ret != 1) { |
564 |
|
✗ |
av_log(s, AV_LOG_ERROR, "Custom band noise must be float.\n"); |
565 |
|
✗ |
break; |
566 |
|
|
} |
567 |
|
|
|
568 |
|
✗ |
band_noise[i] = av_clipd(noise, -24., 24.); |
569 |
|
|
} |
570 |
|
|
|
571 |
|
✗ |
av_free(custom_noise_str); |
572 |
|
✗ |
memcpy(dnch->band_noise, band_noise, sizeof(band_noise)); |
573 |
|
|
} |
574 |
|
|
|
575 |
|
✗ |
static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var) |
576 |
|
|
{ |
577 |
|
✗ |
if (dnch->last_noise_floor != dnch->noise_floor) |
578 |
|
✗ |
dnch->last_noise_floor = dnch->noise_floor; |
579 |
|
|
|
580 |
|
✗ |
if (s->track_residual) |
581 |
|
✗ |
dnch->last_noise_floor = fmax(dnch->last_noise_floor, dnch->residual_floor); |
582 |
|
|
|
583 |
|
✗ |
dnch->max_var = s->floor * exp((100.0 + dnch->last_noise_floor) * C); |
584 |
|
✗ |
if (update_auto_var) { |
585 |
|
✗ |
for (int i = 0; i < NB_PROFILE_BANDS; i++) |
586 |
|
✗ |
dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C); |
587 |
|
|
} |
588 |
|
|
|
589 |
|
✗ |
if (s->track_residual) { |
590 |
|
✗ |
if (update_var || dnch->last_residual_floor != dnch->residual_floor) { |
591 |
|
✗ |
update_var = 1; |
592 |
|
✗ |
dnch->last_residual_floor = dnch->residual_floor; |
593 |
|
✗ |
dnch->last_noise_reduction = fmax(dnch->last_noise_floor - dnch->last_residual_floor + 100., 0); |
594 |
|
✗ |
dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C)); |
595 |
|
|
} |
596 |
|
✗ |
} else if (update_var || dnch->noise_reduction != dnch->last_noise_reduction) { |
597 |
|
✗ |
update_var = 1; |
598 |
|
✗ |
dnch->last_noise_reduction = dnch->noise_reduction; |
599 |
|
✗ |
dnch->last_residual_floor = av_clipd(dnch->last_noise_floor - dnch->last_noise_reduction, -80, -20); |
600 |
|
✗ |
dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C)); |
601 |
|
|
} |
602 |
|
|
|
603 |
|
✗ |
dnch->gain_scale = 1.0 / (dnch->max_gain * dnch->max_gain); |
604 |
|
|
|
605 |
|
✗ |
if (update_var) { |
606 |
|
✗ |
set_band_parameters(s, dnch); |
607 |
|
|
|
608 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) { |
609 |
|
✗ |
dnch->abs_var[i] = fmax(dnch->max_var * dnch->rel_var[i], 1.0); |
610 |
|
✗ |
dnch->min_abs_var[i] = dnch->gain_scale * dnch->abs_var[i]; |
611 |
|
|
} |
612 |
|
|
} |
613 |
|
✗ |
} |
614 |
|
|
|
615 |
|
✗ |
static void reduce_mean(double *band_noise) |
616 |
|
|
{ |
617 |
|
✗ |
double mean = 0.f; |
618 |
|
|
|
619 |
|
✗ |
for (int i = 0; i < NB_PROFILE_BANDS; i++) |
620 |
|
✗ |
mean += band_noise[i]; |
621 |
|
✗ |
mean /= NB_PROFILE_BANDS; |
622 |
|
|
|
623 |
|
✗ |
for (int i = 0; i < NB_PROFILE_BANDS; i++) |
624 |
|
✗ |
band_noise[i] -= mean; |
625 |
|
✗ |
} |
626 |
|
|
|
627 |
|
✗ |
static int config_input(AVFilterLink *inlink) |
628 |
|
|
{ |
629 |
|
✗ |
AVFilterContext *ctx = inlink->dst; |
630 |
|
✗ |
AudioFFTDeNoiseContext *s = ctx->priv; |
631 |
|
|
double wscale, sar, sum, sdiv; |
632 |
|
|
int i, j, k, m, n, ret, tx_type; |
633 |
|
✗ |
double dscale = 1.; |
634 |
|
✗ |
float fscale = 1.f; |
635 |
|
|
void *scale; |
636 |
|
|
|
637 |
|
✗ |
s->format = inlink->format; |
638 |
|
|
|
639 |
|
✗ |
switch (s->format) { |
640 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
641 |
|
✗ |
s->sample_size = sizeof(float); |
642 |
|
✗ |
s->complex_sample_size = sizeof(AVComplexFloat); |
643 |
|
✗ |
tx_type = AV_TX_FLOAT_RDFT; |
644 |
|
✗ |
scale = &fscale; |
645 |
|
✗ |
break; |
646 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
647 |
|
✗ |
s->sample_size = sizeof(double); |
648 |
|
✗ |
s->complex_sample_size = sizeof(AVComplexDouble); |
649 |
|
✗ |
tx_type = AV_TX_DOUBLE_RDFT; |
650 |
|
✗ |
scale = &dscale; |
651 |
|
✗ |
break; |
652 |
|
|
} |
653 |
|
|
|
654 |
|
✗ |
s->dnch = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dnch)); |
655 |
|
✗ |
if (!s->dnch) |
656 |
|
✗ |
return AVERROR(ENOMEM); |
657 |
|
|
|
658 |
|
✗ |
s->channels = inlink->ch_layout.nb_channels; |
659 |
|
✗ |
s->sample_rate = inlink->sample_rate; |
660 |
|
✗ |
s->sample_advance = s->sample_rate / 80; |
661 |
|
✗ |
s->window_length = 3 * s->sample_advance; |
662 |
|
✗ |
s->fft_length2 = 1 << (32 - ff_clz(s->window_length)); |
663 |
|
✗ |
s->fft_length = s->fft_length2; |
664 |
|
✗ |
s->buffer_length = s->fft_length * 2; |
665 |
|
✗ |
s->bin_count = s->fft_length2 / 2 + 1; |
666 |
|
|
|
667 |
|
✗ |
s->band_centre[0] = 80; |
668 |
|
✗ |
for (i = 1; i < NB_PROFILE_BANDS; i++) { |
669 |
|
✗ |
s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0); |
670 |
|
✗ |
if (s->band_centre[i] < 1000) { |
671 |
|
✗ |
s->band_centre[i] = 10 * (s->band_centre[i] / 10); |
672 |
|
✗ |
} else if (s->band_centre[i] < 5000) { |
673 |
|
✗ |
s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50); |
674 |
|
✗ |
} else if (s->band_centre[i] < 15000) { |
675 |
|
✗ |
s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100); |
676 |
|
|
} else { |
677 |
|
✗ |
s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000); |
678 |
|
|
} |
679 |
|
|
} |
680 |
|
|
|
681 |
|
✗ |
for (j = 0; j < SOLVE_SIZE; j++) { |
682 |
|
✗ |
for (k = 0; k < SOLVE_SIZE; k++) { |
683 |
|
✗ |
s->matrix_a[j + k * SOLVE_SIZE] = 0.0; |
684 |
|
✗ |
for (m = 0; m < NB_PROFILE_BANDS; m++) |
685 |
|
✗ |
s->matrix_a[j + k * SOLVE_SIZE] += pow(m, j + k); |
686 |
|
|
} |
687 |
|
|
} |
688 |
|
|
|
689 |
|
✗ |
factor(s->matrix_a, SOLVE_SIZE); |
690 |
|
|
|
691 |
|
✗ |
i = 0; |
692 |
|
✗ |
for (j = 0; j < SOLVE_SIZE; j++) |
693 |
|
✗ |
for (k = 0; k < NB_PROFILE_BANDS; k++) |
694 |
|
✗ |
s->matrix_b[i++] = pow(k, j); |
695 |
|
|
|
696 |
|
✗ |
i = 0; |
697 |
|
✗ |
for (j = 0; j < NB_PROFILE_BANDS; j++) |
698 |
|
✗ |
for (k = 0; k < SOLVE_SIZE; k++) |
699 |
|
✗ |
s->matrix_c[i++] = pow(j, k); |
700 |
|
|
|
701 |
|
✗ |
s->window = av_calloc(s->window_length, sizeof(*s->window)); |
702 |
|
✗ |
s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band)); |
703 |
|
✗ |
if (!s->window || !s->bin2band) |
704 |
|
✗ |
return AVERROR(ENOMEM); |
705 |
|
|
|
706 |
|
✗ |
sdiv = s->band_multiplier; |
707 |
|
✗ |
for (i = 0; i < s->bin_count; i++) |
708 |
|
✗ |
s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2)); |
709 |
|
|
|
710 |
|
✗ |
s->number_of_bands = s->bin2band[s->bin_count - 1] + 1; |
711 |
|
|
|
712 |
|
✗ |
s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha)); |
713 |
|
✗ |
s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta)); |
714 |
|
✗ |
if (!s->band_alpha || !s->band_beta) |
715 |
|
✗ |
return AVERROR(ENOMEM); |
716 |
|
|
|
717 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
718 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
719 |
|
|
|
720 |
|
✗ |
switch (s->noise_type) { |
721 |
|
✗ |
case WHITE_NOISE: |
722 |
|
✗ |
for (i = 0; i < NB_PROFILE_BANDS; i++) |
723 |
|
✗ |
dnch->band_noise[i] = 0.; |
724 |
|
✗ |
break; |
725 |
|
✗ |
case VINYL_NOISE: |
726 |
|
✗ |
for (i = 0; i < NB_PROFILE_BANDS; i++) |
727 |
|
✗ |
dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0); |
728 |
|
✗ |
break; |
729 |
|
✗ |
case SHELLAC_NOISE: |
730 |
|
✗ |
for (i = 0; i < NB_PROFILE_BANDS; i++) |
731 |
|
✗ |
dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10); |
732 |
|
✗ |
break; |
733 |
|
✗ |
case CUSTOM_NOISE: |
734 |
|
✗ |
read_custom_noise(s, ch); |
735 |
|
✗ |
break; |
736 |
|
✗ |
default: |
737 |
|
✗ |
return AVERROR_BUG; |
738 |
|
|
} |
739 |
|
|
|
740 |
|
✗ |
reduce_mean(dnch->band_noise); |
741 |
|
|
|
742 |
|
✗ |
dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt)); |
743 |
|
✗ |
dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt)); |
744 |
|
✗ |
dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit)); |
745 |
|
✗ |
dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain)); |
746 |
|
✗ |
dnch->smoothed_gain = av_calloc(s->bin_count, sizeof(*dnch->smoothed_gain)); |
747 |
|
✗ |
dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior)); |
748 |
|
✗ |
dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit)); |
749 |
|
✗ |
dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data)); |
750 |
|
✗ |
dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data)); |
751 |
|
✗ |
dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples)); |
752 |
|
✗ |
dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var)); |
753 |
|
✗ |
dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var)); |
754 |
|
✗ |
dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var)); |
755 |
|
✗ |
dnch->fft_in = av_calloc(s->fft_length2, s->sample_size); |
756 |
|
✗ |
dnch->fft_out = av_calloc(s->fft_length2 + 1, s->complex_sample_size); |
757 |
|
✗ |
ret = av_tx_init(&dnch->fft, &dnch->tx_fn, tx_type, 0, s->fft_length2, scale, 0); |
758 |
|
✗ |
if (ret < 0) |
759 |
|
✗ |
return ret; |
760 |
|
✗ |
ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, tx_type, 1, s->fft_length2, scale, 0); |
761 |
|
✗ |
if (ret < 0) |
762 |
|
✗ |
return ret; |
763 |
|
✗ |
dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands, |
764 |
|
|
sizeof(*dnch->spread_function)); |
765 |
|
|
|
766 |
|
✗ |
if (!dnch->amt || |
767 |
|
✗ |
!dnch->band_amt || |
768 |
|
✗ |
!dnch->band_excit || |
769 |
|
✗ |
!dnch->gain || |
770 |
|
✗ |
!dnch->smoothed_gain || |
771 |
|
✗ |
!dnch->prior || |
772 |
|
✗ |
!dnch->prior_band_excit || |
773 |
|
✗ |
!dnch->clean_data || |
774 |
|
✗ |
!dnch->noisy_data || |
775 |
|
✗ |
!dnch->out_samples || |
776 |
|
✗ |
!dnch->fft_in || |
777 |
|
✗ |
!dnch->fft_out || |
778 |
|
✗ |
!dnch->abs_var || |
779 |
|
✗ |
!dnch->rel_var || |
780 |
|
✗ |
!dnch->min_abs_var || |
781 |
|
✗ |
!dnch->spread_function || |
782 |
|
✗ |
!dnch->fft || |
783 |
|
✗ |
!dnch->ifft) |
784 |
|
✗ |
return AVERROR(ENOMEM); |
785 |
|
|
} |
786 |
|
|
|
787 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
788 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
789 |
|
✗ |
double *prior_band_excit = dnch->prior_band_excit; |
790 |
|
|
double min, max; |
791 |
|
|
double p1, p2; |
792 |
|
|
|
793 |
|
✗ |
p1 = pow(0.1, 2.5 / sdiv); |
794 |
|
✗ |
p2 = pow(0.1, 1.0 / sdiv); |
795 |
|
✗ |
j = 0; |
796 |
|
✗ |
for (m = 0; m < s->number_of_bands; m++) { |
797 |
|
✗ |
for (n = 0; n < s->number_of_bands; n++) { |
798 |
|
✗ |
if (n < m) { |
799 |
|
✗ |
dnch->spread_function[j++] = pow(p2, m - n); |
800 |
|
✗ |
} else if (n > m) { |
801 |
|
✗ |
dnch->spread_function[j++] = pow(p1, n - m); |
802 |
|
|
} else { |
803 |
|
✗ |
dnch->spread_function[j++] = 1.0; |
804 |
|
|
} |
805 |
|
|
} |
806 |
|
|
} |
807 |
|
|
|
808 |
|
✗ |
for (m = 0; m < s->number_of_bands; m++) { |
809 |
|
✗ |
dnch->band_excit[m] = 0.0; |
810 |
|
✗ |
prior_band_excit[m] = 0.0; |
811 |
|
|
} |
812 |
|
|
|
813 |
|
✗ |
for (m = 0; m < s->bin_count; m++) |
814 |
|
✗ |
dnch->band_excit[s->bin2band[m]] += 1.0; |
815 |
|
|
|
816 |
|
✗ |
j = 0; |
817 |
|
✗ |
for (m = 0; m < s->number_of_bands; m++) { |
818 |
|
✗ |
for (n = 0; n < s->number_of_bands; n++) |
819 |
|
✗ |
prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n]; |
820 |
|
|
} |
821 |
|
|
|
822 |
|
✗ |
min = pow(0.1, 2.5); |
823 |
|
✗ |
max = pow(0.1, 1.0); |
824 |
|
✗ |
for (int i = 0; i < s->number_of_bands; i++) { |
825 |
|
✗ |
if (i < lrint(12.0 * sdiv)) { |
826 |
|
✗ |
dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv); |
827 |
|
|
} else { |
828 |
|
✗ |
dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0)); |
829 |
|
|
} |
830 |
|
✗ |
dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max); |
831 |
|
|
} |
832 |
|
|
|
833 |
|
✗ |
for (int i = 0; i < s->buffer_length; i++) |
834 |
|
✗ |
dnch->out_samples[i] = 0; |
835 |
|
|
|
836 |
|
✗ |
j = 0; |
837 |
|
✗ |
for (int i = 0; i < s->number_of_bands; i++) |
838 |
|
✗ |
for (int k = 0; k < s->number_of_bands; k++) |
839 |
|
✗ |
dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i]; |
840 |
|
|
} |
841 |
|
|
|
842 |
|
✗ |
j = 0; |
843 |
|
✗ |
sar = s->sample_advance / s->sample_rate; |
844 |
|
✗ |
for (int i = 0; i < s->bin_count; i++) { |
845 |
|
✗ |
if ((i == s->fft_length2) || (s->bin2band[i] > j)) { |
846 |
|
✗ |
double d6 = (i - 1) * s->sample_rate / s->fft_length; |
847 |
|
✗ |
double d7 = fmin(0.008 + 2.2 / d6, 0.03); |
848 |
|
✗ |
s->band_alpha[j] = exp(-sar / d7); |
849 |
|
✗ |
s->band_beta[j] = 1.0 - s->band_alpha[j]; |
850 |
|
✗ |
j = s->bin2band[i]; |
851 |
|
|
} |
852 |
|
|
} |
853 |
|
|
|
854 |
|
✗ |
s->winframe = ff_get_audio_buffer(inlink, s->window_length); |
855 |
|
✗ |
if (!s->winframe) |
856 |
|
✗ |
return AVERROR(ENOMEM); |
857 |
|
|
|
858 |
|
✗ |
wscale = sqrt(8.0 / (9.0 * s->fft_length)); |
859 |
|
✗ |
sum = 0.0; |
860 |
|
✗ |
for (int i = 0; i < s->window_length; i++) { |
861 |
|
✗ |
double d10 = sin(i * M_PI / s->window_length); |
862 |
|
✗ |
d10 *= wscale * d10; |
863 |
|
✗ |
s->window[i] = d10; |
864 |
|
✗ |
sum += d10 * d10; |
865 |
|
|
} |
866 |
|
|
|
867 |
|
✗ |
s->window_weight = 0.5 * sum; |
868 |
|
✗ |
s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight; |
869 |
|
✗ |
s->sample_floor = s->floor * exp(4.144600506562284); |
870 |
|
|
|
871 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
872 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
873 |
|
|
|
874 |
|
✗ |
dnch->noise_reduction = s->noise_reduction; |
875 |
|
✗ |
dnch->noise_floor = s->noise_floor; |
876 |
|
✗ |
dnch->residual_floor = s->residual_floor; |
877 |
|
|
|
878 |
|
✗ |
set_parameters(s, dnch, 1, 1); |
879 |
|
|
} |
880 |
|
|
|
881 |
|
✗ |
s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate); |
882 |
|
✗ |
i = 0; |
883 |
|
✗ |
for (int j = 1; j < NB_PROFILE_BANDS + 1; j++) { |
884 |
|
✗ |
s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate); |
885 |
|
✗ |
if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1])) |
886 |
|
✗ |
i++; |
887 |
|
✗ |
s->noise_band_edge[NB_PROFILE_BANDS + 1] = i; |
888 |
|
|
} |
889 |
|
✗ |
s->noise_band_count = s->noise_band_edge[NB_PROFILE_BANDS + 1]; |
890 |
|
|
|
891 |
|
✗ |
return 0; |
892 |
|
|
} |
893 |
|
|
|
894 |
|
✗ |
static void init_sample_noise(DeNoiseChannel *dnch) |
895 |
|
|
{ |
896 |
|
✗ |
for (int i = 0; i < NB_PROFILE_BANDS; i++) { |
897 |
|
✗ |
dnch->noise_band_norm[i] = 0.0; |
898 |
|
✗ |
dnch->noise_band_avr[i] = 0.0; |
899 |
|
✗ |
dnch->noise_band_avi[i] = 0.0; |
900 |
|
✗ |
dnch->noise_band_var[i] = 0.0; |
901 |
|
|
} |
902 |
|
✗ |
} |
903 |
|
|
|
904 |
|
✗ |
static void sample_noise_block(AudioFFTDeNoiseContext *s, |
905 |
|
|
DeNoiseChannel *dnch, |
906 |
|
|
AVFrame *in, int ch) |
907 |
|
|
{ |
908 |
|
✗ |
double *src_dbl = (double *)in->extended_data[ch]; |
909 |
|
✗ |
float *src_flt = (float *)in->extended_data[ch]; |
910 |
|
✗ |
double mag2, var = 0.0, avr = 0.0, avi = 0.0; |
911 |
|
✗ |
AVComplexDouble *fft_out_dbl = dnch->fft_out; |
912 |
|
✗ |
AVComplexFloat *fft_out_flt = dnch->fft_out; |
913 |
|
✗ |
double *fft_in_dbl = dnch->fft_in; |
914 |
|
✗ |
float *fft_in_flt = dnch->fft_in; |
915 |
|
|
int edge, j, k, n, edgemax; |
916 |
|
|
|
917 |
|
✗ |
switch (s->format) { |
918 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
919 |
|
✗ |
for (int i = 0; i < s->window_length; i++) |
920 |
|
✗ |
fft_in_flt[i] = s->window[i] * src_flt[i] * (1LL << 23); |
921 |
|
|
|
922 |
|
✗ |
for (int i = s->window_length; i < s->fft_length2; i++) |
923 |
|
✗ |
fft_in_flt[i] = 0.f; |
924 |
|
✗ |
break; |
925 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
926 |
|
✗ |
for (int i = 0; i < s->window_length; i++) |
927 |
|
✗ |
fft_in_dbl[i] = s->window[i] * src_dbl[i] * (1LL << 23); |
928 |
|
|
|
929 |
|
✗ |
for (int i = s->window_length; i < s->fft_length2; i++) |
930 |
|
✗ |
fft_in_dbl[i] = 0.; |
931 |
|
✗ |
break; |
932 |
|
|
} |
933 |
|
|
|
934 |
|
✗ |
dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size); |
935 |
|
|
|
936 |
|
✗ |
edge = s->noise_band_edge[0]; |
937 |
|
✗ |
j = edge; |
938 |
|
✗ |
k = 0; |
939 |
|
✗ |
n = j; |
940 |
|
✗ |
edgemax = fmin(s->fft_length2, s->noise_band_edge[NB_PROFILE_BANDS]); |
941 |
|
✗ |
for (int i = j; i <= edgemax; i++) { |
942 |
|
✗ |
if ((i == j) && (i < edgemax)) { |
943 |
|
✗ |
if (j > edge) { |
944 |
|
✗ |
dnch->noise_band_norm[k - 1] += j - edge; |
945 |
|
✗ |
dnch->noise_band_avr[k - 1] += avr; |
946 |
|
✗ |
dnch->noise_band_avi[k - 1] += avi; |
947 |
|
✗ |
dnch->noise_band_var[k - 1] += var; |
948 |
|
|
} |
949 |
|
✗ |
k++; |
950 |
|
✗ |
edge = j; |
951 |
|
✗ |
j = s->noise_band_edge[k]; |
952 |
|
✗ |
if (k == NB_PROFILE_BANDS) { |
953 |
|
✗ |
j++; |
954 |
|
|
} |
955 |
|
✗ |
var = 0.0; |
956 |
|
✗ |
avr = 0.0; |
957 |
|
✗ |
avi = 0.0; |
958 |
|
|
} |
959 |
|
|
|
960 |
|
✗ |
switch (s->format) { |
961 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
962 |
|
✗ |
avr += fft_out_flt[n].re; |
963 |
|
✗ |
avi += fft_out_flt[n].im; |
964 |
|
✗ |
mag2 = fft_out_flt[n].re * fft_out_flt[n].re + |
965 |
|
✗ |
fft_out_flt[n].im * fft_out_flt[n].im; |
966 |
|
✗ |
break; |
967 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
968 |
|
✗ |
avr += fft_out_dbl[n].re; |
969 |
|
✗ |
avi += fft_out_dbl[n].im; |
970 |
|
✗ |
mag2 = fft_out_dbl[n].re * fft_out_dbl[n].re + |
971 |
|
✗ |
fft_out_dbl[n].im * fft_out_dbl[n].im; |
972 |
|
✗ |
break; |
973 |
|
|
} |
974 |
|
|
|
975 |
|
✗ |
mag2 = fmax(mag2, s->sample_floor); |
976 |
|
|
|
977 |
|
✗ |
var += mag2; |
978 |
|
✗ |
n++; |
979 |
|
|
} |
980 |
|
|
|
981 |
|
✗ |
dnch->noise_band_norm[k - 1] += j - edge; |
982 |
|
✗ |
dnch->noise_band_avr[k - 1] += avr; |
983 |
|
✗ |
dnch->noise_band_avi[k - 1] += avi; |
984 |
|
✗ |
dnch->noise_band_var[k - 1] += var; |
985 |
|
✗ |
} |
986 |
|
|
|
987 |
|
✗ |
static void finish_sample_noise(AudioFFTDeNoiseContext *s, |
988 |
|
|
DeNoiseChannel *dnch, |
989 |
|
|
double *sample_noise) |
990 |
|
|
{ |
991 |
|
✗ |
for (int i = 0; i < s->noise_band_count; i++) { |
992 |
|
✗ |
dnch->noise_band_avr[i] /= dnch->noise_band_norm[i]; |
993 |
|
✗ |
dnch->noise_band_avi[i] /= dnch->noise_band_norm[i]; |
994 |
|
✗ |
dnch->noise_band_var[i] /= dnch->noise_band_norm[i]; |
995 |
|
✗ |
dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] + |
996 |
|
✗ |
dnch->noise_band_avi[i] * dnch->noise_band_avi[i]; |
997 |
|
✗ |
dnch->noise_band_auto_var[i] = dnch->noise_band_var[i]; |
998 |
|
✗ |
sample_noise[i] = 10.0 * log10(dnch->noise_band_var[i] / s->floor) - 100.0; |
999 |
|
|
} |
1000 |
|
✗ |
if (s->noise_band_count < NB_PROFILE_BANDS) { |
1001 |
|
✗ |
for (int i = s->noise_band_count; i < NB_PROFILE_BANDS; i++) |
1002 |
|
✗ |
sample_noise[i] = sample_noise[i - 1]; |
1003 |
|
|
} |
1004 |
|
✗ |
} |
1005 |
|
|
|
1006 |
|
✗ |
static void set_noise_profile(AudioFFTDeNoiseContext *s, |
1007 |
|
|
DeNoiseChannel *dnch, |
1008 |
|
|
double *sample_noise) |
1009 |
|
|
{ |
1010 |
|
|
double new_band_noise[NB_PROFILE_BANDS]; |
1011 |
|
|
double temp[NB_PROFILE_BANDS]; |
1012 |
|
✗ |
double sum = 0.0; |
1013 |
|
|
|
1014 |
|
✗ |
for (int m = 0; m < NB_PROFILE_BANDS; m++) |
1015 |
|
✗ |
temp[m] = sample_noise[m]; |
1016 |
|
|
|
1017 |
|
✗ |
for (int m = 0, i = 0; m < SOLVE_SIZE; m++) { |
1018 |
|
✗ |
sum = 0.0; |
1019 |
|
✗ |
for (int n = 0; n < NB_PROFILE_BANDS; n++) |
1020 |
|
✗ |
sum += s->matrix_b[i++] * temp[n]; |
1021 |
|
✗ |
s->vector_b[m] = sum; |
1022 |
|
|
} |
1023 |
|
✗ |
solve(s->matrix_a, s->vector_b, SOLVE_SIZE); |
1024 |
|
✗ |
for (int m = 0, i = 0; m < NB_PROFILE_BANDS; m++) { |
1025 |
|
✗ |
sum = 0.0; |
1026 |
|
✗ |
for (int n = 0; n < SOLVE_SIZE; n++) |
1027 |
|
✗ |
sum += s->matrix_c[i++] * s->vector_b[n]; |
1028 |
|
✗ |
temp[m] = sum; |
1029 |
|
|
} |
1030 |
|
|
|
1031 |
|
✗ |
reduce_mean(temp); |
1032 |
|
|
|
1033 |
|
✗ |
av_log(s, AV_LOG_INFO, "bn="); |
1034 |
|
✗ |
for (int m = 0; m < NB_PROFILE_BANDS; m++) { |
1035 |
|
✗ |
new_band_noise[m] = temp[m]; |
1036 |
|
✗ |
new_band_noise[m] = av_clipd(new_band_noise[m], -24.0, 24.0); |
1037 |
|
✗ |
av_log(s, AV_LOG_INFO, "%f ", new_band_noise[m]); |
1038 |
|
|
} |
1039 |
|
✗ |
av_log(s, AV_LOG_INFO, "\n"); |
1040 |
|
✗ |
memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise)); |
1041 |
|
✗ |
} |
1042 |
|
|
|
1043 |
|
✗ |
static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) |
1044 |
|
|
{ |
1045 |
|
✗ |
AudioFFTDeNoiseContext *s = ctx->priv; |
1046 |
|
✗ |
AVFrame *in = arg; |
1047 |
|
✗ |
const int start = (in->ch_layout.nb_channels * jobnr) / nb_jobs; |
1048 |
|
✗ |
const int end = (in->ch_layout.nb_channels * (jobnr+1)) / nb_jobs; |
1049 |
|
✗ |
const int window_length = s->window_length; |
1050 |
|
✗ |
const double *window = s->window; |
1051 |
|
|
|
1052 |
|
✗ |
for (int ch = start; ch < end; ch++) { |
1053 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1054 |
|
✗ |
const double *src_dbl = (const double *)in->extended_data[ch]; |
1055 |
|
✗ |
const float *src_flt = (const float *)in->extended_data[ch]; |
1056 |
|
✗ |
double *dst = dnch->out_samples; |
1057 |
|
✗ |
double *fft_in_dbl = dnch->fft_in; |
1058 |
|
✗ |
float *fft_in_flt = dnch->fft_in; |
1059 |
|
|
|
1060 |
|
✗ |
switch (s->format) { |
1061 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
1062 |
|
✗ |
for (int m = 0; m < window_length; m++) |
1063 |
|
✗ |
fft_in_flt[m] = window[m] * src_flt[m] * (1LL << 23); |
1064 |
|
|
|
1065 |
|
✗ |
for (int m = window_length; m < s->fft_length2; m++) |
1066 |
|
✗ |
fft_in_flt[m] = 0.f; |
1067 |
|
✗ |
break; |
1068 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
1069 |
|
✗ |
for (int m = 0; m < window_length; m++) |
1070 |
|
✗ |
fft_in_dbl[m] = window[m] * src_dbl[m] * (1LL << 23); |
1071 |
|
|
|
1072 |
|
✗ |
for (int m = window_length; m < s->fft_length2; m++) |
1073 |
|
✗ |
fft_in_dbl[m] = 0.; |
1074 |
|
✗ |
break; |
1075 |
|
|
} |
1076 |
|
|
|
1077 |
|
✗ |
dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size); |
1078 |
|
|
|
1079 |
|
✗ |
process_frame(ctx, s, dnch, |
1080 |
|
|
dnch->prior, |
1081 |
|
|
dnch->prior_band_excit, |
1082 |
|
|
s->track_noise); |
1083 |
|
|
|
1084 |
|
✗ |
dnch->itx_fn(dnch->ifft, dnch->fft_in, dnch->fft_out, s->complex_sample_size); |
1085 |
|
|
|
1086 |
|
✗ |
switch (s->format) { |
1087 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
1088 |
|
✗ |
for (int m = 0; m < window_length; m++) |
1089 |
|
✗ |
dst[m] += s->window[m] * fft_in_flt[m] / (1LL << 23); |
1090 |
|
✗ |
break; |
1091 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
1092 |
|
✗ |
for (int m = 0; m < window_length; m++) |
1093 |
|
✗ |
dst[m] += s->window[m] * fft_in_dbl[m] / (1LL << 23); |
1094 |
|
✗ |
break; |
1095 |
|
|
} |
1096 |
|
|
} |
1097 |
|
|
|
1098 |
|
✗ |
return 0; |
1099 |
|
|
} |
1100 |
|
|
|
1101 |
|
✗ |
static int output_frame(AVFilterLink *inlink, AVFrame *in) |
1102 |
|
|
{ |
1103 |
|
✗ |
AVFilterContext *ctx = inlink->dst; |
1104 |
|
✗ |
AVFilterLink *outlink = ctx->outputs[0]; |
1105 |
|
✗ |
AudioFFTDeNoiseContext *s = ctx->priv; |
1106 |
|
✗ |
const int output_mode = ctx->is_disabled ? IN_MODE : s->output_mode; |
1107 |
|
✗ |
const int offset = s->window_length - s->sample_advance; |
1108 |
|
|
AVFrame *out; |
1109 |
|
|
|
1110 |
|
✗ |
for (int ch = 0; ch < s->channels; ch++) { |
1111 |
|
✗ |
uint8_t *src = (uint8_t *)s->winframe->extended_data[ch]; |
1112 |
|
|
|
1113 |
|
✗ |
memmove(src, src + s->sample_advance * s->sample_size, |
1114 |
|
✗ |
offset * s->sample_size); |
1115 |
|
✗ |
memcpy(src + offset * s->sample_size, in->extended_data[ch], |
1116 |
|
✗ |
in->nb_samples * s->sample_size); |
1117 |
|
✗ |
memset(src + s->sample_size * (offset + in->nb_samples), 0, |
1118 |
|
✗ |
(s->sample_advance - in->nb_samples) * s->sample_size); |
1119 |
|
|
} |
1120 |
|
|
|
1121 |
|
✗ |
if (s->track_noise) { |
1122 |
|
✗ |
double average = 0.0, min = DBL_MAX, max = -DBL_MAX; |
1123 |
|
|
|
1124 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
1125 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1126 |
|
|
|
1127 |
|
✗ |
average += dnch->noise_floor; |
1128 |
|
✗ |
max = fmax(max, dnch->noise_floor); |
1129 |
|
✗ |
min = fmin(min, dnch->noise_floor); |
1130 |
|
|
} |
1131 |
|
|
|
1132 |
|
✗ |
average /= inlink->ch_layout.nb_channels; |
1133 |
|
|
|
1134 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
1135 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1136 |
|
|
|
1137 |
|
✗ |
switch (s->noise_floor_link) { |
1138 |
|
✗ |
case MIN_LINK: dnch->noise_floor = min; break; |
1139 |
|
✗ |
case MAX_LINK: dnch->noise_floor = max; break; |
1140 |
|
✗ |
case AVERAGE_LINK: dnch->noise_floor = average; break; |
1141 |
|
✗ |
case NONE_LINK: |
1142 |
|
|
default: |
1143 |
|
✗ |
break; |
1144 |
|
|
} |
1145 |
|
|
|
1146 |
|
✗ |
if (dnch->noise_floor != dnch->last_noise_floor) |
1147 |
|
✗ |
set_parameters(s, dnch, 1, 0); |
1148 |
|
|
} |
1149 |
|
|
} |
1150 |
|
|
|
1151 |
|
✗ |
if (s->sample_noise_mode == SAMPLE_START) { |
1152 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
1153 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1154 |
|
|
|
1155 |
|
✗ |
init_sample_noise(dnch); |
1156 |
|
|
} |
1157 |
|
✗ |
s->sample_noise_mode = SAMPLE_NONE; |
1158 |
|
✗ |
s->sample_noise = 1; |
1159 |
|
✗ |
s->sample_noise_blocks = 0; |
1160 |
|
|
} |
1161 |
|
|
|
1162 |
|
✗ |
if (s->sample_noise) { |
1163 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
1164 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1165 |
|
|
|
1166 |
|
✗ |
sample_noise_block(s, dnch, s->winframe, ch); |
1167 |
|
|
} |
1168 |
|
✗ |
s->sample_noise_blocks++; |
1169 |
|
|
} |
1170 |
|
|
|
1171 |
|
✗ |
if (s->sample_noise_mode == SAMPLE_STOP) { |
1172 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
1173 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1174 |
|
|
double sample_noise[NB_PROFILE_BANDS]; |
1175 |
|
|
|
1176 |
|
✗ |
if (s->sample_noise_blocks <= 0) |
1177 |
|
✗ |
break; |
1178 |
|
✗ |
finish_sample_noise(s, dnch, sample_noise); |
1179 |
|
✗ |
set_noise_profile(s, dnch, sample_noise); |
1180 |
|
✗ |
set_parameters(s, dnch, 1, 1); |
1181 |
|
|
} |
1182 |
|
✗ |
s->sample_noise = 0; |
1183 |
|
✗ |
s->sample_noise_blocks = 0; |
1184 |
|
✗ |
s->sample_noise_mode = SAMPLE_NONE; |
1185 |
|
|
} |
1186 |
|
|
|
1187 |
|
✗ |
ff_filter_execute(ctx, filter_channel, s->winframe, NULL, |
1188 |
|
✗ |
FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx))); |
1189 |
|
|
|
1190 |
|
✗ |
if (av_frame_is_writable(in)) { |
1191 |
|
✗ |
out = in; |
1192 |
|
|
} else { |
1193 |
|
✗ |
out = ff_get_audio_buffer(outlink, in->nb_samples); |
1194 |
|
✗ |
if (!out) { |
1195 |
|
✗ |
av_frame_free(&in); |
1196 |
|
✗ |
return AVERROR(ENOMEM); |
1197 |
|
|
} |
1198 |
|
|
|
1199 |
|
✗ |
av_frame_copy_props(out, in); |
1200 |
|
|
} |
1201 |
|
|
|
1202 |
|
✗ |
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { |
1203 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1204 |
|
✗ |
double *src = dnch->out_samples; |
1205 |
|
✗ |
const double *orig_dbl = (const double *)s->winframe->extended_data[ch]; |
1206 |
|
✗ |
const float *orig_flt = (const float *)s->winframe->extended_data[ch]; |
1207 |
|
✗ |
double *dst_dbl = (double *)out->extended_data[ch]; |
1208 |
|
✗ |
float *dst_flt = (float *)out->extended_data[ch]; |
1209 |
|
|
|
1210 |
|
✗ |
switch (output_mode) { |
1211 |
|
✗ |
case IN_MODE: |
1212 |
|
✗ |
switch (s->format) { |
1213 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
1214 |
|
✗ |
for (int m = 0; m < out->nb_samples; m++) |
1215 |
|
✗ |
dst_flt[m] = orig_flt[m]; |
1216 |
|
✗ |
break; |
1217 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
1218 |
|
✗ |
for (int m = 0; m < out->nb_samples; m++) |
1219 |
|
✗ |
dst_dbl[m] = orig_dbl[m]; |
1220 |
|
✗ |
break; |
1221 |
|
|
} |
1222 |
|
✗ |
break; |
1223 |
|
✗ |
case OUT_MODE: |
1224 |
|
✗ |
switch (s->format) { |
1225 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
1226 |
|
✗ |
for (int m = 0; m < out->nb_samples; m++) |
1227 |
|
✗ |
dst_flt[m] = src[m]; |
1228 |
|
✗ |
break; |
1229 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
1230 |
|
✗ |
for (int m = 0; m < out->nb_samples; m++) |
1231 |
|
✗ |
dst_dbl[m] = src[m]; |
1232 |
|
✗ |
break; |
1233 |
|
|
} |
1234 |
|
✗ |
break; |
1235 |
|
✗ |
case NOISE_MODE: |
1236 |
|
✗ |
switch (s->format) { |
1237 |
|
✗ |
case AV_SAMPLE_FMT_FLTP: |
1238 |
|
✗ |
for (int m = 0; m < out->nb_samples; m++) |
1239 |
|
✗ |
dst_flt[m] = orig_flt[m] - src[m]; |
1240 |
|
✗ |
break; |
1241 |
|
✗ |
case AV_SAMPLE_FMT_DBLP: |
1242 |
|
✗ |
for (int m = 0; m < out->nb_samples; m++) |
1243 |
|
✗ |
dst_dbl[m] = orig_dbl[m] - src[m]; |
1244 |
|
✗ |
break; |
1245 |
|
|
} |
1246 |
|
✗ |
break; |
1247 |
|
✗ |
default: |
1248 |
|
✗ |
if (in != out) |
1249 |
|
✗ |
av_frame_free(&in); |
1250 |
|
✗ |
av_frame_free(&out); |
1251 |
|
✗ |
return AVERROR_BUG; |
1252 |
|
|
} |
1253 |
|
|
|
1254 |
|
✗ |
memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src)); |
1255 |
|
✗ |
memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src)); |
1256 |
|
|
} |
1257 |
|
|
|
1258 |
|
✗ |
if (out != in) |
1259 |
|
✗ |
av_frame_free(&in); |
1260 |
|
✗ |
return ff_filter_frame(outlink, out); |
1261 |
|
|
} |
1262 |
|
|
|
1263 |
|
✗ |
static int activate(AVFilterContext *ctx) |
1264 |
|
|
{ |
1265 |
|
✗ |
AVFilterLink *inlink = ctx->inputs[0]; |
1266 |
|
✗ |
AVFilterLink *outlink = ctx->outputs[0]; |
1267 |
|
✗ |
AudioFFTDeNoiseContext *s = ctx->priv; |
1268 |
|
✗ |
AVFrame *in = NULL; |
1269 |
|
|
int ret; |
1270 |
|
|
|
1271 |
|
✗ |
FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); |
1272 |
|
|
|
1273 |
|
✗ |
ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in); |
1274 |
|
✗ |
if (ret < 0) |
1275 |
|
✗ |
return ret; |
1276 |
|
✗ |
if (ret > 0) |
1277 |
|
✗ |
return output_frame(inlink, in); |
1278 |
|
|
|
1279 |
|
✗ |
if (ff_inlink_queued_samples(inlink) >= s->sample_advance) { |
1280 |
|
✗ |
ff_filter_set_ready(ctx, 10); |
1281 |
|
✗ |
return 0; |
1282 |
|
|
} |
1283 |
|
|
|
1284 |
|
✗ |
FF_FILTER_FORWARD_STATUS(inlink, outlink); |
1285 |
|
✗ |
FF_FILTER_FORWARD_WANTED(outlink, inlink); |
1286 |
|
|
|
1287 |
|
✗ |
return FFERROR_NOT_READY; |
1288 |
|
|
} |
1289 |
|
|
|
1290 |
|
✗ |
static av_cold void uninit(AVFilterContext *ctx) |
1291 |
|
|
{ |
1292 |
|
✗ |
AudioFFTDeNoiseContext *s = ctx->priv; |
1293 |
|
|
|
1294 |
|
✗ |
av_freep(&s->window); |
1295 |
|
✗ |
av_freep(&s->bin2band); |
1296 |
|
✗ |
av_freep(&s->band_alpha); |
1297 |
|
✗ |
av_freep(&s->band_beta); |
1298 |
|
✗ |
av_frame_free(&s->winframe); |
1299 |
|
|
|
1300 |
|
✗ |
if (s->dnch) { |
1301 |
|
✗ |
for (int ch = 0; ch < s->channels; ch++) { |
1302 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1303 |
|
✗ |
av_freep(&dnch->amt); |
1304 |
|
✗ |
av_freep(&dnch->band_amt); |
1305 |
|
✗ |
av_freep(&dnch->band_excit); |
1306 |
|
✗ |
av_freep(&dnch->gain); |
1307 |
|
✗ |
av_freep(&dnch->smoothed_gain); |
1308 |
|
✗ |
av_freep(&dnch->prior); |
1309 |
|
✗ |
av_freep(&dnch->prior_band_excit); |
1310 |
|
✗ |
av_freep(&dnch->clean_data); |
1311 |
|
✗ |
av_freep(&dnch->noisy_data); |
1312 |
|
✗ |
av_freep(&dnch->out_samples); |
1313 |
|
✗ |
av_freep(&dnch->spread_function); |
1314 |
|
✗ |
av_freep(&dnch->abs_var); |
1315 |
|
✗ |
av_freep(&dnch->rel_var); |
1316 |
|
✗ |
av_freep(&dnch->min_abs_var); |
1317 |
|
✗ |
av_freep(&dnch->fft_in); |
1318 |
|
✗ |
av_freep(&dnch->fft_out); |
1319 |
|
✗ |
av_tx_uninit(&dnch->fft); |
1320 |
|
✗ |
av_tx_uninit(&dnch->ifft); |
1321 |
|
|
} |
1322 |
|
✗ |
av_freep(&s->dnch); |
1323 |
|
|
} |
1324 |
|
✗ |
} |
1325 |
|
|
|
1326 |
|
✗ |
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, |
1327 |
|
|
char *res, int res_len, int flags) |
1328 |
|
|
{ |
1329 |
|
✗ |
AudioFFTDeNoiseContext *s = ctx->priv; |
1330 |
|
✗ |
int ret = 0; |
1331 |
|
|
|
1332 |
|
✗ |
ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags); |
1333 |
|
✗ |
if (ret < 0) |
1334 |
|
✗ |
return ret; |
1335 |
|
|
|
1336 |
|
✗ |
if (!strcmp(cmd, "sample_noise") || !strcmp(cmd, "sn")) |
1337 |
|
✗ |
return 0; |
1338 |
|
|
|
1339 |
|
✗ |
for (int ch = 0; ch < s->channels; ch++) { |
1340 |
|
✗ |
DeNoiseChannel *dnch = &s->dnch[ch]; |
1341 |
|
|
|
1342 |
|
✗ |
dnch->noise_reduction = s->noise_reduction; |
1343 |
|
✗ |
dnch->noise_floor = s->noise_floor; |
1344 |
|
✗ |
dnch->residual_floor = s->residual_floor; |
1345 |
|
|
|
1346 |
|
✗ |
set_parameters(s, dnch, 1, 1); |
1347 |
|
|
} |
1348 |
|
|
|
1349 |
|
✗ |
return 0; |
1350 |
|
|
} |
1351 |
|
|
|
1352 |
|
|
static const AVFilterPad inputs[] = { |
1353 |
|
|
{ |
1354 |
|
|
.name = "default", |
1355 |
|
|
.type = AVMEDIA_TYPE_AUDIO, |
1356 |
|
|
.config_props = config_input, |
1357 |
|
|
}, |
1358 |
|
|
}; |
1359 |
|
|
|
1360 |
|
|
const AVFilter ff_af_afftdn = { |
1361 |
|
|
.name = "afftdn", |
1362 |
|
|
.description = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."), |
1363 |
|
|
.priv_size = sizeof(AudioFFTDeNoiseContext), |
1364 |
|
|
.priv_class = &afftdn_class, |
1365 |
|
|
.activate = activate, |
1366 |
|
|
.uninit = uninit, |
1367 |
|
|
FILTER_INPUTS(inputs), |
1368 |
|
|
FILTER_OUTPUTS(ff_audio_default_filterpad), |
1369 |
|
|
FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP), |
1370 |
|
|
.process_command = process_command, |
1371 |
|
|
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | |
1372 |
|
|
AVFILTER_FLAG_SLICE_THREADS, |
1373 |
|
|
}; |
1374 |
|
|
|