| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (c) 2018 The FFmpeg Project | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public | ||
| 17 | * License along with FFmpeg; if not, write to the Free Software | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include <float.h> | ||
| 22 | |||
| 23 | #include "libavutil/avassert.h" | ||
| 24 | #include "libavutil/avstring.h" | ||
| 25 | #include "libavutil/channel_layout.h" | ||
| 26 | #include "libavutil/mem.h" | ||
| 27 | #include "libavutil/opt.h" | ||
| 28 | #include "libavutil/tx.h" | ||
| 29 | #include "avfilter.h" | ||
| 30 | #include "audio.h" | ||
| 31 | #include "filters.h" | ||
| 32 | |||
| 33 | #define C (M_LN10 * 0.1) | ||
| 34 | #define SOLVE_SIZE (5) | ||
| 35 | #define NB_PROFILE_BANDS (15) | ||
| 36 | |||
| 37 | enum SampleNoiseModes { | ||
| 38 | SAMPLE_NONE, | ||
| 39 | SAMPLE_START, | ||
| 40 | SAMPLE_STOP, | ||
| 41 | NB_SAMPLEMODES | ||
| 42 | }; | ||
| 43 | |||
| 44 | enum OutModes { | ||
| 45 | IN_MODE, | ||
| 46 | OUT_MODE, | ||
| 47 | NOISE_MODE, | ||
| 48 | NB_MODES | ||
| 49 | }; | ||
| 50 | |||
| 51 | enum NoiseLinkType { | ||
| 52 | NONE_LINK, | ||
| 53 | MIN_LINK, | ||
| 54 | MAX_LINK, | ||
| 55 | AVERAGE_LINK, | ||
| 56 | NB_LINK | ||
| 57 | }; | ||
| 58 | |||
| 59 | enum NoiseType { | ||
| 60 | WHITE_NOISE, | ||
| 61 | VINYL_NOISE, | ||
| 62 | SHELLAC_NOISE, | ||
| 63 | CUSTOM_NOISE, | ||
| 64 | NB_NOISE | ||
| 65 | }; | ||
| 66 | |||
| 67 | typedef struct DeNoiseChannel { | ||
| 68 | double band_noise[NB_PROFILE_BANDS]; | ||
| 69 | double noise_band_auto_var[NB_PROFILE_BANDS]; | ||
| 70 | double noise_band_sample[NB_PROFILE_BANDS]; | ||
| 71 | double *amt; | ||
| 72 | double *band_amt; | ||
| 73 | double *band_excit; | ||
| 74 | double *gain; | ||
| 75 | double *smoothed_gain; | ||
| 76 | double *prior; | ||
| 77 | double *prior_band_excit; | ||
| 78 | double *clean_data; | ||
| 79 | double *noisy_data; | ||
| 80 | double *out_samples; | ||
| 81 | double *spread_function; | ||
| 82 | double *abs_var; | ||
| 83 | double *rel_var; | ||
| 84 | double *min_abs_var; | ||
| 85 | void *fft_in; | ||
| 86 | void *fft_out; | ||
| 87 | AVTXContext *fft, *ifft; | ||
| 88 | av_tx_fn tx_fn, itx_fn; | ||
| 89 | |||
| 90 | double noise_band_norm[NB_PROFILE_BANDS]; | ||
| 91 | double noise_band_avr[NB_PROFILE_BANDS]; | ||
| 92 | double noise_band_avi[NB_PROFILE_BANDS]; | ||
| 93 | double noise_band_var[NB_PROFILE_BANDS]; | ||
| 94 | |||
| 95 | double noise_reduction; | ||
| 96 | double last_noise_reduction; | ||
| 97 | double noise_floor; | ||
| 98 | double last_noise_floor; | ||
| 99 | double residual_floor; | ||
| 100 | double last_residual_floor; | ||
| 101 | double max_gain; | ||
| 102 | double max_var; | ||
| 103 | double gain_scale; | ||
| 104 | } DeNoiseChannel; | ||
| 105 | |||
| 106 | typedef struct AudioFFTDeNoiseContext { | ||
| 107 | const AVClass *class; | ||
| 108 | |||
| 109 | int format; | ||
| 110 | size_t sample_size; | ||
| 111 | size_t complex_sample_size; | ||
| 112 | |||
| 113 | float noise_reduction; | ||
| 114 | float noise_floor; | ||
| 115 | int noise_type; | ||
| 116 | char *band_noise_str; | ||
| 117 | float residual_floor; | ||
| 118 | int track_noise; | ||
| 119 | int track_residual; | ||
| 120 | int output_mode; | ||
| 121 | int noise_floor_link; | ||
| 122 | float ratio; | ||
| 123 | int gain_smooth; | ||
| 124 | float band_multiplier; | ||
| 125 | float floor_offset; | ||
| 126 | |||
| 127 | int channels; | ||
| 128 | int sample_noise; | ||
| 129 | int sample_noise_blocks; | ||
| 130 | int sample_noise_mode; | ||
| 131 | float sample_rate; | ||
| 132 | int buffer_length; | ||
| 133 | int fft_length; | ||
| 134 | int fft_length2; | ||
| 135 | int bin_count; | ||
| 136 | int window_length; | ||
| 137 | int sample_advance; | ||
| 138 | int number_of_bands; | ||
| 139 | |||
| 140 | int band_centre[NB_PROFILE_BANDS]; | ||
| 141 | |||
| 142 | int *bin2band; | ||
| 143 | double *window; | ||
| 144 | double *band_alpha; | ||
| 145 | double *band_beta; | ||
| 146 | |||
| 147 | DeNoiseChannel *dnch; | ||
| 148 | |||
| 149 | AVFrame *winframe; | ||
| 150 | |||
| 151 | double window_weight; | ||
| 152 | double floor; | ||
| 153 | double sample_floor; | ||
| 154 | |||
| 155 | int noise_band_edge[NB_PROFILE_BANDS + 2]; | ||
| 156 | int noise_band_count; | ||
| 157 | double matrix_a[SOLVE_SIZE * SOLVE_SIZE]; | ||
| 158 | double vector_b[SOLVE_SIZE]; | ||
| 159 | double matrix_b[SOLVE_SIZE * NB_PROFILE_BANDS]; | ||
| 160 | double matrix_c[SOLVE_SIZE * NB_PROFILE_BANDS]; | ||
| 161 | } AudioFFTDeNoiseContext; | ||
| 162 | |||
| 163 | #define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x) | ||
| 164 | #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM | ||
| 165 | #define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM | ||
| 166 | |||
| 167 | static const AVOption afftdn_options[] = { | ||
| 168 | { "noise_reduction", "set the noise reduction",OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT,{.dbl = 12}, .01, 97, AFR }, | ||
| 169 | { "nr", "set the noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT, {.dbl = 12}, .01, 97, AFR }, | ||
| 170 | { "noise_floor", "set the noise floor",OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR }, | ||
| 171 | { "nf", "set the noise floor", OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR }, | ||
| 172 | { "noise_type", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" }, | ||
| 173 | { "nt", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" }, | ||
| 174 | { "white", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" }, | ||
| 175 | { "w", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, .unit = "type" }, | ||
| 176 | { "vinyl", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" }, | ||
| 177 | { "v", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, .unit = "type" }, | ||
| 178 | { "shellac", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" }, | ||
| 179 | { "s", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, .unit = "type" }, | ||
| 180 | { "custom", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" }, | ||
| 181 | { "c", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, .unit = "type" }, | ||
| 182 | { "band_noise", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF }, | ||
| 183 | { "bn", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF }, | ||
| 184 | { "residual_floor", "set the residual floor",OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR }, | ||
| 185 | { "rf", "set the residual floor", OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR }, | ||
| 186 | { "track_noise", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR }, | ||
| 187 | { "tn", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR }, | ||
| 188 | { "track_residual", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR }, | ||
| 189 | { "tr", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR }, | ||
| 190 | { "output_mode", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" }, | ||
| 191 | { "om", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, .unit = "mode" }, | ||
| 192 | { "input", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" }, | ||
| 193 | { "i", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, .unit = "mode" }, | ||
| 194 | { "output", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" }, | ||
| 195 | { "o", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, .unit = "mode" }, | ||
| 196 | { "noise", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" }, | ||
| 197 | { "n", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, .unit = "mode" }, | ||
| 198 | { "adaptivity", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR }, | ||
| 199 | { "ad", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR }, | ||
| 200 | { "floor_offset", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR }, | ||
| 201 | { "fo", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR }, | ||
| 202 | { "noise_link", "set the noise floor link",OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" }, | ||
| 203 | { "nl", "set the noise floor link", OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, .unit = "link" }, | ||
| 204 | { "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = NONE_LINK}, 0, 0, AFR, .unit = "link" }, | ||
| 205 | { "min", "min", 0, AV_OPT_TYPE_CONST, {.i64 = MIN_LINK}, 0, 0, AFR, .unit = "link" }, | ||
| 206 | { "max", "max", 0, AV_OPT_TYPE_CONST, {.i64 = MAX_LINK}, 0, 0, AFR, .unit = "link" }, | ||
| 207 | { "average", "average", 0, AV_OPT_TYPE_CONST, {.i64 = AVERAGE_LINK}, 0, 0, AFR, .unit = "link" }, | ||
| 208 | { "band_multiplier", "set band multiplier",OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF }, | ||
| 209 | { "bm", "set band multiplier", OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF }, | ||
| 210 | { "sample_noise", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" }, | ||
| 211 | { "sn", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, .unit = "sample" }, | ||
| 212 | { "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_NONE}, 0, 0, AFR, .unit = "sample" }, | ||
| 213 | { "start", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" }, | ||
| 214 | { "begin", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, .unit = "sample" }, | ||
| 215 | { "stop", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" }, | ||
| 216 | { "end", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, .unit = "sample" }, | ||
| 217 | { "gain_smooth", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR }, | ||
| 218 | { "gs", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR }, | ||
| 219 | { NULL } | ||
| 220 | }; | ||
| 221 | |||
| 222 | AVFILTER_DEFINE_CLASS(afftdn); | ||
| 223 | |||
| 224 | ✗ | static double get_band_noise(AudioFFTDeNoiseContext *s, | |
| 225 | int band, double a, | ||
| 226 | double b, double c) | ||
| 227 | { | ||
| 228 | double d1, d2, d3; | ||
| 229 | |||
| 230 | ✗ | d1 = a / s->band_centre[band]; | |
| 231 | ✗ | d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10; | |
| 232 | ✗ | d2 = b / s->band_centre[band]; | |
| 233 | ✗ | d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10; | |
| 234 | ✗ | d3 = s->band_centre[band] / c; | |
| 235 | ✗ | d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10; | |
| 236 | |||
| 237 | ✗ | return -d1 + d2 - d3; | |
| 238 | } | ||
| 239 | |||
| 240 | ✗ | static void factor(double *array, int size) | |
| 241 | { | ||
| 242 | ✗ | for (int i = 0; i < size - 1; i++) { | |
| 243 | ✗ | for (int j = i + 1; j < size; j++) { | |
| 244 | ✗ | double d = array[j + i * size] / array[i + i * size]; | |
| 245 | |||
| 246 | ✗ | array[j + i * size] = d; | |
| 247 | ✗ | for (int k = i + 1; k < size; k++) { | |
| 248 | ✗ | array[j + k * size] -= d * array[i + k * size]; | |
| 249 | } | ||
| 250 | } | ||
| 251 | } | ||
| 252 | ✗ | } | |
| 253 | |||
| 254 | ✗ | static void solve(double *matrix, double *vector, int size) | |
| 255 | { | ||
| 256 | ✗ | for (int i = 0; i < size - 1; i++) { | |
| 257 | ✗ | for (int j = i + 1; j < size; j++) { | |
| 258 | ✗ | double d = matrix[j + i * size]; | |
| 259 | ✗ | vector[j] -= d * vector[i]; | |
| 260 | } | ||
| 261 | } | ||
| 262 | |||
| 263 | ✗ | vector[size - 1] /= matrix[size * size - 1]; | |
| 264 | |||
| 265 | ✗ | for (int i = size - 2; i >= 0; i--) { | |
| 266 | ✗ | double d = vector[i]; | |
| 267 | ✗ | for (int j = i + 1; j < size; j++) | |
| 268 | ✗ | d -= matrix[i + j * size] * vector[j]; | |
| 269 | ✗ | vector[i] = d / matrix[i + i * size]; | |
| 270 | } | ||
| 271 | ✗ | } | |
| 272 | |||
| 273 | ✗ | static double process_get_band_noise(AudioFFTDeNoiseContext *s, | |
| 274 | DeNoiseChannel *dnch, | ||
| 275 | int band) | ||
| 276 | { | ||
| 277 | double product, sum, f; | ||
| 278 | ✗ | int i = 0; | |
| 279 | |||
| 280 | ✗ | if (band < NB_PROFILE_BANDS) | |
| 281 | ✗ | return dnch->band_noise[band]; | |
| 282 | |||
| 283 | ✗ | for (int j = 0; j < SOLVE_SIZE; j++) { | |
| 284 | ✗ | sum = 0.0; | |
| 285 | ✗ | for (int k = 0; k < NB_PROFILE_BANDS; k++) | |
| 286 | ✗ | sum += s->matrix_b[i++] * dnch->band_noise[k]; | |
| 287 | ✗ | s->vector_b[j] = sum; | |
| 288 | } | ||
| 289 | |||
| 290 | ✗ | solve(s->matrix_a, s->vector_b, SOLVE_SIZE); | |
| 291 | ✗ | f = (0.5 * s->sample_rate) / s->band_centre[NB_PROFILE_BANDS-1]; | |
| 292 | ✗ | f = 15.0 + log(f / 1.5) / log(1.5); | |
| 293 | ✗ | sum = 0.0; | |
| 294 | ✗ | product = 1.0; | |
| 295 | ✗ | for (int j = 0; j < SOLVE_SIZE; j++) { | |
| 296 | ✗ | sum += product * s->vector_b[j]; | |
| 297 | ✗ | product *= f; | |
| 298 | } | ||
| 299 | |||
| 300 | ✗ | return sum; | |
| 301 | } | ||
| 302 | |||
| 303 | ✗ | static double limit_gain(double a, double b) | |
| 304 | { | ||
| 305 | ✗ | if (a > 1.0) | |
| 306 | ✗ | return (b * a - 1.0) / (b + a - 2.0); | |
| 307 | ✗ | if (a < 1.0) | |
| 308 | ✗ | return (b * a - 2.0 * a + 1.0) / (b - a); | |
| 309 | ✗ | return 1.0; | |
| 310 | } | ||
| 311 | |||
| 312 | ✗ | static void spectral_flatness(AudioFFTDeNoiseContext *s, const double *const spectral, | |
| 313 | double floor, int len, double *rnum, double *rden) | ||
| 314 | { | ||
| 315 | ✗ | double num = 0., den = 0.; | |
| 316 | ✗ | int size = 0; | |
| 317 | |||
| 318 | ✗ | for (int n = 0; n < len; n++) { | |
| 319 | ✗ | const double v = spectral[n]; | |
| 320 | ✗ | if (v > floor) { | |
| 321 | ✗ | num += log(v); | |
| 322 | ✗ | den += v; | |
| 323 | ✗ | size++; | |
| 324 | } | ||
| 325 | } | ||
| 326 | |||
| 327 | ✗ | size = FFMAX(size, 1); | |
| 328 | |||
| 329 | ✗ | num /= size; | |
| 330 | ✗ | den /= size; | |
| 331 | |||
| 332 | ✗ | num = exp(num); | |
| 333 | |||
| 334 | ✗ | *rnum = num; | |
| 335 | ✗ | *rden = den; | |
| 336 | ✗ | } | |
| 337 | |||
| 338 | static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var); | ||
| 339 | |||
| 340 | ✗ | static double floor_offset(const double *S, int size, double mean) | |
| 341 | { | ||
| 342 | ✗ | double offset = 0.0; | |
| 343 | |||
| 344 | ✗ | for (int n = 0; n < size; n++) { | |
| 345 | ✗ | const double p = S[n] - mean; | |
| 346 | |||
| 347 | ✗ | offset = fmax(offset, fabs(p)); | |
| 348 | } | ||
| 349 | |||
| 350 | ✗ | return offset / mean; | |
| 351 | } | ||
| 352 | |||
| 353 | ✗ | static void process_frame(AVFilterContext *ctx, | |
| 354 | AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, | ||
| 355 | double *prior, double *prior_band_excit, int track_noise) | ||
| 356 | { | ||
| 357 | ✗ | AVFilterLink *outlink = ctx->outputs[0]; | |
| 358 | ✗ | FilterLink *outl = ff_filter_link(outlink); | |
| 359 | ✗ | const double *abs_var = dnch->abs_var; | |
| 360 | ✗ | const double ratio = outl->frame_count_out ? s->ratio : 1.0; | |
| 361 | ✗ | const double rratio = 1. - ratio; | |
| 362 | ✗ | const int *bin2band = s->bin2band; | |
| 363 | ✗ | double *noisy_data = dnch->noisy_data; | |
| 364 | ✗ | double *band_excit = dnch->band_excit; | |
| 365 | ✗ | double *band_amt = dnch->band_amt; | |
| 366 | ✗ | double *smoothed_gain = dnch->smoothed_gain; | |
| 367 | ✗ | AVComplexDouble *fft_data_dbl = dnch->fft_out; | |
| 368 | ✗ | AVComplexFloat *fft_data_flt = dnch->fft_out; | |
| 369 | ✗ | double *gain = dnch->gain; | |
| 370 | |||
| 371 | ✗ | for (int i = 0; i < s->bin_count; i++) { | |
| 372 | double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var; | ||
| 373 | |||
| 374 | ✗ | switch (s->format) { | |
| 375 | ✗ | case AV_SAMPLE_FMT_FLTP: | |
| 376 | ✗ | noisy_data[i] = mag = hypot(fft_data_flt[i].re, fft_data_flt[i].im); | |
| 377 | ✗ | break; | |
| 378 | ✗ | case AV_SAMPLE_FMT_DBLP: | |
| 379 | ✗ | noisy_data[i] = mag = hypot(fft_data_dbl[i].re, fft_data_dbl[i].im); | |
| 380 | ✗ | break; | |
| 381 | ✗ | default: | |
| 382 | ✗ | av_assert0(0); | |
| 383 | } | ||
| 384 | |||
| 385 | ✗ | power = mag * mag; | |
| 386 | ✗ | mag_abs_var = power / abs_var[i]; | |
| 387 | ✗ | new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0); | |
| 388 | ✗ | new_gain = new_mag_abs_var / (1.0 + new_mag_abs_var); | |
| 389 | ✗ | sqr_new_gain = new_gain * new_gain; | |
| 390 | ✗ | prior[i] = mag_abs_var * sqr_new_gain; | |
| 391 | ✗ | dnch->clean_data[i] = power * sqr_new_gain; | |
| 392 | ✗ | gain[i] = new_gain; | |
| 393 | } | ||
| 394 | |||
| 395 | ✗ | if (track_noise) { | |
| 396 | double flatness, num, den; | ||
| 397 | |||
| 398 | ✗ | spectral_flatness(s, noisy_data, s->floor, s->bin_count, &num, &den); | |
| 399 | |||
| 400 | ✗ | flatness = num / den; | |
| 401 | ✗ | if (flatness > 0.8) { | |
| 402 | ✗ | const double offset = s->floor_offset * floor_offset(noisy_data, s->bin_count, den); | |
| 403 | ✗ | const double new_floor = av_clipd(10.0 * log10(den) - 100.0 + offset, -90., -20.); | |
| 404 | |||
| 405 | ✗ | dnch->noise_floor = 0.1 * new_floor + dnch->noise_floor * 0.9; | |
| 406 | ✗ | set_parameters(s, dnch, 1, 1); | |
| 407 | } | ||
| 408 | } | ||
| 409 | |||
| 410 | ✗ | for (int i = 0; i < s->number_of_bands; i++) { | |
| 411 | ✗ | band_excit[i] = 0.0; | |
| 412 | ✗ | band_amt[i] = 0.0; | |
| 413 | } | ||
| 414 | |||
| 415 | ✗ | for (int i = 0; i < s->bin_count; i++) | |
| 416 | ✗ | band_excit[bin2band[i]] += dnch->clean_data[i]; | |
| 417 | |||
| 418 | ✗ | for (int i = 0; i < s->number_of_bands; i++) { | |
| 419 | ✗ | band_excit[i] = fmax(band_excit[i], | |
| 420 | ✗ | s->band_alpha[i] * band_excit[i] + | |
| 421 | ✗ | s->band_beta[i] * prior_band_excit[i]); | |
| 422 | ✗ | prior_band_excit[i] = band_excit[i]; | |
| 423 | } | ||
| 424 | |||
| 425 | ✗ | for (int j = 0, i = 0; j < s->number_of_bands; j++) { | |
| 426 | ✗ | for (int k = 0; k < s->number_of_bands; k++) { | |
| 427 | ✗ | band_amt[j] += dnch->spread_function[i++] * band_excit[k]; | |
| 428 | } | ||
| 429 | } | ||
| 430 | |||
| 431 | ✗ | for (int i = 0; i < s->bin_count; i++) | |
| 432 | ✗ | dnch->amt[i] = band_amt[bin2band[i]]; | |
| 433 | |||
| 434 | ✗ | for (int i = 0; i < s->bin_count; i++) { | |
| 435 | ✗ | if (dnch->amt[i] > abs_var[i]) { | |
| 436 | ✗ | gain[i] = 1.0; | |
| 437 | ✗ | } else if (dnch->amt[i] > dnch->min_abs_var[i]) { | |
| 438 | ✗ | const double limit = sqrt(abs_var[i] / dnch->amt[i]); | |
| 439 | |||
| 440 | ✗ | gain[i] = limit_gain(gain[i], limit); | |
| 441 | } else { | ||
| 442 | ✗ | gain[i] = limit_gain(gain[i], dnch->max_gain); | |
| 443 | } | ||
| 444 | } | ||
| 445 | |||
| 446 | ✗ | memcpy(smoothed_gain, gain, s->bin_count * sizeof(*smoothed_gain)); | |
| 447 | ✗ | if (s->gain_smooth > 0) { | |
| 448 | ✗ | const int r = s->gain_smooth; | |
| 449 | |||
| 450 | ✗ | for (int i = r; i < s->bin_count - r; i++) { | |
| 451 | ✗ | const double gc = gain[i]; | |
| 452 | ✗ | double num = 0., den = 0.; | |
| 453 | |||
| 454 | ✗ | for (int j = -r; j <= r; j++) { | |
| 455 | ✗ | const double g = gain[i + j]; | |
| 456 | ✗ | const double d = 1. - fabs(g - gc); | |
| 457 | |||
| 458 | ✗ | num += g * d; | |
| 459 | ✗ | den += d; | |
| 460 | } | ||
| 461 | |||
| 462 | ✗ | smoothed_gain[i] = num / den; | |
| 463 | } | ||
| 464 | } | ||
| 465 | |||
| 466 | ✗ | switch (s->format) { | |
| 467 | ✗ | case AV_SAMPLE_FMT_FLTP: | |
| 468 | ✗ | for (int i = 0; i < s->bin_count; i++) { | |
| 469 | ✗ | const float new_gain = smoothed_gain[i]; | |
| 470 | |||
| 471 | ✗ | fft_data_flt[i].re *= new_gain; | |
| 472 | ✗ | fft_data_flt[i].im *= new_gain; | |
| 473 | } | ||
| 474 | ✗ | break; | |
| 475 | ✗ | case AV_SAMPLE_FMT_DBLP: | |
| 476 | ✗ | for (int i = 0; i < s->bin_count; i++) { | |
| 477 | ✗ | const double new_gain = smoothed_gain[i]; | |
| 478 | |||
| 479 | ✗ | fft_data_dbl[i].re *= new_gain; | |
| 480 | ✗ | fft_data_dbl[i].im *= new_gain; | |
| 481 | } | ||
| 482 | ✗ | break; | |
| 483 | } | ||
| 484 | ✗ | } | |
| 485 | |||
| 486 | ✗ | static double freq2bark(double x) | |
| 487 | { | ||
| 488 | ✗ | double d = x / 7500.0; | |
| 489 | |||
| 490 | ✗ | return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d); | |
| 491 | } | ||
| 492 | |||
| 493 | ✗ | static int get_band_centre(AudioFFTDeNoiseContext *s, int band) | |
| 494 | { | ||
| 495 | ✗ | if (band == -1) | |
| 496 | ✗ | return lrint(s->band_centre[0] / 1.5); | |
| 497 | |||
| 498 | ✗ | return s->band_centre[band]; | |
| 499 | } | ||
| 500 | |||
| 501 | ✗ | static int get_band_edge(AudioFFTDeNoiseContext *s, int band) | |
| 502 | { | ||
| 503 | int i; | ||
| 504 | |||
| 505 | ✗ | if (band == NB_PROFILE_BANDS) { | |
| 506 | ✗ | i = lrint(s->band_centre[NB_PROFILE_BANDS - 1] * 1.224745); | |
| 507 | } else { | ||
| 508 | ✗ | i = lrint(s->band_centre[band] / 1.224745); | |
| 509 | } | ||
| 510 | |||
| 511 | ✗ | return FFMIN(i, s->sample_rate / 2); | |
| 512 | } | ||
| 513 | |||
| 514 | ✗ | static void set_band_parameters(AudioFFTDeNoiseContext *s, | |
| 515 | DeNoiseChannel *dnch) | ||
| 516 | { | ||
| 517 | double band_noise, d2, d3, d4, d5; | ||
| 518 | ✗ | int i = 0, j = 0, k = 0; | |
| 519 | |||
| 520 | ✗ | d5 = 0.0; | |
| 521 | ✗ | band_noise = process_get_band_noise(s, dnch, 0); | |
| 522 | ✗ | for (int m = j; m < s->bin_count; m++) { | |
| 523 | ✗ | if (m == j) { | |
| 524 | ✗ | i = j; | |
| 525 | ✗ | d5 = band_noise; | |
| 526 | ✗ | if (k >= NB_PROFILE_BANDS) { | |
| 527 | ✗ | j = s->bin_count; | |
| 528 | } else { | ||
| 529 | ✗ | j = s->fft_length * get_band_centre(s, k) / s->sample_rate; | |
| 530 | } | ||
| 531 | ✗ | d2 = j - i; | |
| 532 | ✗ | band_noise = process_get_band_noise(s, dnch, k); | |
| 533 | ✗ | k++; | |
| 534 | } | ||
| 535 | ✗ | d3 = (j - m) / d2; | |
| 536 | ✗ | d4 = (m - i) / d2; | |
| 537 | ✗ | dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C); | |
| 538 | } | ||
| 539 | |||
| 540 | ✗ | for (i = 0; i < NB_PROFILE_BANDS; i++) | |
| 541 | ✗ | dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C); | |
| 542 | ✗ | } | |
| 543 | |||
| 544 | ✗ | static void read_custom_noise(AVFilterContext *ctx, int ch) | |
| 545 | { | ||
| 546 | ✗ | AudioFFTDeNoiseContext *s = ctx->priv; | |
| 547 | ✗ | DeNoiseChannel *dnch = &s->dnch[ch]; | |
| 548 | ✗ | char *custom_noise_str, *p, *arg, *saveptr = NULL; | |
| 549 | ✗ | double band_noise[NB_PROFILE_BANDS] = { 0.f }; | |
| 550 | int ret; | ||
| 551 | |||
| 552 | ✗ | if (!s->band_noise_str) | |
| 553 | ✗ | return; | |
| 554 | |||
| 555 | ✗ | custom_noise_str = p = av_strdup(s->band_noise_str); | |
| 556 | ✗ | if (!p) | |
| 557 | ✗ | return; | |
| 558 | |||
| 559 | ✗ | for (int i = 0; i < NB_PROFILE_BANDS; i++) { | |
| 560 | float noise; | ||
| 561 | |||
| 562 | ✗ | if (!(arg = av_strtok(p, "| ", &saveptr))) | |
| 563 | ✗ | break; | |
| 564 | |||
| 565 | ✗ | p = NULL; | |
| 566 | |||
| 567 | ✗ | ret = av_sscanf(arg, "%f", &noise); | |
| 568 | ✗ | if (ret != 1) { | |
| 569 | ✗ | av_log(ctx, AV_LOG_ERROR, "Custom band noise must be float.\n"); | |
| 570 | ✗ | break; | |
| 571 | } | ||
| 572 | |||
| 573 | ✗ | band_noise[i] = av_clipd(noise, -24., 24.); | |
| 574 | } | ||
| 575 | |||
| 576 | ✗ | av_free(custom_noise_str); | |
| 577 | ✗ | memcpy(dnch->band_noise, band_noise, sizeof(band_noise)); | |
| 578 | } | ||
| 579 | |||
| 580 | ✗ | static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var) | |
| 581 | { | ||
| 582 | ✗ | if (dnch->last_noise_floor != dnch->noise_floor) | |
| 583 | ✗ | dnch->last_noise_floor = dnch->noise_floor; | |
| 584 | |||
| 585 | ✗ | if (s->track_residual) | |
| 586 | ✗ | dnch->last_noise_floor = fmax(dnch->last_noise_floor, dnch->residual_floor); | |
| 587 | |||
| 588 | ✗ | dnch->max_var = s->floor * exp((100.0 + dnch->last_noise_floor) * C); | |
| 589 | ✗ | if (update_auto_var) { | |
| 590 | ✗ | for (int i = 0; i < NB_PROFILE_BANDS; i++) | |
| 591 | ✗ | dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C); | |
| 592 | } | ||
| 593 | |||
| 594 | ✗ | if (s->track_residual) { | |
| 595 | ✗ | if (update_var || dnch->last_residual_floor != dnch->residual_floor) { | |
| 596 | ✗ | update_var = 1; | |
| 597 | ✗ | dnch->last_residual_floor = dnch->residual_floor; | |
| 598 | ✗ | dnch->last_noise_reduction = fmax(dnch->last_noise_floor - dnch->last_residual_floor + 100., 0); | |
| 599 | ✗ | dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C)); | |
| 600 | } | ||
| 601 | ✗ | } else if (update_var || dnch->noise_reduction != dnch->last_noise_reduction) { | |
| 602 | ✗ | update_var = 1; | |
| 603 | ✗ | dnch->last_noise_reduction = dnch->noise_reduction; | |
| 604 | ✗ | dnch->last_residual_floor = av_clipd(dnch->last_noise_floor - dnch->last_noise_reduction, -80, -20); | |
| 605 | ✗ | dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C)); | |
| 606 | } | ||
| 607 | |||
| 608 | ✗ | dnch->gain_scale = 1.0 / (dnch->max_gain * dnch->max_gain); | |
| 609 | |||
| 610 | ✗ | if (update_var) { | |
| 611 | ✗ | set_band_parameters(s, dnch); | |
| 612 | |||
| 613 | ✗ | for (int i = 0; i < s->bin_count; i++) { | |
| 614 | ✗ | dnch->abs_var[i] = fmax(dnch->max_var * dnch->rel_var[i], 1.0); | |
| 615 | ✗ | dnch->min_abs_var[i] = dnch->gain_scale * dnch->abs_var[i]; | |
| 616 | } | ||
| 617 | } | ||
| 618 | ✗ | } | |
| 619 | |||
| 620 | ✗ | static void reduce_mean(double *band_noise) | |
| 621 | { | ||
| 622 | ✗ | double mean = 0.f; | |
| 623 | |||
| 624 | ✗ | for (int i = 0; i < NB_PROFILE_BANDS; i++) | |
| 625 | ✗ | mean += band_noise[i]; | |
| 626 | ✗ | mean /= NB_PROFILE_BANDS; | |
| 627 | |||
| 628 | ✗ | for (int i = 0; i < NB_PROFILE_BANDS; i++) | |
| 629 | ✗ | band_noise[i] -= mean; | |
| 630 | ✗ | } | |
| 631 | |||
| 632 | ✗ | static int config_input(AVFilterLink *inlink) | |
| 633 | { | ||
| 634 | ✗ | AVFilterContext *ctx = inlink->dst; | |
| 635 | ✗ | AudioFFTDeNoiseContext *s = ctx->priv; | |
| 636 | double wscale, sar, sum, sdiv; | ||
| 637 | int i, j, k, m, n, ret, tx_type; | ||
| 638 | ✗ | double dscale = 1.; | |
| 639 | ✗ | float fscale = 1.f; | |
| 640 | void *scale; | ||
| 641 | |||
| 642 | ✗ | s->format = inlink->format; | |
| 643 | |||
| 644 | ✗ | switch (s->format) { | |
| 645 | ✗ | case AV_SAMPLE_FMT_FLTP: | |
| 646 | ✗ | s->sample_size = sizeof(float); | |
| 647 | ✗ | s->complex_sample_size = sizeof(AVComplexFloat); | |
| 648 | ✗ | tx_type = AV_TX_FLOAT_RDFT; | |
| 649 | ✗ | scale = &fscale; | |
| 650 | ✗ | break; | |
| 651 | ✗ | case AV_SAMPLE_FMT_DBLP: | |
| 652 | ✗ | s->sample_size = sizeof(double); | |
| 653 | ✗ | s->complex_sample_size = sizeof(AVComplexDouble); | |
| 654 | ✗ | tx_type = AV_TX_DOUBLE_RDFT; | |
| 655 | ✗ | scale = &dscale; | |
| 656 | ✗ | break; | |
| 657 | } | ||
| 658 | |||
| 659 | ✗ | s->dnch = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dnch)); | |
| 660 | ✗ | if (!s->dnch) | |
| 661 | ✗ | return AVERROR(ENOMEM); | |
| 662 | |||
| 663 | ✗ | s->channels = inlink->ch_layout.nb_channels; | |
| 664 | ✗ | s->sample_rate = inlink->sample_rate; | |
| 665 | ✗ | s->sample_advance = s->sample_rate / 80; | |
| 666 | ✗ | s->window_length = 3 * s->sample_advance; | |
| 667 | ✗ | s->fft_length2 = 1 << (32 - ff_clz(s->window_length)); | |
| 668 | ✗ | s->fft_length = s->fft_length2; | |
| 669 | ✗ | s->buffer_length = s->fft_length * 2; | |
| 670 | ✗ | s->bin_count = s->fft_length2 / 2 + 1; | |
| 671 | |||
| 672 | ✗ | s->band_centre[0] = 80; | |
| 673 | ✗ | for (i = 1; i < NB_PROFILE_BANDS; i++) { | |
| 674 | ✗ | s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0); | |
| 675 | ✗ | if (s->band_centre[i] < 1000) { | |
| 676 | ✗ | s->band_centre[i] = 10 * (s->band_centre[i] / 10); | |
| 677 | ✗ | } else if (s->band_centre[i] < 5000) { | |
| 678 | ✗ | s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50); | |
| 679 | ✗ | } else if (s->band_centre[i] < 15000) { | |
| 680 | ✗ | s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100); | |
| 681 | } else { | ||
| 682 | ✗ | s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000); | |
| 683 | } | ||
| 684 | } | ||
| 685 | |||
| 686 | ✗ | for (j = 0; j < SOLVE_SIZE; j++) { | |
| 687 | ✗ | for (k = 0; k < SOLVE_SIZE; k++) { | |
| 688 | ✗ | s->matrix_a[j + k * SOLVE_SIZE] = 0.0; | |
| 689 | ✗ | for (m = 0; m < NB_PROFILE_BANDS; m++) | |
| 690 | ✗ | s->matrix_a[j + k * SOLVE_SIZE] += pow(m, j + k); | |
| 691 | } | ||
| 692 | } | ||
| 693 | |||
| 694 | ✗ | factor(s->matrix_a, SOLVE_SIZE); | |
| 695 | |||
| 696 | ✗ | i = 0; | |
| 697 | ✗ | for (j = 0; j < SOLVE_SIZE; j++) | |
| 698 | ✗ | for (k = 0; k < NB_PROFILE_BANDS; k++) | |
| 699 | ✗ | s->matrix_b[i++] = pow(k, j); | |
| 700 | |||
| 701 | ✗ | i = 0; | |
| 702 | ✗ | for (j = 0; j < NB_PROFILE_BANDS; j++) | |
| 703 | ✗ | for (k = 0; k < SOLVE_SIZE; k++) | |
| 704 | ✗ | s->matrix_c[i++] = pow(j, k); | |
| 705 | |||
| 706 | ✗ | s->window = av_calloc(s->window_length, sizeof(*s->window)); | |
| 707 | ✗ | s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band)); | |
| 708 | ✗ | if (!s->window || !s->bin2band) | |
| 709 | ✗ | return AVERROR(ENOMEM); | |
| 710 | |||
| 711 | ✗ | sdiv = s->band_multiplier; | |
| 712 | ✗ | for (i = 0; i < s->bin_count; i++) | |
| 713 | ✗ | s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2)); | |
| 714 | |||
| 715 | ✗ | s->number_of_bands = s->bin2band[s->bin_count - 1] + 1; | |
| 716 | |||
| 717 | ✗ | s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha)); | |
| 718 | ✗ | s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta)); | |
| 719 | ✗ | if (!s->band_alpha || !s->band_beta) | |
| 720 | ✗ | return AVERROR(ENOMEM); | |
| 721 | |||
| 722 | ✗ | for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { | |
| 723 | ✗ | DeNoiseChannel *dnch = &s->dnch[ch]; | |
| 724 | |||
| 725 | ✗ | switch (s->noise_type) { | |
| 726 | ✗ | case WHITE_NOISE: | |
| 727 | ✗ | for (i = 0; i < NB_PROFILE_BANDS; i++) | |
| 728 | ✗ | dnch->band_noise[i] = 0.; | |
| 729 | ✗ | break; | |
| 730 | ✗ | case VINYL_NOISE: | |
| 731 | ✗ | for (i = 0; i < NB_PROFILE_BANDS; i++) | |
| 732 | ✗ | dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0); | |
| 733 | ✗ | break; | |
| 734 | ✗ | case SHELLAC_NOISE: | |
| 735 | ✗ | for (i = 0; i < NB_PROFILE_BANDS; i++) | |
| 736 | ✗ | dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10); | |
| 737 | ✗ | break; | |
| 738 | ✗ | case CUSTOM_NOISE: | |
| 739 | ✗ | read_custom_noise(ctx, ch); | |
| 740 | ✗ | break; | |
| 741 | ✗ | default: | |
| 742 | ✗ | return AVERROR_BUG; | |
| 743 | } | ||
| 744 | |||
| 745 | ✗ | reduce_mean(dnch->band_noise); | |
| 746 | |||
| 747 | ✗ | dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt)); | |
| 748 | ✗ | dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt)); | |
| 749 | ✗ | dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit)); | |
| 750 | ✗ | dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain)); | |
| 751 | ✗ | dnch->smoothed_gain = av_calloc(s->bin_count, sizeof(*dnch->smoothed_gain)); | |
| 752 | ✗ | dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior)); | |
| 753 | ✗ | dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit)); | |
| 754 | ✗ | dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data)); | |
| 755 | ✗ | dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data)); | |
| 756 | ✗ | dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples)); | |
| 757 | ✗ | dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var)); | |
| 758 | ✗ | dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var)); | |
| 759 | ✗ | dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var)); | |
| 760 | ✗ | dnch->fft_in = av_calloc(s->fft_length2, s->sample_size); | |
| 761 | ✗ | dnch->fft_out = av_calloc(s->fft_length2 + 1, s->complex_sample_size); | |
| 762 | ✗ | ret = av_tx_init(&dnch->fft, &dnch->tx_fn, tx_type, 0, s->fft_length2, scale, 0); | |
| 763 | ✗ | if (ret < 0) | |
| 764 | ✗ | return ret; | |
| 765 | ✗ | ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, tx_type, 1, s->fft_length2, scale, 0); | |
| 766 | ✗ | if (ret < 0) | |
| 767 | ✗ | return ret; | |
| 768 | ✗ | dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands, | |
| 769 | sizeof(*dnch->spread_function)); | ||
| 770 | |||
| 771 | ✗ | if (!dnch->amt || | |
| 772 | ✗ | !dnch->band_amt || | |
| 773 | ✗ | !dnch->band_excit || | |
| 774 | ✗ | !dnch->gain || | |
| 775 | ✗ | !dnch->smoothed_gain || | |
| 776 | ✗ | !dnch->prior || | |
| 777 | ✗ | !dnch->prior_band_excit || | |
| 778 | ✗ | !dnch->clean_data || | |
| 779 | ✗ | !dnch->noisy_data || | |
| 780 | ✗ | !dnch->out_samples || | |
| 781 | ✗ | !dnch->fft_in || | |
| 782 | ✗ | !dnch->fft_out || | |
| 783 | ✗ | !dnch->abs_var || | |
| 784 | ✗ | !dnch->rel_var || | |
| 785 | ✗ | !dnch->min_abs_var || | |
| 786 | ✗ | !dnch->spread_function || | |
| 787 | ✗ | !dnch->fft || | |
| 788 | ✗ | !dnch->ifft) | |
| 789 | ✗ | return AVERROR(ENOMEM); | |
| 790 | } | ||
| 791 | |||
| 792 | ✗ | for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { | |
| 793 | ✗ | DeNoiseChannel *dnch = &s->dnch[ch]; | |
| 794 | ✗ | double *prior_band_excit = dnch->prior_band_excit; | |
| 795 | double min, max; | ||
| 796 | double p1, p2; | ||
| 797 | |||
| 798 | ✗ | p1 = pow(0.1, 2.5 / sdiv); | |
| 799 | ✗ | p2 = pow(0.1, 1.0 / sdiv); | |
| 800 | ✗ | j = 0; | |
| 801 | ✗ | for (m = 0; m < s->number_of_bands; m++) { | |
| 802 | ✗ | for (n = 0; n < s->number_of_bands; n++) { | |
| 803 | ✗ | if (n < m) { | |
| 804 | ✗ | dnch->spread_function[j++] = pow(p2, m - n); | |
| 805 | ✗ | } else if (n > m) { | |
| 806 | ✗ | dnch->spread_function[j++] = pow(p1, n - m); | |
| 807 | } else { | ||
| 808 | ✗ | dnch->spread_function[j++] = 1.0; | |
| 809 | } | ||
| 810 | } | ||
| 811 | } | ||
| 812 | |||
| 813 | ✗ | for (m = 0; m < s->number_of_bands; m++) { | |
| 814 | ✗ | dnch->band_excit[m] = 0.0; | |
| 815 | ✗ | prior_band_excit[m] = 0.0; | |
| 816 | } | ||
| 817 | |||
| 818 | ✗ | for (m = 0; m < s->bin_count; m++) | |
| 819 | ✗ | dnch->band_excit[s->bin2band[m]] += 1.0; | |
| 820 | |||
| 821 | ✗ | j = 0; | |
| 822 | ✗ | for (m = 0; m < s->number_of_bands; m++) { | |
| 823 | ✗ | for (n = 0; n < s->number_of_bands; n++) | |
| 824 | ✗ | prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n]; | |
| 825 | } | ||
| 826 | |||
| 827 | ✗ | min = pow(0.1, 2.5); | |
| 828 | ✗ | max = pow(0.1, 1.0); | |
| 829 | ✗ | for (int i = 0; i < s->number_of_bands; i++) { | |
| 830 | ✗ | if (i < lrint(12.0 * sdiv)) { | |
| 831 | ✗ | dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv); | |
| 832 | } else { | ||
| 833 | ✗ | dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0)); | |
| 834 | } | ||
| 835 | ✗ | dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max); | |
| 836 | } | ||
| 837 | |||
| 838 | ✗ | for (int i = 0; i < s->buffer_length; i++) | |
| 839 | ✗ | dnch->out_samples[i] = 0; | |
| 840 | |||
| 841 | ✗ | j = 0; | |
| 842 | ✗ | for (int i = 0; i < s->number_of_bands; i++) | |
| 843 | ✗ | for (int k = 0; k < s->number_of_bands; k++) | |
| 844 | ✗ | dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i]; | |
| 845 | } | ||
| 846 | |||
| 847 | ✗ | j = 0; | |
| 848 | ✗ | sar = s->sample_advance / s->sample_rate; | |
| 849 | ✗ | for (int i = 0; i < s->bin_count; i++) { | |
| 850 | ✗ | if ((i == s->fft_length2) || (s->bin2band[i] > j)) { | |
| 851 | ✗ | double d6 = (i - 1) * s->sample_rate / s->fft_length; | |
| 852 | ✗ | double d7 = fmin(0.008 + 2.2 / d6, 0.03); | |
| 853 | ✗ | s->band_alpha[j] = exp(-sar / d7); | |
| 854 | ✗ | s->band_beta[j] = 1.0 - s->band_alpha[j]; | |
| 855 | ✗ | j = s->bin2band[i]; | |
| 856 | } | ||
| 857 | } | ||
| 858 | |||
| 859 | ✗ | s->winframe = ff_get_audio_buffer(inlink, s->window_length); | |
| 860 | ✗ | if (!s->winframe) | |
| 861 | ✗ | return AVERROR(ENOMEM); | |
| 862 | |||
| 863 | ✗ | wscale = sqrt(8.0 / (9.0 * s->fft_length)); | |
| 864 | ✗ | sum = 0.0; | |
| 865 | ✗ | for (int i = 0; i < s->window_length; i++) { | |
| 866 | ✗ | double d10 = sin(i * M_PI / s->window_length); | |
| 867 | ✗ | d10 *= wscale * d10; | |
| 868 | ✗ | s->window[i] = d10; | |
| 869 | ✗ | sum += d10 * d10; | |
| 870 | } | ||
| 871 | |||
| 872 | ✗ | s->window_weight = 0.5 * sum; | |
| 873 | ✗ | s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight; | |
| 874 | ✗ | s->sample_floor = s->floor * exp(4.144600506562284); | |
| 875 | |||
| 876 | ✗ | for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { | |
| 877 | ✗ | DeNoiseChannel *dnch = &s->dnch[ch]; | |
| 878 | |||
| 879 | ✗ | dnch->noise_reduction = s->noise_reduction; | |
| 880 | ✗ | dnch->noise_floor = s->noise_floor; | |
| 881 | ✗ | dnch->residual_floor = s->residual_floor; | |
| 882 | |||
| 883 | ✗ | set_parameters(s, dnch, 1, 1); | |
| 884 | } | ||
| 885 | |||
| 886 | ✗ | s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate); | |
| 887 | ✗ | i = 0; | |
| 888 | ✗ | for (int j = 1; j < NB_PROFILE_BANDS + 1; j++) { | |
| 889 | ✗ | s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate); | |
| 890 | ✗ | if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1])) | |
| 891 | ✗ | i++; | |
| 892 | ✗ | s->noise_band_edge[NB_PROFILE_BANDS + 1] = i; | |
| 893 | } | ||
| 894 | ✗ | s->noise_band_count = s->noise_band_edge[NB_PROFILE_BANDS + 1]; | |
| 895 | |||
| 896 | ✗ | return 0; | |
| 897 | } | ||
| 898 | |||
| 899 | ✗ | static void init_sample_noise(DeNoiseChannel *dnch) | |
| 900 | { | ||
| 901 | ✗ | for (int i = 0; i < NB_PROFILE_BANDS; i++) { | |
| 902 | ✗ | dnch->noise_band_norm[i] = 0.0; | |
| 903 | ✗ | dnch->noise_band_avr[i] = 0.0; | |
| 904 | ✗ | dnch->noise_band_avi[i] = 0.0; | |
| 905 | ✗ | dnch->noise_band_var[i] = 0.0; | |
| 906 | } | ||
| 907 | ✗ | } | |
| 908 | |||
| 909 | ✗ | static void sample_noise_block(AudioFFTDeNoiseContext *s, | |
| 910 | DeNoiseChannel *dnch, | ||
| 911 | AVFrame *in, int ch) | ||
| 912 | { | ||
| 913 | ✗ | double *src_dbl = (double *)in->extended_data[ch]; | |
| 914 | ✗ | float *src_flt = (float *)in->extended_data[ch]; | |
| 915 | ✗ | double mag2, var = 0.0, avr = 0.0, avi = 0.0; | |
| 916 | ✗ | AVComplexDouble *fft_out_dbl = dnch->fft_out; | |
| 917 | ✗ | AVComplexFloat *fft_out_flt = dnch->fft_out; | |
| 918 | ✗ | double *fft_in_dbl = dnch->fft_in; | |
| 919 | ✗ | float *fft_in_flt = dnch->fft_in; | |
| 920 | int edge, j, k, n, edgemax; | ||
| 921 | |||
| 922 | ✗ | switch (s->format) { | |
| 923 | ✗ | case AV_SAMPLE_FMT_FLTP: | |
| 924 | ✗ | for (int i = 0; i < s->window_length; i++) | |
| 925 | ✗ | fft_in_flt[i] = s->window[i] * src_flt[i] * (1LL << 23); | |
| 926 | |||
| 927 | ✗ | for (int i = s->window_length; i < s->fft_length2; i++) | |
| 928 | ✗ | fft_in_flt[i] = 0.f; | |
| 929 | ✗ | break; | |
| 930 | ✗ | case AV_SAMPLE_FMT_DBLP: | |
| 931 | ✗ | for (int i = 0; i < s->window_length; i++) | |
| 932 | ✗ | fft_in_dbl[i] = s->window[i] * src_dbl[i] * (1LL << 23); | |
| 933 | |||
| 934 | ✗ | for (int i = s->window_length; i < s->fft_length2; i++) | |
| 935 | ✗ | fft_in_dbl[i] = 0.; | |
| 936 | ✗ | break; | |
| 937 | } | ||
| 938 | |||
| 939 | ✗ | dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size); | |
| 940 | |||
| 941 | ✗ | edge = s->noise_band_edge[0]; | |
| 942 | ✗ | j = edge; | |
| 943 | ✗ | k = 0; | |
| 944 | ✗ | n = j; | |
| 945 | ✗ | edgemax = fmin(s->fft_length2, s->noise_band_edge[NB_PROFILE_BANDS]); | |
| 946 | ✗ | for (int i = j; i <= edgemax; i++) { | |
| 947 | ✗ | if ((i == j) && (i < edgemax)) { | |
| 948 | ✗ | if (j > edge) { | |
| 949 | ✗ | dnch->noise_band_norm[k - 1] += j - edge; | |
| 950 | ✗ | dnch->noise_band_avr[k - 1] += avr; | |
| 951 | ✗ | dnch->noise_band_avi[k - 1] += avi; | |
| 952 | ✗ | dnch->noise_band_var[k - 1] += var; | |
| 953 | } | ||
| 954 | ✗ | k++; | |
| 955 | ✗ | edge = j; | |
| 956 | ✗ | j = s->noise_band_edge[k]; | |
| 957 | ✗ | if (k == NB_PROFILE_BANDS) { | |
| 958 | ✗ | j++; | |
| 959 | } | ||
| 960 | ✗ | var = 0.0; | |
| 961 | ✗ | avr = 0.0; | |
| 962 | ✗ | avi = 0.0; | |
| 963 | } | ||
| 964 | |||
| 965 | ✗ | switch (s->format) { | |
| 966 | ✗ | case AV_SAMPLE_FMT_FLTP: | |
| 967 | ✗ | avr += fft_out_flt[n].re; | |
| 968 | ✗ | avi += fft_out_flt[n].im; | |
| 969 | ✗ | mag2 = fft_out_flt[n].re * fft_out_flt[n].re + | |
| 970 | ✗ | fft_out_flt[n].im * fft_out_flt[n].im; | |
| 971 | ✗ | break; | |
| 972 | ✗ | case AV_SAMPLE_FMT_DBLP: | |
| 973 | ✗ | avr += fft_out_dbl[n].re; | |
| 974 | ✗ | avi += fft_out_dbl[n].im; | |
| 975 | ✗ | mag2 = fft_out_dbl[n].re * fft_out_dbl[n].re + | |
| 976 | ✗ | fft_out_dbl[n].im * fft_out_dbl[n].im; | |
| 977 | ✗ | break; | |
| 978 | ✗ | default: | |
| 979 | av_assert2(0); | ||
| 980 | } | ||
| 981 | |||
| 982 | ✗ | mag2 = fmax(mag2, s->sample_floor); | |
| 983 | |||
| 984 | ✗ | var += mag2; | |
| 985 | ✗ | n++; | |
| 986 | } | ||
| 987 | |||
| 988 | ✗ | dnch->noise_band_norm[k - 1] += j - edge; | |
| 989 | ✗ | dnch->noise_band_avr[k - 1] += avr; | |
| 990 | ✗ | dnch->noise_band_avi[k - 1] += avi; | |
| 991 | ✗ | dnch->noise_band_var[k - 1] += var; | |
| 992 | ✗ | } | |
| 993 | |||
| 994 | ✗ | static void finish_sample_noise(AudioFFTDeNoiseContext *s, | |
| 995 | DeNoiseChannel *dnch, | ||
| 996 | double *sample_noise) | ||
| 997 | { | ||
| 998 | ✗ | for (int i = 0; i < s->noise_band_count; i++) { | |
| 999 | ✗ | dnch->noise_band_avr[i] /= dnch->noise_band_norm[i]; | |
| 1000 | ✗ | dnch->noise_band_avi[i] /= dnch->noise_band_norm[i]; | |
| 1001 | ✗ | dnch->noise_band_var[i] /= dnch->noise_band_norm[i]; | |
| 1002 | ✗ | dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] + | |
| 1003 | ✗ | dnch->noise_band_avi[i] * dnch->noise_band_avi[i]; | |
| 1004 | ✗ | dnch->noise_band_auto_var[i] = dnch->noise_band_var[i]; | |
| 1005 | ✗ | sample_noise[i] = 10.0 * log10(dnch->noise_band_var[i] / s->floor) - 100.0; | |
| 1006 | } | ||
| 1007 | ✗ | if (s->noise_band_count < NB_PROFILE_BANDS) { | |
| 1008 | ✗ | for (int i = s->noise_band_count; i < NB_PROFILE_BANDS; i++) | |
| 1009 | ✗ | sample_noise[i] = sample_noise[i - 1]; | |
| 1010 | } | ||
| 1011 | ✗ | } | |
| 1012 | |||
| 1013 | ✗ | static void set_noise_profile(AVFilterContext *ctx, | |
| 1014 | DeNoiseChannel *dnch, | ||
| 1015 | double *sample_noise) | ||
| 1016 | { | ||
| 1017 | ✗ | AudioFFTDeNoiseContext *s = ctx->priv; | |
| 1018 | double new_band_noise[NB_PROFILE_BANDS]; | ||
| 1019 | double temp[NB_PROFILE_BANDS]; | ||
| 1020 | ✗ | double sum = 0.0; | |
| 1021 | |||
| 1022 | ✗ | for (int m = 0; m < NB_PROFILE_BANDS; m++) | |
| 1023 | ✗ | temp[m] = sample_noise[m]; | |
| 1024 | |||
| 1025 | ✗ | for (int m = 0, i = 0; m < SOLVE_SIZE; m++) { | |
| 1026 | ✗ | sum = 0.0; | |
| 1027 | ✗ | for (int n = 0; n < NB_PROFILE_BANDS; n++) | |
| 1028 | ✗ | sum += s->matrix_b[i++] * temp[n]; | |
| 1029 | ✗ | s->vector_b[m] = sum; | |
| 1030 | } | ||
| 1031 | ✗ | solve(s->matrix_a, s->vector_b, SOLVE_SIZE); | |
| 1032 | ✗ | for (int m = 0, i = 0; m < NB_PROFILE_BANDS; m++) { | |
| 1033 | ✗ | sum = 0.0; | |
| 1034 | ✗ | for (int n = 0; n < SOLVE_SIZE; n++) | |
| 1035 | ✗ | sum += s->matrix_c[i++] * s->vector_b[n]; | |
| 1036 | ✗ | temp[m] = sum; | |
| 1037 | } | ||
| 1038 | |||
| 1039 | ✗ | reduce_mean(temp); | |
| 1040 | |||
| 1041 | ✗ | av_log(ctx, AV_LOG_INFO, "bn="); | |
| 1042 | ✗ | for (int m = 0; m < NB_PROFILE_BANDS; m++) { | |
| 1043 | ✗ | new_band_noise[m] = temp[m]; | |
| 1044 | ✗ | new_band_noise[m] = av_clipd(new_band_noise[m], -24.0, 24.0); | |
| 1045 | ✗ | av_log(ctx, AV_LOG_INFO, "%f ", new_band_noise[m]); | |
| 1046 | } | ||
| 1047 | ✗ | av_log(ctx, AV_LOG_INFO, "\n"); | |
| 1048 | ✗ | memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise)); | |
| 1049 | ✗ | } | |
| 1050 | |||
| 1051 | ✗ | static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) | |
| 1052 | { | ||
| 1053 | ✗ | AudioFFTDeNoiseContext *s = ctx->priv; | |
| 1054 | ✗ | AVFrame *in = arg; | |
| 1055 | ✗ | const int start = (in->ch_layout.nb_channels * jobnr) / nb_jobs; | |
| 1056 | ✗ | const int end = (in->ch_layout.nb_channels * (jobnr+1)) / nb_jobs; | |
| 1057 | ✗ | const int window_length = s->window_length; | |
| 1058 | ✗ | const double *window = s->window; | |
| 1059 | |||
| 1060 | ✗ | for (int ch = start; ch < end; ch++) { | |
| 1061 | ✗ | DeNoiseChannel *dnch = &s->dnch[ch]; | |
| 1062 | ✗ | const double *src_dbl = (const double *)in->extended_data[ch]; | |
| 1063 | ✗ | const float *src_flt = (const float *)in->extended_data[ch]; | |
| 1064 | ✗ | double *dst = dnch->out_samples; | |
| 1065 | ✗ | double *fft_in_dbl = dnch->fft_in; | |
| 1066 | ✗ | float *fft_in_flt = dnch->fft_in; | |
| 1067 | |||
| 1068 | ✗ | switch (s->format) { | |
| 1069 | ✗ | case AV_SAMPLE_FMT_FLTP: | |
| 1070 | ✗ | for (int m = 0; m < window_length; m++) | |
| 1071 | ✗ | fft_in_flt[m] = window[m] * src_flt[m] * (1LL << 23); | |
| 1072 | |||
| 1073 | ✗ | for (int m = window_length; m < s->fft_length2; m++) | |
| 1074 | ✗ | fft_in_flt[m] = 0.f; | |
| 1075 | ✗ | break; | |
| 1076 | ✗ | case AV_SAMPLE_FMT_DBLP: | |
| 1077 | ✗ | for (int m = 0; m < window_length; m++) | |
| 1078 | ✗ | fft_in_dbl[m] = window[m] * src_dbl[m] * (1LL << 23); | |
| 1079 | |||
| 1080 | ✗ | for (int m = window_length; m < s->fft_length2; m++) | |
| 1081 | ✗ | fft_in_dbl[m] = 0.; | |
| 1082 | ✗ | break; | |
| 1083 | } | ||
| 1084 | |||
| 1085 | ✗ | dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size); | |
| 1086 | |||
| 1087 | ✗ | process_frame(ctx, s, dnch, | |
| 1088 | dnch->prior, | ||
| 1089 | dnch->prior_band_excit, | ||
| 1090 | s->track_noise); | ||
| 1091 | |||
| 1092 | ✗ | dnch->itx_fn(dnch->ifft, dnch->fft_in, dnch->fft_out, s->complex_sample_size); | |
| 1093 | |||
| 1094 | ✗ | switch (s->format) { | |
| 1095 | ✗ | case AV_SAMPLE_FMT_FLTP: | |
| 1096 | ✗ | for (int m = 0; m < window_length; m++) | |
| 1097 | ✗ | dst[m] += s->window[m] * fft_in_flt[m] / (1LL << 23); | |
| 1098 | ✗ | break; | |
| 1099 | ✗ | case AV_SAMPLE_FMT_DBLP: | |
| 1100 | ✗ | for (int m = 0; m < window_length; m++) | |
| 1101 | ✗ | dst[m] += s->window[m] * fft_in_dbl[m] / (1LL << 23); | |
| 1102 | ✗ | break; | |
| 1103 | } | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | ✗ | return 0; | |
| 1107 | } | ||
| 1108 | |||
| 1109 | ✗ | static int output_frame(AVFilterLink *inlink, AVFrame *in) | |
| 1110 | { | ||
| 1111 | ✗ | AVFilterContext *ctx = inlink->dst; | |
| 1112 | ✗ | AVFilterLink *outlink = ctx->outputs[0]; | |
| 1113 | ✗ | AudioFFTDeNoiseContext *s = ctx->priv; | |
| 1114 | ✗ | const int output_mode = ctx->is_disabled ? IN_MODE : s->output_mode; | |
| 1115 | ✗ | const int offset = s->window_length - s->sample_advance; | |
| 1116 | AVFrame *out; | ||
| 1117 | |||
| 1118 | ✗ | for (int ch = 0; ch < s->channels; ch++) { | |
| 1119 | ✗ | uint8_t *src = (uint8_t *)s->winframe->extended_data[ch]; | |
| 1120 | |||
| 1121 | ✗ | memmove(src, src + s->sample_advance * s->sample_size, | |
| 1122 | ✗ | offset * s->sample_size); | |
| 1123 | ✗ | memcpy(src + offset * s->sample_size, in->extended_data[ch], | |
| 1124 | ✗ | in->nb_samples * s->sample_size); | |
| 1125 | ✗ | memset(src + s->sample_size * (offset + in->nb_samples), 0, | |
| 1126 | ✗ | (s->sample_advance - in->nb_samples) * s->sample_size); | |
| 1127 | } | ||
| 1128 | |||
| 1129 | ✗ | if (s->track_noise) { | |
| 1130 | ✗ | double average = 0.0, min = DBL_MAX, max = -DBL_MAX; | |
| 1131 | |||
| 1132 | ✗ | for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { | |
| 1133 | ✗ | DeNoiseChannel *dnch = &s->dnch[ch]; | |
| 1134 | |||
| 1135 | ✗ | average += dnch->noise_floor; | |
| 1136 | ✗ | max = fmax(max, dnch->noise_floor); | |
| 1137 | ✗ | min = fmin(min, dnch->noise_floor); | |
| 1138 | } | ||
| 1139 | |||
| 1140 | ✗ | average /= inlink->ch_layout.nb_channels; | |
| 1141 | |||
| 1142 | ✗ | for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { | |
| 1143 | ✗ | DeNoiseChannel *dnch = &s->dnch[ch]; | |
| 1144 | |||
| 1145 | ✗ | switch (s->noise_floor_link) { | |
| 1146 | ✗ | case MIN_LINK: dnch->noise_floor = min; break; | |
| 1147 | ✗ | case MAX_LINK: dnch->noise_floor = max; break; | |
| 1148 | ✗ | case AVERAGE_LINK: dnch->noise_floor = average; break; | |
| 1149 | ✗ | case NONE_LINK: | |
| 1150 | default: | ||
| 1151 | ✗ | break; | |
| 1152 | } | ||
| 1153 | |||
| 1154 | ✗ | if (dnch->noise_floor != dnch->last_noise_floor) | |
| 1155 | ✗ | set_parameters(s, dnch, 1, 0); | |
| 1156 | } | ||
| 1157 | } | ||
| 1158 | |||
| 1159 | ✗ | if (s->sample_noise_mode == SAMPLE_START) { | |
| 1160 | ✗ | for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { | |
| 1161 | ✗ | DeNoiseChannel *dnch = &s->dnch[ch]; | |
| 1162 | |||
| 1163 | ✗ | init_sample_noise(dnch); | |
| 1164 | } | ||
| 1165 | ✗ | s->sample_noise_mode = SAMPLE_NONE; | |
| 1166 | ✗ | s->sample_noise = 1; | |
| 1167 | ✗ | s->sample_noise_blocks = 0; | |
| 1168 | } | ||
| 1169 | |||
| 1170 | ✗ | if (s->sample_noise) { | |
| 1171 | ✗ | for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { | |
| 1172 | ✗ | DeNoiseChannel *dnch = &s->dnch[ch]; | |
| 1173 | |||
| 1174 | ✗ | sample_noise_block(s, dnch, s->winframe, ch); | |
| 1175 | } | ||
| 1176 | ✗ | s->sample_noise_blocks++; | |
| 1177 | } | ||
| 1178 | |||
| 1179 | ✗ | if (s->sample_noise_mode == SAMPLE_STOP) { | |
| 1180 | ✗ | for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { | |
| 1181 | ✗ | DeNoiseChannel *dnch = &s->dnch[ch]; | |
| 1182 | double sample_noise[NB_PROFILE_BANDS]; | ||
| 1183 | |||
| 1184 | ✗ | if (s->sample_noise_blocks <= 0) | |
| 1185 | ✗ | break; | |
| 1186 | ✗ | finish_sample_noise(s, dnch, sample_noise); | |
| 1187 | ✗ | set_noise_profile(ctx, dnch, sample_noise); | |
| 1188 | ✗ | set_parameters(s, dnch, 1, 1); | |
| 1189 | } | ||
| 1190 | ✗ | s->sample_noise = 0; | |
| 1191 | ✗ | s->sample_noise_blocks = 0; | |
| 1192 | ✗ | s->sample_noise_mode = SAMPLE_NONE; | |
| 1193 | } | ||
| 1194 | |||
| 1195 | ✗ | ff_filter_execute(ctx, filter_channel, s->winframe, NULL, | |
| 1196 | ✗ | FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx))); | |
| 1197 | |||
| 1198 | ✗ | if (av_frame_is_writable(in)) { | |
| 1199 | ✗ | out = in; | |
| 1200 | } else { | ||
| 1201 | ✗ | out = ff_get_audio_buffer(outlink, in->nb_samples); | |
| 1202 | ✗ | if (!out) { | |
| 1203 | ✗ | av_frame_free(&in); | |
| 1204 | ✗ | return AVERROR(ENOMEM); | |
| 1205 | } | ||
| 1206 | |||
| 1207 | ✗ | av_frame_copy_props(out, in); | |
| 1208 | } | ||
| 1209 | |||
| 1210 | ✗ | for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { | |
| 1211 | ✗ | DeNoiseChannel *dnch = &s->dnch[ch]; | |
| 1212 | ✗ | double *src = dnch->out_samples; | |
| 1213 | ✗ | const double *orig_dbl = (const double *)s->winframe->extended_data[ch]; | |
| 1214 | ✗ | const float *orig_flt = (const float *)s->winframe->extended_data[ch]; | |
| 1215 | ✗ | double *dst_dbl = (double *)out->extended_data[ch]; | |
| 1216 | ✗ | float *dst_flt = (float *)out->extended_data[ch]; | |
| 1217 | |||
| 1218 | ✗ | switch (output_mode) { | |
| 1219 | ✗ | case IN_MODE: | |
| 1220 | ✗ | switch (s->format) { | |
| 1221 | ✗ | case AV_SAMPLE_FMT_FLTP: | |
| 1222 | ✗ | for (int m = 0; m < out->nb_samples; m++) | |
| 1223 | ✗ | dst_flt[m] = orig_flt[m]; | |
| 1224 | ✗ | break; | |
| 1225 | ✗ | case AV_SAMPLE_FMT_DBLP: | |
| 1226 | ✗ | for (int m = 0; m < out->nb_samples; m++) | |
| 1227 | ✗ | dst_dbl[m] = orig_dbl[m]; | |
| 1228 | ✗ | break; | |
| 1229 | } | ||
| 1230 | ✗ | break; | |
| 1231 | ✗ | case OUT_MODE: | |
| 1232 | ✗ | switch (s->format) { | |
| 1233 | ✗ | case AV_SAMPLE_FMT_FLTP: | |
| 1234 | ✗ | for (int m = 0; m < out->nb_samples; m++) | |
| 1235 | ✗ | dst_flt[m] = src[m]; | |
| 1236 | ✗ | break; | |
| 1237 | ✗ | case AV_SAMPLE_FMT_DBLP: | |
| 1238 | ✗ | for (int m = 0; m < out->nb_samples; m++) | |
| 1239 | ✗ | dst_dbl[m] = src[m]; | |
| 1240 | ✗ | break; | |
| 1241 | } | ||
| 1242 | ✗ | break; | |
| 1243 | ✗ | case NOISE_MODE: | |
| 1244 | ✗ | switch (s->format) { | |
| 1245 | ✗ | case AV_SAMPLE_FMT_FLTP: | |
| 1246 | ✗ | for (int m = 0; m < out->nb_samples; m++) | |
| 1247 | ✗ | dst_flt[m] = orig_flt[m] - src[m]; | |
| 1248 | ✗ | break; | |
| 1249 | ✗ | case AV_SAMPLE_FMT_DBLP: | |
| 1250 | ✗ | for (int m = 0; m < out->nb_samples; m++) | |
| 1251 | ✗ | dst_dbl[m] = orig_dbl[m] - src[m]; | |
| 1252 | ✗ | break; | |
| 1253 | } | ||
| 1254 | ✗ | break; | |
| 1255 | ✗ | default: | |
| 1256 | ✗ | if (in != out) | |
| 1257 | ✗ | av_frame_free(&in); | |
| 1258 | ✗ | av_frame_free(&out); | |
| 1259 | ✗ | return AVERROR_BUG; | |
| 1260 | } | ||
| 1261 | |||
| 1262 | ✗ | memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src)); | |
| 1263 | ✗ | memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src)); | |
| 1264 | } | ||
| 1265 | |||
| 1266 | ✗ | if (out != in) | |
| 1267 | ✗ | av_frame_free(&in); | |
| 1268 | ✗ | return ff_filter_frame(outlink, out); | |
| 1269 | } | ||
| 1270 | |||
| 1271 | ✗ | static int activate(AVFilterContext *ctx) | |
| 1272 | { | ||
| 1273 | ✗ | AVFilterLink *inlink = ctx->inputs[0]; | |
| 1274 | ✗ | AVFilterLink *outlink = ctx->outputs[0]; | |
| 1275 | ✗ | AudioFFTDeNoiseContext *s = ctx->priv; | |
| 1276 | ✗ | AVFrame *in = NULL; | |
| 1277 | int ret; | ||
| 1278 | |||
| 1279 | ✗ | FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); | |
| 1280 | |||
| 1281 | ✗ | ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in); | |
| 1282 | ✗ | if (ret < 0) | |
| 1283 | ✗ | return ret; | |
| 1284 | ✗ | if (ret > 0) | |
| 1285 | ✗ | return output_frame(inlink, in); | |
| 1286 | |||
| 1287 | ✗ | if (ff_inlink_queued_samples(inlink) >= s->sample_advance) { | |
| 1288 | ✗ | ff_filter_set_ready(ctx, 10); | |
| 1289 | ✗ | return 0; | |
| 1290 | } | ||
| 1291 | |||
| 1292 | ✗ | FF_FILTER_FORWARD_STATUS(inlink, outlink); | |
| 1293 | ✗ | FF_FILTER_FORWARD_WANTED(outlink, inlink); | |
| 1294 | |||
| 1295 | ✗ | return FFERROR_NOT_READY; | |
| 1296 | } | ||
| 1297 | |||
| 1298 | ✗ | static av_cold void uninit(AVFilterContext *ctx) | |
| 1299 | { | ||
| 1300 | ✗ | AudioFFTDeNoiseContext *s = ctx->priv; | |
| 1301 | |||
| 1302 | ✗ | av_freep(&s->window); | |
| 1303 | ✗ | av_freep(&s->bin2band); | |
| 1304 | ✗ | av_freep(&s->band_alpha); | |
| 1305 | ✗ | av_freep(&s->band_beta); | |
| 1306 | ✗ | av_frame_free(&s->winframe); | |
| 1307 | |||
| 1308 | ✗ | if (s->dnch) { | |
| 1309 | ✗ | for (int ch = 0; ch < s->channels; ch++) { | |
| 1310 | ✗ | DeNoiseChannel *dnch = &s->dnch[ch]; | |
| 1311 | ✗ | av_freep(&dnch->amt); | |
| 1312 | ✗ | av_freep(&dnch->band_amt); | |
| 1313 | ✗ | av_freep(&dnch->band_excit); | |
| 1314 | ✗ | av_freep(&dnch->gain); | |
| 1315 | ✗ | av_freep(&dnch->smoothed_gain); | |
| 1316 | ✗ | av_freep(&dnch->prior); | |
| 1317 | ✗ | av_freep(&dnch->prior_band_excit); | |
| 1318 | ✗ | av_freep(&dnch->clean_data); | |
| 1319 | ✗ | av_freep(&dnch->noisy_data); | |
| 1320 | ✗ | av_freep(&dnch->out_samples); | |
| 1321 | ✗ | av_freep(&dnch->spread_function); | |
| 1322 | ✗ | av_freep(&dnch->abs_var); | |
| 1323 | ✗ | av_freep(&dnch->rel_var); | |
| 1324 | ✗ | av_freep(&dnch->min_abs_var); | |
| 1325 | ✗ | av_freep(&dnch->fft_in); | |
| 1326 | ✗ | av_freep(&dnch->fft_out); | |
| 1327 | ✗ | av_tx_uninit(&dnch->fft); | |
| 1328 | ✗ | av_tx_uninit(&dnch->ifft); | |
| 1329 | } | ||
| 1330 | ✗ | av_freep(&s->dnch); | |
| 1331 | } | ||
| 1332 | ✗ | } | |
| 1333 | |||
| 1334 | ✗ | static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, | |
| 1335 | char *res, int res_len, int flags) | ||
| 1336 | { | ||
| 1337 | ✗ | AudioFFTDeNoiseContext *s = ctx->priv; | |
| 1338 | ✗ | int ret = 0; | |
| 1339 | |||
| 1340 | ✗ | ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags); | |
| 1341 | ✗ | if (ret < 0) | |
| 1342 | ✗ | return ret; | |
| 1343 | |||
| 1344 | ✗ | if (!strcmp(cmd, "sample_noise") || !strcmp(cmd, "sn")) | |
| 1345 | ✗ | return 0; | |
| 1346 | |||
| 1347 | ✗ | for (int ch = 0; ch < s->channels; ch++) { | |
| 1348 | ✗ | DeNoiseChannel *dnch = &s->dnch[ch]; | |
| 1349 | |||
| 1350 | ✗ | dnch->noise_reduction = s->noise_reduction; | |
| 1351 | ✗ | dnch->noise_floor = s->noise_floor; | |
| 1352 | ✗ | dnch->residual_floor = s->residual_floor; | |
| 1353 | |||
| 1354 | ✗ | set_parameters(s, dnch, 1, 1); | |
| 1355 | } | ||
| 1356 | |||
| 1357 | ✗ | return 0; | |
| 1358 | } | ||
| 1359 | |||
| 1360 | static const AVFilterPad inputs[] = { | ||
| 1361 | { | ||
| 1362 | .name = "default", | ||
| 1363 | .type = AVMEDIA_TYPE_AUDIO, | ||
| 1364 | .config_props = config_input, | ||
| 1365 | }, | ||
| 1366 | }; | ||
| 1367 | |||
| 1368 | const FFFilter ff_af_afftdn = { | ||
| 1369 | .p.name = "afftdn", | ||
| 1370 | .p.description = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."), | ||
| 1371 | .p.priv_class = &afftdn_class, | ||
| 1372 | .p.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | | ||
| 1373 | AVFILTER_FLAG_SLICE_THREADS, | ||
| 1374 | .priv_size = sizeof(AudioFFTDeNoiseContext), | ||
| 1375 | .activate = activate, | ||
| 1376 | .uninit = uninit, | ||
| 1377 | FILTER_INPUTS(inputs), | ||
| 1378 | FILTER_OUTPUTS(ff_audio_default_filterpad), | ||
| 1379 | FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP), | ||
| 1380 | .process_command = process_command, | ||
| 1381 | }; | ||
| 1382 |