| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (c) 2014 - 2021 Jason Jang | ||
| 3 | * Copyright (c) 2021 Paul B Mahol | ||
| 4 | * | ||
| 5 | * This file is part of FFmpeg. | ||
| 6 | * | ||
| 7 | * FFmpeg is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU Lesser General Public License | ||
| 9 | * as published by the Free Software Foundation; either | ||
| 10 | * version 2.1 of the License, or (at your option) any later version. | ||
| 11 | * | ||
| 12 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | * GNU Lesser General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU Lesser General Public License | ||
| 18 | * along with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
| 19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include "libavutil/mem.h" | ||
| 23 | #include "libavutil/opt.h" | ||
| 24 | #include "libavutil/tx.h" | ||
| 25 | #include "audio.h" | ||
| 26 | #include "avfilter.h" | ||
| 27 | #include "filters.h" | ||
| 28 | |||
| 29 | typedef struct AudioPsyClipContext { | ||
| 30 | const AVClass *class; | ||
| 31 | |||
| 32 | double level_in; | ||
| 33 | double level_out; | ||
| 34 | double clip_level; | ||
| 35 | double adaptive; | ||
| 36 | int auto_level; | ||
| 37 | int diff_only; | ||
| 38 | int iterations; | ||
| 39 | char *protections_str; | ||
| 40 | double *protections; | ||
| 41 | |||
| 42 | int num_psy_bins; | ||
| 43 | int fft_size; | ||
| 44 | int overlap; | ||
| 45 | int channels; | ||
| 46 | |||
| 47 | int spread_table_rows; | ||
| 48 | int *spread_table_index; | ||
| 49 | int (*spread_table_range)[2]; | ||
| 50 | float *window, *inv_window, *spread_table, *margin_curve; | ||
| 51 | |||
| 52 | AVFrame *in; | ||
| 53 | AVFrame *in_buffer; | ||
| 54 | AVFrame *in_frame; | ||
| 55 | AVFrame *out_dist_frame; | ||
| 56 | AVFrame *windowed_frame; | ||
| 57 | AVFrame *clipping_delta; | ||
| 58 | AVFrame *spectrum_buf; | ||
| 59 | AVFrame *mask_curve; | ||
| 60 | |||
| 61 | AVTXContext **tx_ctx; | ||
| 62 | av_tx_fn tx_fn; | ||
| 63 | AVTXContext **itx_ctx; | ||
| 64 | av_tx_fn itx_fn; | ||
| 65 | } AudioPsyClipContext; | ||
| 66 | |||
| 67 | #define OFFSET(x) offsetof(AudioPsyClipContext, x) | ||
| 68 | #define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_RUNTIME_PARAM | ||
| 69 | |||
| 70 | static const AVOption apsyclip_options[] = { | ||
| 71 | { "level_in", "set input level", OFFSET(level_in), AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625, 64, FLAGS }, | ||
| 72 | { "level_out", "set output level", OFFSET(level_out), AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625, 64, FLAGS }, | ||
| 73 | { "clip", "set clip level", OFFSET(clip_level), AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625, 1, FLAGS }, | ||
| 74 | { "diff", "enable difference", OFFSET(diff_only), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, | ||
| 75 | { "adaptive", "set adaptive distortion", OFFSET(adaptive), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 1, FLAGS }, | ||
| 76 | { "iterations", "set iterations", OFFSET(iterations), AV_OPT_TYPE_INT, {.i64=10}, 1, 20, FLAGS }, | ||
| 77 | { "level", "set auto level", OFFSET(auto_level), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, | ||
| 78 | {NULL} | ||
| 79 | }; | ||
| 80 | |||
| 81 | AVFILTER_DEFINE_CLASS(apsyclip); | ||
| 82 | |||
| 83 | ✗ | static void generate_hann_window(float *window, float *inv_window, int size) | |
| 84 | { | ||
| 85 | ✗ | for (int i = 0; i < size; i++) { | |
| 86 | ✗ | float value = 0.5f * (1.f - cosf(2.f * M_PI * i / size)); | |
| 87 | |||
| 88 | ✗ | window[i] = value; | |
| 89 | // 1/window to calculate unwindowed peak. | ||
| 90 | ✗ | inv_window[i] = value > 0.1f ? 1.f / value : 0.f; | |
| 91 | } | ||
| 92 | ✗ | } | |
| 93 | |||
| 94 | ✗ | static void set_margin_curve(AudioPsyClipContext *s, | |
| 95 | const int (*points)[2], int num_points, int sample_rate) | ||
| 96 | { | ||
| 97 | ✗ | int j = 0; | |
| 98 | |||
| 99 | ✗ | s->margin_curve[0] = points[0][1]; | |
| 100 | |||
| 101 | ✗ | for (int i = 0; i < num_points - 1; i++) { | |
| 102 | ✗ | while (j < s->fft_size / 2 + 1 && j * sample_rate / s->fft_size < points[i + 1][0]) { | |
| 103 | // linearly interpolate between points | ||
| 104 | ✗ | int binHz = j * sample_rate / s->fft_size; | |
| 105 | ✗ | s->margin_curve[j] = points[i][1] + (binHz - points[i][0]) * (points[i + 1][1] - points[i][1]) / (points[i + 1][0] - points[i][0]); | |
| 106 | ✗ | j++; | |
| 107 | } | ||
| 108 | } | ||
| 109 | // handle bins after the last point | ||
| 110 | ✗ | while (j < s->fft_size / 2 + 1) { | |
| 111 | ✗ | s->margin_curve[j] = points[num_points - 1][1]; | |
| 112 | ✗ | j++; | |
| 113 | } | ||
| 114 | |||
| 115 | // convert margin curve to linear amplitude scale | ||
| 116 | ✗ | for (j = 0; j < s->fft_size / 2 + 1; j++) | |
| 117 | ✗ | s->margin_curve[j] = powf(10.f, s->margin_curve[j] / 20.f); | |
| 118 | ✗ | } | |
| 119 | |||
| 120 | ✗ | static void generate_spread_table(AudioPsyClipContext *s) | |
| 121 | { | ||
| 122 | // Calculate tent-shape function in log-log scale. | ||
| 123 | |||
| 124 | // As an optimization, only consider bins close to "bin" | ||
| 125 | // This reduces the number of multiplications needed in calculate_mask_curve | ||
| 126 | // The masking contribution at faraway bins is negligeable | ||
| 127 | |||
| 128 | // Another optimization to save memory and speed up the calculation of the | ||
| 129 | // spread table is to calculate and store only 2 spread functions per | ||
| 130 | // octave, and reuse the same spread function for multiple bins. | ||
| 131 | ✗ | int table_index = 0; | |
| 132 | ✗ | int bin = 0; | |
| 133 | ✗ | int increment = 1; | |
| 134 | |||
| 135 | ✗ | while (bin < s->num_psy_bins) { | |
| 136 | ✗ | float sum = 0; | |
| 137 | ✗ | int base_idx = table_index * s->num_psy_bins; | |
| 138 | ✗ | int start_bin = bin * 3 / 4; | |
| 139 | ✗ | int end_bin = FFMIN(s->num_psy_bins, ((bin + 1) * 4 + 2) / 3); | |
| 140 | int next_bin; | ||
| 141 | |||
| 142 | ✗ | for (int j = start_bin; j < end_bin; j++) { | |
| 143 | // add 0.5 so i=0 doesn't get log(0) | ||
| 144 | ✗ | float rel_idx_log = FFABS(logf((j + 0.5f) / (bin + 0.5f))); | |
| 145 | float value; | ||
| 146 | ✗ | if (j >= bin) { | |
| 147 | // mask up | ||
| 148 | ✗ | value = expf(-rel_idx_log * 40.f); | |
| 149 | } else { | ||
| 150 | // mask down | ||
| 151 | ✗ | value = expf(-rel_idx_log * 80.f); | |
| 152 | } | ||
| 153 | // the spreading function is centred in the row | ||
| 154 | ✗ | sum += value; | |
| 155 | ✗ | s->spread_table[base_idx + s->num_psy_bins / 2 + j - bin] = value; | |
| 156 | } | ||
| 157 | // now normalize it | ||
| 158 | ✗ | for (int j = start_bin; j < end_bin; j++) { | |
| 159 | ✗ | s->spread_table[base_idx + s->num_psy_bins / 2 + j - bin] /= sum; | |
| 160 | } | ||
| 161 | |||
| 162 | ✗ | s->spread_table_range[table_index][0] = start_bin - bin; | |
| 163 | ✗ | s->spread_table_range[table_index][1] = end_bin - bin; | |
| 164 | |||
| 165 | ✗ | if (bin <= 1) { | |
| 166 | ✗ | next_bin = bin + 1; | |
| 167 | } else { | ||
| 168 | ✗ | if ((bin & (bin - 1)) == 0) { | |
| 169 | // power of 2 | ||
| 170 | ✗ | increment = bin / 2; | |
| 171 | } | ||
| 172 | |||
| 173 | ✗ | next_bin = bin + increment; | |
| 174 | } | ||
| 175 | |||
| 176 | // set bins between "bin" and "next_bin" to use this table_index | ||
| 177 | ✗ | for (int i = bin; i < next_bin; i++) | |
| 178 | ✗ | s->spread_table_index[i] = table_index; | |
| 179 | |||
| 180 | ✗ | bin = next_bin; | |
| 181 | ✗ | table_index++; | |
| 182 | } | ||
| 183 | ✗ | } | |
| 184 | |||
| 185 | ✗ | static int config_input(AVFilterLink *inlink) | |
| 186 | { | ||
| 187 | ✗ | AVFilterContext *ctx = inlink->dst; | |
| 188 | ✗ | AudioPsyClipContext *s = ctx->priv; | |
| 189 | static const int points[][2] = { {0,14}, {125,14}, {250,16}, {500,18}, {1000,20}, {2000,20}, {4000,20}, {8000,17}, {16000,14}, {20000,-10} }; | ||
| 190 | static const int num_points = 10; | ||
| 191 | ✗ | float scale = 1.f; | |
| 192 | int ret; | ||
| 193 | |||
| 194 | ✗ | s->fft_size = inlink->sample_rate > 100000 ? 1024 : inlink->sample_rate > 50000 ? 512 : 256; | |
| 195 | ✗ | s->overlap = s->fft_size / 4; | |
| 196 | |||
| 197 | // The psy masking calculation is O(n^2), | ||
| 198 | // so skip it for frequencies not covered by base sampling rantes (i.e. 44k) | ||
| 199 | ✗ | if (inlink->sample_rate <= 50000) { | |
| 200 | ✗ | s->num_psy_bins = s->fft_size / 2; | |
| 201 | ✗ | } else if (inlink->sample_rate <= 100000) { | |
| 202 | ✗ | s->num_psy_bins = s->fft_size / 4; | |
| 203 | } else { | ||
| 204 | ✗ | s->num_psy_bins = s->fft_size / 8; | |
| 205 | } | ||
| 206 | |||
| 207 | ✗ | s->window = av_calloc(s->fft_size, sizeof(*s->window)); | |
| 208 | ✗ | s->inv_window = av_calloc(s->fft_size, sizeof(*s->inv_window)); | |
| 209 | ✗ | if (!s->window || !s->inv_window) | |
| 210 | ✗ | return AVERROR(ENOMEM); | |
| 211 | |||
| 212 | ✗ | s->in_buffer = ff_get_audio_buffer(inlink, s->fft_size * 2); | |
| 213 | ✗ | s->in_frame = ff_get_audio_buffer(inlink, s->fft_size * 2); | |
| 214 | ✗ | s->out_dist_frame = ff_get_audio_buffer(inlink, s->fft_size * 2); | |
| 215 | ✗ | s->windowed_frame = ff_get_audio_buffer(inlink, s->fft_size * 2); | |
| 216 | ✗ | s->clipping_delta = ff_get_audio_buffer(inlink, s->fft_size * 2); | |
| 217 | ✗ | s->spectrum_buf = ff_get_audio_buffer(inlink, s->fft_size * 2); | |
| 218 | ✗ | s->mask_curve = ff_get_audio_buffer(inlink, s->fft_size / 2 + 1); | |
| 219 | ✗ | if (!s->in_buffer || !s->in_frame || | |
| 220 | ✗ | !s->out_dist_frame || !s->windowed_frame || | |
| 221 | ✗ | !s->clipping_delta || !s->spectrum_buf || !s->mask_curve) | |
| 222 | ✗ | return AVERROR(ENOMEM); | |
| 223 | |||
| 224 | ✗ | generate_hann_window(s->window, s->inv_window, s->fft_size); | |
| 225 | |||
| 226 | ✗ | s->margin_curve = av_calloc(s->fft_size / 2 + 1, sizeof(*s->margin_curve)); | |
| 227 | ✗ | if (!s->margin_curve) | |
| 228 | ✗ | return AVERROR(ENOMEM); | |
| 229 | |||
| 230 | ✗ | s->spread_table_rows = av_log2(s->num_psy_bins) * 2; | |
| 231 | ✗ | s->spread_table = av_calloc(s->spread_table_rows * s->num_psy_bins, sizeof(*s->spread_table)); | |
| 232 | ✗ | if (!s->spread_table) | |
| 233 | ✗ | return AVERROR(ENOMEM); | |
| 234 | |||
| 235 | ✗ | s->spread_table_range = av_calloc(s->spread_table_rows * 2, sizeof(*s->spread_table_range)); | |
| 236 | ✗ | if (!s->spread_table_range) | |
| 237 | ✗ | return AVERROR(ENOMEM); | |
| 238 | |||
| 239 | ✗ | s->spread_table_index = av_calloc(s->num_psy_bins, sizeof(*s->spread_table_index)); | |
| 240 | ✗ | if (!s->spread_table_index) | |
| 241 | ✗ | return AVERROR(ENOMEM); | |
| 242 | |||
| 243 | ✗ | set_margin_curve(s, points, num_points, inlink->sample_rate); | |
| 244 | |||
| 245 | ✗ | generate_spread_table(s); | |
| 246 | |||
| 247 | ✗ | s->channels = inlink->ch_layout.nb_channels; | |
| 248 | |||
| 249 | ✗ | s->tx_ctx = av_calloc(s->channels, sizeof(*s->tx_ctx)); | |
| 250 | ✗ | s->itx_ctx = av_calloc(s->channels, sizeof(*s->itx_ctx)); | |
| 251 | ✗ | if (!s->tx_ctx || !s->itx_ctx) | |
| 252 | ✗ | return AVERROR(ENOMEM); | |
| 253 | |||
| 254 | ✗ | for (int ch = 0; ch < s->channels; ch++) { | |
| 255 | ✗ | ret = av_tx_init(&s->tx_ctx[ch], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->fft_size, &scale, 0); | |
| 256 | ✗ | if (ret < 0) | |
| 257 | ✗ | return ret; | |
| 258 | |||
| 259 | ✗ | ret = av_tx_init(&s->itx_ctx[ch], &s->itx_fn, AV_TX_FLOAT_FFT, 1, s->fft_size, &scale, 0); | |
| 260 | ✗ | if (ret < 0) | |
| 261 | ✗ | return ret; | |
| 262 | } | ||
| 263 | |||
| 264 | ✗ | return 0; | |
| 265 | } | ||
| 266 | |||
| 267 | ✗ | static void apply_window(AudioPsyClipContext *s, | |
| 268 | const float *in_frame, float *out_frame, const int add_to_out_frame) | ||
| 269 | { | ||
| 270 | ✗ | const float *window = s->window; | |
| 271 | |||
| 272 | ✗ | for (int i = 0; i < s->fft_size; i++) { | |
| 273 | ✗ | if (add_to_out_frame) { | |
| 274 | ✗ | out_frame[i] += in_frame[i] * window[i]; | |
| 275 | } else { | ||
| 276 | ✗ | out_frame[i] = in_frame[i] * window[i]; | |
| 277 | } | ||
| 278 | } | ||
| 279 | ✗ | } | |
| 280 | |||
| 281 | ✗ | static void calculate_mask_curve(AudioPsyClipContext *s, | |
| 282 | const float *spectrum, float *mask_curve) | ||
| 283 | { | ||
| 284 | ✗ | for (int i = 0; i < s->fft_size / 2 + 1; i++) | |
| 285 | ✗ | mask_curve[i] = 0; | |
| 286 | |||
| 287 | ✗ | for (int i = 0; i < s->num_psy_bins; i++) { | |
| 288 | int base_idx, start_bin, end_bin, table_idx; | ||
| 289 | float magnitude; | ||
| 290 | int range[2]; | ||
| 291 | |||
| 292 | ✗ | if (i == 0) { | |
| 293 | ✗ | magnitude = FFABS(spectrum[0]); | |
| 294 | ✗ | } else if (i == s->fft_size / 2) { | |
| 295 | ✗ | magnitude = FFABS(spectrum[s->fft_size]); | |
| 296 | } else { | ||
| 297 | // Because the input signal is real, the + and - frequencies are redundant. | ||
| 298 | // Multiply the magnitude by 2 to simulate adding up the + and - frequencies. | ||
| 299 | ✗ | magnitude = hypotf(spectrum[2 * i], spectrum[2 * i + 1]) * 2; | |
| 300 | } | ||
| 301 | |||
| 302 | ✗ | table_idx = s->spread_table_index[i]; | |
| 303 | ✗ | range[0] = s->spread_table_range[table_idx][0]; | |
| 304 | ✗ | range[1] = s->spread_table_range[table_idx][1]; | |
| 305 | ✗ | base_idx = table_idx * s->num_psy_bins; | |
| 306 | ✗ | start_bin = FFMAX(0, i + range[0]); | |
| 307 | ✗ | end_bin = FFMIN(s->num_psy_bins, i + range[1]); | |
| 308 | |||
| 309 | ✗ | for (int j = start_bin; j < end_bin; j++) | |
| 310 | ✗ | mask_curve[j] += s->spread_table[base_idx + s->num_psy_bins / 2 + j - i] * magnitude; | |
| 311 | } | ||
| 312 | |||
| 313 | // for ultrasonic frequencies, skip the O(n^2) spread calculation and just copy the magnitude | ||
| 314 | ✗ | for (int i = s->num_psy_bins; i < s->fft_size / 2 + 1; i++) { | |
| 315 | float magnitude; | ||
| 316 | ✗ | if (i == s->fft_size / 2) { | |
| 317 | ✗ | magnitude = FFABS(spectrum[s->fft_size]); | |
| 318 | } else { | ||
| 319 | // Because the input signal is real, the + and - frequencies are redundant. | ||
| 320 | // Multiply the magnitude by 2 to simulate adding up the + and - frequencies. | ||
| 321 | ✗ | magnitude = hypotf(spectrum[2 * i], spectrum[2 * i + 1]) * 2; | |
| 322 | } | ||
| 323 | |||
| 324 | ✗ | mask_curve[i] = magnitude; | |
| 325 | } | ||
| 326 | |||
| 327 | ✗ | for (int i = 0; i < s->fft_size / 2 + 1; i++) | |
| 328 | ✗ | mask_curve[i] = mask_curve[i] / s->margin_curve[i]; | |
| 329 | ✗ | } | |
| 330 | |||
| 331 | ✗ | static void clip_to_window(AudioPsyClipContext *s, | |
| 332 | const float *windowed_frame, float *clipping_delta, float delta_boost) | ||
| 333 | { | ||
| 334 | ✗ | const float *window = s->window; | |
| 335 | |||
| 336 | ✗ | for (int i = 0; i < s->fft_size; i++) { | |
| 337 | ✗ | const float limit = s->clip_level * window[i]; | |
| 338 | ✗ | const float effective_value = windowed_frame[i] + clipping_delta[i]; | |
| 339 | |||
| 340 | ✗ | if (effective_value > limit) { | |
| 341 | ✗ | clipping_delta[i] += (limit - effective_value) * delta_boost; | |
| 342 | ✗ | } else if (effective_value < -limit) { | |
| 343 | ✗ | clipping_delta[i] += (-limit - effective_value) * delta_boost; | |
| 344 | } | ||
| 345 | } | ||
| 346 | ✗ | } | |
| 347 | |||
| 348 | ✗ | static void limit_clip_spectrum(AudioPsyClipContext *s, | |
| 349 | float *clip_spectrum, const float *mask_curve) | ||
| 350 | { | ||
| 351 | // bin 0 | ||
| 352 | ✗ | float relative_distortion_level = FFABS(clip_spectrum[0]) / mask_curve[0]; | |
| 353 | |||
| 354 | ✗ | if (relative_distortion_level > 1.f) | |
| 355 | ✗ | clip_spectrum[0] /= relative_distortion_level; | |
| 356 | |||
| 357 | // bin 1..N/2-1 | ||
| 358 | ✗ | for (int i = 1; i < s->fft_size / 2; i++) { | |
| 359 | ✗ | float real = clip_spectrum[i * 2]; | |
| 360 | ✗ | float imag = clip_spectrum[i * 2 + 1]; | |
| 361 | // Because the input signal is real, the + and - frequencies are redundant. | ||
| 362 | // Multiply the magnitude by 2 to simulate adding up the + and - frequencies. | ||
| 363 | ✗ | relative_distortion_level = hypotf(real, imag) * 2 / mask_curve[i]; | |
| 364 | ✗ | if (relative_distortion_level > 1.0) { | |
| 365 | ✗ | clip_spectrum[i * 2] /= relative_distortion_level; | |
| 366 | ✗ | clip_spectrum[i * 2 + 1] /= relative_distortion_level; | |
| 367 | ✗ | clip_spectrum[s->fft_size * 2 - i * 2] /= relative_distortion_level; | |
| 368 | ✗ | clip_spectrum[s->fft_size * 2 - i * 2 + 1] /= relative_distortion_level; | |
| 369 | } | ||
| 370 | } | ||
| 371 | // bin N/2 | ||
| 372 | ✗ | relative_distortion_level = FFABS(clip_spectrum[s->fft_size]) / mask_curve[s->fft_size / 2]; | |
| 373 | ✗ | if (relative_distortion_level > 1.f) | |
| 374 | ✗ | clip_spectrum[s->fft_size] /= relative_distortion_level; | |
| 375 | ✗ | } | |
| 376 | |||
| 377 | ✗ | static void r2c(float *buffer, int size) | |
| 378 | { | ||
| 379 | ✗ | for (int i = size - 1; i >= 0; i--) | |
| 380 | ✗ | buffer[2 * i] = buffer[i]; | |
| 381 | |||
| 382 | ✗ | for (int i = size - 1; i >= 0; i--) | |
| 383 | ✗ | buffer[2 * i + 1] = 0.f; | |
| 384 | ✗ | } | |
| 385 | |||
| 386 | ✗ | static void c2r(float *buffer, int size) | |
| 387 | { | ||
| 388 | ✗ | for (int i = 0; i < size; i++) | |
| 389 | ✗ | buffer[i] = buffer[2 * i]; | |
| 390 | |||
| 391 | ✗ | for (int i = 0; i < size; i++) | |
| 392 | ✗ | buffer[i + size] = 0.f; | |
| 393 | ✗ | } | |
| 394 | |||
| 395 | ✗ | static void feed(AVFilterContext *ctx, int ch, | |
| 396 | const float *in_samples, float *out_samples, int diff_only, | ||
| 397 | float *in_frame, float *out_dist_frame, | ||
| 398 | float *windowed_frame, float *clipping_delta, | ||
| 399 | float *spectrum_buf, float *mask_curve) | ||
| 400 | { | ||
| 401 | ✗ | AudioPsyClipContext *s = ctx->priv; | |
| 402 | ✗ | const float clip_level_inv = 1.f / s->clip_level; | |
| 403 | ✗ | const float level_out = s->level_out; | |
| 404 | ✗ | float orig_peak = 0; | |
| 405 | float peak; | ||
| 406 | |||
| 407 | // shift in/out buffers | ||
| 408 | ✗ | for (int i = 0; i < s->fft_size - s->overlap; i++) { | |
| 409 | ✗ | in_frame[i] = in_frame[i + s->overlap]; | |
| 410 | ✗ | out_dist_frame[i] = out_dist_frame[i + s->overlap]; | |
| 411 | } | ||
| 412 | |||
| 413 | ✗ | for (int i = 0; i < s->overlap; i++) { | |
| 414 | ✗ | in_frame[i + s->fft_size - s->overlap] = in_samples[i]; | |
| 415 | ✗ | out_dist_frame[i + s->fft_size - s->overlap] = 0.f; | |
| 416 | } | ||
| 417 | |||
| 418 | ✗ | apply_window(s, in_frame, windowed_frame, 0); | |
| 419 | ✗ | r2c(windowed_frame, s->fft_size); | |
| 420 | ✗ | s->tx_fn(s->tx_ctx[ch], spectrum_buf, windowed_frame, sizeof(AVComplexFloat)); | |
| 421 | ✗ | c2r(windowed_frame, s->fft_size); | |
| 422 | ✗ | calculate_mask_curve(s, spectrum_buf, mask_curve); | |
| 423 | |||
| 424 | // It would be easier to calculate the peak from the unwindowed input. | ||
| 425 | // This is just for consistency with the clipped peak calculateion | ||
| 426 | // because the inv_window zeros out samples on the edge of the window. | ||
| 427 | ✗ | for (int i = 0; i < s->fft_size; i++) | |
| 428 | ✗ | orig_peak = FFMAX(orig_peak, FFABS(windowed_frame[i] * s->inv_window[i])); | |
| 429 | ✗ | orig_peak *= clip_level_inv; | |
| 430 | ✗ | peak = orig_peak; | |
| 431 | |||
| 432 | // clear clipping_delta | ||
| 433 | ✗ | for (int i = 0; i < s->fft_size * 2; i++) | |
| 434 | ✗ | clipping_delta[i] = 0.f; | |
| 435 | |||
| 436 | // repeat clipping-filtering process a few times to control both the peaks and the spectrum | ||
| 437 | ✗ | for (int i = 0; i < s->iterations; i++) { | |
| 438 | ✗ | float mask_curve_shift = 1.122f; // 1.122 is 1dB | |
| 439 | // The last 1/3 of rounds have boosted delta to help reach the peak target faster | ||
| 440 | ✗ | float delta_boost = 1.f; | |
| 441 | ✗ | if (i >= s->iterations - s->iterations / 3) { | |
| 442 | // boosting the delta when largs peaks are still present is dangerous | ||
| 443 | ✗ | if (peak < 2.f) | |
| 444 | ✗ | delta_boost = 2.f; | |
| 445 | } | ||
| 446 | |||
| 447 | ✗ | clip_to_window(s, windowed_frame, clipping_delta, delta_boost); | |
| 448 | |||
| 449 | ✗ | r2c(clipping_delta, s->fft_size); | |
| 450 | ✗ | s->tx_fn(s->tx_ctx[ch], spectrum_buf, clipping_delta, sizeof(AVComplexFloat)); | |
| 451 | |||
| 452 | ✗ | limit_clip_spectrum(s, spectrum_buf, mask_curve); | |
| 453 | |||
| 454 | ✗ | s->itx_fn(s->itx_ctx[ch], clipping_delta, spectrum_buf, sizeof(AVComplexFloat)); | |
| 455 | ✗ | c2r(clipping_delta, s->fft_size); | |
| 456 | |||
| 457 | ✗ | for (int j = 0; j < s->fft_size; j++) | |
| 458 | ✗ | clipping_delta[j] /= s->fft_size; | |
| 459 | |||
| 460 | ✗ | peak = 0; | |
| 461 | ✗ | for (int j = 0; j < s->fft_size; j++) | |
| 462 | ✗ | peak = FFMAX(peak, FFABS((windowed_frame[j] + clipping_delta[j]) * s->inv_window[j])); | |
| 463 | ✗ | peak *= clip_level_inv; | |
| 464 | |||
| 465 | // Automatically adjust mask_curve as necessary to reach peak target | ||
| 466 | ✗ | if (orig_peak > 1.f && peak > 1.f) { | |
| 467 | ✗ | float diff_achieved = orig_peak - peak; | |
| 468 | ✗ | if (i + 1 < s->iterations - s->iterations / 3 && diff_achieved > 0) { | |
| 469 | ✗ | float diff_needed = orig_peak - 1.f; | |
| 470 | ✗ | float diff_ratio = diff_needed / diff_achieved; | |
| 471 | // If a good amount of peak reduction was already achieved, | ||
| 472 | // don't shift the mask_curve by the full peak value | ||
| 473 | // On the other hand, if only a little peak reduction was achieved, | ||
| 474 | // don't shift the mask_curve by the enormous diff_ratio. | ||
| 475 | ✗ | diff_ratio = FFMIN(diff_ratio, peak); | |
| 476 | ✗ | mask_curve_shift = FFMAX(mask_curve_shift, diff_ratio); | |
| 477 | } else { | ||
| 478 | // If the peak got higher than the input or we are in the last 1/3 rounds, | ||
| 479 | // go back to the heavy-handed peak heuristic. | ||
| 480 | ✗ | mask_curve_shift = FFMAX(mask_curve_shift, peak); | |
| 481 | } | ||
| 482 | } | ||
| 483 | |||
| 484 | ✗ | mask_curve_shift = 1.f + (mask_curve_shift - 1.f) * s->adaptive; | |
| 485 | |||
| 486 | // Be less strict in the next iteration. | ||
| 487 | // This helps with peak control. | ||
| 488 | ✗ | for (int j = 0; j < s->fft_size / 2 + 1; j++) | |
| 489 | ✗ | mask_curve[j] *= mask_curve_shift; | |
| 490 | } | ||
| 491 | |||
| 492 | // do overlap & add | ||
| 493 | ✗ | apply_window(s, clipping_delta, out_dist_frame, 1); | |
| 494 | |||
| 495 | ✗ | for (int i = 0; i < s->overlap; i++) { | |
| 496 | // 4 times overlap with squared hanning window results in 1.5 time increase in amplitude | ||
| 497 | ✗ | if (!ctx->is_disabled) { | |
| 498 | ✗ | out_samples[i] = out_dist_frame[i] / 1.5f; | |
| 499 | ✗ | if (!diff_only) | |
| 500 | ✗ | out_samples[i] += in_frame[i]; | |
| 501 | ✗ | if (s->auto_level) | |
| 502 | ✗ | out_samples[i] *= clip_level_inv; | |
| 503 | ✗ | out_samples[i] *= level_out; | |
| 504 | } else { | ||
| 505 | ✗ | out_samples[i] = in_frame[i]; | |
| 506 | } | ||
| 507 | } | ||
| 508 | ✗ | } | |
| 509 | |||
| 510 | ✗ | static int psy_channel(AVFilterContext *ctx, AVFrame *in, AVFrame *out, int ch) | |
| 511 | { | ||
| 512 | ✗ | AudioPsyClipContext *s = ctx->priv; | |
| 513 | ✗ | const float *src = (const float *)in->extended_data[ch]; | |
| 514 | ✗ | float *in_buffer = (float *)s->in_buffer->extended_data[ch]; | |
| 515 | ✗ | float *dst = (float *)out->extended_data[ch]; | |
| 516 | |||
| 517 | ✗ | for (int n = 0; n < s->overlap; n++) | |
| 518 | ✗ | in_buffer[n] = src[n] * s->level_in; | |
| 519 | |||
| 520 | ✗ | feed(ctx, ch, in_buffer, dst, s->diff_only, | |
| 521 | ✗ | (float *)(s->in_frame->extended_data[ch]), | |
| 522 | ✗ | (float *)(s->out_dist_frame->extended_data[ch]), | |
| 523 | ✗ | (float *)(s->windowed_frame->extended_data[ch]), | |
| 524 | ✗ | (float *)(s->clipping_delta->extended_data[ch]), | |
| 525 | ✗ | (float *)(s->spectrum_buf->extended_data[ch]), | |
| 526 | ✗ | (float *)(s->mask_curve->extended_data[ch])); | |
| 527 | |||
| 528 | ✗ | return 0; | |
| 529 | } | ||
| 530 | |||
| 531 | ✗ | static int psy_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) | |
| 532 | { | ||
| 533 | ✗ | AudioPsyClipContext *s = ctx->priv; | |
| 534 | ✗ | AVFrame *out = arg; | |
| 535 | ✗ | const int start = (out->ch_layout.nb_channels * jobnr) / nb_jobs; | |
| 536 | ✗ | const int end = (out->ch_layout.nb_channels * (jobnr+1)) / nb_jobs; | |
| 537 | |||
| 538 | ✗ | for (int ch = start; ch < end; ch++) | |
| 539 | ✗ | psy_channel(ctx, s->in, out, ch); | |
| 540 | |||
| 541 | ✗ | return 0; | |
| 542 | } | ||
| 543 | |||
| 544 | ✗ | static int filter_frame(AVFilterLink *inlink, AVFrame *in) | |
| 545 | { | ||
| 546 | ✗ | AVFilterContext *ctx = inlink->dst; | |
| 547 | ✗ | AVFilterLink *outlink = ctx->outputs[0]; | |
| 548 | ✗ | AudioPsyClipContext *s = ctx->priv; | |
| 549 | AVFrame *out; | ||
| 550 | int ret; | ||
| 551 | |||
| 552 | ✗ | out = ff_get_audio_buffer(outlink, s->overlap); | |
| 553 | ✗ | if (!out) { | |
| 554 | ✗ | ret = AVERROR(ENOMEM); | |
| 555 | ✗ | goto fail; | |
| 556 | } | ||
| 557 | |||
| 558 | ✗ | s->in = in; | |
| 559 | ✗ | av_frame_copy_props(out, in); | |
| 560 | ✗ | ff_filter_execute(ctx, psy_channels, out, NULL, | |
| 561 | ✗ | FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx))); | |
| 562 | |||
| 563 | ✗ | out->pts = in->pts; | |
| 564 | ✗ | out->nb_samples = in->nb_samples; | |
| 565 | ✗ | ret = ff_filter_frame(outlink, out); | |
| 566 | ✗ | fail: | |
| 567 | ✗ | av_frame_free(&in); | |
| 568 | ✗ | s->in = NULL; | |
| 569 | ✗ | return ret < 0 ? ret : 0; | |
| 570 | } | ||
| 571 | |||
| 572 | ✗ | static int activate(AVFilterContext *ctx) | |
| 573 | { | ||
| 574 | ✗ | AVFilterLink *inlink = ctx->inputs[0]; | |
| 575 | ✗ | AVFilterLink *outlink = ctx->outputs[0]; | |
| 576 | ✗ | AudioPsyClipContext *s = ctx->priv; | |
| 577 | ✗ | AVFrame *in = NULL; | |
| 578 | ✗ | int ret = 0, status; | |
| 579 | int64_t pts; | ||
| 580 | |||
| 581 | ✗ | FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); | |
| 582 | |||
| 583 | ✗ | ret = ff_inlink_consume_samples(inlink, s->overlap, s->overlap, &in); | |
| 584 | ✗ | if (ret < 0) | |
| 585 | ✗ | return ret; | |
| 586 | |||
| 587 | ✗ | if (ret > 0) { | |
| 588 | ✗ | return filter_frame(inlink, in); | |
| 589 | ✗ | } else if (ff_inlink_acknowledge_status(inlink, &status, &pts)) { | |
| 590 | ✗ | ff_outlink_set_status(outlink, status, pts); | |
| 591 | ✗ | return 0; | |
| 592 | } else { | ||
| 593 | ✗ | if (ff_inlink_queued_samples(inlink) >= s->overlap) { | |
| 594 | ✗ | ff_filter_set_ready(ctx, 10); | |
| 595 | ✗ | } else if (ff_outlink_frame_wanted(outlink)) { | |
| 596 | ✗ | ff_inlink_request_frame(inlink); | |
| 597 | } | ||
| 598 | ✗ | return 0; | |
| 599 | } | ||
| 600 | } | ||
| 601 | |||
| 602 | ✗ | static av_cold void uninit(AVFilterContext *ctx) | |
| 603 | { | ||
| 604 | ✗ | AudioPsyClipContext *s = ctx->priv; | |
| 605 | |||
| 606 | ✗ | av_freep(&s->window); | |
| 607 | ✗ | av_freep(&s->inv_window); | |
| 608 | ✗ | av_freep(&s->spread_table); | |
| 609 | ✗ | av_freep(&s->spread_table_range); | |
| 610 | ✗ | av_freep(&s->spread_table_index); | |
| 611 | ✗ | av_freep(&s->margin_curve); | |
| 612 | |||
| 613 | ✗ | av_frame_free(&s->in_buffer); | |
| 614 | ✗ | av_frame_free(&s->in_frame); | |
| 615 | ✗ | av_frame_free(&s->out_dist_frame); | |
| 616 | ✗ | av_frame_free(&s->windowed_frame); | |
| 617 | ✗ | av_frame_free(&s->clipping_delta); | |
| 618 | ✗ | av_frame_free(&s->spectrum_buf); | |
| 619 | ✗ | av_frame_free(&s->mask_curve); | |
| 620 | |||
| 621 | ✗ | for (int ch = 0; ch < s->channels; ch++) { | |
| 622 | ✗ | if (s->tx_ctx) | |
| 623 | ✗ | av_tx_uninit(&s->tx_ctx[ch]); | |
| 624 | ✗ | if (s->itx_ctx) | |
| 625 | ✗ | av_tx_uninit(&s->itx_ctx[ch]); | |
| 626 | } | ||
| 627 | |||
| 628 | ✗ | av_freep(&s->tx_ctx); | |
| 629 | ✗ | av_freep(&s->itx_ctx); | |
| 630 | ✗ | } | |
| 631 | |||
| 632 | static const AVFilterPad inputs[] = { | ||
| 633 | { | ||
| 634 | .name = "default", | ||
| 635 | .type = AVMEDIA_TYPE_AUDIO, | ||
| 636 | .config_props = config_input, | ||
| 637 | }, | ||
| 638 | }; | ||
| 639 | |||
| 640 | const FFFilter ff_af_apsyclip = { | ||
| 641 | .p.name = "apsyclip", | ||
| 642 | .p.description = NULL_IF_CONFIG_SMALL("Audio Psychoacoustic Clipper."), | ||
| 643 | .p.priv_class = &apsyclip_class, | ||
| 644 | .p.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | | ||
| 645 | AVFILTER_FLAG_SLICE_THREADS, | ||
| 646 | .priv_size = sizeof(AudioPsyClipContext), | ||
| 647 | .uninit = uninit, | ||
| 648 | FILTER_INPUTS(inputs), | ||
| 649 | FILTER_OUTPUTS(ff_audio_default_filterpad), | ||
| 650 | FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP), | ||
| 651 | .activate = activate, | ||
| 652 | .process_command = ff_filter_process_command, | ||
| 653 | }; | ||
| 654 |