FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_adrc.c
Date: 2025-01-20 09:27:23
Exec Total Coverage
Lines: 0 229 0.0%
Functions: 0 16 0.0%
Branches: 0 98 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2022 Paul B Mahol
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <float.h>
22
23 #include "libavutil/eval.h"
24 #include "libavutil/ffmath.h"
25 #include "libavutil/mem.h"
26 #include "libavutil/opt.h"
27 #include "libavutil/tx.h"
28 #include "audio.h"
29 #include "avfilter.h"
30 #include "filters.h"
31
32 static const char * const var_names[] = {
33 "ch", ///< the value of the current channel
34 "sn", ///< number of samples
35 "nb_channels",
36 "t", ///< timestamp expressed in seconds
37 "sr", ///< sample rate
38 "p", ///< input power in dB for frequency bin
39 "f", ///< frequency in Hz
40 NULL
41 };
42
43 enum var_name {
44 VAR_CH,
45 VAR_SN,
46 VAR_NB_CHANNELS,
47 VAR_T,
48 VAR_SR,
49 VAR_P,
50 VAR_F,
51 VAR_VARS_NB
52 };
53
54 typedef struct AudioDRCContext {
55 const AVClass *class;
56
57 double attack_ms;
58 double release_ms;
59 char *expr_str;
60
61 double attack;
62 double release;
63
64 int fft_size;
65 int overlap;
66 int channels;
67
68 float fx;
69 float *window;
70
71 AVFrame *drc_frame;
72 AVFrame *energy;
73 AVFrame *envelope;
74 AVFrame *factors;
75 AVFrame *in;
76 AVFrame *in_buffer;
77 AVFrame *in_frame;
78 AVFrame *out_dist_frame;
79 AVFrame *spectrum_buf;
80 AVFrame *target_gain;
81 AVFrame *windowed_frame;
82
83 char *channels_to_filter;
84 AVChannelLayout ch_layout;
85
86 AVTXContext **tx_ctx;
87 av_tx_fn tx_fn;
88 AVTXContext **itx_ctx;
89 av_tx_fn itx_fn;
90
91 AVExpr *expr;
92 double var_values[VAR_VARS_NB];
93 } AudioDRCContext;
94
95 #define OFFSET(x) offsetof(AudioDRCContext, x)
96 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_RUNTIME_PARAM
97
98 static const AVOption adrc_options[] = {
99 { "transfer", "set the transfer expression", OFFSET(expr_str), AV_OPT_TYPE_STRING, {.str="p"}, 0, 0, FLAGS },
100 { "attack", "set the attack", OFFSET(attack_ms), AV_OPT_TYPE_DOUBLE, {.dbl=50.}, 1, 1000, FLAGS },
101 { "release", "set the release", OFFSET(release_ms), AV_OPT_TYPE_DOUBLE, {.dbl=100.}, 5, 2000, FLAGS },
102 { "channels", "set channels to filter",OFFSET(channels_to_filter),AV_OPT_TYPE_STRING,{.str="all"},0, 0, FLAGS },
103 {NULL}
104 };
105
106 AVFILTER_DEFINE_CLASS(adrc);
107
108 static void generate_hann_window(float *window, int size)
109 {
110 for (int i = 0; i < size; i++) {
111 float value = 0.5f * (1.f - cosf(2.f * M_PI * i / size));
112
113 window[i] = value;
114 }
115 }
116
117 static int config_input(AVFilterLink *inlink)
118 {
119 AVFilterContext *ctx = inlink->dst;
120 AudioDRCContext *s = ctx->priv;
121 float scale;
122 int ret;
123
124 s->fft_size = inlink->sample_rate > 100000 ? 1024 : inlink->sample_rate > 50000 ? 512 : 256;
125 s->fx = inlink->sample_rate * 0.5f / (s->fft_size / 2 + 1);
126 s->overlap = s->fft_size / 4;
127
128 s->window = av_calloc(s->fft_size, sizeof(*s->window));
129 if (!s->window)
130 return AVERROR(ENOMEM);
131
132 s->drc_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
133 s->energy = ff_get_audio_buffer(inlink, s->fft_size / 2 + 1);
134 s->envelope = ff_get_audio_buffer(inlink, s->fft_size / 2 + 1);
135 s->factors = ff_get_audio_buffer(inlink, s->fft_size / 2 + 1);
136 s->in_buffer = ff_get_audio_buffer(inlink, s->fft_size * 2);
137 s->in_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
138 s->out_dist_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
139 s->spectrum_buf = ff_get_audio_buffer(inlink, s->fft_size * 2);
140 s->target_gain = ff_get_audio_buffer(inlink, s->fft_size / 2 + 1);
141 s->windowed_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
142 if (!s->in_buffer || !s->in_frame || !s->target_gain ||
143 !s->out_dist_frame || !s->windowed_frame || !s->envelope ||
144 !s->drc_frame || !s->spectrum_buf || !s->energy || !s->factors)
145 return AVERROR(ENOMEM);
146
147 generate_hann_window(s->window, s->fft_size);
148
149 s->channels = inlink->ch_layout.nb_channels;
150
151 s->tx_ctx = av_calloc(s->channels, sizeof(*s->tx_ctx));
152 s->itx_ctx = av_calloc(s->channels, sizeof(*s->itx_ctx));
153 if (!s->tx_ctx || !s->itx_ctx)
154 return AVERROR(ENOMEM);
155
156 for (int ch = 0; ch < s->channels; ch++) {
157 scale = 1.f / s->fft_size;
158 ret = av_tx_init(&s->tx_ctx[ch], &s->tx_fn, AV_TX_FLOAT_RDFT, 0, s->fft_size, &scale, 0);
159 if (ret < 0)
160 return ret;
161
162 scale = 1.f;
163 ret = av_tx_init(&s->itx_ctx[ch], &s->itx_fn, AV_TX_FLOAT_RDFT, 1, s->fft_size, &scale, 0);
164 if (ret < 0)
165 return ret;
166 }
167
168 s->var_values[VAR_SR] = inlink->sample_rate;
169 s->var_values[VAR_NB_CHANNELS] = s->channels;
170
171 return av_expr_parse(&s->expr, s->expr_str, var_names, NULL, NULL,
172 NULL, NULL, 0, ctx);
173 }
174
175 static void apply_window(AudioDRCContext *s,
176 const float *in_frame, float *out_frame, const int add_to_out_frame)
177 {
178 const float *window = s->window;
179 const int fft_size = s->fft_size;
180
181 if (add_to_out_frame) {
182 for (int i = 0; i < fft_size; i++)
183 out_frame[i] += in_frame[i] * window[i];
184 } else {
185 for (int i = 0; i < fft_size; i++)
186 out_frame[i] = in_frame[i] * window[i];
187 }
188 }
189
190 static float sqrf(float x)
191 {
192 return x * x;
193 }
194
195 static void get_energy(AVFilterContext *ctx,
196 int len,
197 float *energy,
198 const float *spectral)
199 {
200 for (int n = 0; n < len; n++) {
201 energy[n] = 10.f * log10f(sqrf(spectral[2 * n]) + sqrf(spectral[2 * n + 1]));
202 if (!isnormal(energy[n]))
203 energy[n] = -351.f;
204 }
205 }
206
207 static void get_target_gain(AVFilterContext *ctx,
208 int len,
209 float *gain,
210 const float *energy,
211 double *var_values,
212 float fx, int bypass)
213 {
214 AudioDRCContext *s = ctx->priv;
215
216 if (bypass) {
217 memcpy(gain, energy, sizeof(*gain) * len);
218 return;
219 }
220
221 for (int n = 0; n < len; n++) {
222 const float Xg = energy[n];
223
224 var_values[VAR_P] = Xg;
225 var_values[VAR_F] = n * fx;
226
227 gain[n] = av_expr_eval(s->expr, var_values, s);
228 }
229 }
230
231 static void get_envelope(AVFilterContext *ctx,
232 int len,
233 float *envelope,
234 const float *energy,
235 const float *gain)
236 {
237 AudioDRCContext *s = ctx->priv;
238 const float release = s->release;
239 const float attack = s->attack;
240
241 for (int n = 0; n < len; n++) {
242 const float Bg = gain[n] - energy[n];
243 const float Vg = envelope[n];
244
245 if (Bg > Vg) {
246 envelope[n] = attack * Vg + (1.f - attack) * Bg;
247 } else if (Bg <= Vg) {
248 envelope[n] = release * Vg + (1.f - release) * Bg;
249 } else {
250 envelope[n] = 0.f;
251 }
252 }
253 }
254
255 static void get_factors(AVFilterContext *ctx,
256 int len,
257 float *factors,
258 const float *envelope)
259 {
260 for (int n = 0; n < len; n++)
261 factors[n] = sqrtf(ff_exp10f(envelope[n] / 10.f));
262 }
263
264 static void apply_factors(AVFilterContext *ctx,
265 int len,
266 float *spectrum,
267 const float *factors)
268 {
269 for (int n = 0; n < len; n++) {
270 spectrum[2*n+0] *= factors[n];
271 spectrum[2*n+1] *= factors[n];
272 }
273 }
274
275 static void feed(AVFilterContext *ctx, int ch,
276 const float *in_samples, float *out_samples,
277 float *in_frame, float *out_dist_frame,
278 float *windowed_frame, float *drc_frame,
279 float *spectrum_buf, float *energy,
280 float *target_gain, float *envelope,
281 float *factors)
282 {
283 AudioDRCContext *s = ctx->priv;
284 double var_values[VAR_VARS_NB];
285 const int fft_size = s->fft_size;
286 const int nb_coeffs = s->fft_size / 2 + 1;
287 const int overlap = s->overlap;
288 enum AVChannel channel = av_channel_layout_channel_from_index(&ctx->inputs[0]->ch_layout, ch);
289 const int bypass = av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0;
290
291 memcpy(var_values, s->var_values, sizeof(var_values));
292
293 var_values[VAR_CH] = ch;
294
295 // shift in/out buffers
296 memmove(in_frame, in_frame + overlap, (fft_size - overlap) * sizeof(*in_frame));
297 memmove(out_dist_frame, out_dist_frame + overlap, (fft_size - overlap) * sizeof(*out_dist_frame));
298
299 memcpy(in_frame + fft_size - overlap, in_samples, sizeof(*in_frame) * overlap);
300 memset(out_dist_frame + fft_size - overlap, 0, sizeof(*out_dist_frame) * overlap);
301
302 apply_window(s, in_frame, windowed_frame, 0);
303 s->tx_fn(s->tx_ctx[ch], spectrum_buf, windowed_frame, sizeof(float));
304
305 get_energy(ctx, nb_coeffs, energy, spectrum_buf);
306 get_target_gain(ctx, nb_coeffs, target_gain, energy, var_values, s->fx, bypass);
307 get_envelope(ctx, nb_coeffs, envelope, energy, target_gain);
308 get_factors(ctx, nb_coeffs, factors, envelope);
309 apply_factors(ctx, nb_coeffs, spectrum_buf, factors);
310
311 s->itx_fn(s->itx_ctx[ch], drc_frame, spectrum_buf, sizeof(AVComplexFloat));
312
313 apply_window(s, drc_frame, out_dist_frame, 1);
314
315 // 4 times overlap with squared hanning window results in 1.5 time increase in amplitude
316 if (!ctx->is_disabled) {
317 for (int i = 0; i < overlap; i++)
318 out_samples[i] = out_dist_frame[i] / 1.5f;
319 } else {
320 memcpy(out_samples, in_frame, sizeof(*out_samples) * overlap);
321 }
322 }
323
324 static int drc_channel(AVFilterContext *ctx, AVFrame *in, AVFrame *out, int ch)
325 {
326 AudioDRCContext *s = ctx->priv;
327 const float *src = (const float *)in->extended_data[ch];
328 float *in_buffer = (float *)s->in_buffer->extended_data[ch];
329 float *dst = (float *)out->extended_data[ch];
330
331 memcpy(in_buffer, src, sizeof(*in_buffer) * s->overlap);
332
333 feed(ctx, ch, in_buffer, dst,
334 (float *)(s->in_frame->extended_data[ch]),
335 (float *)(s->out_dist_frame->extended_data[ch]),
336 (float *)(s->windowed_frame->extended_data[ch]),
337 (float *)(s->drc_frame->extended_data[ch]),
338 (float *)(s->spectrum_buf->extended_data[ch]),
339 (float *)(s->energy->extended_data[ch]),
340 (float *)(s->target_gain->extended_data[ch]),
341 (float *)(s->envelope->extended_data[ch]),
342 (float *)(s->factors->extended_data[ch]));
343
344 return 0;
345 }
346
347 static int drc_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
348 {
349 AudioDRCContext *s = ctx->priv;
350 AVFrame *in = s->in;
351 AVFrame *out = arg;
352 const int start = (out->ch_layout.nb_channels * jobnr) / nb_jobs;
353 const int end = (out->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;
354
355 for (int ch = start; ch < end; ch++)
356 drc_channel(ctx, in, out, ch);
357
358 return 0;
359 }
360
361 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
362 {
363 AVFilterContext *ctx = inlink->dst;
364 AVFilterLink *outlink = ctx->outputs[0];
365 FilterLink *outl = ff_filter_link(outlink);
366 AudioDRCContext *s = ctx->priv;
367 AVFrame *out;
368 int ret;
369
370 out = ff_get_audio_buffer(outlink, s->overlap);
371 if (!out) {
372 ret = AVERROR(ENOMEM);
373 goto fail;
374 }
375
376 s->var_values[VAR_SN] = outl->sample_count_in;
377 s->var_values[VAR_T] = s->var_values[VAR_SN] * (double)1/outlink->sample_rate;
378
379 s->in = in;
380 av_frame_copy_props(out, in);
381 ff_filter_execute(ctx, drc_channels, out, NULL,
382 FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));
383
384 out->pts = in->pts;
385 out->nb_samples = in->nb_samples;
386 ret = ff_filter_frame(outlink, out);
387 fail:
388 av_frame_free(&in);
389 s->in = NULL;
390 return ret < 0 ? ret : 0;
391 }
392
393 static int activate(AVFilterContext *ctx)
394 {
395 AVFilterLink *inlink = ctx->inputs[0];
396 AVFilterLink *outlink = ctx->outputs[0];
397 AudioDRCContext *s = ctx->priv;
398 AVFrame *in = NULL;
399 int ret = 0, status;
400 int64_t pts;
401
402 ret = av_channel_layout_copy(&s->ch_layout, &inlink->ch_layout);
403 if (ret < 0)
404 return ret;
405 if (strcmp(s->channels_to_filter, "all"))
406 av_channel_layout_from_string(&s->ch_layout, s->channels_to_filter);
407
408 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
409
410 ret = ff_inlink_consume_samples(inlink, s->overlap, s->overlap, &in);
411 if (ret < 0)
412 return ret;
413
414 if (ret > 0) {
415 s->attack = expf(-1.f / (s->attack_ms * inlink->sample_rate / 1000.f));
416 s->release = expf(-1.f / (s->release_ms * inlink->sample_rate / 1000.f));
417
418 return filter_frame(inlink, in);
419 } else if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
420 ff_outlink_set_status(outlink, status, pts);
421 return 0;
422 } else {
423 if (ff_inlink_queued_samples(inlink) >= s->overlap) {
424 ff_filter_set_ready(ctx, 10);
425 } else if (ff_outlink_frame_wanted(outlink)) {
426 ff_inlink_request_frame(inlink);
427 }
428 return 0;
429 }
430 }
431
432 static av_cold void uninit(AVFilterContext *ctx)
433 {
434 AudioDRCContext *s = ctx->priv;
435
436 av_channel_layout_uninit(&s->ch_layout);
437
438 av_expr_free(s->expr);
439 s->expr = NULL;
440
441 av_freep(&s->window);
442
443 av_frame_free(&s->drc_frame);
444 av_frame_free(&s->energy);
445 av_frame_free(&s->envelope);
446 av_frame_free(&s->factors);
447 av_frame_free(&s->in_buffer);
448 av_frame_free(&s->in_frame);
449 av_frame_free(&s->out_dist_frame);
450 av_frame_free(&s->spectrum_buf);
451 av_frame_free(&s->target_gain);
452 av_frame_free(&s->windowed_frame);
453
454 for (int ch = 0; ch < s->channels; ch++) {
455 if (s->tx_ctx)
456 av_tx_uninit(&s->tx_ctx[ch]);
457 if (s->itx_ctx)
458 av_tx_uninit(&s->itx_ctx[ch]);
459 }
460
461 av_freep(&s->tx_ctx);
462 av_freep(&s->itx_ctx);
463 }
464
465 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
466 char *res, int res_len, int flags)
467 {
468 AudioDRCContext *s = ctx->priv;
469 char *old_expr_str = av_strdup(s->expr_str);
470 int ret;
471
472 ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
473 if (ret >= 0 && strcmp(old_expr_str, s->expr_str)) {
474 ret = av_expr_parse(&s->expr, s->expr_str, var_names, NULL, NULL,
475 NULL, NULL, 0, ctx);
476 }
477 av_free(old_expr_str);
478 return ret;
479 }
480
481 static const AVFilterPad inputs[] = {
482 {
483 .name = "default",
484 .type = AVMEDIA_TYPE_AUDIO,
485 .config_props = config_input,
486 },
487 };
488
489 const FFFilter ff_af_adrc = {
490 .p.name = "adrc",
491 .p.description = NULL_IF_CONFIG_SMALL("Audio Spectral Dynamic Range Controller."),
492 .p.priv_class = &adrc_class,
493 .p.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
494 AVFILTER_FLAG_SLICE_THREADS,
495 .priv_size = sizeof(AudioDRCContext),
496 .uninit = uninit,
497 FILTER_INPUTS(inputs),
498 FILTER_OUTPUTS(ff_audio_default_filterpad),
499 FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP),
500 .activate = activate,
501 .process_command = process_command,
502 };
503