FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_dynaudnorm.c
Date: 2022-12-05 20:26:17
Exec Total Coverage
Lines: 0 487 0.0%
Functions: 0 38 0.0%
Branches: 0 286 0.0%

Line Branch Exec Source
1 /*
2 * Dynamic Audio Normalizer
3 * Copyright (c) 2015 LoRd_MuldeR <mulder2@gmx.de>. Some rights reserved.
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * Dynamic Audio Normalizer
25 */
26
27 #include <float.h>
28
29 #include "libavutil/avassert.h"
30 #include "libavutil/channel_layout.h"
31 #include "libavutil/eval.h"
32 #include "libavutil/opt.h"
33
34 #define MIN_FILTER_SIZE 3
35 #define MAX_FILTER_SIZE 301
36
37 #define FF_BUFQUEUE_SIZE (MAX_FILTER_SIZE + 1)
38 #include "libavfilter/bufferqueue.h"
39
40 #include "audio.h"
41 #include "avfilter.h"
42 #include "filters.h"
43 #include "internal.h"
44
45 static const char * const var_names[] = {
46 "ch", ///< the value of the current channel
47 "sn", ///< number of samples
48 "nb_channels",
49 "t", ///< timestamp expressed in seconds
50 "sr", ///< sample rate
51 "p", ///< peak value
52 NULL
53 };
54
55 enum var_name {
56 VAR_CH,
57 VAR_SN,
58 VAR_NB_CHANNELS,
59 VAR_T,
60 VAR_SR,
61 VAR_P,
62 VAR_VARS_NB
63 };
64
65 typedef struct local_gain {
66 double max_gain;
67 double threshold;
68 } local_gain;
69
70 typedef struct cqueue {
71 double *elements;
72 int size;
73 int max_size;
74 int nb_elements;
75 } cqueue;
76
77 typedef struct DynamicAudioNormalizerContext {
78 const AVClass *class;
79
80 struct FFBufQueue queue;
81
82 int frame_len;
83 int frame_len_msec;
84 int filter_size;
85 int dc_correction;
86 int channels_coupled;
87 int alt_boundary_mode;
88 double overlap;
89 char *expr_str;
90
91 double peak_value;
92 double max_amplification;
93 double target_rms;
94 double compress_factor;
95 double threshold;
96 double *prev_amplification_factor;
97 double *dc_correction_value;
98 double *compress_threshold;
99 double *weights;
100
101 int channels;
102 int sample_advance;
103 int eof;
104 char *channels_to_filter;
105 AVChannelLayout ch_layout;
106 int64_t pts;
107
108 cqueue **gain_history_original;
109 cqueue **gain_history_minimum;
110 cqueue **gain_history_smoothed;
111 cqueue **threshold_history;
112
113 cqueue *is_enabled;
114
115 AVFrame *window;
116
117 AVExpr *expr;
118 double var_values[VAR_VARS_NB];
119 } DynamicAudioNormalizerContext;
120
121 typedef struct ThreadData {
122 AVFrame *in, *out;
123 int enabled;
124 } ThreadData;
125
126 #define OFFSET(x) offsetof(DynamicAudioNormalizerContext, x)
127 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
128
129 static const AVOption dynaudnorm_options[] = {
130 { "framelen", "set the frame length in msec", OFFSET(frame_len_msec), AV_OPT_TYPE_INT, {.i64 = 500}, 10, 8000, FLAGS },
131 { "f", "set the frame length in msec", OFFSET(frame_len_msec), AV_OPT_TYPE_INT, {.i64 = 500}, 10, 8000, FLAGS },
132 { "gausssize", "set the filter size", OFFSET(filter_size), AV_OPT_TYPE_INT, {.i64 = 31}, 3, 301, FLAGS },
133 { "g", "set the filter size", OFFSET(filter_size), AV_OPT_TYPE_INT, {.i64 = 31}, 3, 301, FLAGS },
134 { "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl = 0.95}, 0.0, 1.0, FLAGS },
135 { "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl = 0.95}, 0.0, 1.0, FLAGS },
136 { "maxgain", "set the max amplification", OFFSET(max_amplification), AV_OPT_TYPE_DOUBLE, {.dbl = 10.0}, 1.0, 100.0, FLAGS },
137 { "m", "set the max amplification", OFFSET(max_amplification), AV_OPT_TYPE_DOUBLE, {.dbl = 10.0}, 1.0, 100.0, FLAGS },
138 { "targetrms", "set the target RMS", OFFSET(target_rms), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 1.0, FLAGS },
139 { "r", "set the target RMS", OFFSET(target_rms), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 1.0, FLAGS },
140 { "coupling", "set channel coupling", OFFSET(channels_coupled), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
141 { "n", "set channel coupling", OFFSET(channels_coupled), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
142 { "correctdc", "set DC correction", OFFSET(dc_correction), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
143 { "c", "set DC correction", OFFSET(dc_correction), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
144 { "altboundary", "set alternative boundary mode", OFFSET(alt_boundary_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
145 { "b", "set alternative boundary mode", OFFSET(alt_boundary_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
146 { "compress", "set the compress factor", OFFSET(compress_factor), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 30.0, FLAGS },
147 { "s", "set the compress factor", OFFSET(compress_factor), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 30.0, FLAGS },
148 { "threshold", "set the threshold value", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 1.0, FLAGS },
149 { "t", "set the threshold value", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 1.0, FLAGS },
150 { "channels", "set channels to filter", OFFSET(channels_to_filter),AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
151 { "h", "set channels to filter", OFFSET(channels_to_filter),AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
152 { "overlap", "set the frame overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=.0}, 0.0, 1.0, FLAGS },
153 { "o", "set the frame overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=.0}, 0.0, 1.0, FLAGS },
154 { "curve", "set the custom peak mapping curve",OFFSET(expr_str), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
155 { "v", "set the custom peak mapping curve",OFFSET(expr_str), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
156 { NULL }
157 };
158
159 AVFILTER_DEFINE_CLASS(dynaudnorm);
160
161 static av_cold int init(AVFilterContext *ctx)
162 {
163 DynamicAudioNormalizerContext *s = ctx->priv;
164
165 if (!(s->filter_size & 1)) {
166 av_log(ctx, AV_LOG_WARNING, "filter size %d is invalid. Changing to an odd value.\n", s->filter_size);
167 s->filter_size |= 1;
168 }
169
170 return 0;
171 }
172
173 static inline int frame_size(int sample_rate, int frame_len_msec)
174 {
175 const int frame_size = lrint((double)sample_rate * (frame_len_msec / 1000.0));
176 return frame_size + (frame_size % 2);
177 }
178
179 static cqueue *cqueue_create(int size, int max_size)
180 {
181 cqueue *q;
182
183 if (max_size < size)
184 return NULL;
185
186 q = av_malloc(sizeof(cqueue));
187 if (!q)
188 return NULL;
189
190 q->max_size = max_size;
191 q->size = size;
192 q->nb_elements = 0;
193
194 q->elements = av_malloc_array(max_size, sizeof(double));
195 if (!q->elements) {
196 av_free(q);
197 return NULL;
198 }
199
200 return q;
201 }
202
203 static void cqueue_free(cqueue *q)
204 {
205 if (q)
206 av_free(q->elements);
207 av_free(q);
208 }
209
210 static int cqueue_size(cqueue *q)
211 {
212 return q->nb_elements;
213 }
214
215 static int cqueue_empty(cqueue *q)
216 {
217 return q->nb_elements <= 0;
218 }
219
220 static int cqueue_enqueue(cqueue *q, double element)
221 {
222 av_assert2(q->nb_elements < q->max_size);
223
224 q->elements[q->nb_elements] = element;
225 q->nb_elements++;
226
227 return 0;
228 }
229
230 static double cqueue_peek(cqueue *q, int index)
231 {
232 av_assert2(index < q->nb_elements);
233 return q->elements[index];
234 }
235
236 static int cqueue_dequeue(cqueue *q, double *element)
237 {
238 av_assert2(!cqueue_empty(q));
239
240 *element = q->elements[0];
241 memmove(&q->elements[0], &q->elements[1], (q->nb_elements - 1) * sizeof(double));
242 q->nb_elements--;
243
244 return 0;
245 }
246
247 static int cqueue_pop(cqueue *q)
248 {
249 av_assert2(!cqueue_empty(q));
250
251 memmove(&q->elements[0], &q->elements[1], (q->nb_elements - 1) * sizeof(double));
252 q->nb_elements--;
253
254 return 0;
255 }
256
257 static void cqueue_resize(cqueue *q, int new_size)
258 {
259 av_assert2(q->max_size >= new_size);
260 av_assert2(MIN_FILTER_SIZE <= new_size);
261
262 if (new_size > q->nb_elements) {
263 const int side = (new_size - q->nb_elements) / 2;
264
265 memmove(q->elements + side, q->elements, sizeof(double) * q->nb_elements);
266 for (int i = 0; i < side; i++)
267 q->elements[i] = q->elements[side];
268 q->nb_elements = new_size - 1 - side;
269 } else {
270 int count = (q->size - new_size + 1) / 2;
271
272 while (count-- > 0)
273 cqueue_pop(q);
274 }
275
276 q->size = new_size;
277 }
278
279 static void init_gaussian_filter(DynamicAudioNormalizerContext *s)
280 {
281 double total_weight = 0.0;
282 const double sigma = (((s->filter_size / 2.0) - 1.0) / 3.0) + (1.0 / 3.0);
283 double adjust;
284
285 // Pre-compute constants
286 const int offset = s->filter_size / 2;
287 const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
288 const double c2 = 2.0 * sigma * sigma;
289
290 // Compute weights
291 for (int i = 0; i < s->filter_size; i++) {
292 const int x = i - offset;
293
294 s->weights[i] = c1 * exp(-x * x / c2);
295 total_weight += s->weights[i];
296 }
297
298 // Adjust weights
299 adjust = 1.0 / total_weight;
300 for (int i = 0; i < s->filter_size; i++) {
301 s->weights[i] *= adjust;
302 }
303 }
304
305 static av_cold void uninit(AVFilterContext *ctx)
306 {
307 DynamicAudioNormalizerContext *s = ctx->priv;
308
309 av_freep(&s->prev_amplification_factor);
310 av_freep(&s->dc_correction_value);
311 av_freep(&s->compress_threshold);
312
313 for (int c = 0; c < s->channels; c++) {
314 if (s->gain_history_original)
315 cqueue_free(s->gain_history_original[c]);
316 if (s->gain_history_minimum)
317 cqueue_free(s->gain_history_minimum[c]);
318 if (s->gain_history_smoothed)
319 cqueue_free(s->gain_history_smoothed[c]);
320 if (s->threshold_history)
321 cqueue_free(s->threshold_history[c]);
322 }
323
324 av_freep(&s->gain_history_original);
325 av_freep(&s->gain_history_minimum);
326 av_freep(&s->gain_history_smoothed);
327 av_freep(&s->threshold_history);
328
329 cqueue_free(s->is_enabled);
330 s->is_enabled = NULL;
331
332 av_freep(&s->weights);
333
334 av_channel_layout_uninit(&s->ch_layout);
335
336 ff_bufqueue_discard_all(&s->queue);
337
338 av_frame_free(&s->window);
339 av_expr_free(s->expr);
340 s->expr = NULL;
341 }
342
343 static int config_input(AVFilterLink *inlink)
344 {
345 AVFilterContext *ctx = inlink->dst;
346 DynamicAudioNormalizerContext *s = ctx->priv;
347 int ret = 0;
348
349 uninit(ctx);
350
351 s->channels = inlink->ch_layout.nb_channels;
352 s->frame_len = frame_size(inlink->sample_rate, s->frame_len_msec);
353 av_log(ctx, AV_LOG_DEBUG, "frame len %d\n", s->frame_len);
354
355 s->prev_amplification_factor = av_malloc_array(inlink->ch_layout.nb_channels, sizeof(*s->prev_amplification_factor));
356 s->dc_correction_value = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dc_correction_value));
357 s->compress_threshold = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->compress_threshold));
358 s->gain_history_original = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->gain_history_original));
359 s->gain_history_minimum = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->gain_history_minimum));
360 s->gain_history_smoothed = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->gain_history_smoothed));
361 s->threshold_history = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->threshold_history));
362 s->weights = av_malloc_array(MAX_FILTER_SIZE, sizeof(*s->weights));
363 s->is_enabled = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
364 if (!s->prev_amplification_factor || !s->dc_correction_value ||
365 !s->compress_threshold ||
366 !s->gain_history_original || !s->gain_history_minimum ||
367 !s->gain_history_smoothed || !s->threshold_history ||
368 !s->is_enabled || !s->weights)
369 return AVERROR(ENOMEM);
370
371 for (int c = 0; c < inlink->ch_layout.nb_channels; c++) {
372 s->prev_amplification_factor[c] = 1.0;
373
374 s->gain_history_original[c] = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
375 s->gain_history_minimum[c] = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
376 s->gain_history_smoothed[c] = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
377 s->threshold_history[c] = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
378
379 if (!s->gain_history_original[c] || !s->gain_history_minimum[c] ||
380 !s->gain_history_smoothed[c] || !s->threshold_history[c])
381 return AVERROR(ENOMEM);
382 }
383
384 init_gaussian_filter(s);
385
386 s->window = ff_get_audio_buffer(ctx->outputs[0], s->frame_len * 2);
387 if (!s->window)
388 return AVERROR(ENOMEM);
389 s->sample_advance = FFMAX(1, lrint(s->frame_len * (1. - s->overlap)));
390
391 s->var_values[VAR_SR] = inlink->sample_rate;
392 s->var_values[VAR_NB_CHANNELS] = s->channels;
393
394 if (s->expr_str)
395 ret = av_expr_parse(&s->expr, s->expr_str, var_names, NULL, NULL,
396 NULL, NULL, 0, ctx);
397 return ret;
398 }
399
400 static inline double fade(double prev, double next, int pos, int length)
401 {
402 const double step_size = 1.0 / length;
403 const double f0 = 1.0 - (step_size * (pos + 1.0));
404 const double f1 = 1.0 - f0;
405 return f0 * prev + f1 * next;
406 }
407
408 static inline double pow_2(const double value)
409 {
410 return value * value;
411 }
412
413 static inline double bound(const double threshold, const double val)
414 {
415 const double CONST = 0.8862269254527580136490837416705725913987747280611935; //sqrt(PI) / 2.0
416 return erf(CONST * (val / threshold)) * threshold;
417 }
418
419 static double find_peak_magnitude(AVFrame *frame, int channel)
420 {
421 double max = DBL_EPSILON;
422
423 if (channel == -1) {
424 for (int c = 0; c < frame->ch_layout.nb_channels; c++) {
425 double *data_ptr = (double *)frame->extended_data[c];
426
427 for (int i = 0; i < frame->nb_samples; i++)
428 max = fmax(max, fabs(data_ptr[i]));
429 }
430 } else {
431 double *data_ptr = (double *)frame->extended_data[channel];
432
433 for (int i = 0; i < frame->nb_samples; i++)
434 max = fmax(max, fabs(data_ptr[i]));
435 }
436
437 return max;
438 }
439
440 static double compute_frame_rms(AVFrame *frame, int channel)
441 {
442 double rms_value = 0.0;
443
444 if (channel == -1) {
445 for (int c = 0; c < frame->ch_layout.nb_channels; c++) {
446 const double *data_ptr = (double *)frame->extended_data[c];
447
448 for (int i = 0; i < frame->nb_samples; i++) {
449 rms_value += pow_2(data_ptr[i]);
450 }
451 }
452
453 rms_value /= frame->nb_samples * frame->ch_layout.nb_channels;
454 } else {
455 const double *data_ptr = (double *)frame->extended_data[channel];
456 for (int i = 0; i < frame->nb_samples; i++) {
457 rms_value += pow_2(data_ptr[i]);
458 }
459
460 rms_value /= frame->nb_samples;
461 }
462
463 return fmax(sqrt(rms_value), DBL_EPSILON);
464 }
465
466 static local_gain get_max_local_gain(DynamicAudioNormalizerContext *s, AVFrame *frame,
467 int channel)
468 {
469 const double peak_magnitude = find_peak_magnitude(frame, channel);
470 const double maximum_gain = s->peak_value / peak_magnitude;
471 const double rms_gain = s->target_rms > DBL_EPSILON ? (s->target_rms / compute_frame_rms(frame, channel)) : DBL_MAX;
472 double target_gain = DBL_MAX;
473 local_gain gain;
474
475 if (s->expr_str) {
476 double var_values[VAR_VARS_NB];
477
478 memcpy(var_values, s->var_values, sizeof(var_values));
479
480 var_values[VAR_CH] = channel;
481 var_values[VAR_P] = peak_magnitude;
482
483 target_gain = av_expr_eval(s->expr, var_values, s) / peak_magnitude;
484 }
485
486 gain.threshold = peak_magnitude > s->threshold;
487 gain.max_gain = bound(s->max_amplification, fmin(target_gain, fmin(maximum_gain, rms_gain)));
488
489 return gain;
490 }
491
492 static double minimum_filter(cqueue *q)
493 {
494 double min = DBL_MAX;
495
496 for (int i = 0; i < cqueue_size(q); i++) {
497 min = fmin(min, cqueue_peek(q, i));
498 }
499
500 return min;
501 }
502
503 static double gaussian_filter(DynamicAudioNormalizerContext *s, cqueue *q, cqueue *tq)
504 {
505 const double *weights = s->weights;
506 double result = 0.0, tsum = 0.0;
507
508 for (int i = 0; i < cqueue_size(q); i++) {
509 double tq_item = cqueue_peek(tq, i);
510 double q_item = cqueue_peek(q, i);
511
512 tsum += tq_item * weights[i];
513 result += tq_item * weights[i] * q_item;
514 }
515
516 if (tsum == 0.0)
517 result = 1.0;
518
519 return result;
520 }
521
522 static void update_gain_history(DynamicAudioNormalizerContext *s, int channel,
523 local_gain gain)
524 {
525 if (cqueue_empty(s->gain_history_original[channel])) {
526 const int pre_fill_size = s->filter_size / 2;
527 const double initial_value = s->alt_boundary_mode ? gain.max_gain : fmin(1.0, gain.max_gain);
528
529 s->prev_amplification_factor[channel] = initial_value;
530
531 while (cqueue_size(s->gain_history_original[channel]) < pre_fill_size) {
532 cqueue_enqueue(s->gain_history_original[channel], initial_value);
533 cqueue_enqueue(s->threshold_history[channel], gain.threshold);
534 }
535 }
536
537 cqueue_enqueue(s->gain_history_original[channel], gain.max_gain);
538
539 while (cqueue_size(s->gain_history_original[channel]) >= s->filter_size) {
540 double minimum;
541
542 if (cqueue_empty(s->gain_history_minimum[channel])) {
543 const int pre_fill_size = s->filter_size / 2;
544 double initial_value = s->alt_boundary_mode ? cqueue_peek(s->gain_history_original[channel], 0) : 1.0;
545 int input = pre_fill_size;
546
547 while (cqueue_size(s->gain_history_minimum[channel]) < pre_fill_size) {
548 input++;
549 initial_value = fmin(initial_value, cqueue_peek(s->gain_history_original[channel], input));
550 cqueue_enqueue(s->gain_history_minimum[channel], initial_value);
551 }
552 }
553
554 minimum = minimum_filter(s->gain_history_original[channel]);
555
556 cqueue_enqueue(s->gain_history_minimum[channel], minimum);
557
558 cqueue_enqueue(s->threshold_history[channel], gain.threshold);
559
560 cqueue_pop(s->gain_history_original[channel]);
561 }
562
563 while (cqueue_size(s->gain_history_minimum[channel]) >= s->filter_size) {
564 double smoothed, limit;
565
566 smoothed = gaussian_filter(s, s->gain_history_minimum[channel], s->threshold_history[channel]);
567 limit = cqueue_peek(s->gain_history_original[channel], 0);
568 smoothed = fmin(smoothed, limit);
569
570 cqueue_enqueue(s->gain_history_smoothed[channel], smoothed);
571
572 cqueue_pop(s->gain_history_minimum[channel]);
573 cqueue_pop(s->threshold_history[channel]);
574 }
575 }
576
577 static int update_gain_histories(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
578 {
579 DynamicAudioNormalizerContext *s = ctx->priv;
580 AVFrame *analyze_frame = arg;
581 const int channels = s->channels;
582 const int start = (channels * jobnr) / nb_jobs;
583 const int end = (channels * (jobnr+1)) / nb_jobs;
584
585 for (int c = start; c < end; c++)
586 update_gain_history(s, c, get_max_local_gain(s, analyze_frame, c));
587
588 return 0;
589 }
590
591 static inline double update_value(double new, double old, double aggressiveness)
592 {
593 av_assert0((aggressiveness >= 0.0) && (aggressiveness <= 1.0));
594 return aggressiveness * new + (1.0 - aggressiveness) * old;
595 }
596
597 static inline int bypass_channel(DynamicAudioNormalizerContext *s, AVFrame *frame, int ch)
598 {
599 enum AVChannel channel = av_channel_layout_channel_from_index(&frame->ch_layout, ch);
600
601 return av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0;
602 }
603
604 static void perform_dc_correction(DynamicAudioNormalizerContext *s, AVFrame *frame)
605 {
606 const double diff = 1.0 / frame->nb_samples;
607 int is_first_frame = cqueue_empty(s->gain_history_original[0]);
608
609 for (int c = 0; c < s->channels; c++) {
610 const int bypass = bypass_channel(s, frame, c);
611 double *dst_ptr = (double *)frame->extended_data[c];
612 double current_average_value = 0.0;
613 double prev_value;
614
615 for (int i = 0; i < frame->nb_samples; i++)
616 current_average_value += dst_ptr[i] * diff;
617
618 prev_value = is_first_frame ? current_average_value : s->dc_correction_value[c];
619 s->dc_correction_value[c] = is_first_frame ? current_average_value : update_value(current_average_value, s->dc_correction_value[c], 0.1);
620
621 for (int i = 0; i < frame->nb_samples && !bypass; i++) {
622 dst_ptr[i] -= fade(prev_value, s->dc_correction_value[c], i, frame->nb_samples);
623 }
624 }
625 }
626
627 static double setup_compress_thresh(double threshold)
628 {
629 if ((threshold > DBL_EPSILON) && (threshold < (1.0 - DBL_EPSILON))) {
630 double current_threshold = threshold;
631 double step_size = 1.0;
632
633 while (step_size > DBL_EPSILON) {
634 while ((llrint((current_threshold + step_size) * (UINT64_C(1) << 63)) >
635 llrint(current_threshold * (UINT64_C(1) << 63))) &&
636 (bound(current_threshold + step_size, 1.0) <= threshold)) {
637 current_threshold += step_size;
638 }
639
640 step_size /= 2.0;
641 }
642
643 return current_threshold;
644 } else {
645 return threshold;
646 }
647 }
648
649 static double compute_frame_std_dev(DynamicAudioNormalizerContext *s,
650 AVFrame *frame, int channel)
651 {
652 double variance = 0.0;
653
654 if (channel == -1) {
655 for (int c = 0; c < s->channels; c++) {
656 const double *data_ptr = (double *)frame->extended_data[c];
657
658 for (int i = 0; i < frame->nb_samples; i++) {
659 variance += pow_2(data_ptr[i]); // Assume that MEAN is *zero*
660 }
661 }
662 variance /= (s->channels * frame->nb_samples) - 1;
663 } else {
664 const double *data_ptr = (double *)frame->extended_data[channel];
665
666 for (int i = 0; i < frame->nb_samples; i++) {
667 variance += pow_2(data_ptr[i]); // Assume that MEAN is *zero*
668 }
669 variance /= frame->nb_samples - 1;
670 }
671
672 return fmax(sqrt(variance), DBL_EPSILON);
673 }
674
675 static void perform_compression(DynamicAudioNormalizerContext *s, AVFrame *frame)
676 {
677 int is_first_frame = cqueue_empty(s->gain_history_original[0]);
678
679 if (s->channels_coupled) {
680 const double standard_deviation = compute_frame_std_dev(s, frame, -1);
681 const double current_threshold = fmin(1.0, s->compress_factor * standard_deviation);
682
683 const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[0];
684 double prev_actual_thresh, curr_actual_thresh;
685 s->compress_threshold[0] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[0], (1.0/3.0));
686
687 prev_actual_thresh = setup_compress_thresh(prev_value);
688 curr_actual_thresh = setup_compress_thresh(s->compress_threshold[0]);
689
690 for (int c = 0; c < s->channels; c++) {
691 double *const dst_ptr = (double *)frame->extended_data[c];
692 const int bypass = bypass_channel(s, frame, c);
693
694 if (bypass)
695 continue;
696
697 for (int i = 0; i < frame->nb_samples; i++) {
698 const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, frame->nb_samples);
699 dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
700 }
701 }
702 } else {
703 for (int c = 0; c < s->channels; c++) {
704 const int bypass = bypass_channel(s, frame, c);
705 const double standard_deviation = compute_frame_std_dev(s, frame, c);
706 const double current_threshold = setup_compress_thresh(fmin(1.0, s->compress_factor * standard_deviation));
707 const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[c];
708 double prev_actual_thresh, curr_actual_thresh;
709 double *dst_ptr;
710
711 s->compress_threshold[c] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[c], 1.0/3.0);
712
713 prev_actual_thresh = setup_compress_thresh(prev_value);
714 curr_actual_thresh = setup_compress_thresh(s->compress_threshold[c]);
715
716 dst_ptr = (double *)frame->extended_data[c];
717 for (int i = 0; i < frame->nb_samples && !bypass; i++) {
718 const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, frame->nb_samples);
719 dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
720 }
721 }
722 }
723 }
724
725 static int analyze_frame(AVFilterContext *ctx, AVFilterLink *outlink, AVFrame **frame)
726 {
727 DynamicAudioNormalizerContext *s = ctx->priv;
728 AVFrame *analyze_frame;
729
730 if (s->dc_correction || s->compress_factor > DBL_EPSILON) {
731 int ret;
732
733 if (!av_frame_is_writable(*frame)) {
734 AVFrame *out = ff_get_audio_buffer(outlink, (*frame)->nb_samples);
735
736 if (!out) {
737 av_frame_free(frame);
738 return AVERROR(ENOMEM);
739 }
740 ret = av_frame_copy_props(out, *frame);
741 if (ret < 0) {
742 av_frame_free(frame);
743 av_frame_free(&out);
744 return ret;
745 }
746 ret = av_frame_copy(out, *frame);
747 if (ret < 0) {
748 av_frame_free(frame);
749 av_frame_free(&out);
750 return ret;
751 }
752
753 av_frame_free(frame);
754 *frame = out;
755 }
756 }
757
758 if (s->dc_correction)
759 perform_dc_correction(s, *frame);
760
761 if (s->compress_factor > DBL_EPSILON)
762 perform_compression(s, *frame);
763
764 if (s->frame_len != s->sample_advance) {
765 const int offset = s->frame_len - s->sample_advance;
766
767 for (int c = 0; c < s->channels; c++) {
768 double *src = (double *)s->window->extended_data[c];
769
770 memmove(src, &src[s->sample_advance], offset * sizeof(double));
771 memcpy(&src[offset], (*frame)->extended_data[c], (*frame)->nb_samples * sizeof(double));
772 memset(&src[offset + (*frame)->nb_samples], 0, (s->sample_advance - (*frame)->nb_samples) * sizeof(double));
773 }
774
775 analyze_frame = s->window;
776 } else {
777 av_samples_copy(s->window->extended_data, (*frame)->extended_data, 0, 0,
778 FFMIN(s->frame_len, (*frame)->nb_samples), (*frame)->ch_layout.nb_channels, (*frame)->format);
779 analyze_frame = *frame;
780 }
781
782 s->var_values[VAR_SN] = outlink->sample_count_in;
783 s->var_values[VAR_T] = s->var_values[VAR_SN] * (double)1/outlink->sample_rate;
784
785 if (s->channels_coupled) {
786 const local_gain gain = get_max_local_gain(s, analyze_frame, -1);
787 for (int c = 0; c < s->channels; c++)
788 update_gain_history(s, c, gain);
789 } else {
790 ff_filter_execute(ctx, update_gain_histories, analyze_frame, NULL,
791 FFMIN(s->channels, ff_filter_get_nb_threads(ctx)));
792 }
793
794 return 0;
795 }
796
797 static void amplify_channel(DynamicAudioNormalizerContext *s, AVFrame *in,
798 AVFrame *frame, int enabled, int c)
799 {
800 const int bypass = bypass_channel(s, frame, c);
801 const double *src_ptr = (const double *)in->extended_data[c];
802 double *dst_ptr = (double *)frame->extended_data[c];
803 double current_amplification_factor;
804
805 cqueue_dequeue(s->gain_history_smoothed[c], &current_amplification_factor);
806
807 for (int i = 0; i < frame->nb_samples && enabled && !bypass; i++) {
808 const double amplification_factor = fade(s->prev_amplification_factor[c],
809 current_amplification_factor, i,
810 frame->nb_samples);
811
812 dst_ptr[i] = src_ptr[i] * amplification_factor;
813 }
814
815 s->prev_amplification_factor[c] = current_amplification_factor;
816 }
817
818 static int amplify_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
819 {
820 DynamicAudioNormalizerContext *s = ctx->priv;
821 ThreadData *td = arg;
822 AVFrame *out = td->out;
823 AVFrame *in = td->in;
824 const int enabled = td->enabled;
825 const int channels = s->channels;
826 const int start = (channels * jobnr) / nb_jobs;
827 const int end = (channels * (jobnr+1)) / nb_jobs;
828
829 for (int ch = start; ch < end; ch++)
830 amplify_channel(s, in, out, enabled, ch);
831
832 return 0;
833 }
834
835 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
836 {
837 AVFilterContext *ctx = inlink->dst;
838 DynamicAudioNormalizerContext *s = ctx->priv;
839 AVFilterLink *outlink = ctx->outputs[0];
840 ThreadData td;
841 int ret;
842
843 while (((s->queue.available >= s->filter_size) ||
844 (s->eof && s->queue.available)) &&
845 !cqueue_empty(s->gain_history_smoothed[0])) {
846 AVFrame *in = ff_bufqueue_get(&s->queue);
847 AVFrame *out;
848 double is_enabled;
849
850 cqueue_dequeue(s->is_enabled, &is_enabled);
851
852 if (av_frame_is_writable(in)) {
853 out = in;
854 } else {
855 out = ff_get_audio_buffer(outlink, in->nb_samples);
856 if (!out) {
857 av_frame_free(&in);
858 return AVERROR(ENOMEM);
859 }
860 av_frame_copy_props(out, in);
861 }
862
863 td.in = in;
864 td.out = out;
865 td.enabled = is_enabled > 0.;
866 ff_filter_execute(ctx, amplify_channels, &td, NULL,
867 FFMIN(s->channels, ff_filter_get_nb_threads(ctx)));
868
869 s->pts = out->pts + av_rescale_q(out->nb_samples, av_make_q(1, outlink->sample_rate),
870 outlink->time_base);
871 if (out != in)
872 av_frame_free(&in);
873 ret = ff_filter_frame(outlink, out);
874 if (ret < 0)
875 return ret;
876 }
877
878 ret = analyze_frame(ctx, outlink, &in);
879 if (ret < 0)
880 return ret;
881 if (!s->eof) {
882 ff_bufqueue_add(ctx, &s->queue, in);
883 cqueue_enqueue(s->is_enabled, !ctx->is_disabled);
884 } else {
885 av_frame_free(&in);
886 }
887
888 return 1;
889 }
890
891 static int flush_buffer(DynamicAudioNormalizerContext *s, AVFilterLink *inlink,
892 AVFilterLink *outlink)
893 {
894 AVFrame *out = ff_get_audio_buffer(outlink, s->sample_advance);
895
896 if (!out)
897 return AVERROR(ENOMEM);
898
899 for (int c = 0; c < s->channels; c++) {
900 double *dst_ptr = (double *)out->extended_data[c];
901
902 for (int i = 0; i < out->nb_samples; i++) {
903 dst_ptr[i] = s->alt_boundary_mode ? DBL_EPSILON : ((s->target_rms > DBL_EPSILON) ? fmin(s->peak_value, s->target_rms) : s->peak_value);
904 if (s->dc_correction) {
905 dst_ptr[i] *= ((i % 2) == 1) ? -1 : 1;
906 dst_ptr[i] += s->dc_correction_value[c];
907 }
908 }
909 }
910
911 return filter_frame(inlink, out);
912 }
913
914 static int flush(AVFilterLink *outlink)
915 {
916 AVFilterContext *ctx = outlink->src;
917 AVFilterLink *inlink = ctx->inputs[0];
918 DynamicAudioNormalizerContext *s = ctx->priv;
919
920 while (s->eof && cqueue_empty(s->gain_history_smoothed[0])) {
921 for (int c = 0; c < s->channels; c++)
922 update_gain_history(s, c, (local_gain){ cqueue_peek(s->gain_history_original[c], 0), 1.0});
923 }
924
925 return flush_buffer(s, inlink, outlink);
926 }
927
928 static int activate(AVFilterContext *ctx)
929 {
930 AVFilterLink *inlink = ctx->inputs[0];
931 AVFilterLink *outlink = ctx->outputs[0];
932 DynamicAudioNormalizerContext *s = ctx->priv;
933 AVFrame *in = NULL;
934 int ret = 0, status;
935 int64_t pts;
936
937 ret = av_channel_layout_copy(&s->ch_layout, &inlink->ch_layout);
938 if (ret < 0)
939 return ret;
940 if (strcmp(s->channels_to_filter, "all"))
941 av_channel_layout_from_string(&s->ch_layout, s->channels_to_filter);
942
943 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
944
945 if (!s->eof) {
946 ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in);
947 if (ret < 0)
948 return ret;
949 if (ret > 0) {
950 ret = filter_frame(inlink, in);
951 if (ret <= 0)
952 return ret;
953 }
954
955 if (ff_inlink_check_available_samples(inlink, s->sample_advance) > 0) {
956 ff_filter_set_ready(ctx, 10);
957 return 0;
958 }
959 }
960
961 if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
962 if (status == AVERROR_EOF)
963 s->eof = 1;
964 }
965
966 if (s->eof && s->queue.available)
967 return flush(outlink);
968
969 if (s->eof && !s->queue.available) {
970 ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
971 return 0;
972 }
973
974 if (!s->eof)
975 FF_FILTER_FORWARD_WANTED(outlink, inlink);
976
977 return FFERROR_NOT_READY;
978 }
979
980 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
981 char *res, int res_len, int flags)
982 {
983 DynamicAudioNormalizerContext *s = ctx->priv;
984 AVFilterLink *inlink = ctx->inputs[0];
985 int prev_filter_size = s->filter_size;
986 int ret;
987
988 ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
989 if (ret < 0)
990 return ret;
991
992 s->filter_size |= 1;
993 if (prev_filter_size != s->filter_size) {
994 init_gaussian_filter(s);
995
996 for (int c = 0; c < s->channels; c++) {
997 cqueue_resize(s->gain_history_original[c], s->filter_size);
998 cqueue_resize(s->gain_history_minimum[c], s->filter_size);
999 cqueue_resize(s->threshold_history[c], s->filter_size);
1000 }
1001 }
1002
1003 s->frame_len = frame_size(inlink->sample_rate, s->frame_len_msec);
1004 s->sample_advance = FFMAX(1, lrint(s->frame_len * (1. - s->overlap)));
1005 if (s->expr_str) {
1006 ret = av_expr_parse(&s->expr, s->expr_str, var_names, NULL, NULL,
1007 NULL, NULL, 0, ctx);
1008 if (ret < 0)
1009 return ret;
1010 }
1011 return 0;
1012 }
1013
1014 static const AVFilterPad avfilter_af_dynaudnorm_inputs[] = {
1015 {
1016 .name = "default",
1017 .type = AVMEDIA_TYPE_AUDIO,
1018 .config_props = config_input,
1019 },
1020 };
1021
1022 static const AVFilterPad avfilter_af_dynaudnorm_outputs[] = {
1023 {
1024 .name = "default",
1025 .type = AVMEDIA_TYPE_AUDIO,
1026 },
1027 };
1028
1029 const AVFilter ff_af_dynaudnorm = {
1030 .name = "dynaudnorm",
1031 .description = NULL_IF_CONFIG_SMALL("Dynamic Audio Normalizer."),
1032 .priv_size = sizeof(DynamicAudioNormalizerContext),
1033 .init = init,
1034 .uninit = uninit,
1035 .activate = activate,
1036 FILTER_INPUTS(avfilter_af_dynaudnorm_inputs),
1037 FILTER_OUTPUTS(avfilter_af_dynaudnorm_outputs),
1038 FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_DBLP),
1039 .priv_class = &dynaudnorm_class,
1040 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
1041 AVFILTER_FLAG_SLICE_THREADS,
1042 .process_command = process_command,
1043 };
1044