FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_dynaudnorm.c
Date: 2024-07-26 21:54:09
Exec Total Coverage
Lines: 0 495 0.0%
Functions: 0 38 0.0%
Branches: 0 286 0.0%

Line Branch Exec Source
1 /*
2 * Dynamic Audio Normalizer
3 * Copyright (c) 2015 LoRd_MuldeR <mulder2@gmx.de>. Some rights reserved.
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * Dynamic Audio Normalizer
25 */
26
27 #include <float.h>
28
29 #include "libavutil/avassert.h"
30 #include "libavutil/channel_layout.h"
31 #include "libavutil/eval.h"
32 #include "libavutil/mem.h"
33 #include "libavutil/opt.h"
34
35 #define MIN_FILTER_SIZE 3
36 #define MAX_FILTER_SIZE 301
37
38 #define FF_BUFQUEUE_SIZE (MAX_FILTER_SIZE + 1)
39 #include "libavfilter/bufferqueue.h"
40
41 #include "audio.h"
42 #include "avfilter.h"
43 #include "filters.h"
44 #include "internal.h"
45
46 static const char * const var_names[] = {
47 "ch", ///< the value of the current channel
48 "sn", ///< number of samples
49 "nb_channels",
50 "t", ///< timestamp expressed in seconds
51 "sr", ///< sample rate
52 "p", ///< peak value
53 NULL
54 };
55
56 enum var_name {
57 VAR_CH,
58 VAR_SN,
59 VAR_NB_CHANNELS,
60 VAR_T,
61 VAR_SR,
62 VAR_P,
63 VAR_VARS_NB
64 };
65
66 typedef struct local_gain {
67 double max_gain;
68 double threshold;
69 } local_gain;
70
71 typedef struct cqueue {
72 double *elements;
73 int size;
74 int max_size;
75 int nb_elements;
76 } cqueue;
77
78 typedef struct DynamicAudioNormalizerContext {
79 const AVClass *class;
80
81 struct FFBufQueue queue;
82
83 int frame_len;
84 int frame_len_msec;
85 int filter_size;
86 int dc_correction;
87 int channels_coupled;
88 int alt_boundary_mode;
89 double overlap;
90 char *expr_str;
91
92 double peak_value;
93 double max_amplification;
94 double target_rms;
95 double compress_factor;
96 double threshold;
97 double *prev_amplification_factor;
98 double *dc_correction_value;
99 double *compress_threshold;
100 double *weights;
101
102 int channels;
103 int sample_advance;
104 int eof;
105 char *channels_to_filter;
106 AVChannelLayout ch_layout;
107 int64_t pts;
108
109 cqueue **gain_history_original;
110 cqueue **gain_history_minimum;
111 cqueue **gain_history_smoothed;
112 cqueue **threshold_history;
113
114 cqueue *is_enabled;
115
116 AVFrame *window;
117
118 AVExpr *expr;
119 double var_values[VAR_VARS_NB];
120 } DynamicAudioNormalizerContext;
121
122 typedef struct ThreadData {
123 AVFrame *in, *out;
124 int enabled;
125 } ThreadData;
126
127 #define OFFSET(x) offsetof(DynamicAudioNormalizerContext, x)
128 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
129
130 static const AVOption dynaudnorm_options[] = {
131 { "framelen", "set the frame length in msec", OFFSET(frame_len_msec), AV_OPT_TYPE_INT, {.i64 = 500}, 10, 8000, FLAGS },
132 { "f", "set the frame length in msec", OFFSET(frame_len_msec), AV_OPT_TYPE_INT, {.i64 = 500}, 10, 8000, FLAGS },
133 { "gausssize", "set the filter size", OFFSET(filter_size), AV_OPT_TYPE_INT, {.i64 = 31}, 3, 301, FLAGS },
134 { "g", "set the filter size", OFFSET(filter_size), AV_OPT_TYPE_INT, {.i64 = 31}, 3, 301, FLAGS },
135 { "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl = 0.95}, 0.0, 1.0, FLAGS },
136 { "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl = 0.95}, 0.0, 1.0, FLAGS },
137 { "maxgain", "set the max amplification", OFFSET(max_amplification), AV_OPT_TYPE_DOUBLE, {.dbl = 10.0}, 1.0, 100.0, FLAGS },
138 { "m", "set the max amplification", OFFSET(max_amplification), AV_OPT_TYPE_DOUBLE, {.dbl = 10.0}, 1.0, 100.0, FLAGS },
139 { "targetrms", "set the target RMS", OFFSET(target_rms), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 1.0, FLAGS },
140 { "r", "set the target RMS", OFFSET(target_rms), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 1.0, FLAGS },
141 { "coupling", "set channel coupling", OFFSET(channels_coupled), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
142 { "n", "set channel coupling", OFFSET(channels_coupled), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
143 { "correctdc", "set DC correction", OFFSET(dc_correction), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
144 { "c", "set DC correction", OFFSET(dc_correction), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
145 { "altboundary", "set alternative boundary mode", OFFSET(alt_boundary_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
146 { "b", "set alternative boundary mode", OFFSET(alt_boundary_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
147 { "compress", "set the compress factor", OFFSET(compress_factor), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 30.0, FLAGS },
148 { "s", "set the compress factor", OFFSET(compress_factor), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 30.0, FLAGS },
149 { "threshold", "set the threshold value", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 1.0, FLAGS },
150 { "t", "set the threshold value", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 1.0, FLAGS },
151 { "channels", "set channels to filter", OFFSET(channels_to_filter),AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
152 { "h", "set channels to filter", OFFSET(channels_to_filter),AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
153 { "overlap", "set the frame overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=.0}, 0.0, 1.0, FLAGS },
154 { "o", "set the frame overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=.0}, 0.0, 1.0, FLAGS },
155 { "curve", "set the custom peak mapping curve",OFFSET(expr_str), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
156 { "v", "set the custom peak mapping curve",OFFSET(expr_str), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
157 { NULL }
158 };
159
160 AVFILTER_DEFINE_CLASS(dynaudnorm);
161
162 static av_cold int init(AVFilterContext *ctx)
163 {
164 DynamicAudioNormalizerContext *s = ctx->priv;
165
166 if (!(s->filter_size & 1)) {
167 av_log(ctx, AV_LOG_WARNING, "filter size %d is invalid. Changing to an odd value.\n", s->filter_size);
168 s->filter_size |= 1;
169 }
170
171 return 0;
172 }
173
174 static inline int frame_size(int sample_rate, int frame_len_msec)
175 {
176 const int frame_size = lrint((double)sample_rate * (frame_len_msec / 1000.0));
177 return frame_size + (frame_size % 2);
178 }
179
180 static cqueue *cqueue_create(int size, int max_size)
181 {
182 cqueue *q;
183
184 if (max_size < size)
185 return NULL;
186
187 q = av_malloc(sizeof(cqueue));
188 if (!q)
189 return NULL;
190
191 q->max_size = max_size;
192 q->size = size;
193 q->nb_elements = 0;
194
195 q->elements = av_malloc_array(max_size, sizeof(double));
196 if (!q->elements) {
197 av_free(q);
198 return NULL;
199 }
200
201 return q;
202 }
203
204 static void cqueue_free(cqueue *q)
205 {
206 if (q)
207 av_free(q->elements);
208 av_free(q);
209 }
210
211 static int cqueue_size(cqueue *q)
212 {
213 return q->nb_elements;
214 }
215
216 static int cqueue_empty(cqueue *q)
217 {
218 return q->nb_elements <= 0;
219 }
220
221 static int cqueue_enqueue(cqueue *q, double element)
222 {
223 av_assert2(q->nb_elements < q->max_size);
224
225 q->elements[q->nb_elements] = element;
226 q->nb_elements++;
227
228 return 0;
229 }
230
231 static double cqueue_peek(cqueue *q, int index)
232 {
233 av_assert2(index < q->nb_elements);
234 return q->elements[index];
235 }
236
237 static int cqueue_dequeue(cqueue *q, double *element)
238 {
239 av_assert2(!cqueue_empty(q));
240
241 *element = q->elements[0];
242 memmove(&q->elements[0], &q->elements[1], (q->nb_elements - 1) * sizeof(double));
243 q->nb_elements--;
244
245 return 0;
246 }
247
248 static int cqueue_pop(cqueue *q)
249 {
250 av_assert2(!cqueue_empty(q));
251
252 memmove(&q->elements[0], &q->elements[1], (q->nb_elements - 1) * sizeof(double));
253 q->nb_elements--;
254
255 return 0;
256 }
257
258 static void cqueue_resize(cqueue *q, int new_size)
259 {
260 av_assert2(q->max_size >= new_size);
261 av_assert2(MIN_FILTER_SIZE <= new_size);
262
263 if (new_size > q->nb_elements) {
264 const int side = (new_size - q->nb_elements) / 2;
265
266 memmove(q->elements + side, q->elements, sizeof(double) * q->nb_elements);
267 for (int i = 0; i < side; i++)
268 q->elements[i] = q->elements[side];
269 q->nb_elements = new_size - 1 - side;
270 } else {
271 int count = (q->size - new_size + 1) / 2;
272
273 while (count-- > 0)
274 cqueue_pop(q);
275 }
276
277 q->size = new_size;
278 }
279
280 static void init_gaussian_filter(DynamicAudioNormalizerContext *s)
281 {
282 double total_weight = 0.0;
283 const double sigma = (((s->filter_size / 2.0) - 1.0) / 3.0) + (1.0 / 3.0);
284 double adjust;
285
286 // Pre-compute constants
287 const int offset = s->filter_size / 2;
288 const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
289 const double c2 = 2.0 * sigma * sigma;
290
291 // Compute weights
292 for (int i = 0; i < s->filter_size; i++) {
293 const int x = i - offset;
294
295 s->weights[i] = c1 * exp(-x * x / c2);
296 total_weight += s->weights[i];
297 }
298
299 // Adjust weights
300 adjust = 1.0 / total_weight;
301 for (int i = 0; i < s->filter_size; i++) {
302 s->weights[i] *= adjust;
303 }
304 }
305
306 static av_cold void uninit(AVFilterContext *ctx)
307 {
308 DynamicAudioNormalizerContext *s = ctx->priv;
309
310 av_freep(&s->prev_amplification_factor);
311 av_freep(&s->dc_correction_value);
312 av_freep(&s->compress_threshold);
313
314 for (int c = 0; c < s->channels; c++) {
315 if (s->gain_history_original)
316 cqueue_free(s->gain_history_original[c]);
317 if (s->gain_history_minimum)
318 cqueue_free(s->gain_history_minimum[c]);
319 if (s->gain_history_smoothed)
320 cqueue_free(s->gain_history_smoothed[c]);
321 if (s->threshold_history)
322 cqueue_free(s->threshold_history[c]);
323 }
324
325 av_freep(&s->gain_history_original);
326 av_freep(&s->gain_history_minimum);
327 av_freep(&s->gain_history_smoothed);
328 av_freep(&s->threshold_history);
329
330 cqueue_free(s->is_enabled);
331 s->is_enabled = NULL;
332
333 av_freep(&s->weights);
334
335 av_channel_layout_uninit(&s->ch_layout);
336
337 ff_bufqueue_discard_all(&s->queue);
338
339 av_frame_free(&s->window);
340 av_expr_free(s->expr);
341 s->expr = NULL;
342 }
343
344 static int config_input(AVFilterLink *inlink)
345 {
346 AVFilterContext *ctx = inlink->dst;
347 DynamicAudioNormalizerContext *s = ctx->priv;
348 int ret = 0;
349
350 uninit(ctx);
351
352 s->channels = inlink->ch_layout.nb_channels;
353 s->frame_len = frame_size(inlink->sample_rate, s->frame_len_msec);
354 av_log(ctx, AV_LOG_DEBUG, "frame len %d\n", s->frame_len);
355
356 s->prev_amplification_factor = av_malloc_array(inlink->ch_layout.nb_channels, sizeof(*s->prev_amplification_factor));
357 s->dc_correction_value = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dc_correction_value));
358 s->compress_threshold = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->compress_threshold));
359 s->gain_history_original = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->gain_history_original));
360 s->gain_history_minimum = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->gain_history_minimum));
361 s->gain_history_smoothed = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->gain_history_smoothed));
362 s->threshold_history = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->threshold_history));
363 s->weights = av_malloc_array(MAX_FILTER_SIZE, sizeof(*s->weights));
364 s->is_enabled = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
365 if (!s->prev_amplification_factor || !s->dc_correction_value ||
366 !s->compress_threshold ||
367 !s->gain_history_original || !s->gain_history_minimum ||
368 !s->gain_history_smoothed || !s->threshold_history ||
369 !s->is_enabled || !s->weights)
370 return AVERROR(ENOMEM);
371
372 for (int c = 0; c < inlink->ch_layout.nb_channels; c++) {
373 s->prev_amplification_factor[c] = 1.0;
374
375 s->gain_history_original[c] = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
376 s->gain_history_minimum[c] = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
377 s->gain_history_smoothed[c] = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
378 s->threshold_history[c] = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
379
380 if (!s->gain_history_original[c] || !s->gain_history_minimum[c] ||
381 !s->gain_history_smoothed[c] || !s->threshold_history[c])
382 return AVERROR(ENOMEM);
383 }
384
385 init_gaussian_filter(s);
386
387 s->window = ff_get_audio_buffer(ctx->outputs[0], s->frame_len * 2);
388 if (!s->window)
389 return AVERROR(ENOMEM);
390 s->sample_advance = FFMAX(1, lrint(s->frame_len * (1. - s->overlap)));
391
392 s->var_values[VAR_SR] = inlink->sample_rate;
393 s->var_values[VAR_NB_CHANNELS] = s->channels;
394
395 if (s->expr_str)
396 ret = av_expr_parse(&s->expr, s->expr_str, var_names, NULL, NULL,
397 NULL, NULL, 0, ctx);
398 return ret;
399 }
400
401 static inline double fade(double prev, double next, int pos, int length)
402 {
403 const double step_size = 1.0 / length;
404 const double f0 = 1.0 - (step_size * (pos + 1.0));
405 const double f1 = 1.0 - f0;
406 return f0 * prev + f1 * next;
407 }
408
409 static inline double pow_2(const double value)
410 {
411 return value * value;
412 }
413
414 static inline double bound(const double threshold, const double val)
415 {
416 const double CONST = 0.8862269254527580136490837416705725913987747280611935; //sqrt(PI) / 2.0
417 return erf(CONST * (val / threshold)) * threshold;
418 }
419
420 static double find_peak_magnitude(AVFrame *frame, int channel)
421 {
422 double max = DBL_EPSILON;
423
424 if (channel == -1) {
425 for (int c = 0; c < frame->ch_layout.nb_channels; c++) {
426 double *data_ptr = (double *)frame->extended_data[c];
427
428 for (int i = 0; i < frame->nb_samples; i++)
429 max = fmax(max, fabs(data_ptr[i]));
430 }
431 } else {
432 double *data_ptr = (double *)frame->extended_data[channel];
433
434 for (int i = 0; i < frame->nb_samples; i++)
435 max = fmax(max, fabs(data_ptr[i]));
436 }
437
438 return max;
439 }
440
441 static double compute_frame_rms(AVFrame *frame, int channel)
442 {
443 double rms_value = 0.0;
444
445 if (channel == -1) {
446 for (int c = 0; c < frame->ch_layout.nb_channels; c++) {
447 const double *data_ptr = (double *)frame->extended_data[c];
448
449 for (int i = 0; i < frame->nb_samples; i++) {
450 rms_value += pow_2(data_ptr[i]);
451 }
452 }
453
454 rms_value /= frame->nb_samples * frame->ch_layout.nb_channels;
455 } else {
456 const double *data_ptr = (double *)frame->extended_data[channel];
457 for (int i = 0; i < frame->nb_samples; i++) {
458 rms_value += pow_2(data_ptr[i]);
459 }
460
461 rms_value /= frame->nb_samples;
462 }
463
464 return fmax(sqrt(rms_value), DBL_EPSILON);
465 }
466
467 static local_gain get_max_local_gain(DynamicAudioNormalizerContext *s, AVFrame *frame,
468 int channel)
469 {
470 const double peak_magnitude = find_peak_magnitude(frame, channel);
471 const double maximum_gain = s->peak_value / peak_magnitude;
472 const double rms_gain = s->target_rms > DBL_EPSILON ? (s->target_rms / compute_frame_rms(frame, channel)) : DBL_MAX;
473 double target_gain = DBL_MAX;
474 local_gain gain;
475
476 if (s->expr_str) {
477 double var_values[VAR_VARS_NB];
478
479 memcpy(var_values, s->var_values, sizeof(var_values));
480
481 var_values[VAR_CH] = channel;
482 var_values[VAR_P] = peak_magnitude;
483
484 target_gain = av_expr_eval(s->expr, var_values, s) / peak_magnitude;
485 }
486
487 gain.threshold = peak_magnitude > s->threshold;
488 gain.max_gain = bound(s->max_amplification, fmin(target_gain, fmin(maximum_gain, rms_gain)));
489
490 return gain;
491 }
492
493 static double minimum_filter(cqueue *q)
494 {
495 double min = DBL_MAX;
496
497 for (int i = 0; i < cqueue_size(q); i++) {
498 min = fmin(min, cqueue_peek(q, i));
499 }
500
501 return min;
502 }
503
504 static double gaussian_filter(DynamicAudioNormalizerContext *s, cqueue *q, cqueue *tq)
505 {
506 const double *weights = s->weights;
507 double result = 0.0, tsum = 0.0;
508
509 for (int i = 0; i < cqueue_size(q); i++) {
510 double tq_item = cqueue_peek(tq, i);
511 double q_item = cqueue_peek(q, i);
512
513 tsum += tq_item * weights[i];
514 result += tq_item * weights[i] * q_item;
515 }
516
517 if (tsum == 0.0)
518 result = 1.0;
519
520 return result;
521 }
522
523 static void update_gain_history(DynamicAudioNormalizerContext *s, int channel,
524 local_gain gain)
525 {
526 if (cqueue_empty(s->gain_history_original[channel])) {
527 const int pre_fill_size = s->filter_size / 2;
528 const double initial_value = s->alt_boundary_mode ? gain.max_gain : fmin(1.0, gain.max_gain);
529
530 s->prev_amplification_factor[channel] = initial_value;
531
532 while (cqueue_size(s->gain_history_original[channel]) < pre_fill_size) {
533 cqueue_enqueue(s->gain_history_original[channel], initial_value);
534 cqueue_enqueue(s->threshold_history[channel], gain.threshold);
535 }
536 }
537
538 cqueue_enqueue(s->gain_history_original[channel], gain.max_gain);
539
540 while (cqueue_size(s->gain_history_original[channel]) >= s->filter_size) {
541 double minimum;
542
543 if (cqueue_empty(s->gain_history_minimum[channel])) {
544 const int pre_fill_size = s->filter_size / 2;
545 double initial_value = s->alt_boundary_mode ? cqueue_peek(s->gain_history_original[channel], 0) : 1.0;
546 int input = pre_fill_size;
547
548 while (cqueue_size(s->gain_history_minimum[channel]) < pre_fill_size) {
549 input++;
550 initial_value = fmin(initial_value, cqueue_peek(s->gain_history_original[channel], input));
551 cqueue_enqueue(s->gain_history_minimum[channel], initial_value);
552 }
553 }
554
555 minimum = minimum_filter(s->gain_history_original[channel]);
556
557 cqueue_enqueue(s->gain_history_minimum[channel], minimum);
558
559 cqueue_enqueue(s->threshold_history[channel], gain.threshold);
560
561 cqueue_pop(s->gain_history_original[channel]);
562 }
563
564 while (cqueue_size(s->gain_history_minimum[channel]) >= s->filter_size) {
565 double smoothed, limit;
566
567 smoothed = gaussian_filter(s, s->gain_history_minimum[channel], s->threshold_history[channel]);
568 limit = cqueue_peek(s->gain_history_original[channel], 0);
569 smoothed = fmin(smoothed, limit);
570
571 cqueue_enqueue(s->gain_history_smoothed[channel], smoothed);
572
573 cqueue_pop(s->gain_history_minimum[channel]);
574 cqueue_pop(s->threshold_history[channel]);
575 }
576 }
577
578 static int update_gain_histories(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
579 {
580 DynamicAudioNormalizerContext *s = ctx->priv;
581 AVFrame *analyze_frame = arg;
582 const int channels = s->channels;
583 const int start = (channels * jobnr) / nb_jobs;
584 const int end = (channels * (jobnr+1)) / nb_jobs;
585
586 for (int c = start; c < end; c++)
587 update_gain_history(s, c, get_max_local_gain(s, analyze_frame, c));
588
589 return 0;
590 }
591
592 static inline double update_value(double new, double old, double aggressiveness)
593 {
594 av_assert0((aggressiveness >= 0.0) && (aggressiveness <= 1.0));
595 return aggressiveness * new + (1.0 - aggressiveness) * old;
596 }
597
598 static inline int bypass_channel(DynamicAudioNormalizerContext *s, AVFrame *frame, int ch)
599 {
600 enum AVChannel channel = av_channel_layout_channel_from_index(&frame->ch_layout, ch);
601
602 return av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0;
603 }
604
605 static void perform_dc_correction(DynamicAudioNormalizerContext *s, AVFrame *frame)
606 {
607 const double diff = 1.0 / frame->nb_samples;
608 int is_first_frame = cqueue_empty(s->gain_history_original[0]);
609
610 for (int c = 0; c < s->channels; c++) {
611 const int bypass = bypass_channel(s, frame, c);
612 double *dst_ptr = (double *)frame->extended_data[c];
613 double current_average_value = 0.0;
614 double prev_value;
615
616 for (int i = 0; i < frame->nb_samples; i++)
617 current_average_value += dst_ptr[i] * diff;
618
619 prev_value = is_first_frame ? current_average_value : s->dc_correction_value[c];
620 s->dc_correction_value[c] = is_first_frame ? current_average_value : update_value(current_average_value, s->dc_correction_value[c], 0.1);
621
622 for (int i = 0; i < frame->nb_samples && !bypass; i++) {
623 dst_ptr[i] -= fade(prev_value, s->dc_correction_value[c], i, frame->nb_samples);
624 }
625 }
626 }
627
628 static double setup_compress_thresh(double threshold)
629 {
630 if ((threshold > DBL_EPSILON) && (threshold < (1.0 - DBL_EPSILON))) {
631 double current_threshold = threshold;
632 double step_size = 1.0;
633
634 while (step_size > DBL_EPSILON) {
635 while ((llrint((current_threshold + step_size) * (UINT64_C(1) << 63)) >
636 llrint(current_threshold * (UINT64_C(1) << 63))) &&
637 (bound(current_threshold + step_size, 1.0) <= threshold)) {
638 current_threshold += step_size;
639 }
640
641 step_size /= 2.0;
642 }
643
644 return current_threshold;
645 } else {
646 return threshold;
647 }
648 }
649
650 static double compute_frame_std_dev(DynamicAudioNormalizerContext *s,
651 AVFrame *frame, int channel)
652 {
653 double variance = 0.0;
654
655 if (channel == -1) {
656 for (int c = 0; c < s->channels; c++) {
657 const double *data_ptr = (double *)frame->extended_data[c];
658
659 for (int i = 0; i < frame->nb_samples; i++) {
660 variance += pow_2(data_ptr[i]); // Assume that MEAN is *zero*
661 }
662 }
663 variance /= (s->channels * frame->nb_samples) - 1;
664 } else {
665 const double *data_ptr = (double *)frame->extended_data[channel];
666
667 for (int i = 0; i < frame->nb_samples; i++) {
668 variance += pow_2(data_ptr[i]); // Assume that MEAN is *zero*
669 }
670 variance /= frame->nb_samples - 1;
671 }
672
673 return fmax(sqrt(variance), DBL_EPSILON);
674 }
675
676 static void perform_compression(DynamicAudioNormalizerContext *s, AVFrame *frame)
677 {
678 int is_first_frame = cqueue_empty(s->gain_history_original[0]);
679
680 if (s->channels_coupled) {
681 const double standard_deviation = compute_frame_std_dev(s, frame, -1);
682 const double current_threshold = fmin(1.0, s->compress_factor * standard_deviation);
683
684 const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[0];
685 double prev_actual_thresh, curr_actual_thresh;
686 s->compress_threshold[0] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[0], (1.0/3.0));
687
688 prev_actual_thresh = setup_compress_thresh(prev_value);
689 curr_actual_thresh = setup_compress_thresh(s->compress_threshold[0]);
690
691 for (int c = 0; c < s->channels; c++) {
692 double *const dst_ptr = (double *)frame->extended_data[c];
693 const int bypass = bypass_channel(s, frame, c);
694
695 if (bypass)
696 continue;
697
698 for (int i = 0; i < frame->nb_samples; i++) {
699 const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, frame->nb_samples);
700 dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
701 }
702 }
703 } else {
704 for (int c = 0; c < s->channels; c++) {
705 const int bypass = bypass_channel(s, frame, c);
706 const double standard_deviation = compute_frame_std_dev(s, frame, c);
707 const double current_threshold = setup_compress_thresh(fmin(1.0, s->compress_factor * standard_deviation));
708 const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[c];
709 double prev_actual_thresh, curr_actual_thresh;
710 double *dst_ptr;
711
712 s->compress_threshold[c] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[c], 1.0/3.0);
713
714 prev_actual_thresh = setup_compress_thresh(prev_value);
715 curr_actual_thresh = setup_compress_thresh(s->compress_threshold[c]);
716
717 dst_ptr = (double *)frame->extended_data[c];
718 for (int i = 0; i < frame->nb_samples && !bypass; i++) {
719 const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, frame->nb_samples);
720 dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
721 }
722 }
723 }
724 }
725
726 static int analyze_frame(AVFilterContext *ctx, AVFilterLink *outlink, AVFrame **frame)
727 {
728 DynamicAudioNormalizerContext *s = ctx->priv;
729 AVFrame *analyze_frame;
730
731 if (s->dc_correction || s->compress_factor > DBL_EPSILON) {
732 int ret;
733
734 if (!av_frame_is_writable(*frame)) {
735 AVFrame *out = ff_get_audio_buffer(outlink, (*frame)->nb_samples);
736
737 if (!out) {
738 av_frame_free(frame);
739 return AVERROR(ENOMEM);
740 }
741 ret = av_frame_copy_props(out, *frame);
742 if (ret < 0) {
743 av_frame_free(frame);
744 av_frame_free(&out);
745 return ret;
746 }
747 ret = av_frame_copy(out, *frame);
748 if (ret < 0) {
749 av_frame_free(frame);
750 av_frame_free(&out);
751 return ret;
752 }
753
754 av_frame_free(frame);
755 *frame = out;
756 }
757 }
758
759 if (s->dc_correction)
760 perform_dc_correction(s, *frame);
761
762 if (s->compress_factor > DBL_EPSILON)
763 perform_compression(s, *frame);
764
765 if (s->frame_len != s->sample_advance) {
766 const int offset = s->frame_len - s->sample_advance;
767
768 for (int c = 0; c < s->channels; c++) {
769 double *src = (double *)s->window->extended_data[c];
770
771 memmove(src, &src[s->sample_advance], offset * sizeof(double));
772 memcpy(&src[offset], (*frame)->extended_data[c], (*frame)->nb_samples * sizeof(double));
773 memset(&src[offset + (*frame)->nb_samples], 0, (s->sample_advance - (*frame)->nb_samples) * sizeof(double));
774 }
775
776 analyze_frame = s->window;
777 } else {
778 av_samples_copy(s->window->extended_data, (*frame)->extended_data, 0, 0,
779 FFMIN(s->frame_len, (*frame)->nb_samples), (*frame)->ch_layout.nb_channels, (*frame)->format);
780 analyze_frame = *frame;
781 }
782
783 s->var_values[VAR_SN] = outlink->sample_count_in;
784 s->var_values[VAR_T] = s->var_values[VAR_SN] * (double)1/outlink->sample_rate;
785
786 if (s->channels_coupled) {
787 const local_gain gain = get_max_local_gain(s, analyze_frame, -1);
788 for (int c = 0; c < s->channels; c++)
789 update_gain_history(s, c, gain);
790 } else {
791 ff_filter_execute(ctx, update_gain_histories, analyze_frame, NULL,
792 FFMIN(s->channels, ff_filter_get_nb_threads(ctx)));
793 }
794
795 return 0;
796 }
797
798 static void amplify_channel(DynamicAudioNormalizerContext *s, AVFrame *in,
799 AVFrame *frame, int enabled, int c)
800 {
801 const int bypass = bypass_channel(s, frame, c);
802 const double *src_ptr = (const double *)in->extended_data[c];
803 double *dst_ptr = (double *)frame->extended_data[c];
804 double current_amplification_factor;
805
806 cqueue_dequeue(s->gain_history_smoothed[c], &current_amplification_factor);
807
808 for (int i = 0; i < frame->nb_samples && enabled && !bypass; i++) {
809 const double amplification_factor = fade(s->prev_amplification_factor[c],
810 current_amplification_factor, i,
811 frame->nb_samples);
812
813 dst_ptr[i] = src_ptr[i] * amplification_factor;
814 }
815
816 s->prev_amplification_factor[c] = current_amplification_factor;
817 }
818
819 static int amplify_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
820 {
821 DynamicAudioNormalizerContext *s = ctx->priv;
822 ThreadData *td = arg;
823 AVFrame *out = td->out;
824 AVFrame *in = td->in;
825 const int enabled = td->enabled;
826 const int channels = s->channels;
827 const int start = (channels * jobnr) / nb_jobs;
828 const int end = (channels * (jobnr+1)) / nb_jobs;
829
830 for (int ch = start; ch < end; ch++)
831 amplify_channel(s, in, out, enabled, ch);
832
833 return 0;
834 }
835
836 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
837 {
838 AVFilterContext *ctx = inlink->dst;
839 DynamicAudioNormalizerContext *s = ctx->priv;
840 AVFilterLink *outlink = ctx->outputs[0];
841 ThreadData td;
842 int ret;
843
844 while (((s->queue.available >= s->filter_size) ||
845 (s->eof && s->queue.available)) &&
846 !cqueue_empty(s->gain_history_smoothed[0])) {
847 AVFrame *in = ff_bufqueue_get(&s->queue);
848 AVFrame *out;
849 double is_enabled;
850
851 cqueue_dequeue(s->is_enabled, &is_enabled);
852
853 if (av_frame_is_writable(in)) {
854 out = in;
855 } else {
856 out = ff_get_audio_buffer(outlink, in->nb_samples);
857 if (!out) {
858 av_frame_free(&in);
859 return AVERROR(ENOMEM);
860 }
861 av_frame_copy_props(out, in);
862 }
863
864 td.in = in;
865 td.out = out;
866 td.enabled = is_enabled > 0.;
867 ff_filter_execute(ctx, amplify_channels, &td, NULL,
868 FFMIN(s->channels, ff_filter_get_nb_threads(ctx)));
869
870 s->pts = out->pts + av_rescale_q(out->nb_samples, av_make_q(1, outlink->sample_rate),
871 outlink->time_base);
872 if (out != in)
873 av_frame_free(&in);
874 ret = ff_filter_frame(outlink, out);
875 if (ret < 0)
876 return ret;
877 }
878
879 ret = analyze_frame(ctx, outlink, &in);
880 if (ret < 0)
881 return ret;
882 if (!s->eof) {
883 ff_bufqueue_add(ctx, &s->queue, in);
884 cqueue_enqueue(s->is_enabled, !ctx->is_disabled);
885 } else {
886 av_frame_free(&in);
887 }
888
889 return 1;
890 }
891
892 static int flush_buffer(DynamicAudioNormalizerContext *s, AVFilterLink *inlink,
893 AVFilterLink *outlink)
894 {
895 AVFrame *out = ff_get_audio_buffer(outlink, s->sample_advance);
896
897 if (!out)
898 return AVERROR(ENOMEM);
899
900 for (int c = 0; c < s->channels; c++) {
901 double *dst_ptr = (double *)out->extended_data[c];
902
903 for (int i = 0; i < out->nb_samples; i++) {
904 dst_ptr[i] = s->alt_boundary_mode ? DBL_EPSILON : ((s->target_rms > DBL_EPSILON) ? fmin(s->peak_value, s->target_rms) : s->peak_value);
905 if (s->dc_correction) {
906 dst_ptr[i] *= ((i % 2) == 1) ? -1 : 1;
907 dst_ptr[i] += s->dc_correction_value[c];
908 }
909 }
910 }
911
912 return filter_frame(inlink, out);
913 }
914
915 static int flush(AVFilterLink *outlink)
916 {
917 AVFilterContext *ctx = outlink->src;
918 AVFilterLink *inlink = ctx->inputs[0];
919 DynamicAudioNormalizerContext *s = ctx->priv;
920
921 while (s->eof && cqueue_empty(s->gain_history_smoothed[0])) {
922 for (int c = 0; c < s->channels; c++)
923 update_gain_history(s, c, (local_gain){ cqueue_peek(s->gain_history_original[c], 0), 1.0});
924 }
925
926 return flush_buffer(s, inlink, outlink);
927 }
928
929 static int activate(AVFilterContext *ctx)
930 {
931 AVFilterLink *inlink = ctx->inputs[0];
932 AVFilterLink *outlink = ctx->outputs[0];
933 DynamicAudioNormalizerContext *s = ctx->priv;
934 AVFrame *in = NULL;
935 int ret = 0, status;
936 int64_t pts;
937
938 ret = av_channel_layout_copy(&s->ch_layout, &inlink->ch_layout);
939 if (ret < 0)
940 return ret;
941 if (strcmp(s->channels_to_filter, "all"))
942 av_channel_layout_from_string(&s->ch_layout, s->channels_to_filter);
943
944 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
945
946 if (!s->eof) {
947 ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in);
948 if (ret < 0)
949 return ret;
950 if (ret > 0) {
951 ret = filter_frame(inlink, in);
952 if (ret <= 0)
953 return ret;
954 }
955
956 if (ff_inlink_check_available_samples(inlink, s->sample_advance) > 0) {
957 ff_filter_set_ready(ctx, 10);
958 return 0;
959 }
960 }
961
962 if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
963 if (status == AVERROR_EOF)
964 s->eof = 1;
965 }
966
967 if (s->eof && s->queue.available)
968 return flush(outlink);
969
970 if (s->eof && !s->queue.available) {
971 ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
972 return 0;
973 }
974
975 if (!s->eof)
976 FF_FILTER_FORWARD_WANTED(outlink, inlink);
977
978 return FFERROR_NOT_READY;
979 }
980
981 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
982 char *res, int res_len, int flags)
983 {
984 DynamicAudioNormalizerContext *s = ctx->priv;
985 AVFilterLink *inlink = ctx->inputs[0];
986 int prev_filter_size = s->filter_size;
987 int ret;
988
989 ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
990 if (ret < 0)
991 return ret;
992
993 s->filter_size |= 1;
994 if (prev_filter_size != s->filter_size) {
995 init_gaussian_filter(s);
996
997 for (int c = 0; c < s->channels; c++) {
998 cqueue_resize(s->gain_history_original[c], s->filter_size);
999 cqueue_resize(s->gain_history_minimum[c], s->filter_size);
1000 cqueue_resize(s->threshold_history[c], s->filter_size);
1001 }
1002 }
1003
1004 s->frame_len = frame_size(inlink->sample_rate, s->frame_len_msec);
1005 s->sample_advance = FFMAX(1, lrint(s->frame_len * (1. - s->overlap)));
1006 if (s->expr_str) {
1007 ret = av_expr_parse(&s->expr, s->expr_str, var_names, NULL, NULL,
1008 NULL, NULL, 0, ctx);
1009 if (ret < 0)
1010 return ret;
1011 }
1012 return 0;
1013 }
1014
1015 static const AVFilterPad avfilter_af_dynaudnorm_inputs[] = {
1016 {
1017 .name = "default",
1018 .type = AVMEDIA_TYPE_AUDIO,
1019 .config_props = config_input,
1020 },
1021 };
1022
1023 const AVFilter ff_af_dynaudnorm = {
1024 .name = "dynaudnorm",
1025 .description = NULL_IF_CONFIG_SMALL("Dynamic Audio Normalizer."),
1026 .priv_size = sizeof(DynamicAudioNormalizerContext),
1027 .init = init,
1028 .uninit = uninit,
1029 .activate = activate,
1030 FILTER_INPUTS(avfilter_af_dynaudnorm_inputs),
1031 FILTER_OUTPUTS(ff_audio_default_filterpad),
1032 FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_DBLP),
1033 .priv_class = &dynaudnorm_class,
1034 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
1035 AVFILTER_FLAG_SLICE_THREADS,
1036 .process_command = process_command,
1037 };
1038