LCOV - code coverage report
Current view: top level - libavfilter - af_dynaudnorm.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 340 0.0 %
Date: 2017-12-17 04:34:43 Functions: 0 34 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Dynamic Audio Normalizer
       3             :  * Copyright (c) 2015 LoRd_MuldeR <mulder2@gmx.de>. Some rights reserved.
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : /**
      23             :  * @file
      24             :  * Dynamic Audio Normalizer
      25             :  */
      26             : 
      27             : #include <float.h>
      28             : 
      29             : #include "libavutil/avassert.h"
      30             : #include "libavutil/opt.h"
      31             : 
      32             : #define FF_BUFQUEUE_SIZE 302
      33             : #include "libavfilter/bufferqueue.h"
      34             : 
      35             : #include "audio.h"
      36             : #include "avfilter.h"
      37             : #include "internal.h"
      38             : 
      39             : typedef struct cqueue {
      40             :     double *elements;
      41             :     int size;
      42             :     int nb_elements;
      43             :     int first;
      44             : } cqueue;
      45             : 
      46             : typedef struct DynamicAudioNormalizerContext {
      47             :     const AVClass *class;
      48             : 
      49             :     struct FFBufQueue queue;
      50             : 
      51             :     int frame_len;
      52             :     int frame_len_msec;
      53             :     int filter_size;
      54             :     int dc_correction;
      55             :     int channels_coupled;
      56             :     int alt_boundary_mode;
      57             : 
      58             :     double peak_value;
      59             :     double max_amplification;
      60             :     double target_rms;
      61             :     double compress_factor;
      62             :     double *prev_amplification_factor;
      63             :     double *dc_correction_value;
      64             :     double *compress_threshold;
      65             :     double *fade_factors[2];
      66             :     double *weights;
      67             : 
      68             :     int channels;
      69             :     int delay;
      70             : 
      71             :     cqueue **gain_history_original;
      72             :     cqueue **gain_history_minimum;
      73             :     cqueue **gain_history_smoothed;
      74             : } DynamicAudioNormalizerContext;
      75             : 
      76             : #define OFFSET(x) offsetof(DynamicAudioNormalizerContext, x)
      77             : #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
      78             : 
      79             : static const AVOption dynaudnorm_options[] = {
      80             :     { "f", "set the frame length in msec",     OFFSET(frame_len_msec),    AV_OPT_TYPE_INT,    {.i64 = 500},   10,  8000, FLAGS },
      81             :     { "g", "set the filter size",              OFFSET(filter_size),       AV_OPT_TYPE_INT,    {.i64 = 31},     3,   301, FLAGS },
      82             :     { "p", "set the peak value",               OFFSET(peak_value),        AV_OPT_TYPE_DOUBLE, {.dbl = 0.95}, 0.0,   1.0, FLAGS },
      83             :     { "m", "set the max amplification",        OFFSET(max_amplification), AV_OPT_TYPE_DOUBLE, {.dbl = 10.0}, 1.0, 100.0, FLAGS },
      84             :     { "r", "set the target RMS",               OFFSET(target_rms),        AV_OPT_TYPE_DOUBLE, {.dbl = 0.0},  0.0,   1.0, FLAGS },
      85             :     { "n", "set channel coupling",             OFFSET(channels_coupled),  AV_OPT_TYPE_BOOL,   {.i64 = 1},      0,     1, FLAGS },
      86             :     { "c", "set DC correction",                OFFSET(dc_correction),     AV_OPT_TYPE_BOOL,   {.i64 = 0},      0,     1, FLAGS },
      87             :     { "b", "set alternative boundary mode",    OFFSET(alt_boundary_mode), AV_OPT_TYPE_BOOL,   {.i64 = 0},      0,     1, FLAGS },
      88             :     { "s", "set the compress factor",          OFFSET(compress_factor),   AV_OPT_TYPE_DOUBLE, {.dbl = 0.0},  0.0,  30.0, FLAGS },
      89             :     { NULL }
      90             : };
      91             : 
      92             : AVFILTER_DEFINE_CLASS(dynaudnorm);
      93             : 
      94           0 : static av_cold int init(AVFilterContext *ctx)
      95             : {
      96           0 :     DynamicAudioNormalizerContext *s = ctx->priv;
      97             : 
      98           0 :     if (!(s->filter_size & 1)) {
      99           0 :         av_log(ctx, AV_LOG_ERROR, "filter size %d is invalid. Must be an odd value.\n", s->filter_size);
     100           0 :         return AVERROR(EINVAL);
     101             :     }
     102             : 
     103           0 :     return 0;
     104             : }
     105             : 
     106           0 : static int query_formats(AVFilterContext *ctx)
     107             : {
     108             :     AVFilterFormats *formats;
     109             :     AVFilterChannelLayouts *layouts;
     110             :     static const enum AVSampleFormat sample_fmts[] = {
     111             :         AV_SAMPLE_FMT_DBLP,
     112             :         AV_SAMPLE_FMT_NONE
     113             :     };
     114             :     int ret;
     115             : 
     116           0 :     layouts = ff_all_channel_counts();
     117           0 :     if (!layouts)
     118           0 :         return AVERROR(ENOMEM);
     119           0 :     ret = ff_set_common_channel_layouts(ctx, layouts);
     120           0 :     if (ret < 0)
     121           0 :         return ret;
     122             : 
     123           0 :     formats = ff_make_format_list(sample_fmts);
     124           0 :     if (!formats)
     125           0 :         return AVERROR(ENOMEM);
     126           0 :     ret = ff_set_common_formats(ctx, formats);
     127           0 :     if (ret < 0)
     128           0 :         return ret;
     129             : 
     130           0 :     formats = ff_all_samplerates();
     131           0 :     if (!formats)
     132           0 :         return AVERROR(ENOMEM);
     133           0 :     return ff_set_common_samplerates(ctx, formats);
     134             : }
     135             : 
     136           0 : static inline int frame_size(int sample_rate, int frame_len_msec)
     137             : {
     138           0 :     const int frame_size = lrint((double)sample_rate * (frame_len_msec / 1000.0));
     139           0 :     return frame_size + (frame_size % 2);
     140             : }
     141             : 
     142           0 : static void precalculate_fade_factors(double *fade_factors[2], int frame_len)
     143             : {
     144           0 :     const double step_size = 1.0 / frame_len;
     145             :     int pos;
     146             : 
     147           0 :     for (pos = 0; pos < frame_len; pos++) {
     148           0 :         fade_factors[0][pos] = 1.0 - (step_size * (pos + 1.0));
     149           0 :         fade_factors[1][pos] = 1.0 - fade_factors[0][pos];
     150             :     }
     151           0 : }
     152             : 
     153           0 : static cqueue *cqueue_create(int size)
     154             : {
     155             :     cqueue *q;
     156             : 
     157           0 :     q = av_malloc(sizeof(cqueue));
     158           0 :     if (!q)
     159           0 :         return NULL;
     160             : 
     161           0 :     q->size = size;
     162           0 :     q->nb_elements = 0;
     163           0 :     q->first = 0;
     164             : 
     165           0 :     q->elements = av_malloc_array(size, sizeof(double));
     166           0 :     if (!q->elements) {
     167           0 :         av_free(q);
     168           0 :         return NULL;
     169             :     }
     170             : 
     171           0 :     return q;
     172             : }
     173             : 
     174           0 : static void cqueue_free(cqueue *q)
     175             : {
     176           0 :     if (q)
     177           0 :         av_free(q->elements);
     178           0 :     av_free(q);
     179           0 : }
     180             : 
     181           0 : static int cqueue_size(cqueue *q)
     182             : {
     183           0 :     return q->nb_elements;
     184             : }
     185             : 
     186           0 : static int cqueue_empty(cqueue *q)
     187             : {
     188           0 :     return !q->nb_elements;
     189             : }
     190             : 
     191           0 : static int cqueue_enqueue(cqueue *q, double element)
     192             : {
     193             :     int i;
     194             : 
     195             :     av_assert2(q->nb_elements != q->size);
     196             : 
     197           0 :     i = (q->first + q->nb_elements) % q->size;
     198           0 :     q->elements[i] = element;
     199           0 :     q->nb_elements++;
     200             : 
     201           0 :     return 0;
     202             : }
     203             : 
     204           0 : static double cqueue_peek(cqueue *q, int index)
     205             : {
     206             :     av_assert2(index < q->nb_elements);
     207           0 :     return q->elements[(q->first + index) % q->size];
     208             : }
     209             : 
     210           0 : static int cqueue_dequeue(cqueue *q, double *element)
     211             : {
     212             :     av_assert2(!cqueue_empty(q));
     213             : 
     214           0 :     *element = q->elements[q->first];
     215           0 :     q->first = (q->first + 1) % q->size;
     216           0 :     q->nb_elements--;
     217             : 
     218           0 :     return 0;
     219             : }
     220             : 
     221           0 : static int cqueue_pop(cqueue *q)
     222             : {
     223             :     av_assert2(!cqueue_empty(q));
     224             : 
     225           0 :     q->first = (q->first + 1) % q->size;
     226           0 :     q->nb_elements--;
     227             : 
     228           0 :     return 0;
     229             : }
     230             : 
     231           0 : static void init_gaussian_filter(DynamicAudioNormalizerContext *s)
     232             : {
     233           0 :     double total_weight = 0.0;
     234           0 :     const double sigma = (((s->filter_size / 2.0) - 1.0) / 3.0) + (1.0 / 3.0);
     235             :     double adjust;
     236             :     int i;
     237             : 
     238             :     // Pre-compute constants
     239           0 :     const int offset = s->filter_size / 2;
     240           0 :     const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
     241           0 :     const double c2 = 2.0 * sigma * sigma;
     242             : 
     243             :     // Compute weights
     244           0 :     for (i = 0; i < s->filter_size; i++) {
     245           0 :         const int x = i - offset;
     246             : 
     247           0 :         s->weights[i] = c1 * exp(-x * x / c2);
     248           0 :         total_weight += s->weights[i];
     249             :     }
     250             : 
     251             :     // Adjust weights
     252           0 :     adjust = 1.0 / total_weight;
     253           0 :     for (i = 0; i < s->filter_size; i++) {
     254           0 :         s->weights[i] *= adjust;
     255             :     }
     256           0 : }
     257             : 
     258           0 : static av_cold void uninit(AVFilterContext *ctx)
     259             : {
     260           0 :     DynamicAudioNormalizerContext *s = ctx->priv;
     261             :     int c;
     262             : 
     263           0 :     av_freep(&s->prev_amplification_factor);
     264           0 :     av_freep(&s->dc_correction_value);
     265           0 :     av_freep(&s->compress_threshold);
     266           0 :     av_freep(&s->fade_factors[0]);
     267           0 :     av_freep(&s->fade_factors[1]);
     268             : 
     269           0 :     for (c = 0; c < s->channels; c++) {
     270           0 :         if (s->gain_history_original)
     271           0 :             cqueue_free(s->gain_history_original[c]);
     272           0 :         if (s->gain_history_minimum)
     273           0 :             cqueue_free(s->gain_history_minimum[c]);
     274           0 :         if (s->gain_history_smoothed)
     275           0 :             cqueue_free(s->gain_history_smoothed[c]);
     276             :     }
     277             : 
     278           0 :     av_freep(&s->gain_history_original);
     279           0 :     av_freep(&s->gain_history_minimum);
     280           0 :     av_freep(&s->gain_history_smoothed);
     281             : 
     282           0 :     av_freep(&s->weights);
     283             : 
     284           0 :     ff_bufqueue_discard_all(&s->queue);
     285           0 : }
     286             : 
     287           0 : static int config_input(AVFilterLink *inlink)
     288             : {
     289           0 :     AVFilterContext *ctx = inlink->dst;
     290           0 :     DynamicAudioNormalizerContext *s = ctx->priv;
     291             :     int c;
     292             : 
     293           0 :     uninit(ctx);
     294             : 
     295           0 :     s->frame_len =
     296           0 :     inlink->min_samples =
     297           0 :     inlink->max_samples =
     298           0 :     inlink->partial_buf_size = frame_size(inlink->sample_rate, s->frame_len_msec);
     299           0 :     av_log(ctx, AV_LOG_DEBUG, "frame len %d\n", s->frame_len);
     300             : 
     301           0 :     s->fade_factors[0] = av_malloc_array(s->frame_len, sizeof(*s->fade_factors[0]));
     302           0 :     s->fade_factors[1] = av_malloc_array(s->frame_len, sizeof(*s->fade_factors[1]));
     303             : 
     304           0 :     s->prev_amplification_factor = av_malloc_array(inlink->channels, sizeof(*s->prev_amplification_factor));
     305           0 :     s->dc_correction_value = av_calloc(inlink->channels, sizeof(*s->dc_correction_value));
     306           0 :     s->compress_threshold = av_calloc(inlink->channels, sizeof(*s->compress_threshold));
     307           0 :     s->gain_history_original = av_calloc(inlink->channels, sizeof(*s->gain_history_original));
     308           0 :     s->gain_history_minimum = av_calloc(inlink->channels, sizeof(*s->gain_history_minimum));
     309           0 :     s->gain_history_smoothed = av_calloc(inlink->channels, sizeof(*s->gain_history_smoothed));
     310           0 :     s->weights = av_malloc_array(s->filter_size, sizeof(*s->weights));
     311           0 :     if (!s->prev_amplification_factor || !s->dc_correction_value ||
     312           0 :         !s->compress_threshold || !s->fade_factors[0] || !s->fade_factors[1] ||
     313           0 :         !s->gain_history_original || !s->gain_history_minimum ||
     314           0 :         !s->gain_history_smoothed || !s->weights)
     315           0 :         return AVERROR(ENOMEM);
     316             : 
     317           0 :     for (c = 0; c < inlink->channels; c++) {
     318           0 :         s->prev_amplification_factor[c] = 1.0;
     319             : 
     320           0 :         s->gain_history_original[c] = cqueue_create(s->filter_size);
     321           0 :         s->gain_history_minimum[c]  = cqueue_create(s->filter_size);
     322           0 :         s->gain_history_smoothed[c] = cqueue_create(s->filter_size);
     323             : 
     324           0 :         if (!s->gain_history_original[c] || !s->gain_history_minimum[c] ||
     325           0 :             !s->gain_history_smoothed[c])
     326           0 :             return AVERROR(ENOMEM);
     327             :     }
     328             : 
     329           0 :     precalculate_fade_factors(s->fade_factors, s->frame_len);
     330           0 :     init_gaussian_filter(s);
     331             : 
     332           0 :     s->channels = inlink->channels;
     333           0 :     s->delay = s->filter_size;
     334             : 
     335           0 :     return 0;
     336             : }
     337             : 
     338           0 : static inline double fade(double prev, double next, int pos,
     339             :                           double *fade_factors[2])
     340             : {
     341           0 :     return fade_factors[0][pos] * prev + fade_factors[1][pos] * next;
     342             : }
     343             : 
     344           0 : static inline double pow_2(const double value)
     345             : {
     346           0 :     return value * value;
     347             : }
     348             : 
     349           0 : static inline double bound(const double threshold, const double val)
     350             : {
     351           0 :     const double CONST = 0.8862269254527580136490837416705725913987747280611935; //sqrt(PI) / 2.0
     352           0 :     return erf(CONST * (val / threshold)) * threshold;
     353             : }
     354             : 
     355           0 : static double find_peak_magnitude(AVFrame *frame, int channel)
     356             : {
     357           0 :     double max = DBL_EPSILON;
     358             :     int c, i;
     359             : 
     360           0 :     if (channel == -1) {
     361           0 :         for (c = 0; c < frame->channels; c++) {
     362           0 :             double *data_ptr = (double *)frame->extended_data[c];
     363             : 
     364           0 :             for (i = 0; i < frame->nb_samples; i++)
     365           0 :                 max = FFMAX(max, fabs(data_ptr[i]));
     366             :         }
     367             :     } else {
     368           0 :         double *data_ptr = (double *)frame->extended_data[channel];
     369             : 
     370           0 :         for (i = 0; i < frame->nb_samples; i++)
     371           0 :             max = FFMAX(max, fabs(data_ptr[i]));
     372             :     }
     373             : 
     374           0 :     return max;
     375             : }
     376             : 
     377           0 : static double compute_frame_rms(AVFrame *frame, int channel)
     378             : {
     379           0 :     double rms_value = 0.0;
     380             :     int c, i;
     381             : 
     382           0 :     if (channel == -1) {
     383           0 :         for (c = 0; c < frame->channels; c++) {
     384           0 :             const double *data_ptr = (double *)frame->extended_data[c];
     385             : 
     386           0 :             for (i = 0; i < frame->nb_samples; i++) {
     387           0 :                 rms_value += pow_2(data_ptr[i]);
     388             :             }
     389             :         }
     390             : 
     391           0 :         rms_value /= frame->nb_samples * frame->channels;
     392             :     } else {
     393           0 :         const double *data_ptr = (double *)frame->extended_data[channel];
     394           0 :         for (i = 0; i < frame->nb_samples; i++) {
     395           0 :             rms_value += pow_2(data_ptr[i]);
     396             :         }
     397             : 
     398           0 :         rms_value /= frame->nb_samples;
     399             :     }
     400             : 
     401           0 :     return FFMAX(sqrt(rms_value), DBL_EPSILON);
     402             : }
     403             : 
     404           0 : static double get_max_local_gain(DynamicAudioNormalizerContext *s, AVFrame *frame,
     405             :                                  int channel)
     406             : {
     407           0 :     const double maximum_gain = s->peak_value / find_peak_magnitude(frame, channel);
     408           0 :     const double rms_gain = s->target_rms > DBL_EPSILON ? (s->target_rms / compute_frame_rms(frame, channel)) : DBL_MAX;
     409           0 :     return bound(s->max_amplification, FFMIN(maximum_gain, rms_gain));
     410             : }
     411             : 
     412           0 : static double minimum_filter(cqueue *q)
     413             : {
     414           0 :     double min = DBL_MAX;
     415             :     int i;
     416             : 
     417           0 :     for (i = 0; i < cqueue_size(q); i++) {
     418           0 :         min = FFMIN(min, cqueue_peek(q, i));
     419             :     }
     420             : 
     421           0 :     return min;
     422             : }
     423             : 
     424           0 : static double gaussian_filter(DynamicAudioNormalizerContext *s, cqueue *q)
     425             : {
     426           0 :     double result = 0.0;
     427             :     int i;
     428             : 
     429           0 :     for (i = 0; i < cqueue_size(q); i++) {
     430           0 :         result += cqueue_peek(q, i) * s->weights[i];
     431             :     }
     432             : 
     433           0 :     return result;
     434             : }
     435             : 
     436           0 : static void update_gain_history(DynamicAudioNormalizerContext *s, int channel,
     437             :                                 double current_gain_factor)
     438             : {
     439           0 :     if (cqueue_empty(s->gain_history_original[channel]) ||
     440           0 :         cqueue_empty(s->gain_history_minimum[channel])) {
     441           0 :         const int pre_fill_size = s->filter_size / 2;
     442           0 :         const double initial_value = s->alt_boundary_mode ? current_gain_factor : 1.0;
     443             : 
     444           0 :         s->prev_amplification_factor[channel] = initial_value;
     445             : 
     446           0 :         while (cqueue_size(s->gain_history_original[channel]) < pre_fill_size) {
     447           0 :             cqueue_enqueue(s->gain_history_original[channel], initial_value);
     448             :         }
     449             :     }
     450             : 
     451           0 :     cqueue_enqueue(s->gain_history_original[channel], current_gain_factor);
     452             : 
     453           0 :     while (cqueue_size(s->gain_history_original[channel]) >= s->filter_size) {
     454             :         double minimum;
     455           0 :         av_assert0(cqueue_size(s->gain_history_original[channel]) == s->filter_size);
     456             : 
     457           0 :         if (cqueue_empty(s->gain_history_minimum[channel])) {
     458           0 :             const int pre_fill_size = s->filter_size / 2;
     459           0 :             double initial_value = s->alt_boundary_mode ? cqueue_peek(s->gain_history_original[channel], 0) : 1.0;
     460           0 :             int input = pre_fill_size;
     461             : 
     462           0 :             while (cqueue_size(s->gain_history_minimum[channel]) < pre_fill_size) {
     463           0 :                 input++;
     464           0 :                 initial_value = FFMIN(initial_value, cqueue_peek(s->gain_history_original[channel], input));
     465           0 :                 cqueue_enqueue(s->gain_history_minimum[channel], initial_value);
     466             :             }
     467             :         }
     468             : 
     469           0 :         minimum = minimum_filter(s->gain_history_original[channel]);
     470             : 
     471           0 :         cqueue_enqueue(s->gain_history_minimum[channel], minimum);
     472             : 
     473           0 :         cqueue_pop(s->gain_history_original[channel]);
     474             :     }
     475             : 
     476           0 :     while (cqueue_size(s->gain_history_minimum[channel]) >= s->filter_size) {
     477             :         double smoothed;
     478           0 :         av_assert0(cqueue_size(s->gain_history_minimum[channel]) == s->filter_size);
     479           0 :         smoothed = gaussian_filter(s, s->gain_history_minimum[channel]);
     480             : 
     481           0 :         cqueue_enqueue(s->gain_history_smoothed[channel], smoothed);
     482             : 
     483           0 :         cqueue_pop(s->gain_history_minimum[channel]);
     484             :     }
     485           0 : }
     486             : 
     487           0 : static inline double update_value(double new, double old, double aggressiveness)
     488             : {
     489           0 :     av_assert0((aggressiveness >= 0.0) && (aggressiveness <= 1.0));
     490           0 :     return aggressiveness * new + (1.0 - aggressiveness) * old;
     491             : }
     492             : 
     493           0 : static void perform_dc_correction(DynamicAudioNormalizerContext *s, AVFrame *frame)
     494             : {
     495           0 :     const double diff = 1.0 / frame->nb_samples;
     496           0 :     int is_first_frame = cqueue_empty(s->gain_history_original[0]);
     497             :     int c, i;
     498             : 
     499           0 :     for (c = 0; c < s->channels; c++) {
     500           0 :         double *dst_ptr = (double *)frame->extended_data[c];
     501           0 :         double current_average_value = 0.0;
     502             :         double prev_value;
     503             : 
     504           0 :         for (i = 0; i < frame->nb_samples; i++)
     505           0 :             current_average_value += dst_ptr[i] * diff;
     506             : 
     507           0 :         prev_value = is_first_frame ? current_average_value : s->dc_correction_value[c];
     508           0 :         s->dc_correction_value[c] = is_first_frame ? current_average_value : update_value(current_average_value, s->dc_correction_value[c], 0.1);
     509             : 
     510           0 :         for (i = 0; i < frame->nb_samples; i++) {
     511           0 :             dst_ptr[i] -= fade(prev_value, s->dc_correction_value[c], i, s->fade_factors);
     512             :         }
     513             :     }
     514           0 : }
     515             : 
     516           0 : static double setup_compress_thresh(double threshold)
     517             : {
     518           0 :     if ((threshold > DBL_EPSILON) && (threshold < (1.0 - DBL_EPSILON))) {
     519           0 :         double current_threshold = threshold;
     520           0 :         double step_size = 1.0;
     521             : 
     522           0 :         while (step_size > DBL_EPSILON) {
     523           0 :             while ((llrint((current_threshold + step_size) * (UINT64_C(1) << 63)) >
     524           0 :                     llrint(current_threshold * (UINT64_C(1) << 63))) &&
     525           0 :                    (bound(current_threshold + step_size, 1.0) <= threshold)) {
     526           0 :                 current_threshold += step_size;
     527             :             }
     528             : 
     529           0 :             step_size /= 2.0;
     530             :         }
     531             : 
     532           0 :         return current_threshold;
     533             :     } else {
     534           0 :         return threshold;
     535             :     }
     536             : }
     537             : 
     538           0 : static double compute_frame_std_dev(DynamicAudioNormalizerContext *s,
     539             :                                     AVFrame *frame, int channel)
     540             : {
     541           0 :     double variance = 0.0;
     542             :     int i, c;
     543             : 
     544           0 :     if (channel == -1) {
     545           0 :         for (c = 0; c < s->channels; c++) {
     546           0 :             const double *data_ptr = (double *)frame->extended_data[c];
     547             : 
     548           0 :             for (i = 0; i < frame->nb_samples; i++) {
     549           0 :                 variance += pow_2(data_ptr[i]);  // Assume that MEAN is *zero*
     550             :             }
     551             :         }
     552           0 :         variance /= (s->channels * frame->nb_samples) - 1;
     553             :     } else {
     554           0 :         const double *data_ptr = (double *)frame->extended_data[channel];
     555             : 
     556           0 :         for (i = 0; i < frame->nb_samples; i++) {
     557           0 :             variance += pow_2(data_ptr[i]);      // Assume that MEAN is *zero*
     558             :         }
     559           0 :         variance /= frame->nb_samples - 1;
     560             :     }
     561             : 
     562           0 :     return FFMAX(sqrt(variance), DBL_EPSILON);
     563             : }
     564             : 
     565           0 : static void perform_compression(DynamicAudioNormalizerContext *s, AVFrame *frame)
     566             : {
     567           0 :     int is_first_frame = cqueue_empty(s->gain_history_original[0]);
     568             :     int c, i;
     569             : 
     570           0 :     if (s->channels_coupled) {
     571           0 :         const double standard_deviation = compute_frame_std_dev(s, frame, -1);
     572           0 :         const double current_threshold  = FFMIN(1.0, s->compress_factor * standard_deviation);
     573             : 
     574           0 :         const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[0];
     575             :         double prev_actual_thresh, curr_actual_thresh;
     576           0 :         s->compress_threshold[0] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[0], (1.0/3.0));
     577             : 
     578           0 :         prev_actual_thresh = setup_compress_thresh(prev_value);
     579           0 :         curr_actual_thresh = setup_compress_thresh(s->compress_threshold[0]);
     580             : 
     581           0 :         for (c = 0; c < s->channels; c++) {
     582           0 :             double *const dst_ptr = (double *)frame->extended_data[c];
     583           0 :             for (i = 0; i < frame->nb_samples; i++) {
     584           0 :                 const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, s->fade_factors);
     585           0 :                 dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
     586             :             }
     587             :         }
     588             :     } else {
     589           0 :         for (c = 0; c < s->channels; c++) {
     590           0 :             const double standard_deviation = compute_frame_std_dev(s, frame, c);
     591           0 :             const double current_threshold  = setup_compress_thresh(FFMIN(1.0, s->compress_factor * standard_deviation));
     592             : 
     593           0 :             const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[c];
     594             :             double prev_actual_thresh, curr_actual_thresh;
     595             :             double *dst_ptr;
     596           0 :             s->compress_threshold[c] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[c], 1.0/3.0);
     597             : 
     598           0 :             prev_actual_thresh = setup_compress_thresh(prev_value);
     599           0 :             curr_actual_thresh = setup_compress_thresh(s->compress_threshold[c]);
     600             : 
     601           0 :             dst_ptr = (double *)frame->extended_data[c];
     602           0 :             for (i = 0; i < frame->nb_samples; i++) {
     603           0 :                 const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, s->fade_factors);
     604           0 :                 dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
     605             :             }
     606             :         }
     607             :     }
     608           0 : }
     609             : 
     610           0 : static void analyze_frame(DynamicAudioNormalizerContext *s, AVFrame *frame)
     611             : {
     612           0 :     if (s->dc_correction) {
     613           0 :         perform_dc_correction(s, frame);
     614             :     }
     615             : 
     616           0 :     if (s->compress_factor > DBL_EPSILON) {
     617           0 :         perform_compression(s, frame);
     618             :     }
     619             : 
     620           0 :     if (s->channels_coupled) {
     621           0 :         const double current_gain_factor = get_max_local_gain(s, frame, -1);
     622             :         int c;
     623             : 
     624           0 :         for (c = 0; c < s->channels; c++)
     625           0 :             update_gain_history(s, c, current_gain_factor);
     626             :     } else {
     627             :         int c;
     628             : 
     629           0 :         for (c = 0; c < s->channels; c++)
     630           0 :             update_gain_history(s, c, get_max_local_gain(s, frame, c));
     631             :     }
     632           0 : }
     633             : 
     634           0 : static void amplify_frame(DynamicAudioNormalizerContext *s, AVFrame *frame)
     635             : {
     636             :     int c, i;
     637             : 
     638           0 :     for (c = 0; c < s->channels; c++) {
     639           0 :         double *dst_ptr = (double *)frame->extended_data[c];
     640             :         double current_amplification_factor;
     641             : 
     642           0 :         cqueue_dequeue(s->gain_history_smoothed[c], &current_amplification_factor);
     643             : 
     644           0 :         for (i = 0; i < frame->nb_samples; i++) {
     645           0 :             const double amplification_factor = fade(s->prev_amplification_factor[c],
     646             :                                                      current_amplification_factor, i,
     647           0 :                                                      s->fade_factors);
     648             : 
     649           0 :             dst_ptr[i] *= amplification_factor;
     650             : 
     651           0 :             if (fabs(dst_ptr[i]) > s->peak_value)
     652           0 :                 dst_ptr[i] = copysign(s->peak_value, dst_ptr[i]);
     653             :         }
     654             : 
     655           0 :         s->prev_amplification_factor[c] = current_amplification_factor;
     656             :     }
     657           0 : }
     658             : 
     659           0 : static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     660             : {
     661           0 :     AVFilterContext *ctx = inlink->dst;
     662           0 :     DynamicAudioNormalizerContext *s = ctx->priv;
     663           0 :     AVFilterLink *outlink = inlink->dst->outputs[0];
     664           0 :     int ret = 0;
     665             : 
     666           0 :     if (!cqueue_empty(s->gain_history_smoothed[0])) {
     667           0 :         AVFrame *out = ff_bufqueue_get(&s->queue);
     668             : 
     669           0 :         amplify_frame(s, out);
     670           0 :         ret = ff_filter_frame(outlink, out);
     671             :     }
     672             : 
     673           0 :     analyze_frame(s, in);
     674           0 :     ff_bufqueue_add(ctx, &s->queue, in);
     675             : 
     676           0 :     return ret;
     677             : }
     678             : 
     679           0 : static int flush_buffer(DynamicAudioNormalizerContext *s, AVFilterLink *inlink,
     680             :                         AVFilterLink *outlink)
     681             : {
     682           0 :     AVFrame *out = ff_get_audio_buffer(outlink, s->frame_len);
     683             :     int c, i;
     684             : 
     685           0 :     if (!out)
     686           0 :         return AVERROR(ENOMEM);
     687             : 
     688           0 :     for (c = 0; c < s->channels; c++) {
     689           0 :         double *dst_ptr = (double *)out->extended_data[c];
     690             : 
     691           0 :         for (i = 0; i < out->nb_samples; i++) {
     692           0 :             dst_ptr[i] = s->alt_boundary_mode ? DBL_EPSILON : ((s->target_rms > DBL_EPSILON) ? FFMIN(s->peak_value, s->target_rms) : s->peak_value);
     693           0 :             if (s->dc_correction) {
     694           0 :                 dst_ptr[i] *= ((i % 2) == 1) ? -1 : 1;
     695           0 :                 dst_ptr[i] += s->dc_correction_value[c];
     696             :             }
     697             :         }
     698             :     }
     699             : 
     700           0 :     s->delay--;
     701           0 :     return filter_frame(inlink, out);
     702             : }
     703             : 
     704           0 : static int request_frame(AVFilterLink *outlink)
     705             : {
     706           0 :     AVFilterContext *ctx = outlink->src;
     707           0 :     DynamicAudioNormalizerContext *s = ctx->priv;
     708           0 :     int ret = 0;
     709             : 
     710           0 :     ret = ff_request_frame(ctx->inputs[0]);
     711             : 
     712           0 :     if (ret == AVERROR_EOF && !ctx->is_disabled && s->delay) {
     713           0 :         if (!cqueue_empty(s->gain_history_smoothed[0])) {
     714           0 :             ret = flush_buffer(s, ctx->inputs[0], outlink);
     715           0 :         } else if (s->queue.available) {
     716           0 :             AVFrame *out = ff_bufqueue_get(&s->queue);
     717             : 
     718           0 :             ret = ff_filter_frame(outlink, out);
     719             :         }
     720             :     }
     721             : 
     722           0 :     return ret;
     723             : }
     724             : 
     725             : static const AVFilterPad avfilter_af_dynaudnorm_inputs[] = {
     726             :     {
     727             :         .name           = "default",
     728             :         .type           = AVMEDIA_TYPE_AUDIO,
     729             :         .filter_frame   = filter_frame,
     730             :         .config_props   = config_input,
     731             :         .needs_writable = 1,
     732             :     },
     733             :     { NULL }
     734             : };
     735             : 
     736             : static const AVFilterPad avfilter_af_dynaudnorm_outputs[] = {
     737             :     {
     738             :         .name          = "default",
     739             :         .type          = AVMEDIA_TYPE_AUDIO,
     740             :         .request_frame = request_frame,
     741             :     },
     742             :     { NULL }
     743             : };
     744             : 
     745             : AVFilter ff_af_dynaudnorm = {
     746             :     .name          = "dynaudnorm",
     747             :     .description   = NULL_IF_CONFIG_SMALL("Dynamic Audio Normalizer."),
     748             :     .query_formats = query_formats,
     749             :     .priv_size     = sizeof(DynamicAudioNormalizerContext),
     750             :     .init          = init,
     751             :     .uninit        = uninit,
     752             :     .inputs        = avfilter_af_dynaudnorm_inputs,
     753             :     .outputs       = avfilter_af_dynaudnorm_outputs,
     754             :     .priv_class    = &dynaudnorm_class,
     755             : };

Generated by: LCOV version 1.13