LCOV - code coverage report
Current view: top level - src/libavfilter - af_dynaudnorm.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 339 0.0 %
Date: 2017-01-24 04:42:20 Functions: 0 34 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Dynamic Audio Normalizer
       3             :  * Copyright (c) 2015 LoRd_MuldeR <mulder2@gmx.de>. Some rights reserved.
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : /**
      23             :  * @file
      24             :  * Dynamic Audio Normalizer
      25             :  */
      26             : 
      27             : #include <float.h>
      28             : 
      29             : #include "libavutil/avassert.h"
      30             : #include "libavutil/opt.h"
      31             : 
      32             : #define FF_BUFQUEUE_SIZE 302
      33             : #include "libavfilter/bufferqueue.h"
      34             : 
      35             : #include "audio.h"
      36             : #include "avfilter.h"
      37             : #include "internal.h"
      38             : 
      39             : typedef struct cqueue {
      40             :     double *elements;
      41             :     int size;
      42             :     int nb_elements;
      43             :     int first;
      44             : } cqueue;
      45             : 
      46             : typedef struct DynamicAudioNormalizerContext {
      47             :     const AVClass *class;
      48             : 
      49             :     struct FFBufQueue queue;
      50             : 
      51             :     int frame_len;
      52             :     int frame_len_msec;
      53             :     int filter_size;
      54             :     int dc_correction;
      55             :     int channels_coupled;
      56             :     int alt_boundary_mode;
      57             : 
      58             :     double peak_value;
      59             :     double max_amplification;
      60             :     double target_rms;
      61             :     double compress_factor;
      62             :     double *prev_amplification_factor;
      63             :     double *dc_correction_value;
      64             :     double *compress_threshold;
      65             :     double *fade_factors[2];
      66             :     double *weights;
      67             : 
      68             :     int channels;
      69             :     int delay;
      70             : 
      71             :     cqueue **gain_history_original;
      72             :     cqueue **gain_history_minimum;
      73             :     cqueue **gain_history_smoothed;
      74             : } DynamicAudioNormalizerContext;
      75             : 
      76             : #define OFFSET(x) offsetof(DynamicAudioNormalizerContext, x)
      77             : #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
      78             : 
      79             : static const AVOption dynaudnorm_options[] = {
      80             :     { "f", "set the frame length in msec",     OFFSET(frame_len_msec),    AV_OPT_TYPE_INT,    {.i64 = 500},   10,  8000, FLAGS },
      81             :     { "g", "set the filter size",              OFFSET(filter_size),       AV_OPT_TYPE_INT,    {.i64 = 31},     3,   301, FLAGS },
      82             :     { "p", "set the peak value",               OFFSET(peak_value),        AV_OPT_TYPE_DOUBLE, {.dbl = 0.95}, 0.0,   1.0, FLAGS },
      83             :     { "m", "set the max amplification",        OFFSET(max_amplification), AV_OPT_TYPE_DOUBLE, {.dbl = 10.0}, 1.0, 100.0, FLAGS },
      84             :     { "r", "set the target RMS",               OFFSET(target_rms),        AV_OPT_TYPE_DOUBLE, {.dbl = 0.0},  0.0,   1.0, FLAGS },
      85             :     { "n", "set channel coupling",             OFFSET(channels_coupled),  AV_OPT_TYPE_BOOL,   {.i64 = 1},      0,     1, FLAGS },
      86             :     { "c", "set DC correction",                OFFSET(dc_correction),     AV_OPT_TYPE_BOOL,   {.i64 = 0},      0,     1, FLAGS },
      87             :     { "b", "set alternative boundary mode",    OFFSET(alt_boundary_mode), AV_OPT_TYPE_BOOL,   {.i64 = 0},      0,     1, FLAGS },
      88             :     { "s", "set the compress factor",          OFFSET(compress_factor),   AV_OPT_TYPE_DOUBLE, {.dbl = 0.0},  0.0,  30.0, FLAGS },
      89             :     { NULL }
      90             : };
      91             : 
      92             : AVFILTER_DEFINE_CLASS(dynaudnorm);
      93             : 
      94           0 : static av_cold int init(AVFilterContext *ctx)
      95             : {
      96           0 :     DynamicAudioNormalizerContext *s = ctx->priv;
      97             : 
      98           0 :     if (!(s->filter_size & 1)) {
      99           0 :         av_log(ctx, AV_LOG_ERROR, "filter size %d is invalid. Must be an odd value.\n", s->filter_size);
     100           0 :         return AVERROR(EINVAL);
     101             :     }
     102             : 
     103           0 :     return 0;
     104             : }
     105             : 
     106           0 : static int query_formats(AVFilterContext *ctx)
     107             : {
     108             :     AVFilterFormats *formats;
     109             :     AVFilterChannelLayouts *layouts;
     110             :     static const enum AVSampleFormat sample_fmts[] = {
     111             :         AV_SAMPLE_FMT_DBLP,
     112             :         AV_SAMPLE_FMT_NONE
     113             :     };
     114             :     int ret;
     115             : 
     116           0 :     layouts = ff_all_channel_counts();
     117           0 :     if (!layouts)
     118           0 :         return AVERROR(ENOMEM);
     119           0 :     ret = ff_set_common_channel_layouts(ctx, layouts);
     120           0 :     if (ret < 0)
     121           0 :         return ret;
     122             : 
     123           0 :     formats = ff_make_format_list(sample_fmts);
     124           0 :     if (!formats)
     125           0 :         return AVERROR(ENOMEM);
     126           0 :     ret = ff_set_common_formats(ctx, formats);
     127           0 :     if (ret < 0)
     128           0 :         return ret;
     129             : 
     130           0 :     formats = ff_all_samplerates();
     131           0 :     if (!formats)
     132           0 :         return AVERROR(ENOMEM);
     133           0 :     return ff_set_common_samplerates(ctx, formats);
     134             : }
     135             : 
     136           0 : static inline int frame_size(int sample_rate, int frame_len_msec)
     137             : {
     138           0 :     const int frame_size = lrint((double)sample_rate * (frame_len_msec / 1000.0));
     139           0 :     return frame_size + (frame_size % 2);
     140             : }
     141             : 
     142           0 : static void precalculate_fade_factors(double *fade_factors[2], int frame_len)
     143             : {
     144           0 :     const double step_size = 1.0 / frame_len;
     145             :     int pos;
     146             : 
     147           0 :     for (pos = 0; pos < frame_len; pos++) {
     148           0 :         fade_factors[0][pos] = 1.0 - (step_size * (pos + 1.0));
     149           0 :         fade_factors[1][pos] = 1.0 - fade_factors[0][pos];
     150             :     }
     151           0 : }
     152             : 
     153           0 : static cqueue *cqueue_create(int size)
     154             : {
     155             :     cqueue *q;
     156             : 
     157           0 :     q = av_malloc(sizeof(cqueue));
     158           0 :     if (!q)
     159           0 :         return NULL;
     160             : 
     161           0 :     q->size = size;
     162           0 :     q->nb_elements = 0;
     163           0 :     q->first = 0;
     164             : 
     165           0 :     q->elements = av_malloc_array(size, sizeof(double));
     166           0 :     if (!q->elements) {
     167           0 :         av_free(q);
     168           0 :         return NULL;
     169             :     }
     170             : 
     171           0 :     return q;
     172             : }
     173             : 
     174           0 : static void cqueue_free(cqueue *q)
     175             : {
     176           0 :     if (q)
     177           0 :         av_free(q->elements);
     178           0 :     av_free(q);
     179           0 : }
     180             : 
     181           0 : static int cqueue_size(cqueue *q)
     182             : {
     183           0 :     return q->nb_elements;
     184             : }
     185             : 
     186           0 : static int cqueue_empty(cqueue *q)
     187             : {
     188           0 :     return !q->nb_elements;
     189             : }
     190             : 
     191           0 : static int cqueue_enqueue(cqueue *q, double element)
     192             : {
     193             :     int i;
     194             : 
     195             :     av_assert2(q->nb_elements != q->size);
     196             : 
     197           0 :     i = (q->first + q->nb_elements) % q->size;
     198           0 :     q->elements[i] = element;
     199           0 :     q->nb_elements++;
     200             : 
     201           0 :     return 0;
     202             : }
     203             : 
     204           0 : static double cqueue_peek(cqueue *q, int index)
     205             : {
     206             :     av_assert2(index < q->nb_elements);
     207           0 :     return q->elements[(q->first + index) % q->size];
     208             : }
     209             : 
     210           0 : static int cqueue_dequeue(cqueue *q, double *element)
     211             : {
     212             :     av_assert2(!cqueue_empty(q));
     213             : 
     214           0 :     *element = q->elements[q->first];
     215           0 :     q->first = (q->first + 1) % q->size;
     216           0 :     q->nb_elements--;
     217             : 
     218           0 :     return 0;
     219             : }
     220             : 
     221           0 : static int cqueue_pop(cqueue *q)
     222             : {
     223             :     av_assert2(!cqueue_empty(q));
     224             : 
     225           0 :     q->first = (q->first + 1) % q->size;
     226           0 :     q->nb_elements--;
     227             : 
     228           0 :     return 0;
     229             : }
     230             : 
     231           0 : static void init_gaussian_filter(DynamicAudioNormalizerContext *s)
     232             : {
     233           0 :     double total_weight = 0.0;
     234           0 :     const double sigma = (((s->filter_size / 2.0) - 1.0) / 3.0) + (1.0 / 3.0);
     235             :     double adjust;
     236             :     int i;
     237             : 
     238             :     // Pre-compute constants
     239           0 :     const int offset = s->filter_size / 2;
     240           0 :     const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
     241           0 :     const double c2 = 2.0 * sigma * sigma;
     242             : 
     243             :     // Compute weights
     244           0 :     for (i = 0; i < s->filter_size; i++) {
     245           0 :         const int x = i - offset;
     246             : 
     247           0 :         s->weights[i] = c1 * exp(-x * x / c2);
     248           0 :         total_weight += s->weights[i];
     249             :     }
     250             : 
     251             :     // Adjust weights
     252           0 :     adjust = 1.0 / total_weight;
     253           0 :     for (i = 0; i < s->filter_size; i++) {
     254           0 :         s->weights[i] *= adjust;
     255             :     }
     256           0 : }
     257             : 
     258           0 : static av_cold void uninit(AVFilterContext *ctx)
     259             : {
     260           0 :     DynamicAudioNormalizerContext *s = ctx->priv;
     261             :     int c;
     262             : 
     263           0 :     av_freep(&s->prev_amplification_factor);
     264           0 :     av_freep(&s->dc_correction_value);
     265           0 :     av_freep(&s->compress_threshold);
     266           0 :     av_freep(&s->fade_factors[0]);
     267           0 :     av_freep(&s->fade_factors[1]);
     268             : 
     269           0 :     for (c = 0; c < s->channels; c++) {
     270           0 :         if (s->gain_history_original)
     271           0 :             cqueue_free(s->gain_history_original[c]);
     272           0 :         if (s->gain_history_minimum)
     273           0 :             cqueue_free(s->gain_history_minimum[c]);
     274           0 :         if (s->gain_history_smoothed)
     275           0 :             cqueue_free(s->gain_history_smoothed[c]);
     276             :     }
     277             : 
     278           0 :     av_freep(&s->gain_history_original);
     279           0 :     av_freep(&s->gain_history_minimum);
     280           0 :     av_freep(&s->gain_history_smoothed);
     281             : 
     282           0 :     av_freep(&s->weights);
     283             : 
     284           0 :     ff_bufqueue_discard_all(&s->queue);
     285           0 : }
     286             : 
     287           0 : static int config_input(AVFilterLink *inlink)
     288             : {
     289           0 :     AVFilterContext *ctx = inlink->dst;
     290           0 :     DynamicAudioNormalizerContext *s = ctx->priv;
     291             :     int c;
     292             : 
     293           0 :     uninit(ctx);
     294             : 
     295           0 :     s->frame_len =
     296           0 :     inlink->min_samples =
     297           0 :     inlink->max_samples =
     298           0 :     inlink->partial_buf_size = frame_size(inlink->sample_rate, s->frame_len_msec);
     299           0 :     av_log(ctx, AV_LOG_DEBUG, "frame len %d\n", s->frame_len);
     300             : 
     301           0 :     s->fade_factors[0] = av_malloc_array(s->frame_len, sizeof(*s->fade_factors[0]));
     302           0 :     s->fade_factors[1] = av_malloc_array(s->frame_len, sizeof(*s->fade_factors[1]));
     303             : 
     304           0 :     s->prev_amplification_factor = av_malloc_array(inlink->channels, sizeof(*s->prev_amplification_factor));
     305           0 :     s->dc_correction_value = av_calloc(inlink->channels, sizeof(*s->dc_correction_value));
     306           0 :     s->compress_threshold = av_calloc(inlink->channels, sizeof(*s->compress_threshold));
     307           0 :     s->gain_history_original = av_calloc(inlink->channels, sizeof(*s->gain_history_original));
     308           0 :     s->gain_history_minimum = av_calloc(inlink->channels, sizeof(*s->gain_history_minimum));
     309           0 :     s->gain_history_smoothed = av_calloc(inlink->channels, sizeof(*s->gain_history_smoothed));
     310           0 :     s->weights = av_malloc_array(s->filter_size, sizeof(*s->weights));
     311           0 :     if (!s->prev_amplification_factor || !s->dc_correction_value ||
     312           0 :         !s->compress_threshold || !s->fade_factors[0] || !s->fade_factors[1] ||
     313           0 :         !s->gain_history_original || !s->gain_history_minimum ||
     314           0 :         !s->gain_history_smoothed || !s->weights)
     315           0 :         return AVERROR(ENOMEM);
     316             : 
     317           0 :     for (c = 0; c < inlink->channels; c++) {
     318           0 :         s->prev_amplification_factor[c] = 1.0;
     319             : 
     320           0 :         s->gain_history_original[c] = cqueue_create(s->filter_size);
     321           0 :         s->gain_history_minimum[c]  = cqueue_create(s->filter_size);
     322           0 :         s->gain_history_smoothed[c] = cqueue_create(s->filter_size);
     323             : 
     324           0 :         if (!s->gain_history_original[c] || !s->gain_history_minimum[c] ||
     325           0 :             !s->gain_history_smoothed[c])
     326           0 :             return AVERROR(ENOMEM);
     327             :     }
     328             : 
     329           0 :     precalculate_fade_factors(s->fade_factors, s->frame_len);
     330           0 :     init_gaussian_filter(s);
     331             : 
     332           0 :     s->channels = inlink->channels;
     333           0 :     s->delay = s->filter_size;
     334             : 
     335           0 :     return 0;
     336             : }
     337             : 
     338           0 : static inline double fade(double prev, double next, int pos,
     339             :                           double *fade_factors[2])
     340             : {
     341           0 :     return fade_factors[0][pos] * prev + fade_factors[1][pos] * next;
     342             : }
     343             : 
     344           0 : static inline double pow2(const double value)
     345             : {
     346           0 :     return value * value;
     347             : }
     348             : 
     349           0 : static inline double bound(const double threshold, const double val)
     350             : {
     351           0 :     const double CONST = 0.8862269254527580136490837416705725913987747280611935; //sqrt(PI) / 2.0
     352           0 :     return erf(CONST * (val / threshold)) * threshold;
     353             : }
     354             : 
     355           0 : static double find_peak_magnitude(AVFrame *frame, int channel)
     356             : {
     357           0 :     double max = DBL_EPSILON;
     358             :     int c, i;
     359             : 
     360           0 :     if (channel == -1) {
     361           0 :         for (c = 0; c < av_frame_get_channels(frame); c++) {
     362           0 :             double *data_ptr = (double *)frame->extended_data[c];
     363             : 
     364           0 :             for (i = 0; i < frame->nb_samples; i++)
     365           0 :                 max = FFMAX(max, fabs(data_ptr[i]));
     366             :         }
     367             :     } else {
     368           0 :         double *data_ptr = (double *)frame->extended_data[channel];
     369             : 
     370           0 :         for (i = 0; i < frame->nb_samples; i++)
     371           0 :             max = FFMAX(max, fabs(data_ptr[i]));
     372             :     }
     373             : 
     374           0 :     return max;
     375             : }
     376             : 
     377           0 : static double compute_frame_rms(AVFrame *frame, int channel)
     378             : {
     379           0 :     double rms_value = 0.0;
     380             :     int c, i;
     381             : 
     382           0 :     if (channel == -1) {
     383           0 :         for (c = 0; c < av_frame_get_channels(frame); c++) {
     384           0 :             const double *data_ptr = (double *)frame->extended_data[c];
     385             : 
     386           0 :             for (i = 0; i < frame->nb_samples; i++) {
     387           0 :                 rms_value += pow2(data_ptr[i]);
     388             :             }
     389             :         }
     390             : 
     391           0 :         rms_value /= frame->nb_samples * av_frame_get_channels(frame);
     392             :     } else {
     393           0 :         const double *data_ptr = (double *)frame->extended_data[channel];
     394           0 :         for (i = 0; i < frame->nb_samples; i++) {
     395           0 :             rms_value += pow2(data_ptr[i]);
     396             :         }
     397             : 
     398           0 :         rms_value /= frame->nb_samples;
     399             :     }
     400             : 
     401           0 :     return FFMAX(sqrt(rms_value), DBL_EPSILON);
     402             : }
     403             : 
     404           0 : static double get_max_local_gain(DynamicAudioNormalizerContext *s, AVFrame *frame,
     405             :                                  int channel)
     406             : {
     407           0 :     const double maximum_gain = s->peak_value / find_peak_magnitude(frame, channel);
     408           0 :     const double rms_gain = s->target_rms > DBL_EPSILON ? (s->target_rms / compute_frame_rms(frame, channel)) : DBL_MAX;
     409           0 :     return bound(s->max_amplification, FFMIN(maximum_gain, rms_gain));
     410             : }
     411             : 
     412           0 : static double minimum_filter(cqueue *q)
     413             : {
     414           0 :     double min = DBL_MAX;
     415             :     int i;
     416             : 
     417           0 :     for (i = 0; i < cqueue_size(q); i++) {
     418           0 :         min = FFMIN(min, cqueue_peek(q, i));
     419             :     }
     420             : 
     421           0 :     return min;
     422             : }
     423             : 
     424           0 : static double gaussian_filter(DynamicAudioNormalizerContext *s, cqueue *q)
     425             : {
     426           0 :     double result = 0.0;
     427             :     int i;
     428             : 
     429           0 :     for (i = 0; i < cqueue_size(q); i++) {
     430           0 :         result += cqueue_peek(q, i) * s->weights[i];
     431             :     }
     432             : 
     433           0 :     return result;
     434             : }
     435             : 
     436           0 : static void update_gain_history(DynamicAudioNormalizerContext *s, int channel,
     437             :                                 double current_gain_factor)
     438             : {
     439           0 :     if (cqueue_empty(s->gain_history_original[channel]) ||
     440           0 :         cqueue_empty(s->gain_history_minimum[channel])) {
     441           0 :         const int pre_fill_size = s->filter_size / 2;
     442           0 :         const double initial_value = s->alt_boundary_mode ? current_gain_factor : 1.0;
     443             : 
     444           0 :         s->prev_amplification_factor[channel] = initial_value;
     445             : 
     446           0 :         while (cqueue_size(s->gain_history_original[channel]) < pre_fill_size) {
     447           0 :             cqueue_enqueue(s->gain_history_original[channel], initial_value);
     448             :         }
     449             :     }
     450             : 
     451           0 :     cqueue_enqueue(s->gain_history_original[channel], current_gain_factor);
     452             : 
     453           0 :     while (cqueue_size(s->gain_history_original[channel]) >= s->filter_size) {
     454             :         double minimum;
     455           0 :         av_assert0(cqueue_size(s->gain_history_original[channel]) == s->filter_size);
     456             : 
     457           0 :         if (cqueue_empty(s->gain_history_minimum[channel])) {
     458           0 :             const int pre_fill_size = s->filter_size / 2;
     459           0 :             double initial_value = s->alt_boundary_mode ? cqueue_peek(s->gain_history_original[channel], 0) : 1.0;
     460           0 :             int input = pre_fill_size;
     461             : 
     462           0 :             while (cqueue_size(s->gain_history_minimum[channel]) < pre_fill_size) {
     463           0 :                 initial_value = FFMIN(initial_value, cqueue_peek(s->gain_history_original[channel], ++input));
     464           0 :                 cqueue_enqueue(s->gain_history_minimum[channel], initial_value);
     465             :             }
     466             :         }
     467             : 
     468           0 :         minimum = minimum_filter(s->gain_history_original[channel]);
     469             : 
     470           0 :         cqueue_enqueue(s->gain_history_minimum[channel], minimum);
     471             : 
     472           0 :         cqueue_pop(s->gain_history_original[channel]);
     473             :     }
     474             : 
     475           0 :     while (cqueue_size(s->gain_history_minimum[channel]) >= s->filter_size) {
     476             :         double smoothed;
     477           0 :         av_assert0(cqueue_size(s->gain_history_minimum[channel]) == s->filter_size);
     478           0 :         smoothed = gaussian_filter(s, s->gain_history_minimum[channel]);
     479             : 
     480           0 :         cqueue_enqueue(s->gain_history_smoothed[channel], smoothed);
     481             : 
     482           0 :         cqueue_pop(s->gain_history_minimum[channel]);
     483             :     }
     484           0 : }
     485             : 
     486           0 : static inline double update_value(double new, double old, double aggressiveness)
     487             : {
     488           0 :     av_assert0((aggressiveness >= 0.0) && (aggressiveness <= 1.0));
     489           0 :     return aggressiveness * new + (1.0 - aggressiveness) * old;
     490             : }
     491             : 
     492           0 : static void perform_dc_correction(DynamicAudioNormalizerContext *s, AVFrame *frame)
     493             : {
     494           0 :     const double diff = 1.0 / frame->nb_samples;
     495           0 :     int is_first_frame = cqueue_empty(s->gain_history_original[0]);
     496             :     int c, i;
     497             : 
     498           0 :     for (c = 0; c < s->channels; c++) {
     499           0 :         double *dst_ptr = (double *)frame->extended_data[c];
     500           0 :         double current_average_value = 0.0;
     501             :         double prev_value;
     502             : 
     503           0 :         for (i = 0; i < frame->nb_samples; i++)
     504           0 :             current_average_value += dst_ptr[i] * diff;
     505             : 
     506           0 :         prev_value = is_first_frame ? current_average_value : s->dc_correction_value[c];
     507           0 :         s->dc_correction_value[c] = is_first_frame ? current_average_value : update_value(current_average_value, s->dc_correction_value[c], 0.1);
     508             : 
     509           0 :         for (i = 0; i < frame->nb_samples; i++) {
     510           0 :             dst_ptr[i] -= fade(prev_value, s->dc_correction_value[c], i, s->fade_factors);
     511             :         }
     512             :     }
     513           0 : }
     514             : 
     515           0 : static double setup_compress_thresh(double threshold)
     516             : {
     517           0 :     if ((threshold > DBL_EPSILON) && (threshold < (1.0 - DBL_EPSILON))) {
     518           0 :         double current_threshold = threshold;
     519           0 :         double step_size = 1.0;
     520             : 
     521           0 :         while (step_size > DBL_EPSILON) {
     522           0 :             while ((llrint((current_threshold + step_size) * (UINT64_C(1) << 63)) >
     523           0 :                     llrint(current_threshold * (UINT64_C(1) << 63))) &&
     524           0 :                    (bound(current_threshold + step_size, 1.0) <= threshold)) {
     525           0 :                 current_threshold += step_size;
     526             :             }
     527             : 
     528           0 :             step_size /= 2.0;
     529             :         }
     530             : 
     531           0 :         return current_threshold;
     532             :     } else {
     533           0 :         return threshold;
     534             :     }
     535             : }
     536             : 
     537           0 : static double compute_frame_std_dev(DynamicAudioNormalizerContext *s,
     538             :                                     AVFrame *frame, int channel)
     539             : {
     540           0 :     double variance = 0.0;
     541             :     int i, c;
     542             : 
     543           0 :     if (channel == -1) {
     544           0 :         for (c = 0; c < s->channels; c++) {
     545           0 :             const double *data_ptr = (double *)frame->extended_data[c];
     546             : 
     547           0 :             for (i = 0; i < frame->nb_samples; i++) {
     548           0 :                 variance += pow2(data_ptr[i]);  // Assume that MEAN is *zero*
     549             :             }
     550             :         }
     551           0 :         variance /= (s->channels * frame->nb_samples) - 1;
     552             :     } else {
     553           0 :         const double *data_ptr = (double *)frame->extended_data[channel];
     554             : 
     555           0 :         for (i = 0; i < frame->nb_samples; i++) {
     556           0 :             variance += pow2(data_ptr[i]);      // Assume that MEAN is *zero*
     557             :         }
     558           0 :         variance /= frame->nb_samples - 1;
     559             :     }
     560             : 
     561           0 :     return FFMAX(sqrt(variance), DBL_EPSILON);
     562             : }
     563             : 
     564           0 : static void perform_compression(DynamicAudioNormalizerContext *s, AVFrame *frame)
     565             : {
     566           0 :     int is_first_frame = cqueue_empty(s->gain_history_original[0]);
     567             :     int c, i;
     568             : 
     569           0 :     if (s->channels_coupled) {
     570           0 :         const double standard_deviation = compute_frame_std_dev(s, frame, -1);
     571           0 :         const double current_threshold  = FFMIN(1.0, s->compress_factor * standard_deviation);
     572             : 
     573           0 :         const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[0];
     574             :         double prev_actual_thresh, curr_actual_thresh;
     575           0 :         s->compress_threshold[0] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[0], (1.0/3.0));
     576             : 
     577           0 :         prev_actual_thresh = setup_compress_thresh(prev_value);
     578           0 :         curr_actual_thresh = setup_compress_thresh(s->compress_threshold[0]);
     579             : 
     580           0 :         for (c = 0; c < s->channels; c++) {
     581           0 :             double *const dst_ptr = (double *)frame->extended_data[c];
     582           0 :             for (i = 0; i < frame->nb_samples; i++) {
     583           0 :                 const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, s->fade_factors);
     584           0 :                 dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
     585             :             }
     586             :         }
     587             :     } else {
     588           0 :         for (c = 0; c < s->channels; c++) {
     589           0 :             const double standard_deviation = compute_frame_std_dev(s, frame, c);
     590           0 :             const double current_threshold  = setup_compress_thresh(FFMIN(1.0, s->compress_factor * standard_deviation));
     591             : 
     592           0 :             const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[c];
     593             :             double prev_actual_thresh, curr_actual_thresh;
     594             :             double *dst_ptr;
     595           0 :             s->compress_threshold[c] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[c], 1.0/3.0);
     596             : 
     597           0 :             prev_actual_thresh = setup_compress_thresh(prev_value);
     598           0 :             curr_actual_thresh = setup_compress_thresh(s->compress_threshold[c]);
     599             : 
     600           0 :             dst_ptr = (double *)frame->extended_data[c];
     601           0 :             for (i = 0; i < frame->nb_samples; i++) {
     602           0 :                 const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, s->fade_factors);
     603           0 :                 dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
     604             :             }
     605             :         }
     606             :     }
     607           0 : }
     608             : 
     609           0 : static void analyze_frame(DynamicAudioNormalizerContext *s, AVFrame *frame)
     610             : {
     611           0 :     if (s->dc_correction) {
     612           0 :         perform_dc_correction(s, frame);
     613             :     }
     614             : 
     615           0 :     if (s->compress_factor > DBL_EPSILON) {
     616           0 :         perform_compression(s, frame);
     617             :     }
     618             : 
     619           0 :     if (s->channels_coupled) {
     620           0 :         const double current_gain_factor = get_max_local_gain(s, frame, -1);
     621             :         int c;
     622             : 
     623           0 :         for (c = 0; c < s->channels; c++)
     624           0 :             update_gain_history(s, c, current_gain_factor);
     625             :     } else {
     626             :         int c;
     627             : 
     628           0 :         for (c = 0; c < s->channels; c++)
     629           0 :             update_gain_history(s, c, get_max_local_gain(s, frame, c));
     630             :     }
     631           0 : }
     632             : 
     633           0 : static void amplify_frame(DynamicAudioNormalizerContext *s, AVFrame *frame)
     634             : {
     635             :     int c, i;
     636             : 
     637           0 :     for (c = 0; c < s->channels; c++) {
     638           0 :         double *dst_ptr = (double *)frame->extended_data[c];
     639             :         double current_amplification_factor;
     640             : 
     641           0 :         cqueue_dequeue(s->gain_history_smoothed[c], &current_amplification_factor);
     642             : 
     643           0 :         for (i = 0; i < frame->nb_samples; i++) {
     644           0 :             const double amplification_factor = fade(s->prev_amplification_factor[c],
     645             :                                                      current_amplification_factor, i,
     646           0 :                                                      s->fade_factors);
     647             : 
     648           0 :             dst_ptr[i] *= amplification_factor;
     649             : 
     650           0 :             if (fabs(dst_ptr[i]) > s->peak_value)
     651           0 :                 dst_ptr[i] = copysign(s->peak_value, dst_ptr[i]);
     652             :         }
     653             : 
     654           0 :         s->prev_amplification_factor[c] = current_amplification_factor;
     655             :     }
     656           0 : }
     657             : 
     658           0 : static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     659             : {
     660           0 :     AVFilterContext *ctx = inlink->dst;
     661           0 :     DynamicAudioNormalizerContext *s = ctx->priv;
     662           0 :     AVFilterLink *outlink = inlink->dst->outputs[0];
     663           0 :     int ret = 0;
     664             : 
     665           0 :     if (!cqueue_empty(s->gain_history_smoothed[0])) {
     666           0 :         AVFrame *out = ff_bufqueue_get(&s->queue);
     667             : 
     668           0 :         amplify_frame(s, out);
     669           0 :         ret = ff_filter_frame(outlink, out);
     670             :     }
     671             : 
     672           0 :     analyze_frame(s, in);
     673           0 :     ff_bufqueue_add(ctx, &s->queue, in);
     674             : 
     675           0 :     return ret;
     676             : }
     677             : 
     678           0 : static int flush_buffer(DynamicAudioNormalizerContext *s, AVFilterLink *inlink,
     679             :                         AVFilterLink *outlink)
     680             : {
     681           0 :     AVFrame *out = ff_get_audio_buffer(outlink, s->frame_len);
     682             :     int c, i;
     683             : 
     684           0 :     if (!out)
     685           0 :         return AVERROR(ENOMEM);
     686             : 
     687           0 :     for (c = 0; c < s->channels; c++) {
     688           0 :         double *dst_ptr = (double *)out->extended_data[c];
     689             : 
     690           0 :         for (i = 0; i < out->nb_samples; i++) {
     691           0 :             dst_ptr[i] = s->alt_boundary_mode ? DBL_EPSILON : ((s->target_rms > DBL_EPSILON) ? FFMIN(s->peak_value, s->target_rms) : s->peak_value);
     692           0 :             if (s->dc_correction) {
     693           0 :                 dst_ptr[i] *= ((i % 2) == 1) ? -1 : 1;
     694           0 :                 dst_ptr[i] += s->dc_correction_value[c];
     695             :             }
     696             :         }
     697             :     }
     698             : 
     699           0 :     s->delay--;
     700           0 :     return filter_frame(inlink, out);
     701             : }
     702             : 
     703           0 : static int request_frame(AVFilterLink *outlink)
     704             : {
     705           0 :     AVFilterContext *ctx = outlink->src;
     706           0 :     DynamicAudioNormalizerContext *s = ctx->priv;
     707           0 :     int ret = 0;
     708             : 
     709           0 :     ret = ff_request_frame(ctx->inputs[0]);
     710             : 
     711           0 :     if (ret == AVERROR_EOF && !ctx->is_disabled && s->delay) {
     712           0 :         if (!cqueue_empty(s->gain_history_smoothed[0])) {
     713           0 :             ret = flush_buffer(s, ctx->inputs[0], outlink);
     714           0 :         } else if (s->queue.available) {
     715           0 :             AVFrame *out = ff_bufqueue_get(&s->queue);
     716             : 
     717           0 :             ret = ff_filter_frame(outlink, out);
     718             :         }
     719             :     }
     720             : 
     721           0 :     return ret;
     722             : }
     723             : 
     724             : static const AVFilterPad avfilter_af_dynaudnorm_inputs[] = {
     725             :     {
     726             :         .name           = "default",
     727             :         .type           = AVMEDIA_TYPE_AUDIO,
     728             :         .filter_frame   = filter_frame,
     729             :         .config_props   = config_input,
     730             :         .needs_writable = 1,
     731             :     },
     732             :     { NULL }
     733             : };
     734             : 
     735             : static const AVFilterPad avfilter_af_dynaudnorm_outputs[] = {
     736             :     {
     737             :         .name          = "default",
     738             :         .type          = AVMEDIA_TYPE_AUDIO,
     739             :         .request_frame = request_frame,
     740             :     },
     741             :     { NULL }
     742             : };
     743             : 
     744             : AVFilter ff_af_dynaudnorm = {
     745             :     .name          = "dynaudnorm",
     746             :     .description   = NULL_IF_CONFIG_SMALL("Dynamic Audio Normalizer."),
     747             :     .query_formats = query_formats,
     748             :     .priv_size     = sizeof(DynamicAudioNormalizerContext),
     749             :     .init          = init,
     750             :     .uninit        = uninit,
     751             :     .inputs        = avfilter_af_dynaudnorm_inputs,
     752             :     .outputs       = avfilter_af_dynaudnorm_outputs,
     753             :     .priv_class    = &dynaudnorm_class,
     754             : };

Generated by: LCOV version 1.12