LCOV - code coverage report
Current view: top level - libavfilter - af_loudnorm.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 465 0.0 %
Date: 2017-12-14 19:11:59 Functions: 0 11 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016 Kyle Swanson <k@ylo.ph>.
       3             :  *
       4             :  * This file is part of FFmpeg.
       5             :  *
       6             :  * FFmpeg is free software; you can redistribute it and/or
       7             :  * modify it under the terms of the GNU Lesser General Public
       8             :  * License as published by the Free Software Foundation; either
       9             :  * version 2.1 of the License, or (at your option) any later version.
      10             :  *
      11             :  * FFmpeg is distributed in the hope that it will be useful,
      12             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14             :  * Lesser General Public License for more details.
      15             :  *
      16             :  * You should have received a copy of the GNU Lesser General Public
      17             :  * License along with FFmpeg; if not, write to the Free Software
      18             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      19             :  */
      20             : 
      21             : /* http://k.ylo.ph/2016/04/04/loudnorm.html */
      22             : 
      23             : #include "libavutil/opt.h"
      24             : #include "avfilter.h"
      25             : #include "internal.h"
      26             : #include "audio.h"
      27             : #include "ebur128.h"
      28             : 
      29             : enum FrameType {
      30             :     FIRST_FRAME,
      31             :     INNER_FRAME,
      32             :     FINAL_FRAME,
      33             :     LINEAR_MODE,
      34             :     FRAME_NB
      35             : };
      36             : 
      37             : enum LimiterState {
      38             :     OUT,
      39             :     ATTACK,
      40             :     SUSTAIN,
      41             :     RELEASE,
      42             :     STATE_NB
      43             : };
      44             : 
      45             : enum PrintFormat {
      46             :     NONE,
      47             :     JSON,
      48             :     SUMMARY,
      49             :     PF_NB
      50             : };
      51             : 
      52             : typedef struct LoudNormContext {
      53             :     const AVClass *class;
      54             :     double target_i;
      55             :     double target_lra;
      56             :     double target_tp;
      57             :     double measured_i;
      58             :     double measured_lra;
      59             :     double measured_tp;
      60             :     double measured_thresh;
      61             :     double offset;
      62             :     int linear;
      63             :     int dual_mono;
      64             :     enum PrintFormat print_format;
      65             : 
      66             :     double *buf;
      67             :     int buf_size;
      68             :     int buf_index;
      69             :     int prev_buf_index;
      70             : 
      71             :     double delta[30];
      72             :     double weights[21];
      73             :     double prev_delta;
      74             :     int index;
      75             : 
      76             :     double gain_reduction[2];
      77             :     double *limiter_buf;
      78             :     double *prev_smp;
      79             :     int limiter_buf_index;
      80             :     int limiter_buf_size;
      81             :     enum LimiterState limiter_state;
      82             :     int peak_index;
      83             :     int env_index;
      84             :     int env_cnt;
      85             :     int attack_length;
      86             :     int release_length;
      87             : 
      88             :     int64_t pts;
      89             :     enum FrameType frame_type;
      90             :     int above_threshold;
      91             :     int prev_nb_samples;
      92             :     int channels;
      93             : 
      94             :     FFEBUR128State *r128_in;
      95             :     FFEBUR128State *r128_out;
      96             : } LoudNormContext;
      97             : 
      98             : #define OFFSET(x) offsetof(LoudNormContext, x)
      99             : #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
     100             : 
     101             : static const AVOption loudnorm_options[] = {
     102             :     { "I",                "set integrated loudness target",    OFFSET(target_i),         AV_OPT_TYPE_DOUBLE,  {.dbl = -24.},   -70.,       -5.,  FLAGS },
     103             :     { "i",                "set integrated loudness target",    OFFSET(target_i),         AV_OPT_TYPE_DOUBLE,  {.dbl = -24.},   -70.,       -5.,  FLAGS },
     104             :     { "LRA",              "set loudness range target",         OFFSET(target_lra),       AV_OPT_TYPE_DOUBLE,  {.dbl =  7.},     1.,        20.,  FLAGS },
     105             :     { "lra",              "set loudness range target",         OFFSET(target_lra),       AV_OPT_TYPE_DOUBLE,  {.dbl =  7.},     1.,        20.,  FLAGS },
     106             :     { "TP",               "set maximum true peak",             OFFSET(target_tp),        AV_OPT_TYPE_DOUBLE,  {.dbl = -2.},    -9.,         0.,  FLAGS },
     107             :     { "tp",               "set maximum true peak",             OFFSET(target_tp),        AV_OPT_TYPE_DOUBLE,  {.dbl = -2.},    -9.,         0.,  FLAGS },
     108             :     { "measured_I",       "measured IL of input file",         OFFSET(measured_i),       AV_OPT_TYPE_DOUBLE,  {.dbl =  0.},    -99.,        0.,  FLAGS },
     109             :     { "measured_i",       "measured IL of input file",         OFFSET(measured_i),       AV_OPT_TYPE_DOUBLE,  {.dbl =  0.},    -99.,        0.,  FLAGS },
     110             :     { "measured_LRA",     "measured LRA of input file",        OFFSET(measured_lra),     AV_OPT_TYPE_DOUBLE,  {.dbl =  0.},     0.,        99.,  FLAGS },
     111             :     { "measured_lra",     "measured LRA of input file",        OFFSET(measured_lra),     AV_OPT_TYPE_DOUBLE,  {.dbl =  0.},     0.,        99.,  FLAGS },
     112             :     { "measured_TP",      "measured true peak of input file",  OFFSET(measured_tp),      AV_OPT_TYPE_DOUBLE,  {.dbl =  99.},   -99.,       99.,  FLAGS },
     113             :     { "measured_tp",      "measured true peak of input file",  OFFSET(measured_tp),      AV_OPT_TYPE_DOUBLE,  {.dbl =  99.},   -99.,       99.,  FLAGS },
     114             :     { "measured_thresh",  "measured threshold of input file",  OFFSET(measured_thresh),  AV_OPT_TYPE_DOUBLE,  {.dbl = -70.},   -99.,        0.,  FLAGS },
     115             :     { "offset",           "set offset gain",                   OFFSET(offset),           AV_OPT_TYPE_DOUBLE,  {.dbl =  0.},    -99.,       99.,  FLAGS },
     116             :     { "linear",           "normalize linearly if possible",    OFFSET(linear),           AV_OPT_TYPE_BOOL,    {.i64 =  1},        0,         1,  FLAGS },
     117             :     { "dual_mono",        "treat mono input as dual-mono",     OFFSET(dual_mono),        AV_OPT_TYPE_BOOL,    {.i64 =  0},        0,         1,  FLAGS },
     118             :     { "print_format",     "set print format for stats",        OFFSET(print_format),     AV_OPT_TYPE_INT,     {.i64 =  NONE},  NONE,  PF_NB -1,  FLAGS, "print_format" },
     119             :     {     "none",         0,                                   0,                        AV_OPT_TYPE_CONST,   {.i64 =  NONE},     0,         0,  FLAGS, "print_format" },
     120             :     {     "json",         0,                                   0,                        AV_OPT_TYPE_CONST,   {.i64 =  JSON},     0,         0,  FLAGS, "print_format" },
     121             :     {     "summary",      0,                                   0,                        AV_OPT_TYPE_CONST,   {.i64 =  SUMMARY},  0,         0,  FLAGS, "print_format" },
     122             :     { NULL }
     123             : };
     124             : 
     125             : AVFILTER_DEFINE_CLASS(loudnorm);
     126             : 
     127           0 : static inline int frame_size(int sample_rate, int frame_len_msec)
     128             : {
     129           0 :     const int frame_size = round((double)sample_rate * (frame_len_msec / 1000.0));
     130           0 :     return frame_size + (frame_size % 2);
     131             : }
     132             : 
     133           0 : static void init_gaussian_filter(LoudNormContext *s)
     134             : {
     135           0 :     double total_weight = 0.0;
     136           0 :     const double sigma = 3.5;
     137             :     double adjust;
     138             :     int i;
     139             : 
     140           0 :     const int offset = 21 / 2;
     141           0 :     const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
     142           0 :     const double c2 = 2.0 * pow(sigma, 2.0);
     143             : 
     144           0 :     for (i = 0; i < 21; i++) {
     145           0 :         const int x = i - offset;
     146           0 :         s->weights[i] = c1 * exp(-(pow(x, 2.0) / c2));
     147           0 :         total_weight += s->weights[i];
     148             :     }
     149             : 
     150           0 :     adjust = 1.0 / total_weight;
     151           0 :     for (i = 0; i < 21; i++)
     152           0 :         s->weights[i] *= adjust;
     153           0 : }
     154             : 
     155           0 : static double gaussian_filter(LoudNormContext *s, int index)
     156             : {
     157           0 :     double result = 0.;
     158             :     int i;
     159             : 
     160           0 :     index = index - 10 > 0 ? index - 10 : index + 20;
     161           0 :     for (i = 0; i < 21; i++)
     162           0 :         result += s->delta[((index + i) < 30) ? (index + i) : (index + i - 30)] * s->weights[i];
     163             : 
     164           0 :     return result;
     165             : }
     166             : 
     167           0 : static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
     168             : {
     169             :     int n, c, i, index;
     170             :     double ceiling;
     171             :     double *buf;
     172             : 
     173           0 :     *peak_delta = -1;
     174           0 :     buf = s->limiter_buf;
     175           0 :     ceiling = s->target_tp;
     176             : 
     177           0 :     index = s->limiter_buf_index + (offset * channels) + (1920 * channels);
     178           0 :     if (index >= s->limiter_buf_size)
     179           0 :         index -= s->limiter_buf_size;
     180             : 
     181           0 :     if (s->frame_type == FIRST_FRAME) {
     182           0 :         for (c = 0; c < channels; c++)
     183           0 :             s->prev_smp[c] = fabs(buf[index + c - channels]);
     184             :     }
     185             : 
     186           0 :     for (n = 0; n < nb_samples; n++) {
     187           0 :         for (c = 0; c < channels; c++) {
     188             :             double this, next, max_peak;
     189             : 
     190           0 :             this = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
     191           0 :             next = fabs(buf[(index + c + channels) < s->limiter_buf_size ? (index + c + channels) : (index + c + channels - s->limiter_buf_size)]);
     192             : 
     193           0 :             if ((s->prev_smp[c] <= this) && (next <= this) && (this > ceiling) && (n > 0)) {
     194             :                 int detected;
     195             : 
     196           0 :                 detected = 1;
     197           0 :                 for (i = 2; i < 12; i++) {
     198           0 :                     next = fabs(buf[(index + c + (i * channels)) < s->limiter_buf_size ? (index + c + (i * channels)) : (index + c + (i * channels) - s->limiter_buf_size)]);
     199           0 :                     if (next > this) {
     200           0 :                         detected = 0;
     201           0 :                         break;
     202             :                     }
     203             :                 }
     204             : 
     205           0 :                 if (!detected)
     206           0 :                     continue;
     207             : 
     208           0 :                 for (c = 0; c < channels; c++) {
     209           0 :                     if (c == 0 || fabs(buf[index + c]) > max_peak)
     210           0 :                         max_peak = fabs(buf[index + c]);
     211             : 
     212           0 :                     s->prev_smp[c] = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
     213             :                 }
     214             : 
     215           0 :                 *peak_delta = n;
     216           0 :                 s->peak_index = index;
     217           0 :                 *peak_value = max_peak;
     218           0 :                 return;
     219             :             }
     220             : 
     221           0 :             s->prev_smp[c] = this;
     222             :         }
     223             : 
     224           0 :         index += channels;
     225           0 :         if (index >= s->limiter_buf_size)
     226           0 :             index -= s->limiter_buf_size;
     227             :     }
     228             : }
     229             : 
     230           0 : static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
     231             : {
     232             :     int n, c, index, peak_delta, smp_cnt;
     233             :     double ceiling, peak_value;
     234             :     double *buf;
     235             : 
     236           0 :     buf = s->limiter_buf;
     237           0 :     ceiling = s->target_tp;
     238           0 :     index = s->limiter_buf_index;
     239           0 :     smp_cnt = 0;
     240             : 
     241           0 :     if (s->frame_type == FIRST_FRAME) {
     242             :         double max;
     243             : 
     244           0 :         max = 0.;
     245           0 :         for (n = 0; n < 1920; n++) {
     246           0 :             for (c = 0; c < channels; c++) {
     247           0 :               max = fabs(buf[c]) > max ? fabs(buf[c]) : max;
     248             :             }
     249           0 :             buf += channels;
     250             :         }
     251             : 
     252           0 :         if (max > ceiling) {
     253           0 :             s->gain_reduction[1] = ceiling / max;
     254           0 :             s->limiter_state = SUSTAIN;
     255           0 :             buf = s->limiter_buf;
     256             : 
     257           0 :             for (n = 0; n < 1920; n++) {
     258           0 :                 for (c = 0; c < channels; c++) {
     259             :                     double env;
     260           0 :                     env = s->gain_reduction[1];
     261           0 :                     buf[c] *= env;
     262             :                 }
     263           0 :                 buf += channels;
     264             :             }
     265             :         }
     266             : 
     267           0 :         buf = s->limiter_buf;
     268             :     }
     269             : 
     270             :     do {
     271             : 
     272           0 :         switch(s->limiter_state) {
     273           0 :         case OUT:
     274           0 :             detect_peak(s, smp_cnt, nb_samples - smp_cnt, channels, &peak_delta, &peak_value);
     275           0 :             if (peak_delta != -1) {
     276           0 :                 s->env_cnt = 0;
     277           0 :                 smp_cnt += (peak_delta - s->attack_length);
     278           0 :                 s->gain_reduction[0] = 1.;
     279           0 :                 s->gain_reduction[1] = ceiling / peak_value;
     280           0 :                 s->limiter_state = ATTACK;
     281             : 
     282           0 :                 s->env_index = s->peak_index - (s->attack_length * channels);
     283           0 :                 if (s->env_index < 0)
     284           0 :                     s->env_index += s->limiter_buf_size;
     285             : 
     286           0 :                 s->env_index += (s->env_cnt * channels);
     287           0 :                 if (s->env_index > s->limiter_buf_size)
     288           0 :                     s->env_index -= s->limiter_buf_size;
     289             : 
     290             :             } else {
     291           0 :                 smp_cnt = nb_samples;
     292             :             }
     293           0 :             break;
     294             : 
     295           0 :         case ATTACK:
     296           0 :             for (; s->env_cnt < s->attack_length; s->env_cnt++) {
     297           0 :                 for (c = 0; c < channels; c++) {
     298             :                     double env;
     299           0 :                     env = s->gain_reduction[0] - ((double) s->env_cnt / (s->attack_length - 1) * (s->gain_reduction[0] - s->gain_reduction[1]));
     300           0 :                     buf[s->env_index + c] *= env;
     301             :                 }
     302             : 
     303           0 :                 s->env_index += channels;
     304           0 :                 if (s->env_index >= s->limiter_buf_size)
     305           0 :                     s->env_index -= s->limiter_buf_size;
     306             : 
     307           0 :                 smp_cnt++;
     308           0 :                 if (smp_cnt >= nb_samples) {
     309           0 :                     s->env_cnt++;
     310           0 :                     break;
     311             :                 }
     312             :             }
     313             : 
     314           0 :             if (smp_cnt < nb_samples) {
     315           0 :                 s->env_cnt = 0;
     316           0 :                 s->attack_length = 1920;
     317           0 :                 s->limiter_state = SUSTAIN;
     318             :             }
     319           0 :             break;
     320             : 
     321           0 :         case SUSTAIN:
     322           0 :             detect_peak(s, smp_cnt, nb_samples, channels, &peak_delta, &peak_value);
     323           0 :             if (peak_delta == -1) {
     324           0 :                 s->limiter_state = RELEASE;
     325           0 :                 s->gain_reduction[0] = s->gain_reduction[1];
     326           0 :                 s->gain_reduction[1] = 1.;
     327           0 :                 s->env_cnt = 0;
     328           0 :                 break;
     329             :             } else {
     330             :                 double gain_reduction;
     331           0 :                 gain_reduction = ceiling / peak_value;
     332             : 
     333           0 :                 if (gain_reduction < s->gain_reduction[1]) {
     334           0 :                     s->limiter_state = ATTACK;
     335             : 
     336           0 :                     s->attack_length = peak_delta;
     337           0 :                     if (s->attack_length <= 1)
     338           0 :                         s->attack_length =  2;
     339             : 
     340           0 :                     s->gain_reduction[0] = s->gain_reduction[1];
     341           0 :                     s->gain_reduction[1] = gain_reduction;
     342           0 :                     s->env_cnt = 0;
     343           0 :                     break;
     344             :                 }
     345             : 
     346           0 :                 for (s->env_cnt = 0; s->env_cnt < peak_delta; s->env_cnt++) {
     347           0 :                     for (c = 0; c < channels; c++) {
     348             :                         double env;
     349           0 :                         env = s->gain_reduction[1];
     350           0 :                         buf[s->env_index + c] *= env;
     351             :                     }
     352             : 
     353           0 :                     s->env_index += channels;
     354           0 :                     if (s->env_index >= s->limiter_buf_size)
     355           0 :                         s->env_index -= s->limiter_buf_size;
     356             : 
     357           0 :                     smp_cnt++;
     358           0 :                     if (smp_cnt >= nb_samples) {
     359           0 :                         s->env_cnt++;
     360           0 :                         break;
     361             :                     }
     362             :                 }
     363             :             }
     364           0 :             break;
     365             : 
     366           0 :         case RELEASE:
     367           0 :             for (; s->env_cnt < s->release_length; s->env_cnt++) {
     368           0 :                 for (c = 0; c < channels; c++) {
     369             :                     double env;
     370           0 :                     env = s->gain_reduction[0] + (((double) s->env_cnt / (s->release_length - 1)) * (s->gain_reduction[1] - s->gain_reduction[0]));
     371           0 :                     buf[s->env_index + c] *= env;
     372             :                 }
     373             : 
     374           0 :                 s->env_index += channels;
     375           0 :                 if (s->env_index >= s->limiter_buf_size)
     376           0 :                     s->env_index -= s->limiter_buf_size;
     377             : 
     378           0 :                 smp_cnt++;
     379           0 :                 if (smp_cnt >= nb_samples) {
     380           0 :                     s->env_cnt++;
     381           0 :                     break;
     382             :                 }
     383             :             }
     384             : 
     385           0 :             if (smp_cnt < nb_samples) {
     386           0 :                 s->env_cnt = 0;
     387           0 :                 s->limiter_state = OUT;
     388             :             }
     389             : 
     390           0 :             break;
     391             :         }
     392             : 
     393           0 :     } while (smp_cnt < nb_samples);
     394             : 
     395           0 :     for (n = 0; n < nb_samples; n++) {
     396           0 :         for (c = 0; c < channels; c++) {
     397           0 :             out[c] = buf[index + c];
     398           0 :             if (fabs(out[c]) > ceiling) {
     399           0 :                 out[c] = ceiling * (out[c] < 0 ? -1 : 1);
     400             :             }
     401             :         }
     402           0 :         out += channels;
     403           0 :         index += channels;
     404           0 :         if (index >= s->limiter_buf_size)
     405           0 :             index -= s->limiter_buf_size;
     406             :     }
     407           0 : }
     408             : 
     409           0 : static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     410             : {
     411           0 :     AVFilterContext *ctx = inlink->dst;
     412           0 :     LoudNormContext *s = ctx->priv;
     413           0 :     AVFilterLink *outlink = ctx->outputs[0];
     414             :     AVFrame *out;
     415             :     const double *src;
     416             :     double *dst;
     417             :     double *buf;
     418             :     double *limiter_buf;
     419             :     int i, n, c, subframe_length, src_index;
     420             :     double gain, gain_next, env_global, env_shortterm,
     421             :     global, shortterm, lra, relative_threshold;
     422             : 
     423           0 :     if (av_frame_is_writable(in)) {
     424           0 :         out = in;
     425             :     } else {
     426           0 :         out = ff_get_audio_buffer(inlink, in->nb_samples);
     427           0 :         if (!out) {
     428           0 :             av_frame_free(&in);
     429           0 :             return AVERROR(ENOMEM);
     430             :         }
     431           0 :         av_frame_copy_props(out, in);
     432             :     }
     433             : 
     434           0 :     out->pts = s->pts;
     435           0 :     src = (const double *)in->data[0];
     436           0 :     dst = (double *)out->data[0];
     437           0 :     buf = s->buf;
     438           0 :     limiter_buf = s->limiter_buf;
     439             : 
     440           0 :     ff_ebur128_add_frames_double(s->r128_in, src, in->nb_samples);
     441             : 
     442           0 :     if (s->frame_type == FIRST_FRAME && in->nb_samples < frame_size(inlink->sample_rate, 3000)) {
     443             :         double offset, offset_tp, true_peak;
     444             : 
     445           0 :         ff_ebur128_loudness_global(s->r128_in, &global);
     446           0 :         for (c = 0; c < inlink->channels; c++) {
     447             :             double tmp;
     448           0 :             ff_ebur128_sample_peak(s->r128_in, c, &tmp);
     449           0 :             if (c == 0 || tmp > true_peak)
     450           0 :                 true_peak = tmp;
     451             :         }
     452             : 
     453           0 :         offset    = s->target_i - global;
     454           0 :         offset_tp = true_peak + offset;
     455           0 :         s->offset = offset_tp < s->target_tp ? offset : s->target_tp - true_peak;
     456           0 :         s->offset = pow(10., s->offset / 20.);
     457           0 :         s->frame_type = LINEAR_MODE;
     458             :     }
     459             : 
     460           0 :     switch (s->frame_type) {
     461           0 :     case FIRST_FRAME:
     462           0 :         for (n = 0; n < in->nb_samples; n++) {
     463           0 :             for (c = 0; c < inlink->channels; c++) {
     464           0 :                 buf[s->buf_index + c] = src[c];
     465             :             }
     466           0 :             src += inlink->channels;
     467           0 :             s->buf_index += inlink->channels;
     468             :         }
     469             : 
     470           0 :         ff_ebur128_loudness_shortterm(s->r128_in, &shortterm);
     471             : 
     472           0 :         if (shortterm < s->measured_thresh) {
     473           0 :             s->above_threshold = 0;
     474           0 :             env_shortterm = shortterm <= -70. ? 0. : s->target_i - s->measured_i;
     475             :         } else {
     476           0 :             s->above_threshold = 1;
     477           0 :             env_shortterm = shortterm <= -70. ? 0. : s->target_i - shortterm;
     478             :         }
     479             : 
     480           0 :         for (n = 0; n < 30; n++)
     481           0 :             s->delta[n] = pow(10., env_shortterm / 20.);
     482           0 :         s->prev_delta = s->delta[s->index];
     483             : 
     484           0 :         s->buf_index =
     485           0 :         s->limiter_buf_index = 0;
     486             : 
     487           0 :         for (n = 0; n < (s->limiter_buf_size / inlink->channels); n++) {
     488           0 :             for (c = 0; c < inlink->channels; c++) {
     489           0 :                 limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * s->delta[s->index] * s->offset;
     490             :             }
     491           0 :             s->limiter_buf_index += inlink->channels;
     492           0 :             if (s->limiter_buf_index >= s->limiter_buf_size)
     493           0 :                 s->limiter_buf_index -= s->limiter_buf_size;
     494             : 
     495           0 :             s->buf_index += inlink->channels;
     496             :         }
     497             : 
     498           0 :         subframe_length = frame_size(inlink->sample_rate, 100);
     499           0 :         true_peak_limiter(s, dst, subframe_length, inlink->channels);
     500           0 :         ff_ebur128_add_frames_double(s->r128_out, dst, subframe_length);
     501             : 
     502           0 :         s->pts +=
     503           0 :         out->nb_samples =
     504           0 :         inlink->min_samples =
     505           0 :         inlink->max_samples =
     506           0 :         inlink->partial_buf_size = subframe_length;
     507             : 
     508           0 :         s->frame_type = INNER_FRAME;
     509           0 :         break;
     510             : 
     511           0 :     case INNER_FRAME:
     512           0 :         gain      = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
     513           0 :         gain_next = gaussian_filter(s, s->index + 11 < 30 ? s->index + 11 : s->index + 11 - 30);
     514             : 
     515           0 :         for (n = 0; n < in->nb_samples; n++) {
     516           0 :             for (c = 0; c < inlink->channels; c++) {
     517           0 :                 buf[s->prev_buf_index + c] = src[c];
     518           0 :                 limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * (gain + (((double) n / in->nb_samples) * (gain_next - gain))) * s->offset;
     519             :             }
     520           0 :             src += inlink->channels;
     521             : 
     522           0 :             s->limiter_buf_index += inlink->channels;
     523           0 :             if (s->limiter_buf_index >= s->limiter_buf_size)
     524           0 :                 s->limiter_buf_index -= s->limiter_buf_size;
     525             : 
     526           0 :             s->prev_buf_index += inlink->channels;
     527           0 :             if (s->prev_buf_index >= s->buf_size)
     528           0 :                 s->prev_buf_index -= s->buf_size;
     529             : 
     530           0 :             s->buf_index += inlink->channels;
     531           0 :             if (s->buf_index >= s->buf_size)
     532           0 :                 s->buf_index -= s->buf_size;
     533             :         }
     534             : 
     535           0 :         subframe_length = (frame_size(inlink->sample_rate, 100) - in->nb_samples) * inlink->channels;
     536           0 :         s->limiter_buf_index = s->limiter_buf_index + subframe_length < s->limiter_buf_size ? s->limiter_buf_index + subframe_length : s->limiter_buf_index + subframe_length - s->limiter_buf_size;
     537             : 
     538           0 :         true_peak_limiter(s, dst, in->nb_samples, inlink->channels);
     539           0 :         ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
     540             : 
     541           0 :         ff_ebur128_loudness_range(s->r128_in, &lra);
     542           0 :         ff_ebur128_loudness_global(s->r128_in, &global);
     543           0 :         ff_ebur128_loudness_shortterm(s->r128_in, &shortterm);
     544           0 :         ff_ebur128_relative_threshold(s->r128_in, &relative_threshold);
     545             : 
     546           0 :         if (s->above_threshold == 0) {
     547             :             double shortterm_out;
     548             : 
     549           0 :             if (shortterm > s->measured_thresh)
     550           0 :                 s->prev_delta *= 1.0058;
     551             : 
     552           0 :             ff_ebur128_loudness_shortterm(s->r128_out, &shortterm_out);
     553           0 :             if (shortterm_out >= s->target_i)
     554           0 :                 s->above_threshold = 1;
     555             :         }
     556             : 
     557           0 :         if (shortterm < relative_threshold || shortterm <= -70. || s->above_threshold == 0) {
     558           0 :             s->delta[s->index] = s->prev_delta;
     559             :         } else {
     560           0 :             env_global = fabs(shortterm - global) < (s->target_lra / 2.) ? shortterm - global : (s->target_lra / 2.) * ((shortterm - global) < 0 ? -1 : 1);
     561           0 :             env_shortterm = s->target_i - shortterm;
     562           0 :             s->delta[s->index] = pow(10., (env_global + env_shortterm) / 20.);
     563             :         }
     564             : 
     565           0 :         s->prev_delta = s->delta[s->index];
     566           0 :         s->index++;
     567           0 :         if (s->index >= 30)
     568           0 :             s->index -= 30;
     569           0 :         s->prev_nb_samples = in->nb_samples;
     570           0 :         s->pts += in->nb_samples;
     571           0 :         break;
     572             : 
     573           0 :     case FINAL_FRAME:
     574           0 :         gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
     575           0 :         s->limiter_buf_index = 0;
     576           0 :         src_index = 0;
     577             : 
     578           0 :         for (n = 0; n < s->limiter_buf_size / inlink->channels; n++) {
     579           0 :             for (c = 0; c < inlink->channels; c++) {
     580           0 :                 s->limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
     581             :             }
     582           0 :             src_index += inlink->channels;
     583             : 
     584           0 :             s->limiter_buf_index += inlink->channels;
     585           0 :             if (s->limiter_buf_index >= s->limiter_buf_size)
     586           0 :                 s->limiter_buf_index -= s->limiter_buf_size;
     587             :         }
     588             : 
     589           0 :         subframe_length = frame_size(inlink->sample_rate, 100);
     590           0 :         for (i = 0; i < in->nb_samples / subframe_length; i++) {
     591           0 :             true_peak_limiter(s, dst, subframe_length, inlink->channels);
     592             : 
     593           0 :             for (n = 0; n < subframe_length; n++) {
     594           0 :                 for (c = 0; c < inlink->channels; c++) {
     595           0 :                     if (src_index < (in->nb_samples * inlink->channels)) {
     596           0 :                         limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
     597             :                     } else {
     598           0 :                         limiter_buf[s->limiter_buf_index + c] = 0.;
     599             :                     }
     600             :                 }
     601             : 
     602           0 :                 if (src_index < (in->nb_samples * inlink->channels))
     603           0 :                     src_index += inlink->channels;
     604             : 
     605           0 :                 s->limiter_buf_index += inlink->channels;
     606           0 :                 if (s->limiter_buf_index >= s->limiter_buf_size)
     607           0 :                     s->limiter_buf_index -= s->limiter_buf_size;
     608             :             }
     609             : 
     610           0 :             dst += (subframe_length * inlink->channels);
     611             :         }
     612             : 
     613           0 :         dst = (double *)out->data[0];
     614           0 :         ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
     615           0 :         break;
     616             : 
     617           0 :     case LINEAR_MODE:
     618           0 :         for (n = 0; n < in->nb_samples; n++) {
     619           0 :             for (c = 0; c < inlink->channels; c++) {
     620           0 :                 dst[c] = src[c] * s->offset;
     621             :             }
     622           0 :             src += inlink->channels;
     623           0 :             dst += inlink->channels;
     624             :         }
     625             : 
     626           0 :         dst = (double *)out->data[0];
     627           0 :         ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
     628           0 :         s->pts += in->nb_samples;
     629           0 :         break;
     630             :     }
     631             : 
     632           0 :     if (in != out)
     633           0 :         av_frame_free(&in);
     634             : 
     635           0 :     return ff_filter_frame(outlink, out);
     636             : }
     637             : 
     638           0 : static int request_frame(AVFilterLink *outlink)
     639             : {
     640             :     int ret;
     641           0 :     AVFilterContext *ctx = outlink->src;
     642           0 :     AVFilterLink *inlink = ctx->inputs[0];
     643           0 :     LoudNormContext *s = ctx->priv;
     644             : 
     645           0 :     ret = ff_request_frame(inlink);
     646           0 :     if (ret == AVERROR_EOF && s->frame_type == INNER_FRAME) {
     647             :         double *src;
     648             :         double *buf;
     649             :         int nb_samples, n, c, offset;
     650             :         AVFrame *frame;
     651             : 
     652           0 :         nb_samples  = (s->buf_size / inlink->channels) - s->prev_nb_samples;
     653           0 :         nb_samples -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples);
     654             : 
     655           0 :         frame = ff_get_audio_buffer(outlink, nb_samples);
     656           0 :         if (!frame)
     657           0 :             return AVERROR(ENOMEM);
     658           0 :         frame->nb_samples = nb_samples;
     659             : 
     660           0 :         buf = s->buf;
     661           0 :         src = (double *)frame->data[0];
     662             : 
     663           0 :         offset  = ((s->limiter_buf_size / inlink->channels) - s->prev_nb_samples) * inlink->channels;
     664           0 :         offset -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples) * inlink->channels;
     665           0 :         s->buf_index = s->buf_index - offset < 0 ? s->buf_index - offset + s->buf_size : s->buf_index - offset;
     666             : 
     667           0 :         for (n = 0; n < nb_samples; n++) {
     668           0 :             for (c = 0; c < inlink->channels; c++) {
     669           0 :                 src[c] = buf[s->buf_index + c];
     670             :             }
     671           0 :             src += inlink->channels;
     672           0 :             s->buf_index += inlink->channels;
     673           0 :             if (s->buf_index >= s->buf_size)
     674           0 :                 s->buf_index -= s->buf_size;
     675             :         }
     676             : 
     677           0 :         s->frame_type = FINAL_FRAME;
     678           0 :         ret = filter_frame(inlink, frame);
     679             :     }
     680           0 :     return ret;
     681             : }
     682             : 
     683           0 : static int query_formats(AVFilterContext *ctx)
     684             : {
     685           0 :     LoudNormContext *s = ctx->priv;
     686             :     AVFilterFormats *formats;
     687             :     AVFilterChannelLayouts *layouts;
     688           0 :     AVFilterLink *inlink = ctx->inputs[0];
     689           0 :     AVFilterLink *outlink = ctx->outputs[0];
     690             :     static const int input_srate[] = {192000, -1};
     691             :     static const enum AVSampleFormat sample_fmts[] = {
     692             :         AV_SAMPLE_FMT_DBL,
     693             :         AV_SAMPLE_FMT_NONE
     694             :     };
     695             :     int ret;
     696             : 
     697           0 :     layouts = ff_all_channel_counts();
     698           0 :     if (!layouts)
     699           0 :         return AVERROR(ENOMEM);
     700           0 :     ret = ff_set_common_channel_layouts(ctx, layouts);
     701           0 :     if (ret < 0)
     702           0 :         return ret;
     703             : 
     704           0 :     formats = ff_make_format_list(sample_fmts);
     705           0 :     if (!formats)
     706           0 :         return AVERROR(ENOMEM);
     707           0 :     ret = ff_set_common_formats(ctx, formats);
     708           0 :     if (ret < 0)
     709           0 :         return ret;
     710             : 
     711           0 :     if (s->frame_type != LINEAR_MODE) {
     712           0 :         formats = ff_make_format_list(input_srate);
     713           0 :         if (!formats)
     714           0 :             return AVERROR(ENOMEM);
     715           0 :         ret = ff_formats_ref(formats, &inlink->out_samplerates);
     716           0 :         if (ret < 0)
     717           0 :             return ret;
     718           0 :         ret = ff_formats_ref(formats, &outlink->in_samplerates);
     719           0 :         if (ret < 0)
     720           0 :             return ret;
     721             :     }
     722             : 
     723           0 :     return 0;
     724             : }
     725             : 
     726           0 : static int config_input(AVFilterLink *inlink)
     727             : {
     728           0 :     AVFilterContext *ctx = inlink->dst;
     729           0 :     LoudNormContext *s = ctx->priv;
     730             : 
     731           0 :     s->r128_in = ff_ebur128_init(inlink->channels, inlink->sample_rate, 0, FF_EBUR128_MODE_I | FF_EBUR128_MODE_S | FF_EBUR128_MODE_LRA | FF_EBUR128_MODE_SAMPLE_PEAK);
     732           0 :     if (!s->r128_in)
     733           0 :         return AVERROR(ENOMEM);
     734             : 
     735           0 :     s->r128_out = ff_ebur128_init(inlink->channels, inlink->sample_rate, 0, FF_EBUR128_MODE_I | FF_EBUR128_MODE_S | FF_EBUR128_MODE_LRA | FF_EBUR128_MODE_SAMPLE_PEAK);
     736           0 :     if (!s->r128_out)
     737           0 :         return AVERROR(ENOMEM);
     738             : 
     739           0 :     if (inlink->channels == 1 && s->dual_mono) {
     740           0 :         ff_ebur128_set_channel(s->r128_in,  0, FF_EBUR128_DUAL_MONO);
     741           0 :         ff_ebur128_set_channel(s->r128_out, 0, FF_EBUR128_DUAL_MONO);
     742             :     }
     743             : 
     744           0 :     s->buf_size = frame_size(inlink->sample_rate, 3000) * inlink->channels;
     745           0 :     s->buf = av_malloc_array(s->buf_size, sizeof(*s->buf));
     746           0 :     if (!s->buf)
     747           0 :         return AVERROR(ENOMEM);
     748             : 
     749           0 :     s->limiter_buf_size = frame_size(inlink->sample_rate, 210) * inlink->channels;
     750           0 :     s->limiter_buf = av_malloc_array(s->buf_size, sizeof(*s->limiter_buf));
     751           0 :     if (!s->limiter_buf)
     752           0 :         return AVERROR(ENOMEM);
     753             : 
     754           0 :     s->prev_smp = av_malloc_array(inlink->channels, sizeof(*s->prev_smp));
     755           0 :     if (!s->prev_smp)
     756           0 :         return AVERROR(ENOMEM);
     757             : 
     758           0 :     init_gaussian_filter(s);
     759             : 
     760           0 :     if (s->frame_type != LINEAR_MODE) {
     761           0 :         inlink->min_samples =
     762           0 :         inlink->max_samples =
     763           0 :         inlink->partial_buf_size = frame_size(inlink->sample_rate, 3000);
     764             :     }
     765             : 
     766           0 :     s->pts =
     767           0 :     s->buf_index =
     768           0 :     s->prev_buf_index =
     769           0 :     s->limiter_buf_index = 0;
     770           0 :     s->channels = inlink->channels;
     771           0 :     s->index = 1;
     772           0 :     s->limiter_state = OUT;
     773           0 :     s->offset = pow(10., s->offset / 20.);
     774           0 :     s->target_tp = pow(10., s->target_tp / 20.);
     775           0 :     s->attack_length = frame_size(inlink->sample_rate, 10);
     776           0 :     s->release_length = frame_size(inlink->sample_rate, 100);
     777             : 
     778           0 :     return 0;
     779             : }
     780             : 
     781           0 : static av_cold int init(AVFilterContext *ctx)
     782             : {
     783           0 :     LoudNormContext *s = ctx->priv;
     784           0 :     s->frame_type = FIRST_FRAME;
     785             : 
     786           0 :     if (s->linear) {
     787             :         double offset, offset_tp;
     788           0 :         offset    = s->target_i - s->measured_i;
     789           0 :         offset_tp = s->measured_tp + offset;
     790             : 
     791           0 :         if (s->measured_tp != 99 && s->measured_thresh != -70 && s->measured_lra != 0 && s->measured_i != 0) {
     792           0 :             if ((offset_tp <= s->target_tp) && (s->measured_lra <= s->target_lra)) {
     793           0 :                 s->frame_type = LINEAR_MODE;
     794           0 :                 s->offset = offset;
     795             :             }
     796             :         }
     797             :     }
     798             : 
     799           0 :     return 0;
     800             : }
     801             : 
     802           0 : static av_cold void uninit(AVFilterContext *ctx)
     803             : {
     804           0 :     LoudNormContext *s = ctx->priv;
     805             :     double i_in, i_out, lra_in, lra_out, thresh_in, thresh_out, tp_in, tp_out;
     806             :     int c;
     807             : 
     808           0 :     if (!s->r128_in || !s->r128_out)
     809             :         goto end;
     810             : 
     811           0 :     ff_ebur128_loudness_range(s->r128_in, &lra_in);
     812           0 :     ff_ebur128_loudness_global(s->r128_in, &i_in);
     813           0 :     ff_ebur128_relative_threshold(s->r128_in, &thresh_in);
     814           0 :     for (c = 0; c < s->channels; c++) {
     815             :         double tmp;
     816           0 :         ff_ebur128_sample_peak(s->r128_in, c, &tmp);
     817           0 :         if ((c == 0) || (tmp > tp_in))
     818           0 :             tp_in = tmp;
     819             :     }
     820             : 
     821           0 :     ff_ebur128_loudness_range(s->r128_out, &lra_out);
     822           0 :     ff_ebur128_loudness_global(s->r128_out, &i_out);
     823           0 :     ff_ebur128_relative_threshold(s->r128_out, &thresh_out);
     824           0 :     for (c = 0; c < s->channels; c++) {
     825             :         double tmp;
     826           0 :         ff_ebur128_sample_peak(s->r128_out, c, &tmp);
     827           0 :         if ((c == 0) || (tmp > tp_out))
     828           0 :             tp_out = tmp;
     829             :     }
     830             : 
     831           0 :     switch(s->print_format) {
     832           0 :     case NONE:
     833           0 :         break;
     834             : 
     835           0 :     case JSON:
     836           0 :         av_log(ctx, AV_LOG_INFO,
     837             :             "\n{\n"
     838             :             "\t\"input_i\" : \"%.2f\",\n"
     839             :             "\t\"input_tp\" : \"%.2f\",\n"
     840             :             "\t\"input_lra\" : \"%.2f\",\n"
     841             :             "\t\"input_thresh\" : \"%.2f\",\n"
     842             :             "\t\"output_i\" : \"%.2f\",\n"
     843             :             "\t\"output_tp\" : \"%+.2f\",\n"
     844             :             "\t\"output_lra\" : \"%.2f\",\n"
     845             :             "\t\"output_thresh\" : \"%.2f\",\n"
     846             :             "\t\"normalization_type\" : \"%s\",\n"
     847             :             "\t\"target_offset\" : \"%.2f\"\n"
     848             :             "}\n",
     849             :             i_in,
     850           0 :             20. * log10(tp_in),
     851             :             lra_in,
     852             :             thresh_in,
     853             :             i_out,
     854           0 :             20. * log10(tp_out),
     855             :             lra_out,
     856             :             thresh_out,
     857           0 :             s->frame_type == LINEAR_MODE ? "linear" : "dynamic",
     858           0 :             s->target_i - i_out
     859             :         );
     860           0 :         break;
     861             : 
     862           0 :     case SUMMARY:
     863           0 :         av_log(ctx, AV_LOG_INFO,
     864             :             "\n"
     865             :             "Input Integrated:   %+6.1f LUFS\n"
     866             :             "Input True Peak:    %+6.1f dBTP\n"
     867             :             "Input LRA:          %6.1f LU\n"
     868             :             "Input Threshold:    %+6.1f LUFS\n"
     869             :             "\n"
     870             :             "Output Integrated:  %+6.1f LUFS\n"
     871             :             "Output True Peak:   %+6.1f dBTP\n"
     872             :             "Output LRA:         %6.1f LU\n"
     873             :             "Output Threshold:   %+6.1f LUFS\n"
     874             :             "\n"
     875             :             "Normalization Type:   %s\n"
     876             :             "Target Offset:      %+6.1f LU\n",
     877             :             i_in,
     878           0 :             20. * log10(tp_in),
     879             :             lra_in,
     880             :             thresh_in,
     881             :             i_out,
     882           0 :             20. * log10(tp_out),
     883             :             lra_out,
     884             :             thresh_out,
     885           0 :             s->frame_type == LINEAR_MODE ? "Linear" : "Dynamic",
     886           0 :             s->target_i - i_out
     887             :         );
     888           0 :         break;
     889             :     }
     890             : 
     891           0 : end:
     892           0 :     if (s->r128_in)
     893           0 :         ff_ebur128_destroy(&s->r128_in);
     894           0 :     if (s->r128_out)
     895           0 :         ff_ebur128_destroy(&s->r128_out);
     896           0 :     av_freep(&s->limiter_buf);
     897           0 :     av_freep(&s->prev_smp);
     898           0 :     av_freep(&s->buf);
     899           0 : }
     900             : 
     901             : static const AVFilterPad avfilter_af_loudnorm_inputs[] = {
     902             :     {
     903             :         .name         = "default",
     904             :         .type         = AVMEDIA_TYPE_AUDIO,
     905             :         .config_props = config_input,
     906             :         .filter_frame = filter_frame,
     907             :     },
     908             :     { NULL }
     909             : };
     910             : 
     911             : static const AVFilterPad avfilter_af_loudnorm_outputs[] = {
     912             :     {
     913             :         .name          = "default",
     914             :         .request_frame = request_frame,
     915             :         .type          = AVMEDIA_TYPE_AUDIO,
     916             :     },
     917             :     { NULL }
     918             : };
     919             : 
     920             : AVFilter ff_af_loudnorm = {
     921             :     .name          = "loudnorm",
     922             :     .description   = NULL_IF_CONFIG_SMALL("EBU R128 loudness normalization"),
     923             :     .priv_size     = sizeof(LoudNormContext),
     924             :     .priv_class    = &loudnorm_class,
     925             :     .query_formats = query_formats,
     926             :     .init          = init,
     927             :     .uninit        = uninit,
     928             :     .inputs        = avfilter_af_loudnorm_inputs,
     929             :     .outputs       = avfilter_af_loudnorm_outputs,
     930             : };

Generated by: LCOV version 1.13