FFmpeg coverage

Directory:	../../../ffmpeg/
File:	src/libavfilter/af_speechnorm.c
Date:	2025-07-18 13:13:50

	Total	Coverage
Lines:	181	0.0%
Functions:	19	0.0%
Branches:	227	0.0%

  
      Line
      Branch
      Exec
      Source
    
      /*
    
       * Copyright (c) 2020 Paul B Mahol
    
       *
    
       * Speech Normalizer
    
       *
    
       * This file is part of FFmpeg.
    
       *
    
       * FFmpeg is free software; you can redistribute it and/or
    
       * modify it under the terms of the GNU Lesser General Public
    
       * License as published by the Free Software Foundation; either
    
       * version 2.1 of the License, or (at your option) any later version.
    
       *
    
       * FFmpeg is distributed in the hope that it will be useful,
    
       * but WITHOUT ANY WARRANTY; without even the implied warranty of
    
       * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    
       * Lesser General Public License for more details.
    
       *
    
       * You should have received a copy of the GNU Lesser General Public
    
       * License along with FFmpeg; if not, write to the Free Software
    
       * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    
       */
    
      /**
    
       * @file
    
       * Speech Normalizer
    
       */
    
      #include <float.h>
    
      #include "libavutil/avassert.h"
    
      #include "libavutil/channel_layout.h"
    
      #include "libavutil/mem.h"
    
      #include "libavutil/opt.h"
    
      #define FF_BUFQUEUE_SIZE (1024)
    
      #include "bufferqueue.h"
    
      #include "audio.h"
    
      #include "avfilter.h"
    
      #include "filters.h"
    
      #define MAX_ITEMS  882000
    
      #define MIN_PEAK (1. / 32768.)
    
      typedef struct PeriodItem {
    
          int size;
    
          int type;
    
          double max_peak;
    
          double rms_sum;
    
      } PeriodItem;
    
      typedef struct ChannelContext {
    
          int state;
    
          int bypass;
    
          PeriodItem pi[MAX_ITEMS];
    
          double gain_state;
    
          double pi_max_peak;
    
          double pi_rms_sum;
    
          int pi_start;
    
          int pi_end;
    
          int pi_size;
    
      } ChannelContext;
    
      typedef struct SpeechNormalizerContext {
    
          const AVClass *class;
    
          double rms_value;
    
          double peak_value;
    
          double max_expansion;
    
          double max_compression;
    
          double threshold_value;
    
          double raise_amount;
    
          double fall_amount;
    
          char *ch_layout_str;
    
          AVChannelLayout ch_layout;
    
          int invert;
    
          int link;
    
          ChannelContext *cc;
    
          double prev_gain;
    
          int max_period;
    
          int eof;
    
          int64_t pts;
    
          struct FFBufQueue queue;
    
          void (*analyze_channel)(AVFilterContext *ctx, ChannelContext *cc,
    
                                  const uint8_t *srcp, int nb_samples);
    
          void (*filter_channels[2])(AVFilterContext *ctx,
    
                                     AVFrame *in, AVFrame *out, int nb_samples);
    
      } SpeechNormalizerContext;
    
      #define OFFSET(x) offsetof(SpeechNormalizerContext, x)
    
      #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
    
      static const AVOption speechnorm_options[] = {
    
          { "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
    
          { "p",    "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
    
          { "expansion", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
    
          { "e",         "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
    
          { "compression", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
    
          { "c",           "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
    
          { "threshold", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
    
          { "t",         "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
    
          { "raise", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
    
          { "r",     "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
    
          { "fall", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
    
          { "f",    "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
    
          { "channels", "set channels to filter", OFFSET(ch_layout_str), AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
    
          { "h",        "set channels to filter", OFFSET(ch_layout_str), AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
    
          { "invert", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
    
          { "i",      "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
    
          { "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
    
          { "l",    "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
    
          { "rms", "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
    
          { "m",   "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
    
          { NULL }
    
      };
    
      AVFILTER_DEFINE_CLASS(speechnorm);
    
      ✗
      static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
    
      {
    
          int sum;
    
      ✗
          if (pi[start].type == 0)
    
      ✗
              return remain;
    
      ✗
          sum = remain;
    
      ✗
          while (start != end) {
    
      ✗
              start++;
    
      ✗
              if (start >= MAX_ITEMS)
    
      ✗
                  start = 0;
    
      ✗
              if (pi[start].type == 0)
    
      ✗
                  break;
    
              av_assert1(pi[start].size > 0);
    
      ✗
              sum += pi[start].size;
    
          }
    
      ✗
          return sum;
    
      }
    
      ✗
      static int available_samples(AVFilterContext *ctx)
    
      {
    
      ✗
          SpeechNormalizerContext *s = ctx->priv;
    
      ✗
          AVFilterLink *inlink = ctx->inputs[0];
    
          int min_pi_nb_samples;
    
      ✗
          min_pi_nb_samples = get_pi_samples(s->cc[0].pi, s->cc[0].pi_start, s->cc[0].pi_end, s->cc[0].pi_size);
    
      ✗
          for (int ch = 1; ch < inlink->ch_layout.nb_channels && min_pi_nb_samples > 0; ch++) {
    
      ✗
              ChannelContext *cc = &s->cc[ch];
    
      ✗
              min_pi_nb_samples = FFMIN(min_pi_nb_samples, get_pi_samples(cc->pi, cc->pi_start, cc->pi_end, cc->pi_size));
    
          }
    
      ✗
          return min_pi_nb_samples;
    
      }
    
      ✗
      static void consume_pi(ChannelContext *cc, int nb_samples)
    
      {
    
      ✗
          if (cc->pi_size >= nb_samples) {
    
      ✗
              cc->pi_size -= nb_samples;
    
          } else {
    
              av_assert1(0);
    
          }
    
      ✗
      }
    
      ✗
      static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state,
    
                              double pi_rms_sum, int pi_size)
    
      {
    
      ✗
          SpeechNormalizerContext *s = ctx->priv;
    
      ✗
          const double compression = 1. / s->max_compression;
    
      ✗
          const int type = s->invert ? pi_max_peak <= s->threshold_value : pi_max_peak >= s->threshold_value;
    
      ✗
          double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
    
      ✗
          if (s->rms_value > DBL_EPSILON)
    
      ✗
              expansion = FFMIN(expansion, s->rms_value / sqrt(pi_rms_sum / pi_size));
    
      ✗
          if (bypass) {
    
      ✗
              return 1.;
    
      ✗
          } else if (type) {
    
      ✗
              return FFMIN(expansion, state + s->raise_amount);
    
          } else {
    
      ✗
              return FFMIN(expansion, FFMAX(compression, state - s->fall_amount));
    
          }
    
      }
    
      ✗
      static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
    
      {
    
          av_assert1(cc->pi_size >= 0);
    
      ✗
          if (cc->pi_size == 0) {
    
      ✗
              SpeechNormalizerContext *s = ctx->priv;
    
      ✗
              int start = cc->pi_start;
    
              av_assert1(cc->pi[start].size > 0);
    
      ✗
              av_assert0(cc->pi[start].type > 0 || s->eof);
    
      ✗
              cc->pi_size = cc->pi[start].size;
    
      ✗
              cc->pi_rms_sum = cc->pi[start].rms_sum;
    
      ✗
              cc->pi_max_peak = cc->pi[start].max_peak;
    
              av_assert1(cc->pi_start != cc->pi_end || s->eof);
    
      ✗
              start++;
    
      ✗
              if (start >= MAX_ITEMS)
    
      ✗
                  start = 0;
    
      ✗
              cc->pi_start = start;
    
      ✗
              cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state,
    
                                         cc->pi_rms_sum, cc->pi_size);
    
          }
    
      ✗
      }
    
      ✗
      static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
    
      {
    
      ✗
          SpeechNormalizerContext *s = ctx->priv;
    
      ✗
          double min_gain = s->max_expansion;
    
      ✗
          double gain_state = cc->gain_state;
    
      ✗
          int size = cc->pi_size;
    
      ✗
          int idx = cc->pi_start;
    
      ✗
          min_gain = FFMIN(min_gain, gain_state);
    
      ✗
          while (size <= max_size) {
    
      ✗
              if (idx == cc->pi_end)
    
      ✗
                  break;
    
      ✗
              gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state,
    
                                     cc->pi[idx].rms_sum, cc->pi[idx].size);
    
      ✗
              min_gain = FFMIN(min_gain, gain_state);
    
      ✗
              size += cc->pi[idx].size;
    
      ✗
              idx++;
    
      ✗
              if (idx >= MAX_ITEMS)
    
      ✗
                  idx = 0;
    
          }
    
      ✗
          return min_gain;
    
      }
    
      #define ANALYZE_CHANNEL(name, ptype, zero, min_peak)                            \
    
      static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc,  \
    
                                           const uint8_t *srcp, int nb_samples)       \
    
      {                                                                               \
    
          SpeechNormalizerContext *s = ctx->priv;                                     \
    
          const ptype *src = (const ptype *)srcp;                                     \
    
          const int max_period = s->max_period;                                       \
    
          PeriodItem *pi = (PeriodItem *)&cc->pi;                                     \
    
          int pi_end = cc->pi_end;                                                    \
    
          int n = 0;                                                                  \
    
                                                                                      \
    
          if (cc->state < 0)                                                          \
    
              cc->state = src[0] >= zero;                                             \
    
                                                                                      \
    
          while (n < nb_samples) {                                                    \
    
              ptype new_max_peak;                                                     \
    
              ptype new_rms_sum;                                                      \
    
              int new_size;                                                           \
    
                                                                                      \
    
              if ((cc->state != (src[n] >= zero)) ||                                  \
    
                  (pi[pi_end].size > max_period)) {                                   \
    
                  ptype max_peak = pi[pi_end].max_peak;                               \
    
                  ptype rms_sum = pi[pi_end].rms_sum;                                 \
    
                  int state = cc->state;                                              \
    
                                                                                      \
    
                  cc->state = src[n] >= zero;                                         \
    
                  av_assert1(pi[pi_end].size > 0);                                    \
    
                  if (max_peak >= min_peak ||                                         \
    
                      pi[pi_end].size > max_period) {                                 \
    
                      pi[pi_end].type = 1;                                            \
    
                      pi_end++;                                                       \
    
                      if (pi_end >= MAX_ITEMS)                                        \
    
                          pi_end = 0;                                                 \
    
                      if (cc->state != state) {                                       \
    
                          pi[pi_end].max_peak = DBL_MIN;                              \
    
                          pi[pi_end].rms_sum = 0.0;                                   \
    
                      } else {                                                        \
    
                          pi[pi_end].max_peak = max_peak;                             \
    
                          pi[pi_end].rms_sum = rms_sum;                               \
    
                      }                                                               \
    
                      pi[pi_end].type = 0;                                            \
    
                      pi[pi_end].size = 0;                                            \
    
                      av_assert1(pi_end != cc->pi_start);                             \
    
                  }                                                                   \
    
              }                                                                       \
    
                                                                                      \
    
              new_max_peak = pi[pi_end].max_peak;                                     \
    
              new_rms_sum = pi[pi_end].rms_sum;                                       \
    
              new_size = pi[pi_end].size;                                             \
    
              if (cc->state) {                                                        \
    
                  while (src[n] >= zero) {                                            \
    
                      new_max_peak = FFMAX(new_max_peak,  src[n]);                    \
    
                      new_rms_sum += src[n] * src[n];                                 \
    
                      new_size++;                                                     \
    
                      n++;                                                            \
    
                      if (n >= nb_samples)                                            \
    
                          break;                                                      \
    
                  }                                                                   \
    
              } else {                                                                \
    
                  while (src[n] < zero) {                                             \
    
                      new_max_peak = FFMAX(new_max_peak, -src[n]);                    \
    
                      new_rms_sum += src[n] * src[n];                                 \
    
                      new_size++;                                                     \
    
                      n++;                                                            \
    
                      if (n >= nb_samples)                                            \
    
                          break;                                                      \
    
                  }                                                                   \
    
              }                                                                       \
    
                                                                                      \
    
              pi[pi_end].max_peak = new_max_peak;                                     \
    
              pi[pi_end].rms_sum = new_rms_sum;                                       \
    
              pi[pi_end].size = new_size;                                             \
    
          }                                                                           \
    
          cc->pi_end = pi_end;                                                        \
    
      }
    
      ✗
      ANALYZE_CHANNEL(dbl, double, 0.0, MIN_PEAK)
    
      ✗
      ANALYZE_CHANNEL(flt, float,  0.f, (float)MIN_PEAK)
    
      #define FILTER_CHANNELS(name, ptype)                                            \
    
      static void filter_channels_## name (AVFilterContext *ctx,                      \
    
                                           AVFrame *in, AVFrame *out, int nb_samples) \
    
      {                                                                               \
    
          SpeechNormalizerContext *s = ctx->priv;                                     \
    
          AVFilterLink *inlink = ctx->inputs[0];                                      \
    
                                                                                      \
    
          for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {                \
    
              ChannelContext *cc = &s->cc[ch];                                        \
    
              const ptype *src = (const ptype *)in->extended_data[ch];                \
    
              ptype *dst = (ptype *)out->extended_data[ch];                           \
    
              enum AVChannel channel = av_channel_layout_channel_from_index(&inlink->ch_layout, ch); \
    
              const int bypass = av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0; \
    
              int n = 0;                                                              \
    
                                                                                      \
    
              while (n < nb_samples) {                                                \
    
                  ptype gain;                                                         \
    
                  int size;                                                           \
    
                                                                                      \
    
                  next_pi(ctx, cc, bypass);                                           \
    
                  size = FFMIN(nb_samples - n, cc->pi_size);                          \
    
                  av_assert1(size > 0);                                               \
    
                  gain = cc->gain_state;                                              \
    
                  consume_pi(cc, size);                                               \
    
                  for (int i = n; !ctx->is_disabled && i < n + size; i++)             \
    
                      dst[i] = src[i] * gain;                                         \
    
                  n += size;                                                          \
    
              }                                                                       \
    
          }                                                                           \
    
      }
    
      ✗
      FILTER_CHANNELS(dbl, double)
    
      ✗
      FILTER_CHANNELS(flt, float)
    
      ✗
      static double dlerp(double min, double max, double mix)
    
      {
    
      ✗
          return min + (max - min) * mix;
    
      }
    
      ✗
      static float flerp(float min, float max, float mix)
    
      {
    
      ✗
          return min + (max - min) * mix;
    
      }
    
      #define FILTER_LINK_CHANNELS(name, ptype, tlerp)                                \
    
      static void filter_link_channels_## name (AVFilterContext *ctx,                 \
    
                                                AVFrame *in, AVFrame *out,            \
    
                                                int nb_samples)                       \
    
      {                                                                               \
    
          SpeechNormalizerContext *s = ctx->priv;                                     \
    
          AVFilterLink *inlink = ctx->inputs[0];                                      \
    
          int n = 0;                                                                  \
    
                                                                                      \
    
          while (n < nb_samples) {                                                    \
    
              int min_size = nb_samples - n;                                          \
    
              ptype gain = s->max_expansion;                                          \
    
                                                                                      \
    
              for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {            \
    
                  ChannelContext *cc = &s->cc[ch];                                    \
    
                                                                                      \
    
                  enum AVChannel channel = av_channel_layout_channel_from_index(&inlink->ch_layout, ch); \
    
                  cc->bypass = av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0; \
    
                                                                                      \
    
                  next_pi(ctx, cc, cc->bypass);                                       \
    
                  min_size = FFMIN(min_size, cc->pi_size);                            \
    
              }                                                                       \
    
                                                                                      \
    
              av_assert1(min_size > 0);                                               \
    
              for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {            \
    
                  ChannelContext *cc = &s->cc[ch];                                    \
    
                                                                                      \
    
                  if (cc->bypass)                                                     \
    
                      continue;                                                       \
    
                  gain = FFMIN(gain, min_gain(ctx, cc, min_size));                    \
    
              }                                                                       \
    
                                                                                      \
    
              for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {            \
    
                  ChannelContext *cc = &s->cc[ch];                                    \
    
                  const ptype *src = (const ptype *)in->extended_data[ch];            \
    
                  ptype *dst = (ptype *)out->extended_data[ch];                       \
    
                                                                                      \
    
                  consume_pi(cc, min_size);                                           \
    
                  if (cc->bypass)                                                     \
    
                      continue;                                                       \
    
                                                                                      \
    
                  for (int i = n; !ctx->is_disabled && i < n + min_size; i++) {       \
    
                      ptype g = tlerp(s->prev_gain, gain, (i - n) / (ptype)min_size); \
    
                      dst[i] = src[i] * g;                                            \
    
                  }                                                                   \
    
              }                                                                       \
    
                                                                                      \
    
              s->prev_gain = gain;                                                    \
    
              n += min_size;                                                          \
    
          }                                                                           \
    
      }
    
      ✗
      FILTER_LINK_CHANNELS(dbl, double, dlerp)
    
      ✗
      FILTER_LINK_CHANNELS(flt, float, flerp)
    
      ✗
      static int filter_frame(AVFilterContext *ctx)
    
      {
    
      ✗
          SpeechNormalizerContext *s = ctx->priv;
    
      ✗
          AVFilterLink *outlink = ctx->outputs[0];
    
      ✗
          AVFilterLink *inlink = ctx->inputs[0];
    
          int ret;
    
      ✗
          while (s->queue.available > 0) {
    
              int min_pi_nb_samples;
    
              AVFrame *in, *out;
    
      ✗
              in = ff_bufqueue_peek(&s->queue, 0);
    
      ✗
              if (!in)
    
      ✗
                  break;
    
      ✗
              min_pi_nb_samples = available_samples(ctx);
    
      ✗
              if (min_pi_nb_samples < in->nb_samples && !s->eof)
    
      ✗
                  break;
    
      ✗
              in = ff_bufqueue_get(&s->queue);
    
      ✗
              if (av_frame_is_writable(in)) {
    
      ✗
                  out = in;
    
              } else {
    
      ✗
                  out = ff_get_audio_buffer(outlink, in->nb_samples);
    
      ✗
                  if (!out) {
    
      ✗
                      av_frame_free(&in);
    
      ✗
                      return AVERROR(ENOMEM);
    
                  }
    
      ✗
                  av_frame_copy_props(out, in);
    
              }
    
      ✗
              s->filter_channels[s->link](ctx, in, out, in->nb_samples);
    
      ✗
              s->pts = in->pts + av_rescale_q(in->nb_samples, av_make_q(1, outlink->sample_rate),
    
                                              outlink->time_base);
    
      ✗
              if (out != in)
    
      ✗
                  av_frame_free(&in);
    
      ✗
              return ff_filter_frame(outlink, out);
    
          }
    
      ✗
          for (int f = 0; f < ff_inlink_queued_frames(inlink); f++) {
    
              AVFrame *in;
    
      ✗
              ret = ff_inlink_consume_frame(inlink, &in);
    
      ✗
              if (ret < 0)
    
      ✗
                  return ret;
    
      ✗
              if (ret == 0)
    
      ✗
                  break;
    
      ✗
              ff_bufqueue_add(ctx, &s->queue, in);
    
      ✗
              for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
    
      ✗
                  ChannelContext *cc = &s->cc[ch];
    
      ✗
                  s->analyze_channel(ctx, cc, in->extended_data[ch], in->nb_samples);
    
              }
    
          }
    
      ✗
          return 1;
    
      }
    
      ✗
      static int activate(AVFilterContext *ctx)
    
      {
    
      ✗
          AVFilterLink *inlink = ctx->inputs[0];
    
      ✗
          AVFilterLink *outlink = ctx->outputs[0];
    
      ✗
          SpeechNormalizerContext *s = ctx->priv;
    
          int ret, status;
    
          int64_t pts;
    
      ✗
          ret = av_channel_layout_copy(&s->ch_layout, &inlink->ch_layout);
    
      ✗
          if (ret < 0)
    
      ✗
              return ret;
    
      ✗
          if (strcmp(s->ch_layout_str, "all"))
    
      ✗
              av_channel_layout_from_string(&s->ch_layout,
    
      ✗
                                            s->ch_layout_str);
    
      ✗
          FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
    
      ✗
          ret = filter_frame(ctx);
    
      ✗
          if (ret <= 0)
    
      ✗
              return ret;
    
      ✗
          if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
    
      ✗
              if (status == AVERROR_EOF)
    
      ✗
                  s->eof = 1;
    
          }
    
      ✗
          if (s->eof && ff_inlink_queued_samples(inlink) == 0 &&
    
      ✗
              s->queue.available == 0) {
    
      ✗
              ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
    
      ✗
              return 0;
    
          }
    
      ✗
          if (s->queue.available > 0) {
    
      ✗
              AVFrame *in = ff_bufqueue_peek(&s->queue, 0);
    
      ✗
              const int nb_samples = available_samples(ctx);
    
      ✗
              if (nb_samples >= in->nb_samples || s->eof) {
    
      ✗
                  ff_filter_set_ready(ctx, 10);
    
      ✗
                  return 0;
    
              }
    
          }
    
      ✗
          FF_FILTER_FORWARD_WANTED(outlink, inlink);
    
      ✗
          return FFERROR_NOT_READY;
    
      }
    
      ✗
      static int config_input(AVFilterLink *inlink)
    
      {
    
      ✗
          AVFilterContext *ctx = inlink->dst;
    
      ✗
          SpeechNormalizerContext *s = ctx->priv;
    
      ✗
          s->max_period = inlink->sample_rate / 10;
    
      ✗
          s->prev_gain = 1.;
    
      ✗
          s->cc = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->cc));
    
      ✗
          if (!s->cc)
    
      ✗
              return AVERROR(ENOMEM);
    
      ✗
          for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
    
      ✗
              ChannelContext *cc = &s->cc[ch];
    
      ✗
              cc->state = -1;
    
      ✗
              cc->gain_state = s->max_expansion;
    
          }
    
      ✗
          switch (inlink->format) {
    
      ✗
          case AV_SAMPLE_FMT_FLTP:
    
      ✗
              s->analyze_channel = analyze_channel_flt;
    
      ✗
              s->filter_channels[0] = filter_channels_flt;
    
      ✗
              s->filter_channels[1] = filter_link_channels_flt;
    
      ✗
              break;
    
      ✗
          case AV_SAMPLE_FMT_DBLP:
    
      ✗
              s->analyze_channel = analyze_channel_dbl;
    
      ✗
              s->filter_channels[0] = filter_channels_dbl;
    
      ✗
              s->filter_channels[1] = filter_link_channels_dbl;
    
      ✗
              break;
    
      ✗
          default:
    
              av_assert1(0);
    
          }
    
      ✗
          return 0;
    
      }
    
      ✗
      static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
    
                                 char *res, int res_len, int flags)
    
      {
    
      ✗
          SpeechNormalizerContext *s = ctx->priv;
    
      ✗
          int link = s->link;
    
          int ret;
    
      ✗
          ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
    
      ✗
          if (ret < 0)
    
      ✗
              return ret;
    
      ✗
          if (link != s->link)
    
      ✗
              s->prev_gain = 1.;
    
      ✗
          return 0;
    
      }
    
      ✗
      static av_cold void uninit(AVFilterContext *ctx)
    
      {
    
      ✗
          SpeechNormalizerContext *s = ctx->priv;
    
      ✗
          ff_bufqueue_discard_all(&s->queue);
    
      ✗
          av_channel_layout_uninit(&s->ch_layout);
    
      ✗
          av_freep(&s->cc);
    
      ✗
      }
    
      static const AVFilterPad inputs[] = {
    
          {
    
              .name         = "default",
    
              .type         = AVMEDIA_TYPE_AUDIO,
    
              .config_props = config_input,
    
          },
    
      };
    
      const FFFilter ff_af_speechnorm = {
    
          .p.name          = "speechnorm",
    
          .p.description   = NULL_IF_CONFIG_SMALL("Speech Normalizer."),
    
          .p.priv_class    = &speechnorm_class,
    
          .p.flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
    
          .priv_size       = sizeof(SpeechNormalizerContext),
    
          .activate        = activate,
    
          .uninit          = uninit,
    
          FILTER_INPUTS(inputs),
    
          FILTER_OUTPUTS(ff_audio_default_filterpad),
    
          FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP),
    
          .process_command = process_command,
    
      };

Line	Exec	Source
1		/*
2		* Copyright (c) 2020 Paul B Mahol
3		*
4		* Speech Normalizer
5		*
6		* This file is part of FFmpeg.
7		*
8		* FFmpeg is free software; you can redistribute it and/or
9		* modify it under the terms of the GNU Lesser General Public
10		* License as published by the Free Software Foundation; either
11		* version 2.1 of the License, or (at your option) any later version.
12		*
13		* FFmpeg is distributed in the hope that it will be useful,
14		* but WITHOUT ANY WARRANTY; without even the implied warranty of
15		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16		* Lesser General Public License for more details.
17		*
18		* You should have received a copy of the GNU Lesser General Public
19		* License along with FFmpeg; if not, write to the Free Software
20		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21		*/
22
23		/**
24		* @file
25		* Speech Normalizer
26		*/
27
28		#include <float.h>
29
30		#include "libavutil/avassert.h"
31		#include "libavutil/channel_layout.h"
32		#include "libavutil/mem.h"
33		#include "libavutil/opt.h"
34
35		#define FF_BUFQUEUE_SIZE (1024)
36		#include "bufferqueue.h"
37
38		#include "audio.h"
39		#include "avfilter.h"
40		#include "filters.h"
41
42		#define MAX_ITEMS 882000
43		#define MIN_PEAK (1. / 32768.)
44
45		typedef struct PeriodItem {
46		int size;
47		int type;
48		double max_peak;
49		double rms_sum;
50		} PeriodItem;
51
52		typedef struct ChannelContext {
53		int state;
54		int bypass;
55		PeriodItem pi[MAX_ITEMS];
56		double gain_state;
57		double pi_max_peak;
58		double pi_rms_sum;
59		int pi_start;
60		int pi_end;
61		int pi_size;
62		} ChannelContext;
63
64		typedef struct SpeechNormalizerContext {
65		const AVClass *class;
66
67		double rms_value;
68		double peak_value;
69		double max_expansion;
70		double max_compression;
71		double threshold_value;
72		double raise_amount;
73		double fall_amount;
74		char *ch_layout_str;
75		AVChannelLayout ch_layout;
76		int invert;
77		int link;
78
79		ChannelContext *cc;
80		double prev_gain;
81
82		int max_period;
83		int eof;
84		int64_t pts;
85
86		struct FFBufQueue queue;
87
88		void (analyze_channel)(AVFilterContext ctx, ChannelContext *cc,
89		const uint8_t *srcp, int nb_samples);
90		void (filter_channels[2])(AVFilterContext ctx,
91		AVFrame in, AVFrame out, int nb_samples);
92		} SpeechNormalizerContext;
93
94		#define OFFSET(x) offsetof(SpeechNormalizerContext, x)
95		#define FLAGS AV_OPT_FLAG_AUDIO_PARAM\|AV_OPT_FLAG_FILTERING_PARAM\|AV_OPT_FLAG_RUNTIME_PARAM
96
97		static const AVOption speechnorm_options[] = {
98		{ "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
99		{ "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
100		{ "expansion", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
101		{ "e", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
102		{ "compression", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
103		{ "c", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
104		{ "threshold", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
105		{ "t", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
106		{ "raise", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
107		{ "r", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
108		{ "fall", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
109		{ "f", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
110		{ "channels", "set channels to filter", OFFSET(ch_layout_str), AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
111		{ "h", "set channels to filter", OFFSET(ch_layout_str), AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
112		{ "invert", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
113		{ "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
114		{ "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
115		{ "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
116		{ "rms", "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
117		{ "m", "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
118		{ NULL }
119		};
120
121		AVFILTER_DEFINE_CLASS(speechnorm);
122
123	✗	static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
124		{
125		int sum;
126
127	✗	if (pi[start].type == 0)
128	✗	return remain;
129
130	✗	sum = remain;
131	✗	while (start != end) {
132	✗	start++;
133	✗	if (start >= MAX_ITEMS)
134	✗	start = 0;
135	✗	if (pi[start].type == 0)
136	✗	break;
137		av_assert1(pi[start].size > 0);
138	✗	sum += pi[start].size;
139		}
140
141	✗	return sum;
142		}
143
144	✗	static int available_samples(AVFilterContext *ctx)
145		{
146	✗	SpeechNormalizerContext *s = ctx->priv;
147	✗	AVFilterLink *inlink = ctx->inputs[0];
148		int min_pi_nb_samples;
149
150	✗	min_pi_nb_samples = get_pi_samples(s->cc[0].pi, s->cc[0].pi_start, s->cc[0].pi_end, s->cc[0].pi_size);
151	✗	for (int ch = 1; ch < inlink->ch_layout.nb_channels && min_pi_nb_samples > 0; ch++) {
152	✗	ChannelContext *cc = &s->cc[ch];
153
154	✗	min_pi_nb_samples = FFMIN(min_pi_nb_samples, get_pi_samples(cc->pi, cc->pi_start, cc->pi_end, cc->pi_size));
155		}
156
157	✗	return min_pi_nb_samples;
158		}
159
160	✗	static void consume_pi(ChannelContext *cc, int nb_samples)
161		{
162	✗	if (cc->pi_size >= nb_samples) {
163	✗	cc->pi_size -= nb_samples;
164		} else {
165		av_assert1(0);
166		}
167	✗	}
168
169	✗	static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state,
170		double pi_rms_sum, int pi_size)
171		{
172	✗	SpeechNormalizerContext *s = ctx->priv;
173	✗	const double compression = 1. / s->max_compression;
174	✗	const int type = s->invert ? pi_max_peak <= s->threshold_value : pi_max_peak >= s->threshold_value;
175	✗	double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
176
177	✗	if (s->rms_value > DBL_EPSILON)
178	✗	expansion = FFMIN(expansion, s->rms_value / sqrt(pi_rms_sum / pi_size));
179
180	✗	if (bypass) {
181	✗	return 1.;
182	✗	} else if (type) {
183	✗	return FFMIN(expansion, state + s->raise_amount);
184		} else {
185	✗	return FFMIN(expansion, FFMAX(compression, state - s->fall_amount));
186		}
187		}
188
189	✗	static void next_pi(AVFilterContext ctx, ChannelContext cc, int bypass)
190		{
191		av_assert1(cc->pi_size >= 0);
192	✗	if (cc->pi_size == 0) {
193	✗	SpeechNormalizerContext *s = ctx->priv;
194	✗	int start = cc->pi_start;
195
196		av_assert1(cc->pi[start].size > 0);
197	✗	av_assert0(cc->pi[start].type > 0 \|\| s->eof);
198	✗	cc->pi_size = cc->pi[start].size;
199	✗	cc->pi_rms_sum = cc->pi[start].rms_sum;
200	✗	cc->pi_max_peak = cc->pi[start].max_peak;
201		av_assert1(cc->pi_start != cc->pi_end \|\| s->eof);
202	✗	start++;
203	✗	if (start >= MAX_ITEMS)
204	✗	start = 0;
205	✗	cc->pi_start = start;
206	✗	cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state,
207		cc->pi_rms_sum, cc->pi_size);
208		}
209	✗	}
210
211	✗	static double min_gain(AVFilterContext ctx, ChannelContext cc, int max_size)
212		{
213	✗	SpeechNormalizerContext *s = ctx->priv;
214	✗	double min_gain = s->max_expansion;
215	✗	double gain_state = cc->gain_state;
216	✗	int size = cc->pi_size;
217	✗	int idx = cc->pi_start;
218
219	✗	min_gain = FFMIN(min_gain, gain_state);
220	✗	while (size <= max_size) {
221	✗	if (idx == cc->pi_end)
222	✗	break;
223	✗	gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state,
224		cc->pi[idx].rms_sum, cc->pi[idx].size);
225	✗	min_gain = FFMIN(min_gain, gain_state);
226	✗	size += cc->pi[idx].size;
227	✗	idx++;
228	✗	if (idx >= MAX_ITEMS)
229	✗	idx = 0;
230		}
231
232	✗	return min_gain;
233		}
234
235		#define ANALYZE_CHANNEL(name, ptype, zero, min_peak) \
236		static void analyze_channel_## name (AVFilterContext ctx, ChannelContext cc, \
237		const uint8_t *srcp, int nb_samples) \
238		{ \
239		SpeechNormalizerContext *s = ctx->priv; \
240		const ptype src = (const ptype )srcp; \
241		const int max_period = s->max_period; \
242		PeriodItem pi = (PeriodItem )&cc->pi; \
243		int pi_end = cc->pi_end; \
244		int n = 0; \
245		\
246		if (cc->state < 0) \
247		cc->state = src[0] >= zero; \
248		\
249		while (n < nb_samples) { \
250		ptype new_max_peak; \
251		ptype new_rms_sum; \
252		int new_size; \
253		\
254		if ((cc->state != (src[n] >= zero)) \|\| \
255		(pi[pi_end].size > max_period)) { \
256		ptype max_peak = pi[pi_end].max_peak; \
257		ptype rms_sum = pi[pi_end].rms_sum; \
258		int state = cc->state; \
259		\
260		cc->state = src[n] >= zero; \
261		av_assert1(pi[pi_end].size > 0); \
262		if (max_peak >= min_peak \|\| \
263		pi[pi_end].size > max_period) { \
264		pi[pi_end].type = 1; \
265		pi_end++; \
266		if (pi_end >= MAX_ITEMS) \
267		pi_end = 0; \
268		if (cc->state != state) { \
269		pi[pi_end].max_peak = DBL_MIN; \
270		pi[pi_end].rms_sum = 0.0; \
271		} else { \
272		pi[pi_end].max_peak = max_peak; \
273		pi[pi_end].rms_sum = rms_sum; \
274		} \
275		pi[pi_end].type = 0; \
276		pi[pi_end].size = 0; \
277		av_assert1(pi_end != cc->pi_start); \
278		} \
279		} \
280		\
281		new_max_peak = pi[pi_end].max_peak; \
282		new_rms_sum = pi[pi_end].rms_sum; \
283		new_size = pi[pi_end].size; \
284		if (cc->state) { \
285		while (src[n] >= zero) { \
286		new_max_peak = FFMAX(new_max_peak, src[n]); \
287		new_rms_sum += src[n] * src[n]; \
288		new_size++; \
289		n++; \
290		if (n >= nb_samples) \
291		break; \
292		} \
293		} else { \
294		while (src[n] < zero) { \
295		new_max_peak = FFMAX(new_max_peak, -src[n]); \
296		new_rms_sum += src[n] * src[n]; \
297		new_size++; \
298		n++; \
299		if (n >= nb_samples) \
300		break; \
301		} \
302		} \
303		\
304		pi[pi_end].max_peak = new_max_peak; \
305		pi[pi_end].rms_sum = new_rms_sum; \
306		pi[pi_end].size = new_size; \
307		} \
308		cc->pi_end = pi_end; \
309		}
310
311	✗	ANALYZE_CHANNEL(dbl, double, 0.0, MIN_PEAK)
312	✗	ANALYZE_CHANNEL(flt, float, 0.f, (float)MIN_PEAK)
313
314		#define FILTER_CHANNELS(name, ptype) \
315		static void filter_channels_## name (AVFilterContext *ctx, \
316		AVFrame in, AVFrame out, int nb_samples) \
317		{ \
318		SpeechNormalizerContext *s = ctx->priv; \
319		AVFilterLink *inlink = ctx->inputs[0]; \
320		\
321		for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
322		ChannelContext *cc = &s->cc[ch]; \
323		const ptype src = (const ptype )in->extended_data[ch]; \
324		ptype dst = (ptype )out->extended_data[ch]; \
325		enum AVChannel channel = av_channel_layout_channel_from_index(&inlink->ch_layout, ch); \
326		const int bypass = av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0; \
327		int n = 0; \
328		\
329		while (n < nb_samples) { \
330		ptype gain; \
331		int size; \
332		\
333		next_pi(ctx, cc, bypass); \
334		size = FFMIN(nb_samples - n, cc->pi_size); \
335		av_assert1(size > 0); \
336		gain = cc->gain_state; \
337		consume_pi(cc, size); \
338		for (int i = n; !ctx->is_disabled && i < n + size; i++) \
339		dst[i] = src[i] * gain; \
340		n += size; \
341		} \
342		} \
343		}
344
345	✗	FILTER_CHANNELS(dbl, double)
346	✗	FILTER_CHANNELS(flt, float)
347
348	✗	static double dlerp(double min, double max, double mix)
349		{
350	✗	return min + (max - min) * mix;
351		}
352
353	✗	static float flerp(float min, float max, float mix)
354		{
355	✗	return min + (max - min) * mix;
356		}
357
358		#define FILTER_LINK_CHANNELS(name, ptype, tlerp) \
359		static void filter_link_channels_## name (AVFilterContext *ctx, \
360		AVFrame in, AVFrame out, \
361		int nb_samples) \
362		{ \
363		SpeechNormalizerContext *s = ctx->priv; \
364		AVFilterLink *inlink = ctx->inputs[0]; \
365		int n = 0; \
366		\
367		while (n < nb_samples) { \
368		int min_size = nb_samples - n; \
369		ptype gain = s->max_expansion; \
370		\
371		for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
372		ChannelContext *cc = &s->cc[ch]; \
373		\
374		enum AVChannel channel = av_channel_layout_channel_from_index(&inlink->ch_layout, ch); \
375		cc->bypass = av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0; \
376		\
377		next_pi(ctx, cc, cc->bypass); \
378		min_size = FFMIN(min_size, cc->pi_size); \
379		} \
380		\
381		av_assert1(min_size > 0); \
382		for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
383		ChannelContext *cc = &s->cc[ch]; \
384		\
385		if (cc->bypass) \
386		continue; \
387		gain = FFMIN(gain, min_gain(ctx, cc, min_size)); \
388		} \
389		\
390		for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
391		ChannelContext *cc = &s->cc[ch]; \
392		const ptype src = (const ptype )in->extended_data[ch]; \
393		ptype dst = (ptype )out->extended_data[ch]; \
394		\
395		consume_pi(cc, min_size); \
396		if (cc->bypass) \
397		continue; \
398		\
399		for (int i = n; !ctx->is_disabled && i < n + min_size; i++) { \
400		ptype g = tlerp(s->prev_gain, gain, (i - n) / (ptype)min_size); \
401		dst[i] = src[i] * g; \
402		} \
403		} \
404		\
405		s->prev_gain = gain; \
406		n += min_size; \
407		} \
408		}
409
410	✗	FILTER_LINK_CHANNELS(dbl, double, dlerp)
411	✗	FILTER_LINK_CHANNELS(flt, float, flerp)
412
413	✗	static int filter_frame(AVFilterContext *ctx)
414		{
415	✗	SpeechNormalizerContext *s = ctx->priv;
416	✗	AVFilterLink *outlink = ctx->outputs[0];
417	✗	AVFilterLink *inlink = ctx->inputs[0];
418		int ret;
419
420	✗	while (s->queue.available > 0) {
421		int min_pi_nb_samples;
422		AVFrame in, out;
423
424	✗	in = ff_bufqueue_peek(&s->queue, 0);
425	✗	if (!in)
426	✗	break;
427
428	✗	min_pi_nb_samples = available_samples(ctx);
429	✗	if (min_pi_nb_samples < in->nb_samples && !s->eof)
430	✗	break;
431
432	✗	in = ff_bufqueue_get(&s->queue);
433
434	✗	if (av_frame_is_writable(in)) {
435	✗	out = in;
436		} else {
437	✗	out = ff_get_audio_buffer(outlink, in->nb_samples);
438	✗	if (!out) {
439	✗	av_frame_free(&in);
440	✗	return AVERROR(ENOMEM);
441		}
442	✗	av_frame_copy_props(out, in);
443		}
444
445	✗	s->filter_channels[s->link](ctx, in, out, in->nb_samples);
446
447	✗	s->pts = in->pts + av_rescale_q(in->nb_samples, av_make_q(1, outlink->sample_rate),
448		outlink->time_base);
449
450	✗	if (out != in)
451	✗	av_frame_free(&in);
452	✗	return ff_filter_frame(outlink, out);
453		}
454
455	✗	for (int f = 0; f < ff_inlink_queued_frames(inlink); f++) {
456		AVFrame *in;
457
458	✗	ret = ff_inlink_consume_frame(inlink, &in);
459	✗	if (ret < 0)
460	✗	return ret;
461	✗	if (ret == 0)
462	✗	break;
463
464	✗	ff_bufqueue_add(ctx, &s->queue, in);
465
466	✗	for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
467	✗	ChannelContext *cc = &s->cc[ch];
468
469	✗	s->analyze_channel(ctx, cc, in->extended_data[ch], in->nb_samples);
470		}
471		}
472
473	✗	return 1;
474		}
475
476	✗	static int activate(AVFilterContext *ctx)
477		{
478	✗	AVFilterLink *inlink = ctx->inputs[0];
479	✗	AVFilterLink *outlink = ctx->outputs[0];
480	✗	SpeechNormalizerContext *s = ctx->priv;
481		int ret, status;
482		int64_t pts;
483
484	✗	ret = av_channel_layout_copy(&s->ch_layout, &inlink->ch_layout);
485	✗	if (ret < 0)
486	✗	return ret;
487	✗	if (strcmp(s->ch_layout_str, "all"))
488	✗	av_channel_layout_from_string(&s->ch_layout,
489	✗	s->ch_layout_str);
490
491	✗	FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
492
493	✗	ret = filter_frame(ctx);
494	✗	if (ret <= 0)
495	✗	return ret;
496
497	✗	if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
498	✗	if (status == AVERROR_EOF)
499	✗	s->eof = 1;
500		}
501
502	✗	if (s->eof && ff_inlink_queued_samples(inlink) == 0 &&
503	✗	s->queue.available == 0) {
504	✗	ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
505	✗	return 0;
506		}
507
508	✗	if (s->queue.available > 0) {
509	✗	AVFrame *in = ff_bufqueue_peek(&s->queue, 0);
510	✗	const int nb_samples = available_samples(ctx);
511
512	✗	if (nb_samples >= in->nb_samples \|\| s->eof) {
513	✗	ff_filter_set_ready(ctx, 10);
514	✗	return 0;
515		}
516		}
517
518	✗	FF_FILTER_FORWARD_WANTED(outlink, inlink);
519
520	✗	return FFERROR_NOT_READY;
521		}
522
523	✗	static int config_input(AVFilterLink *inlink)
524		{
525	✗	AVFilterContext *ctx = inlink->dst;
526	✗	SpeechNormalizerContext *s = ctx->priv;
527
528	✗	s->max_period = inlink->sample_rate / 10;
529
530	✗	s->prev_gain = 1.;
531	✗	s->cc = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->cc));
532	✗	if (!s->cc)
533	✗	return AVERROR(ENOMEM);
534
535	✗	for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
536	✗	ChannelContext *cc = &s->cc[ch];
537
538	✗	cc->state = -1;
539	✗	cc->gain_state = s->max_expansion;
540		}
541
542	✗	switch (inlink->format) {
543	✗	case AV_SAMPLE_FMT_FLTP:
544	✗	s->analyze_channel = analyze_channel_flt;
545	✗	s->filter_channels[0] = filter_channels_flt;
546	✗	s->filter_channels[1] = filter_link_channels_flt;
547	✗	break;
548	✗	case AV_SAMPLE_FMT_DBLP:
549	✗	s->analyze_channel = analyze_channel_dbl;
550	✗	s->filter_channels[0] = filter_channels_dbl;
551	✗	s->filter_channels[1] = filter_link_channels_dbl;
552	✗	break;
553	✗	default:
554		av_assert1(0);
555		}
556
557	✗	return 0;
558		}
559
560	✗	static int process_command(AVFilterContext ctx, const char cmd, const char *args,
561		char *res, int res_len, int flags)
562		{
563	✗	SpeechNormalizerContext *s = ctx->priv;
564	✗	int link = s->link;
565		int ret;
566
567	✗	ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
568	✗	if (ret < 0)
569	✗	return ret;
570	✗	if (link != s->link)
571	✗	s->prev_gain = 1.;
572
573	✗	return 0;
574		}
575
576	✗	static av_cold void uninit(AVFilterContext *ctx)
577		{
578	✗	SpeechNormalizerContext *s = ctx->priv;
579
580	✗	ff_bufqueue_discard_all(&s->queue);
581	✗	av_channel_layout_uninit(&s->ch_layout);
582	✗	av_freep(&s->cc);
583	✗	}
584
585		static const AVFilterPad inputs[] = {
586		{
587		.name = "default",
588		.type = AVMEDIA_TYPE_AUDIO,
589		.config_props = config_input,
590		},
591		};
592
593		const FFFilter ff_af_speechnorm = {
594		.p.name = "speechnorm",
595		.p.description = NULL_IF_CONFIG_SMALL("Speech Normalizer."),
596		.p.priv_class = &speechnorm_class,
597		.p.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
598		.priv_size = sizeof(SpeechNormalizerContext),
599		.activate = activate,
600		.uninit = uninit,
601		FILTER_INPUTS(inputs),
602		FILTER_OUTPUTS(ff_audio_default_filterpad),
603		FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP),
604		.process_command = process_command,
605		};
606

Function (Line)	Call count	Block coverage
activate (line 476)	not called	0.0%
analyze_channel_dbl (line 311)	not called	0.0%
analyze_channel_flt (line 312)	not called	0.0%
available_samples (line 144)	not called	0.0%
config_input (line 523)	not called	0.0%
consume_pi (line 160)	not called	0.0%
dlerp (line 348)	not called	0.0%
filter_channels_dbl (line 345)	not called	0.0%
filter_channels_flt (line 346)	not called	0.0%
filter_frame (line 413)	not called	0.0%
filter_link_channels_dbl (line 410)	not called	0.0%
filter_link_channels_flt (line 411)	not called	0.0%
flerp (line 353)	not called	0.0%
get_pi_samples (line 123)	not called	0.0%
min_gain (line 211)	not called	0.0%
next_gain (line 169)	not called	0.0%
next_pi (line 189)	not called	0.0%
process_command (line 560)	not called	0.0%
uninit (line 576)	not called	0.0%