LCOV - code coverage report
Current view: top level - src/libavfilter - af_silenceremove.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 152 243 62.6 %
Date: 2017-01-24 04:42:20 Functions: 10 12 83.3 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2001 Heikki Leinonen
       3             :  * Copyright (c) 2001 Chris Bagwell
       4             :  * Copyright (c) 2003 Donnie Smith
       5             :  * Copyright (c) 2014 Paul B Mahol
       6             :  *
       7             :  * This file is part of FFmpeg.
       8             :  *
       9             :  * FFmpeg is free software; you can redistribute it and/or
      10             :  * modify it under the terms of the GNU Lesser General Public
      11             :  * License as published by the Free Software Foundation; either
      12             :  * version 2.1 of the License, or (at your option) any later version.
      13             :  *
      14             :  * FFmpeg is distributed in the hope that it will be useful,
      15             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      17             :  * Lesser General Public License for more details.
      18             :  *
      19             :  * You should have received a copy of the GNU Lesser General Public
      20             :  * License along with FFmpeg; if not, write to the Free Software
      21             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      22             :  */
      23             : 
      24             : #include <float.h> /* DBL_MAX */
      25             : 
      26             : #include "libavutil/opt.h"
      27             : #include "libavutil/timestamp.h"
      28             : #include "audio.h"
      29             : #include "formats.h"
      30             : #include "avfilter.h"
      31             : #include "internal.h"
      32             : 
      33             : enum SilenceMode {
      34             :     SILENCE_TRIM,
      35             :     SILENCE_TRIM_FLUSH,
      36             :     SILENCE_COPY,
      37             :     SILENCE_COPY_FLUSH,
      38             :     SILENCE_STOP
      39             : };
      40             : 
      41             : typedef struct SilenceRemoveContext {
      42             :     const AVClass *class;
      43             : 
      44             :     enum SilenceMode mode;
      45             : 
      46             :     int start_periods;
      47             :     int64_t start_duration;
      48             :     double start_threshold;
      49             : 
      50             :     int stop_periods;
      51             :     int64_t stop_duration;
      52             :     double stop_threshold;
      53             : 
      54             :     double *start_holdoff;
      55             :     size_t start_holdoff_offset;
      56             :     size_t start_holdoff_end;
      57             :     int    start_found_periods;
      58             : 
      59             :     double *stop_holdoff;
      60             :     size_t stop_holdoff_offset;
      61             :     size_t stop_holdoff_end;
      62             :     int    stop_found_periods;
      63             : 
      64             :     double window_ratio;
      65             :     double *window;
      66             :     double *window_current;
      67             :     double *window_end;
      68             :     int window_size;
      69             :     double sum;
      70             : 
      71             :     int leave_silence;
      72             :     int restart;
      73             :     int64_t next_pts;
      74             : 
      75             :     int detection;
      76             :     void (*update)(struct SilenceRemoveContext *s, double sample);
      77             :     double(*compute)(struct SilenceRemoveContext *s, double sample);
      78             : } SilenceRemoveContext;
      79             : 
      80             : #define OFFSET(x) offsetof(SilenceRemoveContext, x)
      81             : #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
      82             : static const AVOption silenceremove_options[] = {
      83             :     { "start_periods",   NULL, OFFSET(start_periods),   AV_OPT_TYPE_INT,      {.i64=0},     0,    9000, FLAGS },
      84             :     { "start_duration",  NULL, OFFSET(start_duration),  AV_OPT_TYPE_DURATION, {.i64=0},     0,    9000, FLAGS },
      85             :     { "start_threshold", NULL, OFFSET(start_threshold), AV_OPT_TYPE_DOUBLE,   {.dbl=0},     0, DBL_MAX, FLAGS },
      86             :     { "stop_periods",    NULL, OFFSET(stop_periods),    AV_OPT_TYPE_INT,      {.i64=0}, -9000,    9000, FLAGS },
      87             :     { "stop_duration",   NULL, OFFSET(stop_duration),   AV_OPT_TYPE_DURATION, {.i64=0},     0,    9000, FLAGS },
      88             :     { "stop_threshold",  NULL, OFFSET(stop_threshold),  AV_OPT_TYPE_DOUBLE,   {.dbl=0},     0, DBL_MAX, FLAGS },
      89             :     { "leave_silence",   NULL, OFFSET(leave_silence),   AV_OPT_TYPE_BOOL,     {.i64=0},     0,       1, FLAGS },
      90             :     { "detection",       NULL, OFFSET(detection),       AV_OPT_TYPE_INT,      {.i64=1},     0,       1, FLAGS, "detection" },
      91             :     {   "peak",          0,    0,                       AV_OPT_TYPE_CONST,    {.i64=0},     0,       0, FLAGS, "detection" },
      92             :     {   "rms",           0,    0,                       AV_OPT_TYPE_CONST,    {.i64=1},     0,       0, FLAGS, "detection" },
      93             :     { "window",          NULL, OFFSET(window_ratio),    AV_OPT_TYPE_DOUBLE,   {.dbl=0.02},  0,      10, FLAGS },
      94             :     { NULL }
      95             : };
      96             : 
      97             : AVFILTER_DEFINE_CLASS(silenceremove);
      98             : 
      99           0 : static double compute_peak(SilenceRemoveContext *s, double sample)
     100             : {
     101             :     double new_sum;
     102             : 
     103           0 :     new_sum  = s->sum;
     104           0 :     new_sum -= *s->window_current;
     105           0 :     new_sum += fabs(sample);
     106             : 
     107           0 :     return new_sum / s->window_size;
     108             : }
     109             : 
     110           0 : static void update_peak(SilenceRemoveContext *s, double sample)
     111             : {
     112           0 :     s->sum -= *s->window_current;
     113           0 :     *s->window_current = fabs(sample);
     114           0 :     s->sum += *s->window_current;
     115             : 
     116           0 :     s->window_current++;
     117           0 :     if (s->window_current >= s->window_end)
     118           0 :         s->window_current = s->window;
     119           0 : }
     120             : 
     121        1364 : static double compute_rms(SilenceRemoveContext *s, double sample)
     122             : {
     123             :     double new_sum;
     124             : 
     125        1364 :     new_sum  = s->sum;
     126        1364 :     new_sum -= *s->window_current;
     127        1364 :     new_sum += sample * sample;
     128             : 
     129        1364 :     return sqrt(new_sum / s->window_size);
     130             : }
     131             : 
     132        1364 : static void update_rms(SilenceRemoveContext *s, double sample)
     133             : {
     134        1364 :     s->sum -= *s->window_current;
     135        1364 :     *s->window_current = sample * sample;
     136        1364 :     s->sum += *s->window_current;
     137             : 
     138        1364 :     s->window_current++;
     139        1364 :     if (s->window_current >= s->window_end)
     140           0 :         s->window_current = s->window;
     141        1364 : }
     142             : 
     143           1 : static av_cold int init(AVFilterContext *ctx)
     144             : {
     145           1 :     SilenceRemoveContext *s = ctx->priv;
     146             : 
     147           1 :     if (s->stop_periods < 0) {
     148           1 :         s->stop_periods = -s->stop_periods;
     149           1 :         s->restart = 1;
     150             :     }
     151             : 
     152           1 :     switch (s->detection) {
     153             :     case 0:
     154           0 :         s->update = update_peak;
     155           0 :         s->compute = compute_peak;
     156           0 :         break;
     157             :     case 1:
     158           1 :         s->update = update_rms;
     159           1 :         s->compute = compute_rms;
     160           1 :         break;
     161             :     };
     162             : 
     163           1 :     return 0;
     164             : }
     165             : 
     166         342 : static void clear_window(SilenceRemoveContext *s)
     167             : {
     168         342 :     memset(s->window, 0, s->window_size * sizeof(*s->window));
     169             : 
     170         342 :     s->window_current = s->window;
     171         342 :     s->window_end = s->window + s->window_size;
     172         342 :     s->sum = 0;
     173         342 : }
     174             : 
     175           1 : static int config_input(AVFilterLink *inlink)
     176             : {
     177           1 :     AVFilterContext *ctx = inlink->dst;
     178           1 :     SilenceRemoveContext *s = ctx->priv;
     179             : 
     180           1 :     s->window_size = FFMAX((inlink->sample_rate * s->window_ratio), 1) * inlink->channels;
     181           1 :     s->window = av_malloc_array(s->window_size, sizeof(*s->window));
     182           1 :     if (!s->window)
     183           0 :         return AVERROR(ENOMEM);
     184             : 
     185           1 :     clear_window(s);
     186             : 
     187           1 :     s->start_duration = av_rescale(s->start_duration, inlink->sample_rate,
     188             :                                    AV_TIME_BASE);
     189           1 :     s->stop_duration  = av_rescale(s->stop_duration, inlink->sample_rate,
     190             :                                    AV_TIME_BASE);
     191             : 
     192           1 :     s->start_holdoff = av_malloc_array(FFMAX(s->start_duration, 1),
     193             :                                        sizeof(*s->start_holdoff) *
     194           1 :                                        inlink->channels);
     195           1 :     if (!s->start_holdoff)
     196           0 :         return AVERROR(ENOMEM);
     197             : 
     198           1 :     s->start_holdoff_offset = 0;
     199           1 :     s->start_holdoff_end    = 0;
     200           1 :     s->start_found_periods  = 0;
     201             : 
     202           1 :     s->stop_holdoff = av_malloc_array(FFMAX(s->stop_duration, 1),
     203             :                                       sizeof(*s->stop_holdoff) *
     204           1 :                                       inlink->channels);
     205           1 :     if (!s->stop_holdoff)
     206           0 :         return AVERROR(ENOMEM);
     207             : 
     208           1 :     s->stop_holdoff_offset = 0;
     209           1 :     s->stop_holdoff_end    = 0;
     210           1 :     s->stop_found_periods  = 0;
     211             : 
     212           1 :     if (s->start_periods)
     213           0 :         s->mode = SILENCE_TRIM;
     214             :     else
     215           1 :         s->mode = SILENCE_COPY;
     216             : 
     217           1 :     return 0;
     218             : }
     219             : 
     220         341 : static void flush(AVFrame *out, AVFilterLink *outlink,
     221             :                   int *nb_samples_written, int *ret)
     222             : {
     223         341 :     if (*nb_samples_written) {
     224           0 :         out->nb_samples = *nb_samples_written / outlink->channels;
     225           0 :         *ret = ff_filter_frame(outlink, out);
     226           0 :         *nb_samples_written = 0;
     227             :     } else {
     228         341 :         av_frame_free(&out);
     229             :     }
     230         341 : }
     231             : 
     232           1 : static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     233             : {
     234           1 :     AVFilterContext *ctx = inlink->dst;
     235           1 :     AVFilterLink *outlink = ctx->outputs[0];
     236           1 :     SilenceRemoveContext *s = ctx->priv;
     237           1 :     int i, j, threshold, ret = 0;
     238             :     int nbs, nb_samples_read, nb_samples_written;
     239           1 :     double *obuf, *ibuf = (double *)in->data[0];
     240             :     AVFrame *out;
     241             : 
     242           1 :     nb_samples_read = nb_samples_written = 0;
     243             : 
     244           1 :     switch (s->mode) {
     245             :     case SILENCE_TRIM:
     246             : silence_trim:
     247         341 :         nbs = in->nb_samples - nb_samples_read / inlink->channels;
     248         341 :         if (!nbs)
     249           0 :             break;
     250             : 
     251         341 :         for (i = 0; i < nbs; i++) {
     252         341 :             threshold = 0;
     253        1023 :             for (j = 0; j < inlink->channels; j++) {
     254         682 :                 threshold |= s->compute(s, ibuf[j]) > s->start_threshold;
     255             :             }
     256             : 
     257         341 :             if (threshold) {
     258        1023 :                 for (j = 0; j < inlink->channels; j++) {
     259         682 :                     s->update(s, *ibuf);
     260         682 :                     s->start_holdoff[s->start_holdoff_end++] = *ibuf++;
     261             :                 }
     262         341 :                 nb_samples_read += inlink->channels;
     263             : 
     264         341 :                 if (s->start_holdoff_end >= s->start_duration * inlink->channels) {
     265         341 :                     if (++s->start_found_periods >= s->start_periods) {
     266         341 :                         s->mode = SILENCE_TRIM_FLUSH;
     267         341 :                         goto silence_trim_flush;
     268             :                     }
     269             : 
     270           0 :                     s->start_holdoff_offset = 0;
     271           0 :                     s->start_holdoff_end = 0;
     272             :                 }
     273             :             } else {
     274           0 :                 s->start_holdoff_end = 0;
     275             : 
     276           0 :                 for (j = 0; j < inlink->channels; j++)
     277           0 :                     s->update(s, ibuf[j]);
     278             : 
     279           0 :                 ibuf += inlink->channels;
     280           0 :                 nb_samples_read += inlink->channels;
     281             :             }
     282             :         }
     283           0 :         break;
     284             : 
     285             :     case SILENCE_TRIM_FLUSH:
     286             : silence_trim_flush:
     287         341 :         nbs  = s->start_holdoff_end - s->start_holdoff_offset;
     288         341 :         nbs -= nbs % inlink->channels;
     289         341 :         if (!nbs)
     290           0 :             break;
     291             : 
     292         341 :         out = ff_get_audio_buffer(inlink, nbs / inlink->channels);
     293         341 :         if (!out) {
     294           0 :             av_frame_free(&in);
     295           0 :             return AVERROR(ENOMEM);
     296             :         }
     297             : 
     298         341 :         memcpy(out->data[0], &s->start_holdoff[s->start_holdoff_offset],
     299             :                nbs * sizeof(double));
     300         341 :         s->start_holdoff_offset += nbs;
     301             : 
     302         341 :         ret = ff_filter_frame(outlink, out);
     303             : 
     304         341 :         if (s->start_holdoff_offset == s->start_holdoff_end) {
     305         341 :             s->start_holdoff_offset = 0;
     306         341 :             s->start_holdoff_end = 0;
     307         341 :             s->mode = SILENCE_COPY;
     308         341 :             goto silence_copy;
     309             :         }
     310           0 :         break;
     311             : 
     312             :     case SILENCE_COPY:
     313             : silence_copy:
     314         342 :         nbs = in->nb_samples - nb_samples_read / inlink->channels;
     315         342 :         if (!nbs)
     316           1 :             break;
     317             : 
     318         341 :         out = ff_get_audio_buffer(inlink, nbs);
     319         341 :         if (!out) {
     320           0 :             av_frame_free(&in);
     321           0 :             return AVERROR(ENOMEM);
     322             :         }
     323         341 :         obuf = (double *)out->data[0];
     324             : 
     325         341 :         if (s->stop_periods) {
     326         341 :             for (i = 0; i < nbs; i++) {
     327         341 :                 threshold = 1;
     328        1023 :                 for (j = 0; j < inlink->channels; j++)
     329         682 :                     threshold &= s->compute(s, ibuf[j]) > s->stop_threshold;
     330             : 
     331         341 :                 if (threshold && s->stop_holdoff_end && !s->leave_silence) {
     332           0 :                     s->mode = SILENCE_COPY_FLUSH;
     333           0 :                     flush(out, outlink, &nb_samples_written, &ret);
     334           0 :                     goto silence_copy_flush;
     335         341 :                 } else if (threshold) {
     336           0 :                     for (j = 0; j < inlink->channels; j++) {
     337           0 :                         s->update(s, *ibuf);
     338           0 :                         *obuf++ = *ibuf++;
     339             :                     }
     340           0 :                     nb_samples_read    += inlink->channels;
     341           0 :                     nb_samples_written += inlink->channels;
     342         341 :                 } else if (!threshold) {
     343        1023 :                     for (j = 0; j < inlink->channels; j++) {
     344         682 :                         s->update(s, *ibuf);
     345         682 :                         if (s->leave_silence) {
     346           0 :                             *obuf++ = *ibuf;
     347           0 :                             nb_samples_written++;
     348             :                         }
     349             : 
     350         682 :                         s->stop_holdoff[s->stop_holdoff_end++] = *ibuf++;
     351             :                     }
     352         341 :                     nb_samples_read += inlink->channels;
     353             : 
     354         341 :                     if (s->stop_holdoff_end >= s->stop_duration * inlink->channels) {
     355         341 :                         if (++s->stop_found_periods >= s->stop_periods) {
     356         341 :                             s->stop_holdoff_offset = 0;
     357         341 :                             s->stop_holdoff_end = 0;
     358             : 
     359         341 :                             if (!s->restart) {
     360           0 :                                 s->mode = SILENCE_STOP;
     361           0 :                                 flush(out, outlink, &nb_samples_written, &ret);
     362           0 :                                 goto silence_stop;
     363             :                             } else {
     364         341 :                                 s->stop_found_periods = 0;
     365         341 :                                 s->start_found_periods = 0;
     366         341 :                                 s->start_holdoff_offset = 0;
     367         341 :                                 s->start_holdoff_end = 0;
     368         341 :                                 clear_window(s);
     369         341 :                                 s->mode = SILENCE_TRIM;
     370         341 :                                 flush(out, outlink, &nb_samples_written, &ret);
     371         341 :                                 goto silence_trim;
     372             :                             }
     373             :                         }
     374           0 :                         s->mode = SILENCE_COPY_FLUSH;
     375           0 :                         flush(out, outlink, &nb_samples_written, &ret);
     376           0 :                         goto silence_copy_flush;
     377             :                     }
     378             :                 }
     379             :             }
     380           0 :             flush(out, outlink, &nb_samples_written, &ret);
     381             :         } else {
     382           0 :             memcpy(obuf, ibuf, sizeof(double) * nbs * inlink->channels);
     383           0 :             ret = ff_filter_frame(outlink, out);
     384             :         }
     385           0 :         break;
     386             : 
     387             :     case SILENCE_COPY_FLUSH:
     388             : silence_copy_flush:
     389           0 :         nbs  = s->stop_holdoff_end - s->stop_holdoff_offset;
     390           0 :         nbs -= nbs % inlink->channels;
     391           0 :         if (!nbs)
     392           0 :             break;
     393             : 
     394           0 :         out = ff_get_audio_buffer(inlink, nbs / inlink->channels);
     395           0 :         if (!out) {
     396           0 :             av_frame_free(&in);
     397           0 :             return AVERROR(ENOMEM);
     398             :         }
     399             : 
     400           0 :         memcpy(out->data[0], &s->stop_holdoff[s->stop_holdoff_offset],
     401             :                nbs * sizeof(double));
     402           0 :         s->stop_holdoff_offset += nbs;
     403             : 
     404           0 :         ret = ff_filter_frame(outlink, out);
     405             : 
     406           0 :         if (s->stop_holdoff_offset == s->stop_holdoff_end) {
     407           0 :             s->stop_holdoff_offset = 0;
     408           0 :             s->stop_holdoff_end = 0;
     409           0 :             s->mode = SILENCE_COPY;
     410           0 :             goto silence_copy;
     411             :         }
     412           0 :         break;
     413             :     case SILENCE_STOP:
     414             : silence_stop:
     415           0 :         break;
     416             :     }
     417             : 
     418           1 :     av_frame_free(&in);
     419             : 
     420           1 :     return ret;
     421             : }
     422             : 
     423           1 : static int request_frame(AVFilterLink *outlink)
     424             : {
     425           1 :     AVFilterContext *ctx = outlink->src;
     426           1 :     SilenceRemoveContext *s = ctx->priv;
     427             :     int ret;
     428             : 
     429           1 :     ret = ff_request_frame(ctx->inputs[0]);
     430           1 :     if (ret == AVERROR_EOF && (s->mode == SILENCE_COPY_FLUSH ||
     431           0 :                                s->mode == SILENCE_COPY)) {
     432           0 :         int nbs = s->stop_holdoff_end - s->stop_holdoff_offset;
     433           0 :         if (nbs) {
     434             :             AVFrame *frame;
     435             : 
     436           0 :             frame = ff_get_audio_buffer(outlink, nbs / outlink->channels);
     437           0 :             if (!frame)
     438           0 :                 return AVERROR(ENOMEM);
     439             : 
     440           0 :             memcpy(frame->data[0], &s->stop_holdoff[s->stop_holdoff_offset],
     441             :                    nbs * sizeof(double));
     442           0 :             ret = ff_filter_frame(ctx->inputs[0], frame);
     443             :         }
     444           0 :         s->mode = SILENCE_STOP;
     445             :     }
     446           1 :     return ret;
     447             : }
     448             : 
     449           1 : static int query_formats(AVFilterContext *ctx)
     450             : {
     451           1 :     AVFilterFormats *formats = NULL;
     452           1 :     AVFilterChannelLayouts *layouts = NULL;
     453             :     static const enum AVSampleFormat sample_fmts[] = {
     454             :         AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_NONE
     455             :     };
     456             :     int ret;
     457             : 
     458           1 :     layouts = ff_all_channel_counts();
     459           1 :     if (!layouts)
     460           0 :         return AVERROR(ENOMEM);
     461           1 :     ret = ff_set_common_channel_layouts(ctx, layouts);
     462           1 :     if (ret < 0)
     463           0 :         return ret;
     464             : 
     465           1 :     formats = ff_make_format_list(sample_fmts);
     466           1 :     if (!formats)
     467           0 :         return AVERROR(ENOMEM);
     468           1 :     ret = ff_set_common_formats(ctx, formats);
     469           1 :     if (ret < 0)
     470           0 :         return ret;
     471             : 
     472           1 :     formats = ff_all_samplerates();
     473           1 :     if (!formats)
     474           0 :         return AVERROR(ENOMEM);
     475           1 :     return ff_set_common_samplerates(ctx, formats);
     476             : }
     477             : 
     478           1 : static av_cold void uninit(AVFilterContext *ctx)
     479             : {
     480           1 :     SilenceRemoveContext *s = ctx->priv;
     481             : 
     482           1 :     av_freep(&s->start_holdoff);
     483           1 :     av_freep(&s->stop_holdoff);
     484           1 :     av_freep(&s->window);
     485           1 : }
     486             : 
     487             : static const AVFilterPad silenceremove_inputs[] = {
     488             :     {
     489             :         .name         = "default",
     490             :         .type         = AVMEDIA_TYPE_AUDIO,
     491             :         .config_props = config_input,
     492             :         .filter_frame = filter_frame,
     493             :     },
     494             :     { NULL }
     495             : };
     496             : 
     497             : static const AVFilterPad silenceremove_outputs[] = {
     498             :     {
     499             :         .name          = "default",
     500             :         .type          = AVMEDIA_TYPE_AUDIO,
     501             :         .request_frame = request_frame,
     502             :     },
     503             :     { NULL }
     504             : };
     505             : 
     506             : AVFilter ff_af_silenceremove = {
     507             :     .name          = "silenceremove",
     508             :     .description   = NULL_IF_CONFIG_SMALL("Remove silence."),
     509             :     .priv_size     = sizeof(SilenceRemoveContext),
     510             :     .priv_class    = &silenceremove_class,
     511             :     .init          = init,
     512             :     .uninit        = uninit,
     513             :     .query_formats = query_formats,
     514             :     .inputs        = silenceremove_inputs,
     515             :     .outputs       = silenceremove_outputs,
     516             : };

Generated by: LCOV version 1.12