| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (c) 2012 Clément Bœsch <u pkh me> | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public | ||
| 17 | * License along with FFmpeg; if not, write to the Free Software | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | /** | ||
| 22 | * @file | ||
| 23 | * Audio silence detector | ||
| 24 | */ | ||
| 25 | |||
| 26 | #include <float.h> /* DBL_MAX */ | ||
| 27 | |||
| 28 | #include "libavutil/mem.h" | ||
| 29 | #include "libavutil/opt.h" | ||
| 30 | #include "libavutil/timestamp.h" | ||
| 31 | #include "audio.h" | ||
| 32 | #include "avfilter.h" | ||
| 33 | #include "filters.h" | ||
| 34 | |||
| 35 | typedef struct SilenceDetectContext { | ||
| 36 | const AVClass *class; | ||
| 37 | double noise; ///< noise amplitude ratio | ||
| 38 | int64_t duration; ///< minimum duration of silence until notification | ||
| 39 | int mono; ///< mono mode : check each channel separately (default = check when ALL channels are silent) | ||
| 40 | int channels; ///< number of channels | ||
| 41 | int independent_channels; ///< number of entries in following arrays (always 1 in mono mode) | ||
| 42 | int64_t *nb_null_samples; ///< (array) current number of continuous zero samples | ||
| 43 | int64_t *start; ///< (array) if silence is detected, this value contains the time of the first zero sample (default/unset = INT64_MIN) | ||
| 44 | int64_t frame_end; ///< pts of the end of the current frame (used to compute duration of silence at EOS) | ||
| 45 | int last_sample_rate; ///< last sample rate to check for sample rate changes | ||
| 46 | AVRational time_base; ///< time_base | ||
| 47 | |||
| 48 | void (*silencedetect)(AVFilterContext *ctx, AVFrame *insamples, | ||
| 49 | int nb_samples, int64_t nb_samples_notify, | ||
| 50 | AVRational time_base); | ||
| 51 | } SilenceDetectContext; | ||
| 52 | |||
| 53 | #define MAX_DURATION (24*3600*1000000LL) | ||
| 54 | #define OFFSET(x) offsetof(SilenceDetectContext, x) | ||
| 55 | #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM | ||
| 56 | static const AVOption silencedetect_options[] = { | ||
| 57 | { "n", "set noise tolerance", OFFSET(noise), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0, DBL_MAX, FLAGS }, | ||
| 58 | { "noise", "set noise tolerance", OFFSET(noise), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0, DBL_MAX, FLAGS }, | ||
| 59 | { "d", "set minimum duration in seconds", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64=2000000}, 0, MAX_DURATION,FLAGS }, | ||
| 60 | { "duration", "set minimum duration in seconds", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64=2000000}, 0, MAX_DURATION,FLAGS }, | ||
| 61 | { "mono", "check each channel separately", OFFSET(mono), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, | ||
| 62 | { "m", "check each channel separately", OFFSET(mono), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, | ||
| 63 | { NULL } | ||
| 64 | }; | ||
| 65 | |||
| 66 | AVFILTER_DEFINE_CLASS(silencedetect); | ||
| 67 | |||
| 68 | 9 | static void set_meta(AVFrame *insamples, int channel, const char *key, char *value) | |
| 69 | { | ||
| 70 | char key2[128]; | ||
| 71 | |||
| 72 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
|
9 | if (channel) |
| 73 | ✗ | snprintf(key2, sizeof(key2), "lavfi.%s.%d", key, channel); | |
| 74 | else | ||
| 75 | 9 | snprintf(key2, sizeof(key2), "lavfi.%s", key); | |
| 76 | 9 | av_dict_set(&insamples->metadata, key2, value, 0); | |
| 77 | 9 | } | |
| 78 | 1048554 | static av_always_inline void update(AVFilterContext *ctx, AVFrame *insamples, | |
| 79 | int is_silence, int current_sample, int64_t nb_samples_notify, | ||
| 80 | AVRational time_base) | ||
| 81 | { | ||
| 82 | 1048554 | SilenceDetectContext *s = ctx->priv; | |
| 83 | 1048554 | int channel = current_sample % s->independent_channels; | |
| 84 |
2/2✓ Branch 0 taken 304774 times.
✓ Branch 1 taken 743780 times.
|
1048554 | if (is_silence) { |
| 85 |
2/2✓ Branch 0 taken 221400 times.
✓ Branch 1 taken 83374 times.
|
304774 | if (s->start[channel] == INT64_MIN) { |
| 86 | 221400 | s->nb_null_samples[channel]++; | |
| 87 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 221397 times.
|
221400 | if (s->nb_null_samples[channel] >= nb_samples_notify) { |
| 88 | 3 | s->start[channel] = insamples->pts + av_rescale_q(current_sample / s->channels + 1 - nb_samples_notify * s->independent_channels / s->channels, | |
| 89 | 3 | (AVRational){ 1, s->last_sample_rate }, time_base); | |
| 90 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | set_meta(insamples, s->mono ? channel + 1 : 0, "silence_start", |
| 91 | 3 | av_ts2timestr(s->start[channel], &time_base)); | |
| 92 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if (s->mono) |
| 93 | ✗ | av_log(ctx, AV_LOG_INFO, "channel: %d | ", channel); | |
| 94 | 3 | av_log(ctx, AV_LOG_INFO, "silence_start: %s\n", | |
| 95 | 3 | av_ts2timestr(s->start[channel], &time_base)); | |
| 96 | } | ||
| 97 | } | ||
| 98 | } else { | ||
| 99 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 743777 times.
|
743780 | if (s->start[channel] > INT64_MIN) { |
| 100 | 6 | int64_t end_pts = insamples ? insamples->pts + av_rescale_q(current_sample / s->channels, | |
| 101 | 3 | (AVRational){ 1, s->last_sample_rate }, time_base) | |
| 102 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | : s->frame_end; |
| 103 | 3 | int64_t duration_ts = end_pts - s->start[channel]; | |
| 104 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | if (insamples) { |
| 105 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | set_meta(insamples, s->mono ? channel + 1 : 0, "silence_end", |
| 106 | 3 | av_ts2timestr(end_pts, &time_base)); | |
| 107 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | set_meta(insamples, s->mono ? channel + 1 : 0, "silence_duration", |
| 108 | 3 | av_ts2timestr(duration_ts, &time_base)); | |
| 109 | } | ||
| 110 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if (s->mono) |
| 111 | ✗ | av_log(ctx, AV_LOG_INFO, "channel: %d | ", channel); | |
| 112 | 3 | av_log(ctx, AV_LOG_INFO, "silence_end: %s | silence_duration: %s\n", | |
| 113 | 3 | av_ts2timestr(end_pts, &time_base), | |
| 114 | 3 | av_ts2timestr(duration_ts, &time_base)); | |
| 115 | } | ||
| 116 | 743780 | s->nb_null_samples[channel] = 0; | |
| 117 | 743780 | s->start[channel] = INT64_MIN; | |
| 118 | } | ||
| 119 | 1048554 | } | |
| 120 | |||
| 121 | #define SILENCE_DETECT(name, type) \ | ||
| 122 | static void silencedetect_##name(AVFilterContext *ctx, AVFrame *insamples, \ | ||
| 123 | int nb_samples, int64_t nb_samples_notify, \ | ||
| 124 | AVRational time_base) \ | ||
| 125 | { \ | ||
| 126 | SilenceDetectContext *s = ctx->priv; \ | ||
| 127 | const type *p = (const type *)insamples->data[0]; \ | ||
| 128 | const type noise = s->noise; \ | ||
| 129 | int i; \ | ||
| 130 | \ | ||
| 131 | for (i = 0; i < nb_samples; i++, p++) \ | ||
| 132 | update(ctx, insamples, *p < noise && *p > -noise, i, \ | ||
| 133 | nb_samples_notify, time_base); \ | ||
| 134 | } | ||
| 135 | |||
| 136 | #define SILENCE_DETECT_PLANAR(name, type) \ | ||
| 137 | static void silencedetect_##name(AVFilterContext *ctx, AVFrame *insamples, \ | ||
| 138 | int nb_samples, int64_t nb_samples_notify, \ | ||
| 139 | AVRational time_base) \ | ||
| 140 | { \ | ||
| 141 | SilenceDetectContext *s = ctx->priv; \ | ||
| 142 | const int channels = insamples->ch_layout.nb_channels; \ | ||
| 143 | const type noise = s->noise; \ | ||
| 144 | \ | ||
| 145 | nb_samples /= channels; \ | ||
| 146 | for (int i = 0; i < nb_samples; i++) { \ | ||
| 147 | for (int ch = 0; ch < insamples->ch_layout.nb_channels; ch++) { \ | ||
| 148 | const type *p = (const type *)insamples->extended_data[ch]; \ | ||
| 149 | update(ctx, insamples, p[i] < noise && p[i] > -noise, \ | ||
| 150 | channels * i + ch, \ | ||
| 151 | nb_samples_notify, time_base); \ | ||
| 152 | } \ | ||
| 153 | } \ | ||
| 154 | } | ||
| 155 | |||
| 156 | ✗ | SILENCE_DETECT(dbl, double) | |
| 157 | ✗ | SILENCE_DETECT(flt, float) | |
| 158 | ✗ | SILENCE_DETECT(s32, int32_t) | |
| 159 |
6/6✓ Branch 0 taken 678862 times.
✓ Branch 1 taken 369692 times.
✓ Branch 2 taken 304774 times.
✓ Branch 3 taken 374088 times.
✓ Branch 5 taken 1048554 times.
✓ Branch 6 taken 12 times.
|
1048566 | SILENCE_DETECT(s16, int16_t) |
| 160 | |||
| 161 | ✗ | SILENCE_DETECT_PLANAR(dblp, double) | |
| 162 | ✗ | SILENCE_DETECT_PLANAR(fltp, float) | |
| 163 | ✗ | SILENCE_DETECT_PLANAR(s32p, int32_t) | |
| 164 | ✗ | SILENCE_DETECT_PLANAR(s16p, int16_t) | |
| 165 | |||
| 166 | 1 | static int config_input(AVFilterLink *inlink) | |
| 167 | { | ||
| 168 | 1 | AVFilterContext *ctx = inlink->dst; | |
| 169 | 1 | SilenceDetectContext *s = ctx->priv; | |
| 170 | int c; | ||
| 171 | |||
| 172 | 1 | s->channels = inlink->ch_layout.nb_channels; | |
| 173 | 1 | s->duration = av_rescale(s->duration, inlink->sample_rate, AV_TIME_BASE); | |
| 174 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | s->independent_channels = s->mono ? s->channels : 1; |
| 175 | 1 | s->nb_null_samples = av_calloc(s->independent_channels, | |
| 176 | sizeof(*s->nb_null_samples)); | ||
| 177 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (!s->nb_null_samples) |
| 178 | ✗ | return AVERROR(ENOMEM); | |
| 179 | 1 | s->start = av_malloc_array(s->independent_channels, sizeof(*s->start)); | |
| 180 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (!s->start) |
| 181 | ✗ | return AVERROR(ENOMEM); | |
| 182 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (c = 0; c < s->independent_channels; c++) |
| 183 | 1 | s->start[c] = INT64_MIN; | |
| 184 | |||
| 185 |
1/9✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
|
1 | switch (inlink->format) { |
| 186 | ✗ | case AV_SAMPLE_FMT_DBL: s->silencedetect = silencedetect_dbl; break; | |
| 187 | ✗ | case AV_SAMPLE_FMT_FLT: s->silencedetect = silencedetect_flt; break; | |
| 188 | ✗ | case AV_SAMPLE_FMT_S32: | |
| 189 | ✗ | s->noise *= INT32_MAX; | |
| 190 | ✗ | s->silencedetect = silencedetect_s32; | |
| 191 | ✗ | break; | |
| 192 | 1 | case AV_SAMPLE_FMT_S16: | |
| 193 | 1 | s->noise *= INT16_MAX; | |
| 194 | 1 | s->silencedetect = silencedetect_s16; | |
| 195 | 1 | break; | |
| 196 | ✗ | case AV_SAMPLE_FMT_DBLP: s->silencedetect = silencedetect_dblp; break; | |
| 197 | ✗ | case AV_SAMPLE_FMT_FLTP: s->silencedetect = silencedetect_fltp; break; | |
| 198 | ✗ | case AV_SAMPLE_FMT_S32P: | |
| 199 | ✗ | s->noise *= INT32_MAX; | |
| 200 | ✗ | s->silencedetect = silencedetect_s32p; | |
| 201 | ✗ | break; | |
| 202 | ✗ | case AV_SAMPLE_FMT_S16P: | |
| 203 | ✗ | s->noise *= INT16_MAX; | |
| 204 | ✗ | s->silencedetect = silencedetect_s16p; | |
| 205 | ✗ | break; | |
| 206 | ✗ | default: | |
| 207 | ✗ | return AVERROR_BUG; | |
| 208 | } | ||
| 209 | |||
| 210 | 1 | return 0; | |
| 211 | } | ||
| 212 | |||
| 213 | 12 | static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) | |
| 214 | { | ||
| 215 | 12 | AVFilterContext *ctx = inlink->dst; | |
| 216 | 12 | SilenceDetectContext *s = ctx->priv; | |
| 217 | 12 | const int nb_channels = inlink->ch_layout.nb_channels; | |
| 218 | 12 | const int srate = inlink->sample_rate; | |
| 219 | 12 | const int nb_samples = insamples->nb_samples * nb_channels; | |
| 220 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | const int64_t nb_samples_notify = s->duration * (s->mono ? 1 : nb_channels); |
| 221 | int c; | ||
| 222 | |||
| 223 | // scale number of null samples to the new sample rate | ||
| 224 |
3/4✓ Branch 0 taken 11 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 11 times.
|
12 | if (s->last_sample_rate && s->last_sample_rate != srate) |
| 225 | ✗ | for (c = 0; c < s->independent_channels; c++) { | |
| 226 | ✗ | s->nb_null_samples[c] = srate * s->nb_null_samples[c] / s->last_sample_rate; | |
| 227 | } | ||
| 228 | 12 | s->last_sample_rate = srate; | |
| 229 | 12 | s->time_base = inlink->time_base; | |
| 230 | 12 | s->frame_end = insamples->pts + av_rescale_q(insamples->nb_samples, | |
| 231 | 12 | (AVRational){ 1, s->last_sample_rate }, inlink->time_base); | |
| 232 | |||
| 233 | 12 | s->silencedetect(ctx, insamples, nb_samples, nb_samples_notify, | |
| 234 | inlink->time_base); | ||
| 235 | |||
| 236 | 12 | return ff_filter_frame(inlink->dst->outputs[0], insamples); | |
| 237 | } | ||
| 238 | |||
| 239 | 1 | static av_cold void uninit(AVFilterContext *ctx) | |
| 240 | { | ||
| 241 | 1 | SilenceDetectContext *s = ctx->priv; | |
| 242 | int c; | ||
| 243 | |||
| 244 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (c = 0; c < s->independent_channels; c++) |
| 245 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (s->start[c] > INT64_MIN) |
| 246 | ✗ | update(ctx, NULL, 0, c, 0, s->time_base); | |
| 247 | 1 | av_freep(&s->nb_null_samples); | |
| 248 | 1 | av_freep(&s->start); | |
| 249 | 1 | } | |
| 250 | |||
| 251 | static const AVFilterPad silencedetect_inputs[] = { | ||
| 252 | { | ||
| 253 | .name = "default", | ||
| 254 | .type = AVMEDIA_TYPE_AUDIO, | ||
| 255 | .config_props = config_input, | ||
| 256 | .filter_frame = filter_frame, | ||
| 257 | }, | ||
| 258 | }; | ||
| 259 | |||
| 260 | const FFFilter ff_af_silencedetect = { | ||
| 261 | .p.name = "silencedetect", | ||
| 262 | .p.description = NULL_IF_CONFIG_SMALL("Detect silence."), | ||
| 263 | .p.priv_class = &silencedetect_class, | ||
| 264 | .p.flags = AVFILTER_FLAG_METADATA_ONLY, | ||
| 265 | .priv_size = sizeof(SilenceDetectContext), | ||
| 266 | .uninit = uninit, | ||
| 267 | FILTER_INPUTS(silencedetect_inputs), | ||
| 268 | FILTER_OUTPUTS(ff_audio_default_filterpad), | ||
| 269 | FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBLP, | ||
| 270 | AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, | ||
| 271 | AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32P, | ||
| 272 | AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P), | ||
| 273 | }; | ||
| 274 |