Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (c) 2012 Clément Bœsch <u pkh me> | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file | ||
23 | * Audio silence detector | ||
24 | */ | ||
25 | |||
26 | #include <float.h> /* DBL_MAX */ | ||
27 | |||
28 | #include "libavutil/mem.h" | ||
29 | #include "libavutil/opt.h" | ||
30 | #include "libavutil/timestamp.h" | ||
31 | #include "audio.h" | ||
32 | #include "avfilter.h" | ||
33 | #include "filters.h" | ||
34 | |||
35 | typedef struct SilenceDetectContext { | ||
36 | const AVClass *class; | ||
37 | double noise; ///< noise amplitude ratio | ||
38 | int64_t duration; ///< minimum duration of silence until notification | ||
39 | int mono; ///< mono mode : check each channel separately (default = check when ALL channels are silent) | ||
40 | int channels; ///< number of channels | ||
41 | int independent_channels; ///< number of entries in following arrays (always 1 in mono mode) | ||
42 | int64_t *nb_null_samples; ///< (array) current number of continuous zero samples | ||
43 | int64_t *start; ///< (array) if silence is detected, this value contains the time of the first zero sample (default/unset = INT64_MIN) | ||
44 | int64_t frame_end; ///< pts of the end of the current frame (used to compute duration of silence at EOS) | ||
45 | int last_sample_rate; ///< last sample rate to check for sample rate changes | ||
46 | AVRational time_base; ///< time_base | ||
47 | |||
48 | void (*silencedetect)(struct SilenceDetectContext *s, AVFrame *insamples, | ||
49 | int nb_samples, int64_t nb_samples_notify, | ||
50 | AVRational time_base); | ||
51 | } SilenceDetectContext; | ||
52 | |||
53 | #define MAX_DURATION (24*3600*1000000LL) | ||
54 | #define OFFSET(x) offsetof(SilenceDetectContext, x) | ||
55 | #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM | ||
56 | static const AVOption silencedetect_options[] = { | ||
57 | { "n", "set noise tolerance", OFFSET(noise), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0, DBL_MAX, FLAGS }, | ||
58 | { "noise", "set noise tolerance", OFFSET(noise), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0, DBL_MAX, FLAGS }, | ||
59 | { "d", "set minimum duration in seconds", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64=2000000}, 0, MAX_DURATION,FLAGS }, | ||
60 | { "duration", "set minimum duration in seconds", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64=2000000}, 0, MAX_DURATION,FLAGS }, | ||
61 | { "mono", "check each channel separately", OFFSET(mono), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, | ||
62 | { "m", "check each channel separately", OFFSET(mono), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, | ||
63 | { NULL } | ||
64 | }; | ||
65 | |||
66 | AVFILTER_DEFINE_CLASS(silencedetect); | ||
67 | |||
68 | 9 | static void set_meta(AVFrame *insamples, int channel, const char *key, char *value) | |
69 | { | ||
70 | char key2[128]; | ||
71 | |||
72 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
|
9 | if (channel) |
73 | ✗ | snprintf(key2, sizeof(key2), "lavfi.%s.%d", key, channel); | |
74 | else | ||
75 | 9 | snprintf(key2, sizeof(key2), "lavfi.%s", key); | |
76 | 9 | av_dict_set(&insamples->metadata, key2, value, 0); | |
77 | 9 | } | |
78 | 1048554 | static av_always_inline void update(SilenceDetectContext *s, AVFrame *insamples, | |
79 | int is_silence, int current_sample, int64_t nb_samples_notify, | ||
80 | AVRational time_base) | ||
81 | { | ||
82 | 1048554 | int channel = current_sample % s->independent_channels; | |
83 |
2/2✓ Branch 0 taken 304774 times.
✓ Branch 1 taken 743780 times.
|
1048554 | if (is_silence) { |
84 |
2/2✓ Branch 0 taken 221400 times.
✓ Branch 1 taken 83374 times.
|
304774 | if (s->start[channel] == INT64_MIN) { |
85 | 221400 | s->nb_null_samples[channel]++; | |
86 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 221397 times.
|
221400 | if (s->nb_null_samples[channel] >= nb_samples_notify) { |
87 | 3 | s->start[channel] = insamples->pts + av_rescale_q(current_sample / s->channels + 1 - nb_samples_notify * s->independent_channels / s->channels, | |
88 | 3 | (AVRational){ 1, s->last_sample_rate }, time_base); | |
89 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | set_meta(insamples, s->mono ? channel + 1 : 0, "silence_start", |
90 | 3 | av_ts2timestr(s->start[channel], &time_base)); | |
91 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if (s->mono) |
92 | ✗ | av_log(s, AV_LOG_INFO, "channel: %d | ", channel); | |
93 | 3 | av_log(s, AV_LOG_INFO, "silence_start: %s\n", | |
94 | 3 | av_ts2timestr(s->start[channel], &time_base)); | |
95 | } | ||
96 | } | ||
97 | } else { | ||
98 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 743777 times.
|
743780 | if (s->start[channel] > INT64_MIN) { |
99 | 6 | int64_t end_pts = insamples ? insamples->pts + av_rescale_q(current_sample / s->channels, | |
100 | 3 | (AVRational){ 1, s->last_sample_rate }, time_base) | |
101 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | : s->frame_end; |
102 | 3 | int64_t duration_ts = end_pts - s->start[channel]; | |
103 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | if (insamples) { |
104 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | set_meta(insamples, s->mono ? channel + 1 : 0, "silence_end", |
105 | 3 | av_ts2timestr(end_pts, &time_base)); | |
106 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | set_meta(insamples, s->mono ? channel + 1 : 0, "silence_duration", |
107 | 3 | av_ts2timestr(duration_ts, &time_base)); | |
108 | } | ||
109 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if (s->mono) |
110 | ✗ | av_log(s, AV_LOG_INFO, "channel: %d | ", channel); | |
111 | 3 | av_log(s, AV_LOG_INFO, "silence_end: %s | silence_duration: %s\n", | |
112 | 3 | av_ts2timestr(end_pts, &time_base), | |
113 | 3 | av_ts2timestr(duration_ts, &time_base)); | |
114 | } | ||
115 | 743780 | s->nb_null_samples[channel] = 0; | |
116 | 743780 | s->start[channel] = INT64_MIN; | |
117 | } | ||
118 | 1048554 | } | |
119 | |||
120 | #define SILENCE_DETECT(name, type) \ | ||
121 | static void silencedetect_##name(SilenceDetectContext *s, AVFrame *insamples, \ | ||
122 | int nb_samples, int64_t nb_samples_notify, \ | ||
123 | AVRational time_base) \ | ||
124 | { \ | ||
125 | const type *p = (const type *)insamples->data[0]; \ | ||
126 | const type noise = s->noise; \ | ||
127 | int i; \ | ||
128 | \ | ||
129 | for (i = 0; i < nb_samples; i++, p++) \ | ||
130 | update(s, insamples, *p < noise && *p > -noise, i, \ | ||
131 | nb_samples_notify, time_base); \ | ||
132 | } | ||
133 | |||
134 | #define SILENCE_DETECT_PLANAR(name, type) \ | ||
135 | static void silencedetect_##name(SilenceDetectContext *s, AVFrame *insamples, \ | ||
136 | int nb_samples, int64_t nb_samples_notify, \ | ||
137 | AVRational time_base) \ | ||
138 | { \ | ||
139 | const int channels = insamples->ch_layout.nb_channels; \ | ||
140 | const type noise = s->noise; \ | ||
141 | \ | ||
142 | nb_samples /= channels; \ | ||
143 | for (int i = 0; i < nb_samples; i++) { \ | ||
144 | for (int ch = 0; ch < insamples->ch_layout.nb_channels; ch++) { \ | ||
145 | const type *p = (const type *)insamples->extended_data[ch]; \ | ||
146 | update(s, insamples, p[i] < noise && p[i] > -noise, \ | ||
147 | channels * i + ch, \ | ||
148 | nb_samples_notify, time_base); \ | ||
149 | } \ | ||
150 | } \ | ||
151 | } | ||
152 | |||
153 | ✗ | SILENCE_DETECT(dbl, double) | |
154 | ✗ | SILENCE_DETECT(flt, float) | |
155 | ✗ | SILENCE_DETECT(s32, int32_t) | |
156 |
6/6✓ Branch 0 taken 678862 times.
✓ Branch 1 taken 369692 times.
✓ Branch 2 taken 304774 times.
✓ Branch 3 taken 374088 times.
✓ Branch 5 taken 1048554 times.
✓ Branch 6 taken 12 times.
|
1048566 | SILENCE_DETECT(s16, int16_t) |
157 | |||
158 | ✗ | SILENCE_DETECT_PLANAR(dblp, double) | |
159 | ✗ | SILENCE_DETECT_PLANAR(fltp, float) | |
160 | ✗ | SILENCE_DETECT_PLANAR(s32p, int32_t) | |
161 | ✗ | SILENCE_DETECT_PLANAR(s16p, int16_t) | |
162 | |||
163 | 1 | static int config_input(AVFilterLink *inlink) | |
164 | { | ||
165 | 1 | AVFilterContext *ctx = inlink->dst; | |
166 | 1 | SilenceDetectContext *s = ctx->priv; | |
167 | int c; | ||
168 | |||
169 | 1 | s->channels = inlink->ch_layout.nb_channels; | |
170 | 1 | s->duration = av_rescale(s->duration, inlink->sample_rate, AV_TIME_BASE); | |
171 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | s->independent_channels = s->mono ? s->channels : 1; |
172 | 1 | s->nb_null_samples = av_calloc(s->independent_channels, | |
173 | sizeof(*s->nb_null_samples)); | ||
174 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (!s->nb_null_samples) |
175 | ✗ | return AVERROR(ENOMEM); | |
176 | 1 | s->start = av_malloc_array(sizeof(*s->start), s->independent_channels); | |
177 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (!s->start) |
178 | ✗ | return AVERROR(ENOMEM); | |
179 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (c = 0; c < s->independent_channels; c++) |
180 | 1 | s->start[c] = INT64_MIN; | |
181 | |||
182 |
1/9✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
|
1 | switch (inlink->format) { |
183 | ✗ | case AV_SAMPLE_FMT_DBL: s->silencedetect = silencedetect_dbl; break; | |
184 | ✗ | case AV_SAMPLE_FMT_FLT: s->silencedetect = silencedetect_flt; break; | |
185 | ✗ | case AV_SAMPLE_FMT_S32: | |
186 | ✗ | s->noise *= INT32_MAX; | |
187 | ✗ | s->silencedetect = silencedetect_s32; | |
188 | ✗ | break; | |
189 | 1 | case AV_SAMPLE_FMT_S16: | |
190 | 1 | s->noise *= INT16_MAX; | |
191 | 1 | s->silencedetect = silencedetect_s16; | |
192 | 1 | break; | |
193 | ✗ | case AV_SAMPLE_FMT_DBLP: s->silencedetect = silencedetect_dblp; break; | |
194 | ✗ | case AV_SAMPLE_FMT_FLTP: s->silencedetect = silencedetect_fltp; break; | |
195 | ✗ | case AV_SAMPLE_FMT_S32P: | |
196 | ✗ | s->noise *= INT32_MAX; | |
197 | ✗ | s->silencedetect = silencedetect_s32p; | |
198 | ✗ | break; | |
199 | ✗ | case AV_SAMPLE_FMT_S16P: | |
200 | ✗ | s->noise *= INT16_MAX; | |
201 | ✗ | s->silencedetect = silencedetect_s16p; | |
202 | ✗ | break; | |
203 | ✗ | default: | |
204 | ✗ | return AVERROR_BUG; | |
205 | } | ||
206 | |||
207 | 1 | return 0; | |
208 | } | ||
209 | |||
210 | 12 | static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) | |
211 | { | ||
212 | 12 | SilenceDetectContext *s = inlink->dst->priv; | |
213 | 12 | const int nb_channels = inlink->ch_layout.nb_channels; | |
214 | 12 | const int srate = inlink->sample_rate; | |
215 | 12 | const int nb_samples = insamples->nb_samples * nb_channels; | |
216 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | const int64_t nb_samples_notify = s->duration * (s->mono ? 1 : nb_channels); |
217 | int c; | ||
218 | |||
219 | // scale number of null samples to the new sample rate | ||
220 |
3/4✓ Branch 0 taken 11 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 11 times.
|
12 | if (s->last_sample_rate && s->last_sample_rate != srate) |
221 | ✗ | for (c = 0; c < s->independent_channels; c++) { | |
222 | ✗ | s->nb_null_samples[c] = srate * s->nb_null_samples[c] / s->last_sample_rate; | |
223 | } | ||
224 | 12 | s->last_sample_rate = srate; | |
225 | 12 | s->time_base = inlink->time_base; | |
226 | 12 | s->frame_end = insamples->pts + av_rescale_q(insamples->nb_samples, | |
227 | 12 | (AVRational){ 1, s->last_sample_rate }, inlink->time_base); | |
228 | |||
229 | 12 | s->silencedetect(s, insamples, nb_samples, nb_samples_notify, | |
230 | inlink->time_base); | ||
231 | |||
232 | 12 | return ff_filter_frame(inlink->dst->outputs[0], insamples); | |
233 | } | ||
234 | |||
235 | 1 | static av_cold void uninit(AVFilterContext *ctx) | |
236 | { | ||
237 | 1 | SilenceDetectContext *s = ctx->priv; | |
238 | int c; | ||
239 | |||
240 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (c = 0; c < s->independent_channels; c++) |
241 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (s->start[c] > INT64_MIN) |
242 | ✗ | update(s, NULL, 0, c, 0, s->time_base); | |
243 | 1 | av_freep(&s->nb_null_samples); | |
244 | 1 | av_freep(&s->start); | |
245 | 1 | } | |
246 | |||
247 | static const AVFilterPad silencedetect_inputs[] = { | ||
248 | { | ||
249 | .name = "default", | ||
250 | .type = AVMEDIA_TYPE_AUDIO, | ||
251 | .config_props = config_input, | ||
252 | .filter_frame = filter_frame, | ||
253 | }, | ||
254 | }; | ||
255 | |||
256 | const FFFilter ff_af_silencedetect = { | ||
257 | .p.name = "silencedetect", | ||
258 | .p.description = NULL_IF_CONFIG_SMALL("Detect silence."), | ||
259 | .p.priv_class = &silencedetect_class, | ||
260 | .p.flags = AVFILTER_FLAG_METADATA_ONLY, | ||
261 | .priv_size = sizeof(SilenceDetectContext), | ||
262 | .uninit = uninit, | ||
263 | FILTER_INPUTS(silencedetect_inputs), | ||
264 | FILTER_OUTPUTS(ff_audio_default_filterpad), | ||
265 | FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBLP, | ||
266 | AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, | ||
267 | AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32P, | ||
268 | AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P), | ||
269 | }; | ||
270 |