FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_silenceremove.c
Date: 2022-12-05 03:11:11
Exec Total Coverage
Lines: 257 541 47.5%
Functions: 10 27 37.0%
Branches: 76 159 47.8%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2001 Heikki Leinonen
3 * Copyright (c) 2001 Chris Bagwell
4 * Copyright (c) 2003 Donnie Smith
5 * Copyright (c) 2014 Paul B Mahol
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include <float.h> /* DBL_MAX */
25
26 #include "libavutil/audio_fifo.h"
27 #include "libavutil/avassert.h"
28 #include "libavutil/opt.h"
29 #include "libavutil/timestamp.h"
30 #include "audio.h"
31 #include "formats.h"
32 #include "avfilter.h"
33 #include "internal.h"
34
35 enum SilenceDetect {
36 D_PEAK,
37 D_RMS,
38 };
39
40 enum ThresholdMode {
41 T_ANY,
42 T_ALL,
43 };
44
45 enum SilenceMode {
46 SILENCE_TRIM,
47 SILENCE_TRIM_FLUSH,
48 SILENCE_COPY,
49 SILENCE_COPY_FLUSH,
50 SILENCE_STOP
51 };
52
53 typedef struct SilenceRemoveContext {
54 const AVClass *class;
55
56 enum SilenceMode mode;
57
58 int start_periods;
59 int64_t start_duration;
60 int64_t start_duration_opt;
61 double start_threshold;
62 int64_t start_silence;
63 int64_t start_silence_opt;
64 int start_mode;
65
66 int stop_periods;
67 int64_t stop_duration;
68 int64_t stop_duration_opt;
69 double stop_threshold;
70 int64_t stop_silence;
71 int64_t stop_silence_opt;
72 int stop_mode;
73
74 int64_t window_duration_opt;
75
76 AVFrame *start_holdoff;
77 AVFrame *start_silence_hold;
78 size_t start_holdoff_offset;
79 size_t start_holdoff_end;
80 size_t start_silence_offset;
81 size_t start_silence_end;
82 int start_found_periods;
83
84 AVFrame *stop_holdoff;
85 AVFrame *stop_silence_hold;
86 size_t stop_holdoff_offset;
87 size_t stop_holdoff_end;
88 size_t stop_silence_offset;
89 size_t stop_silence_end;
90 int stop_found_periods;
91
92 AVFrame *window;
93 int window_offset;
94 int64_t window_duration;
95 double sum;
96
97 int one_period;
98 int restart;
99 int64_t next_pts;
100
101 int detection;
102 void (*update)(struct SilenceRemoveContext *s, AVFrame *frame, int ch, int offset);
103 double (*compute)(struct SilenceRemoveContext *s, AVFrame *frame, int ch, int offset);
104 void (*copy)(struct SilenceRemoveContext *s, AVFrame *out, AVFrame *in,
105 int ch, int out_offset, int in_offset);
106
107 AVAudioFifo *fifo;
108 } SilenceRemoveContext;
109
110 #define OFFSET(x) offsetof(SilenceRemoveContext, x)
111 #define AF AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
112
113 static const AVOption silenceremove_options[] = {
114 { "start_periods", "set periods of silence parts to skip from start", OFFSET(start_periods), AV_OPT_TYPE_INT, {.i64=0}, 0, 9000, AF },
115 { "start_duration", "set start duration of non-silence part", OFFSET(start_duration_opt), AV_OPT_TYPE_DURATION, {.i64=0}, 0, INT32_MAX, AF },
116 { "start_threshold", "set threshold for start silence detection", OFFSET(start_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, AF },
117 { "start_silence", "set start duration of silence part to keep", OFFSET(start_silence_opt), AV_OPT_TYPE_DURATION, {.i64=0}, 0, INT32_MAX, AF },
118 { "start_mode", "set which channel will trigger trimming from start", OFFSET(start_mode), AV_OPT_TYPE_INT, {.i64=T_ANY}, T_ANY, T_ALL, AF, "mode" },
119 { "any", 0, 0, AV_OPT_TYPE_CONST, {.i64=T_ANY}, 0, 0, AF, "mode" },
120 { "all", 0, 0, AV_OPT_TYPE_CONST, {.i64=T_ALL}, 0, 0, AF, "mode" },
121 { "stop_periods", "set periods of silence parts to skip from end", OFFSET(stop_periods), AV_OPT_TYPE_INT, {.i64=0}, -9000, 9000, AF },
122 { "stop_duration", "set stop duration of non-silence part", OFFSET(stop_duration_opt), AV_OPT_TYPE_DURATION, {.i64=0}, 0, INT32_MAX, AF },
123 { "stop_threshold", "set threshold for stop silence detection", OFFSET(stop_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, AF },
124 { "stop_silence", "set stop duration of silence part to keep", OFFSET(stop_silence_opt), AV_OPT_TYPE_DURATION, {.i64=0}, 0, INT32_MAX, AF },
125 { "stop_mode", "set which channel will trigger trimming from end", OFFSET(stop_mode), AV_OPT_TYPE_INT, {.i64=T_ANY}, T_ANY, T_ALL, AF, "mode" },
126 { "detection", "set how silence is detected", OFFSET(detection), AV_OPT_TYPE_INT, {.i64=D_RMS}, D_PEAK,D_RMS, AF, "detection" },
127 { "peak", "use absolute values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_PEAK},0, 0, AF, "detection" },
128 { "rms", "use squared values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_RMS}, 0, 0, AF, "detection" },
129 { "window", "set duration of window for silence detection", OFFSET(window_duration_opt), AV_OPT_TYPE_DURATION, {.i64=20000}, 0, 100000000, AF },
130 { NULL }
131 };
132
133 AVFILTER_DEFINE_CLASS(silenceremove);
134
135 static void copy_double(SilenceRemoveContext *s, AVFrame *out, AVFrame *in,
136 int ch, int out_offset, int in_offset)
137 {
138 const double *srcp = (const double *)in->data[0];
139 const double src = srcp[in->ch_layout.nb_channels * in_offset + ch];
140 double *dstp = (double *)out->data[0];
141
142 dstp[out->ch_layout.nb_channels * out_offset + ch] = src;
143 }
144
145 220506 static void copy_doublep(SilenceRemoveContext *s, AVFrame *out, AVFrame *in,
146 int ch, int out_offset, int in_offset)
147 {
148 220506 const double *srcp = (const double *)in->extended_data[ch];
149 220506 const double src = srcp[in_offset];
150 220506 double *dstp = (double *)out->extended_data[ch];
151
152 220506 dstp[out_offset] = src;
153 220506 }
154
155 static void copy_float(SilenceRemoveContext *s, AVFrame *out, AVFrame *in,
156 int ch, int out_offset, int in_offset)
157 {
158 const float *srcp = (const float *)in->data[0];
159 const float src = srcp[in->ch_layout.nb_channels * in_offset + ch];
160 float *dstp = (float *)out->data[0];
161
162 dstp[out->ch_layout.nb_channels * out_offset + ch] = src;
163 }
164
165 static void copy_floatp(SilenceRemoveContext *s, AVFrame *out, AVFrame *in,
166 int ch, int out_offset, int in_offset)
167 {
168 const float *srcp = (const float *)in->extended_data[ch];
169 const float src = srcp[in_offset];
170 float *dstp = (float *)out->extended_data[ch];
171
172 dstp[out_offset] = src;
173 }
174
175 static double compute_peak_double(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
176 {
177 const double *samples = (const double *)frame->data[0];
178 const double *wsamples = (const double *)s->window->data[0];
179 double sample = samples[frame->ch_layout.nb_channels * offset + ch];
180 double wsample = wsamples[frame->ch_layout.nb_channels * s->window_offset + ch];
181 double new_sum;
182
183 new_sum = s->sum;
184 new_sum -= wsample;
185 new_sum = fmax(new_sum, 0.);
186 new_sum += fabs(sample);
187
188 return new_sum / s->window_duration;
189 }
190
191 static void update_peak_double(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
192 {
193 const double *samples = (const double *)frame->data[0];
194 double *wsamples = (double *)s->window->data[0];
195 double sample = samples[frame->ch_layout.nb_channels * offset + ch];
196 double *wsample = &wsamples[frame->ch_layout.nb_channels * s->window_offset + ch];
197
198 s->sum -= *wsample;
199 s->sum = fmax(s->sum, 0.);
200 *wsample = fabs(sample);
201 s->sum += *wsample;
202 }
203
204 static double compute_peak_float(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
205 {
206 const float *samples = (const float *)frame->data[0];
207 const float *wsamples = (const float *)s->window->data[0];
208 float sample = samples[frame->ch_layout.nb_channels * offset + ch];
209 float wsample = wsamples[frame->ch_layout.nb_channels * s->window_offset + ch];
210 float new_sum;
211
212 new_sum = s->sum;
213 new_sum -= wsample;
214 new_sum = fmaxf(new_sum, 0.f);
215 new_sum += fabsf(sample);
216
217 return new_sum / s->window_duration;
218 }
219
220 static void update_peak_float(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
221 {
222 const float *samples = (const float *)frame->data[0];
223 float *wsamples = (float *)s->window->data[0];
224 float sample = samples[frame->ch_layout.nb_channels * offset + ch];
225 float *wsample = &wsamples[frame->ch_layout.nb_channels * s->window_offset + ch];
226
227 s->sum -= *wsample;
228 s->sum = fmaxf(s->sum, 0.f);
229 *wsample = fabsf(sample);
230 s->sum += *wsample;
231 }
232
233 static double compute_rms_double(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
234 {
235 const double *samples = (const double *)frame->data[0];
236 const double *wsamples = (const double *)s->window->data[0];
237 double sample = samples[frame->ch_layout.nb_channels * offset + ch];
238 double wsample = wsamples[frame->ch_layout.nb_channels * s->window_offset + ch];
239 double new_sum;
240
241 new_sum = s->sum;
242 new_sum -= wsample;
243 new_sum = fmax(new_sum, 0.);
244 new_sum += sample * sample;
245
246 av_assert2(new_sum >= 0.);
247 return sqrt(new_sum / s->window_duration);
248 }
249
250 static void update_rms_double(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
251 {
252 const double *samples = (const double *)frame->data[0];
253 double *wsamples = (double *)s->window->data[0];
254 double sample = samples[frame->ch_layout.nb_channels * offset + ch];
255 double *wsample = &wsamples[frame->ch_layout.nb_channels * s->window_offset + ch];
256
257 s->sum -= *wsample;
258 s->sum = fmax(s->sum, 0.);
259 *wsample = sample * sample;
260 s->sum += *wsample;
261 }
262
263 static double compute_rms_float(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
264 {
265 const float *samples = (const float *)frame->data[0];
266 const float *wsamples = (const float *)s->window->data[0];
267 float sample = samples[frame->ch_layout.nb_channels * offset + ch];
268 float wsample = wsamples[frame->ch_layout.nb_channels * s->window_offset + ch];
269 float new_sum;
270
271 new_sum = s->sum;
272 new_sum -= wsample;
273 new_sum = fmaxf(new_sum, 0.f);
274 new_sum += sample * sample;
275
276 av_assert2(new_sum >= 0.f);
277 return sqrtf(new_sum / s->window_duration);
278 }
279
280 static void update_rms_float(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
281 {
282 const float *samples = (const float *)frame->data[0];
283 float sample = samples[frame->ch_layout.nb_channels * offset + ch];
284 float *wsamples = (float *)s->window->data[0];
285 float *wsample = &wsamples[frame->ch_layout.nb_channels * s->window_offset + ch];
286
287 s->sum -= *wsample;
288 s->sum = fmaxf(s->sum, 0.f);
289 *wsample = sample * sample;
290 s->sum += *wsample;
291 }
292
293 441000 static double compute_peak_doublep(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
294 {
295 441000 const double *samples = (const double *)frame->extended_data[ch];
296 441000 const double *wsamples = (const double *)s->window->extended_data[ch];
297 441000 double sample = samples[offset];
298 441000 double wsample = wsamples[s->window_offset];
299 double new_sum;
300
301 441000 new_sum = s->sum;
302 441000 new_sum -= wsample;
303 441000 new_sum = fmax(new_sum, 0.);
304 441000 new_sum += fabs(sample);
305
306 441000 return new_sum / s->window_duration;
307 }
308
309 441000 static void update_peak_doublep(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
310 {
311 441000 const double *samples = (const double *)frame->extended_data[ch];
312 441000 double *wsamples = (double *)s->window->extended_data[ch];
313 441000 double sample = samples[offset];
314 441000 double *wsample = &wsamples[s->window_offset];
315
316 441000 s->sum -= *wsample;
317 441000 s->sum = fmax(s->sum, 0.);
318 441000 *wsample = fabs(sample);
319 441000 s->sum += *wsample;
320 441000 }
321
322 static double compute_peak_floatp(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
323 {
324 const float *samples = (const float *)frame->extended_data[ch];
325 const float *wsamples = (const float *)s->window->extended_data[ch];
326 float sample = samples[offset];
327 float wsample = wsamples[s->window_offset];
328 float new_sum;
329
330 new_sum = s->sum;
331 new_sum -= wsample;
332 new_sum = fmaxf(new_sum, 0.f);
333 new_sum += fabsf(sample);
334
335 return new_sum / s->window_duration;
336 }
337
338 static void update_peak_floatp(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
339 {
340 const float *samples = (const float *)frame->extended_data[ch];
341 float *wsamples = (float *)s->window->extended_data[ch];
342 float sample = samples[offset];
343 float *wsample = &wsamples[s->window_offset];
344
345 s->sum -= *wsample;
346 s->sum = fmaxf(s->sum, 0.f);
347 *wsample = fabsf(sample);
348 s->sum += *wsample;
349 }
350
351 static double compute_rms_doublep(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
352 {
353 const double *samples = (const double *)frame->extended_data[ch];
354 const double *wsamples = (const double *)s->window->extended_data[ch];
355 double sample = samples[offset];
356 double wsample = wsamples[s->window_offset];
357 double new_sum;
358
359 new_sum = s->sum;
360 new_sum -= wsample;
361 new_sum = fmax(new_sum, 0.);
362 new_sum += sample * sample;
363
364 av_assert2(new_sum >= 0.);
365 return sqrt(new_sum / s->window_duration);
366 }
367
368 static void update_rms_doublep(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
369 {
370 const double *samples = (const double *)frame->extended_data[ch];
371 double *wsamples = (double *)s->window->extended_data[ch];
372 double sample = samples[offset];
373 double *wsample = &wsamples[s->window_offset];
374
375 s->sum -= *wsample;
376 s->sum = fmax(s->sum, 0.);
377 *wsample = sample * sample;
378 s->sum += *wsample;
379 }
380
381 static double compute_rms_floatp(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
382 {
383 const float *samples = (const float *)frame->extended_data[ch];
384 const float *wsamples = (const float *)s->window->extended_data[ch];
385 float sample = samples[offset];
386 float wsample = wsamples[s->window_offset];
387 float new_sum;
388
389 new_sum = s->sum;
390 new_sum -= wsample;
391 new_sum = fmaxf(new_sum, 0.f);
392 new_sum += sample * sample;
393
394 av_assert2(new_sum >= 0.f);
395 return sqrtf(new_sum / s->window_duration);
396 }
397
398 static void update_rms_floatp(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
399 {
400 const float *samples = (const float *)frame->extended_data[ch];
401 float *wsamples = (float *)s->window->extended_data[ch];
402 float sample = samples[offset];
403 float *wsample = &wsamples[s->window_offset];
404
405 s->sum -= *wsample;
406 s->sum = fmaxf(s->sum, 0.f);
407 *wsample = sample * sample;
408 s->sum += *wsample;
409 }
410
411 1 static av_cold int init(AVFilterContext *ctx)
412 {
413 1 SilenceRemoveContext *s = ctx->priv;
414
415
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (s->stop_periods < 0) {
416 1 s->stop_periods = -s->stop_periods;
417 1 s->restart = 1;
418 }
419
420 1 return 0;
421 }
422
423 4 static void clear_window(SilenceRemoveContext *s)
424 {
425 4 av_samples_set_silence(s->window->extended_data, 0, s->window_duration,
426 4 s->window->ch_layout.nb_channels, s->window->format);
427
428 4 s->window_offset = 0;
429 4 s->sum = 0;
430 4 }
431
432 1 static int config_input(AVFilterLink *inlink)
433 {
434 1 AVFilterContext *ctx = inlink->dst;
435 1 SilenceRemoveContext *s = ctx->priv;
436
437 1 s->next_pts = AV_NOPTS_VALUE;
438 1 s->window_duration = av_rescale(s->window_duration_opt, inlink->sample_rate,
439 AV_TIME_BASE);
440 1 s->window_duration = FFMAX(1, s->window_duration);
441 1 s->window = ff_get_audio_buffer(ctx->outputs[0], s->window_duration);
442
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (!s->window)
443 return AVERROR(ENOMEM);
444
445 1 clear_window(s);
446
447 1 s->start_duration = av_rescale(s->start_duration_opt, inlink->sample_rate,
448 AV_TIME_BASE);
449 1 s->start_silence = av_rescale(s->start_silence_opt, inlink->sample_rate,
450 AV_TIME_BASE);
451 1 s->stop_duration = av_rescale(s->stop_duration_opt, inlink->sample_rate,
452 AV_TIME_BASE);
453 1 s->stop_silence = av_rescale(s->stop_silence_opt, inlink->sample_rate,
454 AV_TIME_BASE);
455
456 2 s->start_holdoff = ff_get_audio_buffer(ctx->outputs[0],
457 1 FFMAX(s->start_duration, 1));
458
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (!s->start_holdoff)
459 return AVERROR(ENOMEM);
460
461 2 s->start_silence_hold = ff_get_audio_buffer(ctx->outputs[0],
462 1 FFMAX(s->start_silence, 1));
463
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (!s->start_silence_hold)
464 return AVERROR(ENOMEM);
465
466 1 s->start_holdoff_offset = 0;
467 1 s->start_holdoff_end = 0;
468 1 s->start_found_periods = 0;
469
470 2 s->stop_holdoff = ff_get_audio_buffer(ctx->outputs[0],
471 1 FFMAX(s->stop_duration, 1));
472
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (!s->stop_holdoff)
473 return AVERROR(ENOMEM);
474
475 2 s->stop_silence_hold = ff_get_audio_buffer(ctx->outputs[0],
476 1 FFMAX(s->stop_silence, 1));
477
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (!s->stop_silence_hold)
478 return AVERROR(ENOMEM);
479
480 1 s->stop_holdoff_offset = 0;
481 1 s->stop_holdoff_end = 0;
482 1 s->stop_found_periods = 0;
483
484
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (s->start_periods) {
485 s->mode = SILENCE_TRIM;
486 s->one_period = 1;
487 } else {
488 1 s->mode = SILENCE_COPY;
489 }
490
491
1/5
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
1 switch (inlink->format) {
492 case AV_SAMPLE_FMT_DBL:
493 s->copy = copy_double;
494 switch (s->detection) {
495 case D_PEAK:
496 s->update = update_peak_double;
497 s->compute = compute_peak_double;
498 break;
499 case D_RMS:
500 s->update = update_rms_double;
501 s->compute = compute_rms_double;
502 break;
503 }
504 break;
505 case AV_SAMPLE_FMT_FLT:
506 s->copy = copy_float;
507 switch (s->detection) {
508 case D_PEAK:
509 s->update = update_peak_float;
510 s->compute = compute_peak_float;
511 break;
512 case D_RMS:
513 s->update = update_rms_float;
514 s->compute = compute_rms_float;
515 break;
516 }
517 break;
518 1 case AV_SAMPLE_FMT_DBLP:
519 1 s->copy = copy_doublep;
520
1/3
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
1 switch (s->detection) {
521 1 case D_PEAK:
522 1 s->update = update_peak_doublep;
523 1 s->compute = compute_peak_doublep;
524 1 break;
525 case D_RMS:
526 s->update = update_rms_doublep;
527 s->compute = compute_rms_doublep;
528 break;
529 }
530 1 break;
531 case AV_SAMPLE_FMT_FLTP:
532 s->copy = copy_floatp;
533 switch (s->detection) {
534 case D_PEAK:
535 s->update = update_peak_floatp;
536 s->compute = compute_peak_floatp;
537 break;
538 case D_RMS:
539 s->update = update_rms_floatp;
540 s->compute = compute_rms_floatp;
541 break;
542 }
543 break;
544 default:
545 return AVERROR_BUG;
546 }
547
548 1 s->fifo = av_audio_fifo_alloc(inlink->format, inlink->ch_layout.nb_channels, 1024);
549
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (!s->fifo)
550 return AVERROR(ENOMEM);
551
552 1 return 0;
553 }
554
555 44124 static void flush(SilenceRemoveContext *s,
556 AVFrame *out, AVFilterLink *outlink,
557 int *nb_samples_written, int flush_silence)
558 {
559 AVFrame *silence;
560
561
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 44100 times.
44124 if (*nb_samples_written) {
562 24 out->nb_samples = *nb_samples_written;
563
564 24 av_audio_fifo_write(s->fifo, (void **)out->extended_data, out->nb_samples);
565 24 *nb_samples_written = 0;
566 }
567
568 44124 av_frame_free(&out);
569
570
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 44124 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
44124 if (s->stop_silence_end <= 0 || !flush_silence)
571 44124 return;
572
573 silence = ff_get_audio_buffer(outlink, s->stop_silence_end);
574 if (!silence)
575 return;
576
577 if (s->stop_silence_offset < s->stop_silence_end) {
578 av_samples_copy(silence->extended_data, s->stop_silence_hold->extended_data, 0,
579 s->stop_silence_offset,
580 s->stop_silence_end - s->stop_silence_offset,
581 outlink->ch_layout.nb_channels, outlink->format);
582 }
583
584 if (s->stop_silence_offset > 0) {
585 av_samples_copy(silence->extended_data, s->stop_silence_hold->extended_data,
586 s->stop_silence_end - s->stop_silence_offset,
587 0, s->stop_silence_offset,
588 outlink->ch_layout.nb_channels, outlink->format);
589 }
590
591 s->stop_silence_offset = 0;
592 s->stop_silence_end = 0;
593
594 av_audio_fifo_write(s->fifo, (void **)silence->extended_data, silence->nb_samples);
595 av_frame_free(&silence);
596 }
597
598 54 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
599 {
600 54 AVFilterContext *ctx = inlink->dst;
601 54 AVFilterLink *outlink = ctx->outputs[0];
602 54 SilenceRemoveContext *s = ctx->priv;
603 int nbs, nb_samples_read, nb_samples_written;
604 54 int i, j, threshold, ret = 0;
605 AVFrame *out;
606
607 54 nb_samples_read = nb_samples_written = 0;
608
609
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 53 times.
54 if (s->next_pts == AV_NOPTS_VALUE)
610 1 s->next_pts = in->pts;
611
612
2/6
✓ Branch 0 taken 27 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 27 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
54 switch (s->mode) {
613 case SILENCE_TRIM:
614 30 silence_trim:
615 30 nbs = in->nb_samples - nb_samples_read;
616
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 30 times.
30 if (!nbs)
617 break;
618
619
2/2
✓ Branch 0 taken 220496 times.
✓ Branch 1 taken 28 times.
220524 for (i = 0; i < nbs; i++) {
620
1/2
✓ Branch 0 taken 220496 times.
✗ Branch 1 not taken.
220496 if (s->start_mode == T_ANY) {
621 220496 threshold = 0;
622
2/2
✓ Branch 0 taken 220496 times.
✓ Branch 1 taken 220496 times.
440992 for (j = 0; j < outlink->ch_layout.nb_channels; j++) {
623 220496 threshold |= s->compute(s, in, j, nb_samples_read) > s->start_threshold;
624 }
625 } else {
626 threshold = 1;
627 for (j = 0; j < outlink->ch_layout.nb_channels; j++) {
628 threshold &= s->compute(s, in, j, nb_samples_read) > s->start_threshold;
629 }
630 }
631
632
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 220494 times.
220496 if (threshold) {
633
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 for (j = 0; j < outlink->ch_layout.nb_channels; j++) {
634 2 s->update(s, in, j, nb_samples_read);
635 2 s->copy(s, s->start_holdoff, in, j, s->start_holdoff_end, nb_samples_read);
636 }
637
638 2 s->window_offset++;
639
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if (s->window_offset >= s->window_duration)
640 2 s->window_offset = 0;
641 2 s->start_holdoff_end++;
642 2 nb_samples_read++;
643
644
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if (s->start_holdoff_end >= s->start_duration) {
645 2 s->start_found_periods += s->one_period >= 1;
646 2 s->one_period = 0;
647
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if (s->start_found_periods >= s->start_periods) {
648 2 s->mode = SILENCE_TRIM_FLUSH;
649 2 goto silence_trim_flush;
650 }
651
652 s->start_holdoff_offset = 0;
653 s->start_holdoff_end = 0;
654 s->start_silence_offset = 0;
655 s->start_silence_end = 0;
656 }
657 } else {
658 220494 s->start_holdoff_end = 0;
659 220494 s->one_period++;
660
661
2/2
✓ Branch 0 taken 220494 times.
✓ Branch 1 taken 220494 times.
440988 for (j = 0; j < outlink->ch_layout.nb_channels; j++) {
662 220494 s->update(s, in, j, nb_samples_read);
663
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 220494 times.
220494 if (s->start_silence)
664 s->copy(s, s->start_silence_hold, in, j, s->start_silence_offset, nb_samples_read);
665 }
666
667 220494 s->window_offset++;
668
1/2
✓ Branch 0 taken 220494 times.
✗ Branch 1 not taken.
220494 if (s->window_offset >= s->window_duration)
669 220494 s->window_offset = 0;
670 220494 nb_samples_read++;
671 220494 s->start_silence_offset++;
672
673
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 220494 times.
220494 if (s->start_silence) {
674 s->start_silence_end = FFMIN(s->start_silence_end + 1, s->start_silence);
675 if (s->start_silence_offset >= s->start_silence)
676 s->start_silence_offset = 0;
677 }
678 }
679 }
680 28 break;
681
682 case SILENCE_TRIM_FLUSH:
683 2 silence_trim_flush:
684 2 nbs = s->start_holdoff_end - s->start_holdoff_offset;
685
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (!nbs)
686 break;
687
688 2 out = ff_get_audio_buffer(outlink, nbs + s->start_silence_end);
689
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (!out) {
690 av_frame_free(&in);
691 return AVERROR(ENOMEM);
692 }
693
694
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (s->start_silence_end > 0) {
695 if (s->start_silence_offset < s->start_silence_end) {
696 av_samples_copy(out->extended_data, s->start_silence_hold->extended_data, 0,
697 s->start_silence_offset,
698 s->start_silence_end - s->start_silence_offset,
699 outlink->ch_layout.nb_channels, outlink->format);
700 }
701
702 if (s->start_silence_offset > 0) {
703 av_samples_copy(out->extended_data, s->start_silence_hold->extended_data,
704 s->start_silence_end - s->start_silence_offset,
705 0, s->start_silence_offset,
706 outlink->ch_layout.nb_channels, outlink->format);
707 }
708 }
709
710 2 av_samples_copy(out->extended_data, s->start_holdoff->extended_data,
711 2 s->start_silence_end,
712 2 s->start_holdoff_offset, nbs,
713 2 outlink->ch_layout.nb_channels, outlink->format);
714
715 2 s->start_holdoff_offset += nbs;
716
717 2 av_audio_fifo_write(s->fifo, (void **)out->extended_data, out->nb_samples);
718 2 av_frame_free(&out);
719
720
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if (s->start_holdoff_offset == s->start_holdoff_end) {
721 2 s->start_holdoff_offset = 0;
722 2 s->start_holdoff_end = 0;
723 2 s->start_silence_offset = 0;
724 2 s->start_silence_end = 0;
725 2 s->mode = SILENCE_COPY;
726 2 goto silence_copy;
727 }
728 break;
729
730 case SILENCE_COPY:
731 44129 silence_copy:
732 44129 nbs = in->nb_samples - nb_samples_read;
733
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 44124 times.
44129 if (!nbs)
734 5 break;
735
736 44124 out = ff_get_audio_buffer(outlink, nbs);
737
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 44124 times.
44124 if (!out) {
738 av_frame_free(&in);
739 return AVERROR(ENOMEM);
740 }
741
742
1/2
✓ Branch 0 taken 44124 times.
✗ Branch 1 not taken.
44124 if (s->stop_periods) {
743
2/2
✓ Branch 0 taken 220504 times.
✓ Branch 1 taken 21 times.
220525 for (i = 0; i < nbs; i++) {
744
1/2
✓ Branch 0 taken 220504 times.
✗ Branch 1 not taken.
220504 if (s->stop_mode == T_ANY) {
745 220504 threshold = 0;
746
2/2
✓ Branch 0 taken 220504 times.
✓ Branch 1 taken 220504 times.
441008 for (j = 0; j < outlink->ch_layout.nb_channels; j++) {
747 220504 threshold |= s->compute(s, in, j, nb_samples_read) > s->stop_threshold;
748 }
749 } else {
750 threshold = 1;
751 for (j = 0; j < outlink->ch_layout.nb_channels; j++) {
752 threshold &= s->compute(s, in, j, nb_samples_read) > s->stop_threshold;
753 }
754 }
755
756
3/6
✓ Branch 0 taken 176401 times.
✓ Branch 1 taken 44103 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 176401 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
220504 if (threshold && s->stop_holdoff_end && !s->stop_silence) {
757 s->mode = SILENCE_COPY_FLUSH;
758 flush(s, out, outlink, &nb_samples_written, 0);
759 s->one_period++;
760 goto silence_copy_flush;
761
2/2
✓ Branch 0 taken 176401 times.
✓ Branch 1 taken 44103 times.
220504 } else if (threshold) {
762
2/2
✓ Branch 0 taken 176401 times.
✓ Branch 1 taken 176401 times.
352802 for (j = 0; j < outlink->ch_layout.nb_channels; j++) {
763 176401 s->update(s, in, j, nb_samples_read);
764 176401 s->copy(s, out, in, j, nb_samples_written, nb_samples_read);
765 }
766
767 176401 s->window_offset++;
768
1/2
✓ Branch 0 taken 176401 times.
✗ Branch 1 not taken.
176401 if (s->window_offset >= s->window_duration)
769 176401 s->window_offset = 0;
770 176401 nb_samples_read++;
771 176401 nb_samples_written++;
772 176401 s->one_period++;
773
1/2
✓ Branch 0 taken 44103 times.
✗ Branch 1 not taken.
44103 } else if (!threshold) {
774
2/2
✓ Branch 0 taken 44103 times.
✓ Branch 1 taken 44103 times.
88206 for (j = 0; j < outlink->ch_layout.nb_channels; j++) {
775 44103 s->update(s, in, j, nb_samples_read);
776
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 44103 times.
44103 if (s->stop_silence)
777 s->copy(s, s->stop_silence_hold, in, j, s->stop_silence_offset, nb_samples_read);
778
779 44103 s->copy(s, s->stop_holdoff, in, j, s->stop_holdoff_end, nb_samples_read);
780 }
781
782
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 44103 times.
44103 if (s->stop_silence) {
783 s->stop_silence_offset++;
784 s->stop_silence_end = FFMIN(s->stop_silence_end + 1, s->stop_silence);
785 if (s->stop_silence_offset >= s->stop_silence) {
786 s->stop_silence_offset = 0;
787 }
788 }
789
790 44103 s->window_offset++;
791
1/2
✓ Branch 0 taken 44103 times.
✗ Branch 1 not taken.
44103 if (s->window_offset >= s->window_duration)
792 44103 s->window_offset = 0;
793 44103 nb_samples_read++;
794 44103 s->stop_holdoff_end++;
795
796
1/2
✓ Branch 0 taken 44103 times.
✗ Branch 1 not taken.
44103 if (s->stop_holdoff_end >= s->stop_duration) {
797 44103 s->stop_found_periods += s->one_period >= 1;
798 44103 s->one_period = 0;
799
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 44100 times.
44103 if (s->stop_found_periods >= s->stop_periods) {
800 3 s->stop_holdoff_offset = 0;
801 3 s->stop_holdoff_end = 0;
802
803
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (!s->restart) {
804 s->mode = SILENCE_STOP;
805 flush(s, out, outlink, &nb_samples_written, 1);
806 goto silence_stop;
807 } else {
808 3 s->stop_found_periods = 0;
809 3 s->start_found_periods = 0;
810 3 s->start_holdoff_offset = 0;
811 3 s->start_holdoff_end = 0;
812 3 s->start_silence_offset = 0;
813 3 s->start_silence_end = 0;
814 3 clear_window(s);
815 3 s->mode = SILENCE_TRIM;
816 3 flush(s, out, outlink, &nb_samples_written, 1);
817 3 goto silence_trim;
818 }
819 }
820 44100 s->mode = SILENCE_COPY_FLUSH;
821 44100 flush(s, out, outlink, &nb_samples_written, 0);
822 44100 goto silence_copy_flush;
823 }
824 }
825 }
826 21 s->one_period++;
827 21 flush(s, out, outlink, &nb_samples_written, 0);
828 } else {
829 av_samples_copy(out->extended_data, in->extended_data,
830 nb_samples_written,
831 nb_samples_read, nbs,
832 outlink->ch_layout.nb_channels, outlink->format);
833
834 av_audio_fifo_write(s->fifo, (void **)out->extended_data, out->nb_samples);
835 av_frame_free(&out);
836 }
837 21 break;
838
839 case SILENCE_COPY_FLUSH:
840 44100 silence_copy_flush:
841 44100 nbs = s->stop_holdoff_end - s->stop_holdoff_offset;
842
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 44100 times.
44100 if (!nbs)
843 break;
844
845 44100 out = ff_get_audio_buffer(outlink, nbs);
846
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 44100 times.
44100 if (!out) {
847 av_frame_free(&in);
848 return AVERROR(ENOMEM);
849 }
850
851 44100 av_samples_copy(out->extended_data, s->stop_holdoff->extended_data, 0,
852 44100 s->stop_holdoff_offset, nbs,
853 44100 outlink->ch_layout.nb_channels, outlink->format);
854
855 44100 s->stop_holdoff_offset += nbs;
856
857 44100 av_audio_fifo_write(s->fifo, (void **)out->extended_data, out->nb_samples);
858 44100 av_frame_free(&out);
859
860
1/2
✓ Branch 0 taken 44100 times.
✗ Branch 1 not taken.
44100 if (s->stop_holdoff_offset == s->stop_holdoff_end) {
861 44100 s->stop_holdoff_offset = 0;
862 44100 s->stop_holdoff_end = 0;
863 44100 s->stop_silence_offset = 0;
864 44100 s->stop_silence_end = 0;
865 44100 s->mode = SILENCE_COPY;
866 44100 goto silence_copy;
867 }
868 break;
869 case SILENCE_STOP:
870 silence_stop:
871 break;
872 default:
873 ret = AVERROR_BUG;
874 }
875
876 54 av_frame_free(&in);
877
878
2/2
✓ Branch 1 taken 29 times.
✓ Branch 2 taken 25 times.
54 if (av_audio_fifo_size(s->fifo) > 0) {
879 29 out = ff_get_audio_buffer(outlink, av_audio_fifo_size(s->fifo));
880
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 29 times.
29 if (!out)
881 return AVERROR(ENOMEM);
882
883 29 av_audio_fifo_read(s->fifo, (void **)out->extended_data, out->nb_samples);
884 29 out->pts = s->next_pts;
885 29 s->next_pts += av_rescale_q(out->nb_samples,
886 29 (AVRational){1, outlink->sample_rate},
887 outlink->time_base);
888
889 29 ret = ff_filter_frame(outlink, out);
890 }
891
892 54 return ret;
893 }
894
895 56 static int request_frame(AVFilterLink *outlink)
896 {
897 56 AVFilterContext *ctx = outlink->src;
898 56 SilenceRemoveContext *s = ctx->priv;
899 int ret;
900
901 56 ret = ff_request_frame(ctx->inputs[0]);
902
3/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 55 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
56 if (ret == AVERROR_EOF && (s->mode == SILENCE_COPY_FLUSH ||
903
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 s->mode == SILENCE_COPY)) {
904 int nbs = s->stop_holdoff_end - s->stop_holdoff_offset;
905 if (nbs) {
906 AVFrame *frame;
907
908 frame = ff_get_audio_buffer(outlink, nbs);
909 if (!frame)
910 return AVERROR(ENOMEM);
911
912 av_samples_copy(frame->extended_data, s->stop_holdoff->extended_data, 0,
913 s->stop_holdoff_offset, nbs,
914 outlink->ch_layout.nb_channels, outlink->format);
915
916 frame->pts = s->next_pts;
917 s->next_pts += av_rescale_q(frame->nb_samples,
918 (AVRational){1, outlink->sample_rate},
919 outlink->time_base);
920
921 ret = ff_filter_frame(outlink, frame);
922 }
923 s->mode = SILENCE_STOP;
924 }
925 56 return ret;
926 }
927
928 1 static av_cold void uninit(AVFilterContext *ctx)
929 {
930 1 SilenceRemoveContext *s = ctx->priv;
931
932 1 av_frame_free(&s->start_holdoff);
933 1 av_frame_free(&s->start_silence_hold);
934 1 av_frame_free(&s->stop_holdoff);
935 1 av_frame_free(&s->stop_silence_hold);
936 1 av_frame_free(&s->window);
937
938 1 av_audio_fifo_free(s->fifo);
939 1 s->fifo = NULL;
940 1 }
941
942 static const AVFilterPad silenceremove_inputs[] = {
943 {
944 .name = "default",
945 .type = AVMEDIA_TYPE_AUDIO,
946 .config_props = config_input,
947 .filter_frame = filter_frame,
948 },
949 };
950
951 static const AVFilterPad silenceremove_outputs[] = {
952 {
953 .name = "default",
954 .type = AVMEDIA_TYPE_AUDIO,
955 .request_frame = request_frame,
956 },
957 };
958
959 const AVFilter ff_af_silenceremove = {
960 .name = "silenceremove",
961 .description = NULL_IF_CONFIG_SMALL("Remove silence."),
962 .priv_size = sizeof(SilenceRemoveContext),
963 .priv_class = &silenceremove_class,
964 .init = init,
965 .uninit = uninit,
966 FILTER_INPUTS(silenceremove_inputs),
967 FILTER_OUTPUTS(silenceremove_outputs),
968 FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
969 AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBLP),
970 };
971