1 |
|
|
/* |
2 |
|
|
* Copyright (c) 2001 Heikki Leinonen |
3 |
|
|
* Copyright (c) 2001 Chris Bagwell |
4 |
|
|
* Copyright (c) 2003 Donnie Smith |
5 |
|
|
* Copyright (c) 2014 Paul B Mahol |
6 |
|
|
* |
7 |
|
|
* This file is part of FFmpeg. |
8 |
|
|
* |
9 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
10 |
|
|
* modify it under the terms of the GNU Lesser General Public |
11 |
|
|
* License as published by the Free Software Foundation; either |
12 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
13 |
|
|
* |
14 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
15 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 |
|
|
* Lesser General Public License for more details. |
18 |
|
|
* |
19 |
|
|
* You should have received a copy of the GNU Lesser General Public |
20 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
21 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 |
|
|
*/ |
23 |
|
|
|
24 |
|
|
#include <float.h> /* DBL_MAX */ |
25 |
|
|
|
26 |
|
|
#include "libavutil/opt.h" |
27 |
|
|
#include "libavutil/timestamp.h" |
28 |
|
|
#include "audio.h" |
29 |
|
|
#include "formats.h" |
30 |
|
|
#include "avfilter.h" |
31 |
|
|
#include "internal.h" |
32 |
|
|
|
33 |
|
|
enum SilenceDetect { |
34 |
|
|
D_PEAK, |
35 |
|
|
D_RMS, |
36 |
|
|
}; |
37 |
|
|
|
38 |
|
|
enum ThresholdMode { |
39 |
|
|
T_ANY, |
40 |
|
|
T_ALL, |
41 |
|
|
}; |
42 |
|
|
|
43 |
|
|
enum SilenceMode { |
44 |
|
|
SILENCE_TRIM, |
45 |
|
|
SILENCE_TRIM_FLUSH, |
46 |
|
|
SILENCE_COPY, |
47 |
|
|
SILENCE_COPY_FLUSH, |
48 |
|
|
SILENCE_STOP |
49 |
|
|
}; |
50 |
|
|
|
51 |
|
|
typedef struct SilenceRemoveContext { |
52 |
|
|
const AVClass *class; |
53 |
|
|
|
54 |
|
|
enum SilenceMode mode; |
55 |
|
|
|
56 |
|
|
int start_periods; |
57 |
|
|
int64_t start_duration; |
58 |
|
|
int64_t start_duration_opt; |
59 |
|
|
double start_threshold; |
60 |
|
|
int64_t start_silence; |
61 |
|
|
int64_t start_silence_opt; |
62 |
|
|
int start_mode; |
63 |
|
|
|
64 |
|
|
int stop_periods; |
65 |
|
|
int64_t stop_duration; |
66 |
|
|
int64_t stop_duration_opt; |
67 |
|
|
double stop_threshold; |
68 |
|
|
int64_t stop_silence; |
69 |
|
|
int64_t stop_silence_opt; |
70 |
|
|
int stop_mode; |
71 |
|
|
|
72 |
|
|
double *start_holdoff; |
73 |
|
|
double *start_silence_hold; |
74 |
|
|
size_t start_holdoff_offset; |
75 |
|
|
size_t start_holdoff_end; |
76 |
|
|
size_t start_silence_offset; |
77 |
|
|
size_t start_silence_end; |
78 |
|
|
int start_found_periods; |
79 |
|
|
|
80 |
|
|
double *stop_holdoff; |
81 |
|
|
double *stop_silence_hold; |
82 |
|
|
size_t stop_holdoff_offset; |
83 |
|
|
size_t stop_holdoff_end; |
84 |
|
|
size_t stop_silence_offset; |
85 |
|
|
size_t stop_silence_end; |
86 |
|
|
int stop_found_periods; |
87 |
|
|
|
88 |
|
|
double window_ratio; |
89 |
|
|
double *window; |
90 |
|
|
double *window_current; |
91 |
|
|
double *window_end; |
92 |
|
|
int window_size; |
93 |
|
|
double sum; |
94 |
|
|
|
95 |
|
|
int restart; |
96 |
|
|
int64_t next_pts; |
97 |
|
|
|
98 |
|
|
int detection; |
99 |
|
|
void (*update)(struct SilenceRemoveContext *s, double sample); |
100 |
|
|
double(*compute)(struct SilenceRemoveContext *s, double sample); |
101 |
|
|
} SilenceRemoveContext; |
102 |
|
|
|
103 |
|
|
#define OFFSET(x) offsetof(SilenceRemoveContext, x) |
104 |
|
|
#define AF AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM |
105 |
|
|
|
106 |
|
|
static const AVOption silenceremove_options[] = { |
107 |
|
|
{ "start_periods", NULL, OFFSET(start_periods), AV_OPT_TYPE_INT, {.i64=0}, 0, 9000, AF }, |
108 |
|
|
{ "start_duration", "set start duration of non-silence part", OFFSET(start_duration_opt), AV_OPT_TYPE_DURATION, {.i64=0}, 0, INT32_MAX, AF }, |
109 |
|
|
{ "start_threshold", "set threshold for start silence detection", OFFSET(start_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, AF }, |
110 |
|
|
{ "start_silence", "set start duration of silence part to keep", OFFSET(start_silence_opt), AV_OPT_TYPE_DURATION, {.i64=0}, 0, INT32_MAX, AF }, |
111 |
|
|
{ "start_mode", "set which channel will trigger trimming from start", OFFSET(start_mode), AV_OPT_TYPE_INT, {.i64=T_ANY}, T_ANY, T_ALL, AF, "mode" }, |
112 |
|
|
{ "any", 0, 0, AV_OPT_TYPE_CONST, {.i64=T_ANY}, 0, 0, AF, "mode" }, |
113 |
|
|
{ "all", 0, 0, AV_OPT_TYPE_CONST, {.i64=T_ALL}, 0, 0, AF, "mode" }, |
114 |
|
|
{ "stop_periods", NULL, OFFSET(stop_periods), AV_OPT_TYPE_INT, {.i64=0}, -9000, 9000, AF }, |
115 |
|
|
{ "stop_duration", "set stop duration of non-silence part", OFFSET(stop_duration_opt), AV_OPT_TYPE_DURATION, {.i64=0}, 0, INT32_MAX, AF }, |
116 |
|
|
{ "stop_threshold", "set threshold for stop silence detection", OFFSET(stop_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, AF }, |
117 |
|
|
{ "stop_silence", "set stop duration of silence part to keep", OFFSET(stop_silence_opt), AV_OPT_TYPE_DURATION, {.i64=0}, 0, INT32_MAX, AF }, |
118 |
|
|
{ "stop_mode", "set which channel will trigger trimming from end", OFFSET(stop_mode), AV_OPT_TYPE_INT, {.i64=T_ANY}, T_ANY, T_ALL, AF, "mode" }, |
119 |
|
|
{ "detection", "set how silence is detected", OFFSET(detection), AV_OPT_TYPE_INT, {.i64=D_RMS}, D_PEAK,D_RMS, AF, "detection" }, |
120 |
|
|
{ "peak", "use absolute values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_PEAK},0, 0, AF, "detection" }, |
121 |
|
|
{ "rms", "use squared values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_RMS}, 0, 0, AF, "detection" }, |
122 |
|
|
{ "window", "set duration of window in seconds", OFFSET(window_ratio), AV_OPT_TYPE_DOUBLE, {.dbl=0.02}, 0, 10, AF }, |
123 |
|
|
{ NULL } |
124 |
|
|
}; |
125 |
|
|
|
126 |
|
|
AVFILTER_DEFINE_CLASS(silenceremove); |
127 |
|
|
|
128 |
|
|
static double compute_peak(SilenceRemoveContext *s, double sample) |
129 |
|
|
{ |
130 |
|
|
double new_sum; |
131 |
|
|
|
132 |
|
|
new_sum = s->sum; |
133 |
|
|
new_sum -= *s->window_current; |
134 |
|
|
new_sum += fabs(sample); |
135 |
|
|
|
136 |
|
|
return new_sum / s->window_size; |
137 |
|
|
} |
138 |
|
|
|
139 |
|
|
static void update_peak(SilenceRemoveContext *s, double sample) |
140 |
|
|
{ |
141 |
|
|
s->sum -= *s->window_current; |
142 |
|
|
*s->window_current = fabs(sample); |
143 |
|
|
s->sum += *s->window_current; |
144 |
|
|
|
145 |
|
|
s->window_current++; |
146 |
|
|
if (s->window_current >= s->window_end) |
147 |
|
|
s->window_current = s->window; |
148 |
|
|
} |
149 |
|
|
|
150 |
|
1364 |
static double compute_rms(SilenceRemoveContext *s, double sample) |
151 |
|
|
{ |
152 |
|
|
double new_sum; |
153 |
|
|
|
154 |
|
1364 |
new_sum = s->sum; |
155 |
|
1364 |
new_sum -= *s->window_current; |
156 |
|
1364 |
new_sum += sample * sample; |
157 |
|
|
|
158 |
|
1364 |
return sqrt(new_sum / s->window_size); |
159 |
|
|
} |
160 |
|
|
|
161 |
|
1364 |
static void update_rms(SilenceRemoveContext *s, double sample) |
162 |
|
|
{ |
163 |
|
1364 |
s->sum -= *s->window_current; |
164 |
|
1364 |
*s->window_current = sample * sample; |
165 |
|
1364 |
s->sum += *s->window_current; |
166 |
|
|
|
167 |
|
1364 |
s->window_current++; |
168 |
✗✓ |
1364 |
if (s->window_current >= s->window_end) |
169 |
|
|
s->window_current = s->window; |
170 |
|
1364 |
} |
171 |
|
|
|
172 |
|
1 |
static av_cold int init(AVFilterContext *ctx) |
173 |
|
|
{ |
174 |
|
1 |
SilenceRemoveContext *s = ctx->priv; |
175 |
|
|
|
176 |
✓✗ |
1 |
if (s->stop_periods < 0) { |
177 |
|
1 |
s->stop_periods = -s->stop_periods; |
178 |
|
1 |
s->restart = 1; |
179 |
|
|
} |
180 |
|
|
|
181 |
✗✓✗ |
1 |
switch (s->detection) { |
182 |
|
|
case D_PEAK: |
183 |
|
|
s->update = update_peak; |
184 |
|
|
s->compute = compute_peak; |
185 |
|
|
break; |
186 |
|
1 |
case D_RMS: |
187 |
|
1 |
s->update = update_rms; |
188 |
|
1 |
s->compute = compute_rms; |
189 |
|
1 |
break; |
190 |
|
|
} |
191 |
|
|
|
192 |
|
1 |
return 0; |
193 |
|
|
} |
194 |
|
|
|
195 |
|
342 |
static void clear_window(SilenceRemoveContext *s) |
196 |
|
|
{ |
197 |
|
342 |
memset(s->window, 0, s->window_size * sizeof(*s->window)); |
198 |
|
|
|
199 |
|
342 |
s->window_current = s->window; |
200 |
|
342 |
s->window_end = s->window + s->window_size; |
201 |
|
342 |
s->sum = 0; |
202 |
|
342 |
} |
203 |
|
|
|
204 |
|
1 |
static int config_input(AVFilterLink *inlink) |
205 |
|
|
{ |
206 |
|
1 |
AVFilterContext *ctx = inlink->dst; |
207 |
|
1 |
SilenceRemoveContext *s = ctx->priv; |
208 |
|
|
|
209 |
|
1 |
s->next_pts = AV_NOPTS_VALUE; |
210 |
✓✗ |
1 |
s->window_size = FFMAX((inlink->sample_rate * s->window_ratio), 1) * inlink->channels; |
211 |
|
1 |
s->window = av_malloc_array(s->window_size, sizeof(*s->window)); |
212 |
✗✓ |
1 |
if (!s->window) |
213 |
|
|
return AVERROR(ENOMEM); |
214 |
|
|
|
215 |
|
1 |
clear_window(s); |
216 |
|
|
|
217 |
|
1 |
s->start_duration = av_rescale(s->start_duration_opt, inlink->sample_rate, |
218 |
|
|
AV_TIME_BASE); |
219 |
|
1 |
s->start_silence = av_rescale(s->start_silence_opt, inlink->sample_rate, |
220 |
|
|
AV_TIME_BASE); |
221 |
|
1 |
s->stop_duration = av_rescale(s->stop_duration_opt, inlink->sample_rate, |
222 |
|
|
AV_TIME_BASE); |
223 |
|
1 |
s->stop_silence = av_rescale(s->stop_silence_opt, inlink->sample_rate, |
224 |
|
|
AV_TIME_BASE); |
225 |
|
|
|
226 |
|
2 |
s->start_holdoff = av_malloc_array(FFMAX(s->start_duration, 1), |
227 |
|
|
sizeof(*s->start_holdoff) * |
228 |
|
1 |
inlink->channels); |
229 |
✗✓ |
1 |
if (!s->start_holdoff) |
230 |
|
|
return AVERROR(ENOMEM); |
231 |
|
|
|
232 |
|
2 |
s->start_silence_hold = av_malloc_array(FFMAX(s->start_silence, 1), |
233 |
|
|
sizeof(*s->start_silence_hold) * |
234 |
|
1 |
inlink->channels); |
235 |
✗✓ |
1 |
if (!s->start_silence_hold) |
236 |
|
|
return AVERROR(ENOMEM); |
237 |
|
|
|
238 |
|
1 |
s->start_holdoff_offset = 0; |
239 |
|
1 |
s->start_holdoff_end = 0; |
240 |
|
1 |
s->start_found_periods = 0; |
241 |
|
|
|
242 |
|
2 |
s->stop_holdoff = av_malloc_array(FFMAX(s->stop_duration, 1), |
243 |
|
|
sizeof(*s->stop_holdoff) * |
244 |
|
1 |
inlink->channels); |
245 |
✗✓ |
1 |
if (!s->stop_holdoff) |
246 |
|
|
return AVERROR(ENOMEM); |
247 |
|
|
|
248 |
|
2 |
s->stop_silence_hold = av_malloc_array(FFMAX(s->stop_silence, 1), |
249 |
|
|
sizeof(*s->stop_silence_hold) * |
250 |
|
1 |
inlink->channels); |
251 |
✗✓ |
1 |
if (!s->stop_silence_hold) |
252 |
|
|
return AVERROR(ENOMEM); |
253 |
|
|
|
254 |
|
1 |
s->stop_holdoff_offset = 0; |
255 |
|
1 |
s->stop_holdoff_end = 0; |
256 |
|
1 |
s->stop_found_periods = 0; |
257 |
|
|
|
258 |
✗✓ |
1 |
if (s->start_periods) |
259 |
|
|
s->mode = SILENCE_TRIM; |
260 |
|
|
else |
261 |
|
1 |
s->mode = SILENCE_COPY; |
262 |
|
|
|
263 |
|
1 |
return 0; |
264 |
|
|
} |
265 |
|
|
|
266 |
|
341 |
static void flush(SilenceRemoveContext *s, |
267 |
|
|
AVFrame *out, AVFilterLink *outlink, |
268 |
|
|
int *nb_samples_written, int *ret, int flush_silence) |
269 |
|
|
{ |
270 |
|
|
AVFrame *silence; |
271 |
|
|
|
272 |
✗✓ |
341 |
if (*nb_samples_written) { |
273 |
|
|
out->nb_samples = *nb_samples_written / outlink->channels; |
274 |
|
|
|
275 |
|
|
out->pts = s->next_pts; |
276 |
|
|
s->next_pts += av_rescale_q(out->nb_samples, |
277 |
|
|
(AVRational){1, outlink->sample_rate}, |
278 |
|
|
outlink->time_base); |
279 |
|
|
|
280 |
|
|
*ret = ff_filter_frame(outlink, out); |
281 |
|
|
if (*ret < 0) |
282 |
|
|
return; |
283 |
|
|
*nb_samples_written = 0; |
284 |
|
|
} else { |
285 |
|
341 |
av_frame_free(&out); |
286 |
|
|
} |
287 |
|
|
|
288 |
✗✓✗✗
|
341 |
if (s->stop_silence_end <= 0 || !flush_silence) |
289 |
|
341 |
return; |
290 |
|
|
|
291 |
|
|
silence = ff_get_audio_buffer(outlink, s->stop_silence_end / outlink->channels); |
292 |
|
|
if (!silence) { |
293 |
|
|
*ret = AVERROR(ENOMEM); |
294 |
|
|
return; |
295 |
|
|
} |
296 |
|
|
|
297 |
|
|
if (s->stop_silence_offset < s->stop_silence_end) { |
298 |
|
|
memcpy(silence->data[0], |
299 |
|
|
&s->stop_silence_hold[s->stop_silence_offset], |
300 |
|
|
(s->stop_silence_end - s->stop_silence_offset) * sizeof(double)); |
301 |
|
|
} |
302 |
|
|
|
303 |
|
|
if (s->stop_silence_offset > 0) { |
304 |
|
|
memcpy(silence->data[0] + (s->stop_silence_end - s->stop_silence_offset) * sizeof(double), |
305 |
|
|
&s->stop_silence_hold[0], |
306 |
|
|
s->stop_silence_offset * sizeof(double)); |
307 |
|
|
} |
308 |
|
|
|
309 |
|
|
s->stop_silence_offset = 0; |
310 |
|
|
s->stop_silence_end = 0; |
311 |
|
|
|
312 |
|
|
silence->pts = s->next_pts; |
313 |
|
|
s->next_pts += av_rescale_q(silence->nb_samples, |
314 |
|
|
(AVRational){1, outlink->sample_rate}, |
315 |
|
|
outlink->time_base); |
316 |
|
|
|
317 |
|
|
*ret = ff_filter_frame(outlink, silence); |
318 |
|
|
} |
319 |
|
|
|
320 |
|
1 |
static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
321 |
|
|
{ |
322 |
|
1 |
AVFilterContext *ctx = inlink->dst; |
323 |
|
1 |
AVFilterLink *outlink = ctx->outputs[0]; |
324 |
|
1 |
SilenceRemoveContext *s = ctx->priv; |
325 |
|
1 |
int i, j, threshold, ret = 0; |
326 |
|
|
int nbs, nb_samples_read, nb_samples_written; |
327 |
|
1 |
double *obuf, *ibuf = (double *)in->data[0]; |
328 |
|
|
AVFrame *out; |
329 |
|
|
|
330 |
|
1 |
nb_samples_read = nb_samples_written = 0; |
331 |
|
|
|
332 |
✓✗ |
1 |
if (s->next_pts == AV_NOPTS_VALUE) |
333 |
|
1 |
s->next_pts = in->pts; |
334 |
|
|
|
335 |
✗✗✓✗ ✗✗ |
1 |
switch (s->mode) { |
336 |
|
|
case SILENCE_TRIM: |
337 |
|
341 |
silence_trim: |
338 |
|
341 |
nbs = in->nb_samples - nb_samples_read / outlink->channels; |
339 |
✗✓ |
341 |
if (!nbs) |
340 |
|
1 |
break; |
341 |
|
|
|
342 |
✓✗ |
341 |
for (i = 0; i < nbs; i++) { |
343 |
✓✗ |
341 |
if (s->start_mode == T_ANY) { |
344 |
|
341 |
threshold = 0; |
345 |
✓✓ |
1023 |
for (j = 0; j < outlink->channels; j++) { |
346 |
|
682 |
threshold |= s->compute(s, ibuf[j]) > s->start_threshold; |
347 |
|
|
} |
348 |
|
|
} else { |
349 |
|
|
threshold = 1; |
350 |
|
|
for (j = 0; j < outlink->channels; j++) { |
351 |
|
|
threshold &= s->compute(s, ibuf[j]) > s->start_threshold; |
352 |
|
|
} |
353 |
|
|
} |
354 |
|
|
|
355 |
✓✗ |
341 |
if (threshold) { |
356 |
✓✓ |
1023 |
for (j = 0; j < outlink->channels; j++) { |
357 |
|
682 |
s->update(s, *ibuf); |
358 |
|
682 |
s->start_holdoff[s->start_holdoff_end++] = *ibuf++; |
359 |
|
|
} |
360 |
|
341 |
nb_samples_read += outlink->channels; |
361 |
|
|
|
362 |
✓✗ |
341 |
if (s->start_holdoff_end >= s->start_duration * outlink->channels) { |
363 |
✓✗ |
341 |
if (++s->start_found_periods >= s->start_periods) { |
364 |
|
341 |
s->mode = SILENCE_TRIM_FLUSH; |
365 |
|
341 |
goto silence_trim_flush; |
366 |
|
|
} |
367 |
|
|
|
368 |
|
|
s->start_holdoff_offset = 0; |
369 |
|
|
s->start_holdoff_end = 0; |
370 |
|
|
s->start_silence_offset = 0; |
371 |
|
|
s->start_silence_end = 0; |
372 |
|
|
} |
373 |
|
|
} else { |
374 |
|
|
s->start_holdoff_end = 0; |
375 |
|
|
|
376 |
|
|
for (j = 0; j < outlink->channels; j++) { |
377 |
|
|
s->update(s, ibuf[j]); |
378 |
|
|
if (s->start_silence) { |
379 |
|
|
s->start_silence_hold[s->start_silence_offset++] = ibuf[j]; |
380 |
|
|
s->start_silence_end = FFMIN(s->start_silence_end + 1, outlink->channels * s->start_silence); |
381 |
|
|
if (s->start_silence_offset >= outlink->channels * s->start_silence) { |
382 |
|
|
s->start_silence_offset = 0; |
383 |
|
|
} |
384 |
|
|
} |
385 |
|
|
} |
386 |
|
|
|
387 |
|
|
ibuf += outlink->channels; |
388 |
|
|
nb_samples_read += outlink->channels; |
389 |
|
|
} |
390 |
|
|
} |
391 |
|
|
break; |
392 |
|
|
|
393 |
|
|
case SILENCE_TRIM_FLUSH: |
394 |
|
341 |
silence_trim_flush: |
395 |
|
341 |
nbs = s->start_holdoff_end - s->start_holdoff_offset; |
396 |
|
341 |
nbs -= nbs % outlink->channels; |
397 |
✗✓ |
341 |
if (!nbs) |
398 |
|
|
break; |
399 |
|
|
|
400 |
|
341 |
out = ff_get_audio_buffer(outlink, nbs / outlink->channels + s->start_silence_end / outlink->channels); |
401 |
✗✓ |
341 |
if (!out) { |
402 |
|
|
av_frame_free(&in); |
403 |
|
|
return AVERROR(ENOMEM); |
404 |
|
|
} |
405 |
|
|
|
406 |
✗✓ |
341 |
if (s->start_silence_end > 0) { |
407 |
|
|
if (s->start_silence_offset < s->start_silence_end) { |
408 |
|
|
memcpy(out->data[0], |
409 |
|
|
&s->start_silence_hold[s->start_silence_offset], |
410 |
|
|
(s->start_silence_end - s->start_silence_offset) * sizeof(double)); |
411 |
|
|
} |
412 |
|
|
|
413 |
|
|
if (s->start_silence_offset > 0) { |
414 |
|
|
memcpy(out->data[0] + (s->start_silence_end - s->start_silence_offset) * sizeof(double), |
415 |
|
|
&s->start_silence_hold[0], |
416 |
|
|
s->start_silence_offset * sizeof(double)); |
417 |
|
|
} |
418 |
|
|
} |
419 |
|
|
|
420 |
|
341 |
memcpy(out->data[0] + s->start_silence_end * sizeof(double), |
421 |
|
341 |
&s->start_holdoff[s->start_holdoff_offset], |
422 |
|
|
nbs * sizeof(double)); |
423 |
|
|
|
424 |
|
341 |
out->pts = s->next_pts; |
425 |
|
341 |
s->next_pts += av_rescale_q(out->nb_samples, |
426 |
|
341 |
(AVRational){1, outlink->sample_rate}, |
427 |
|
|
outlink->time_base); |
428 |
|
|
|
429 |
|
341 |
s->start_holdoff_offset += nbs; |
430 |
|
|
|
431 |
|
341 |
ret = ff_filter_frame(outlink, out); |
432 |
|
|
|
433 |
✓✗ |
341 |
if (s->start_holdoff_offset == s->start_holdoff_end) { |
434 |
|
341 |
s->start_holdoff_offset = 0; |
435 |
|
341 |
s->start_holdoff_end = 0; |
436 |
|
341 |
s->start_silence_offset = 0; |
437 |
|
341 |
s->start_silence_end = 0; |
438 |
|
341 |
s->mode = SILENCE_COPY; |
439 |
|
341 |
goto silence_copy; |
440 |
|
|
} |
441 |
|
|
break; |
442 |
|
|
|
443 |
|
|
case SILENCE_COPY: |
444 |
|
342 |
silence_copy: |
445 |
|
342 |
nbs = in->nb_samples - nb_samples_read / outlink->channels; |
446 |
✓✓ |
342 |
if (!nbs) |
447 |
|
1 |
break; |
448 |
|
|
|
449 |
|
341 |
out = ff_get_audio_buffer(outlink, nbs); |
450 |
✗✓ |
341 |
if (!out) { |
451 |
|
|
av_frame_free(&in); |
452 |
|
|
return AVERROR(ENOMEM); |
453 |
|
|
} |
454 |
|
341 |
obuf = (double *)out->data[0]; |
455 |
|
|
|
456 |
✓✗ |
341 |
if (s->stop_periods) { |
457 |
✓✗ |
341 |
for (i = 0; i < nbs; i++) { |
458 |
✓✗ |
341 |
if (s->stop_mode == T_ANY) { |
459 |
|
341 |
threshold = 0; |
460 |
✓✓ |
1023 |
for (j = 0; j < outlink->channels; j++) { |
461 |
|
682 |
threshold |= s->compute(s, ibuf[j]) > s->stop_threshold; |
462 |
|
|
} |
463 |
|
|
} else { |
464 |
|
|
threshold = 1; |
465 |
|
|
for (j = 0; j < outlink->channels; j++) { |
466 |
|
|
threshold &= s->compute(s, ibuf[j]) > s->stop_threshold; |
467 |
|
|
} |
468 |
|
|
} |
469 |
|
|
|
470 |
✗✓✗✗ ✗✗ |
341 |
if (threshold && s->stop_holdoff_end && !s->stop_silence) { |
471 |
|
|
s->mode = SILENCE_COPY_FLUSH; |
472 |
|
|
flush(s, out, outlink, &nb_samples_written, &ret, 0); |
473 |
|
|
goto silence_copy_flush; |
474 |
✗✓ |
341 |
} else if (threshold) { |
475 |
|
|
for (j = 0; j < outlink->channels; j++) { |
476 |
|
|
s->update(s, *ibuf); |
477 |
|
|
*obuf++ = *ibuf++; |
478 |
|
|
} |
479 |
|
|
nb_samples_read += outlink->channels; |
480 |
|
|
nb_samples_written += outlink->channels; |
481 |
✓✗ |
341 |
} else if (!threshold) { |
482 |
✓✓ |
1023 |
for (j = 0; j < outlink->channels; j++) { |
483 |
|
682 |
s->update(s, *ibuf); |
484 |
✗✓ |
682 |
if (s->stop_silence) { |
485 |
|
|
s->stop_silence_hold[s->stop_silence_offset++] = *ibuf; |
486 |
|
|
s->stop_silence_end = FFMIN(s->stop_silence_end + 1, outlink->channels * s->stop_silence); |
487 |
|
|
if (s->stop_silence_offset >= outlink->channels * s->stop_silence) { |
488 |
|
|
s->stop_silence_offset = 0; |
489 |
|
|
} |
490 |
|
|
} |
491 |
|
|
|
492 |
|
682 |
s->stop_holdoff[s->stop_holdoff_end++] = *ibuf++; |
493 |
|
|
} |
494 |
|
341 |
nb_samples_read += outlink->channels; |
495 |
|
|
|
496 |
✓✗ |
341 |
if (s->stop_holdoff_end >= s->stop_duration * outlink->channels) { |
497 |
✓✗ |
341 |
if (++s->stop_found_periods >= s->stop_periods) { |
498 |
|
341 |
s->stop_holdoff_offset = 0; |
499 |
|
341 |
s->stop_holdoff_end = 0; |
500 |
|
|
|
501 |
✗✓ |
341 |
if (!s->restart) { |
502 |
|
|
s->mode = SILENCE_STOP; |
503 |
|
|
flush(s, out, outlink, &nb_samples_written, &ret, 1); |
504 |
|
|
goto silence_stop; |
505 |
|
|
} else { |
506 |
|
341 |
s->stop_found_periods = 0; |
507 |
|
341 |
s->start_found_periods = 0; |
508 |
|
341 |
s->start_holdoff_offset = 0; |
509 |
|
341 |
s->start_holdoff_end = 0; |
510 |
|
341 |
s->start_silence_offset = 0; |
511 |
|
341 |
s->start_silence_end = 0; |
512 |
|
341 |
clear_window(s); |
513 |
|
341 |
s->mode = SILENCE_TRIM; |
514 |
|
341 |
flush(s, out, outlink, &nb_samples_written, &ret, 1); |
515 |
|
341 |
goto silence_trim; |
516 |
|
|
} |
517 |
|
|
} |
518 |
|
|
s->mode = SILENCE_COPY_FLUSH; |
519 |
|
|
flush(s, out, outlink, &nb_samples_written, &ret, 0); |
520 |
|
|
goto silence_copy_flush; |
521 |
|
|
} |
522 |
|
|
} |
523 |
|
|
} |
524 |
|
|
flush(s, out, outlink, &nb_samples_written, &ret, 0); |
525 |
|
|
} else { |
526 |
|
|
memcpy(obuf, ibuf, sizeof(double) * nbs * outlink->channels); |
527 |
|
|
|
528 |
|
|
out->pts = s->next_pts; |
529 |
|
|
s->next_pts += av_rescale_q(out->nb_samples, |
530 |
|
|
(AVRational){1, outlink->sample_rate}, |
531 |
|
|
outlink->time_base); |
532 |
|
|
|
533 |
|
|
ret = ff_filter_frame(outlink, out); |
534 |
|
|
} |
535 |
|
|
break; |
536 |
|
|
|
537 |
|
|
case SILENCE_COPY_FLUSH: |
538 |
|
|
silence_copy_flush: |
539 |
|
|
nbs = s->stop_holdoff_end - s->stop_holdoff_offset; |
540 |
|
|
nbs -= nbs % outlink->channels; |
541 |
|
|
if (!nbs) |
542 |
|
|
break; |
543 |
|
|
|
544 |
|
|
out = ff_get_audio_buffer(outlink, nbs / outlink->channels); |
545 |
|
|
if (!out) { |
546 |
|
|
av_frame_free(&in); |
547 |
|
|
return AVERROR(ENOMEM); |
548 |
|
|
} |
549 |
|
|
|
550 |
|
|
memcpy(out->data[0], &s->stop_holdoff[s->stop_holdoff_offset], |
551 |
|
|
nbs * sizeof(double)); |
552 |
|
|
s->stop_holdoff_offset += nbs; |
553 |
|
|
|
554 |
|
|
out->pts = s->next_pts; |
555 |
|
|
s->next_pts += av_rescale_q(out->nb_samples, |
556 |
|
|
(AVRational){1, outlink->sample_rate}, |
557 |
|
|
outlink->time_base); |
558 |
|
|
|
559 |
|
|
ret = ff_filter_frame(outlink, out); |
560 |
|
|
|
561 |
|
|
if (s->stop_holdoff_offset == s->stop_holdoff_end) { |
562 |
|
|
s->stop_holdoff_offset = 0; |
563 |
|
|
s->stop_holdoff_end = 0; |
564 |
|
|
s->stop_silence_offset = 0; |
565 |
|
|
s->stop_silence_end = 0; |
566 |
|
|
s->mode = SILENCE_COPY; |
567 |
|
|
goto silence_copy; |
568 |
|
|
} |
569 |
|
|
break; |
570 |
|
|
case SILENCE_STOP: |
571 |
|
|
silence_stop: |
572 |
|
|
break; |
573 |
|
|
} |
574 |
|
|
|
575 |
|
1 |
av_frame_free(&in); |
576 |
|
|
|
577 |
|
1 |
return ret; |
578 |
|
|
} |
579 |
|
|
|
580 |
|
1 |
static int request_frame(AVFilterLink *outlink) |
581 |
|
|
{ |
582 |
|
1 |
AVFilterContext *ctx = outlink->src; |
583 |
|
1 |
SilenceRemoveContext *s = ctx->priv; |
584 |
|
|
int ret; |
585 |
|
|
|
586 |
|
1 |
ret = ff_request_frame(ctx->inputs[0]); |
587 |
✓✗✓✗
|
1 |
if (ret == AVERROR_EOF && (s->mode == SILENCE_COPY_FLUSH || |
588 |
✓✗ |
1 |
s->mode == SILENCE_COPY)) { |
589 |
|
1 |
int nbs = s->stop_holdoff_end - s->stop_holdoff_offset; |
590 |
✗✓ |
1 |
if (nbs) { |
591 |
|
|
AVFrame *frame; |
592 |
|
|
|
593 |
|
|
frame = ff_get_audio_buffer(outlink, nbs / outlink->channels); |
594 |
|
|
if (!frame) |
595 |
|
|
return AVERROR(ENOMEM); |
596 |
|
|
|
597 |
|
|
memcpy(frame->data[0], &s->stop_holdoff[s->stop_holdoff_offset], |
598 |
|
|
nbs * sizeof(double)); |
599 |
|
|
|
600 |
|
|
frame->pts = s->next_pts; |
601 |
|
|
s->next_pts += av_rescale_q(frame->nb_samples, |
602 |
|
|
(AVRational){1, outlink->sample_rate}, |
603 |
|
|
outlink->time_base); |
604 |
|
|
|
605 |
|
|
ret = ff_filter_frame(outlink, frame); |
606 |
|
|
} |
607 |
|
1 |
s->mode = SILENCE_STOP; |
608 |
|
|
} |
609 |
|
1 |
return ret; |
610 |
|
|
} |
611 |
|
|
|
612 |
|
1 |
static int query_formats(AVFilterContext *ctx) |
613 |
|
|
{ |
614 |
|
1 |
AVFilterFormats *formats = NULL; |
615 |
|
1 |
AVFilterChannelLayouts *layouts = NULL; |
616 |
|
|
static const enum AVSampleFormat sample_fmts[] = { |
617 |
|
|
AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_NONE |
618 |
|
|
}; |
619 |
|
|
int ret; |
620 |
|
|
|
621 |
|
1 |
layouts = ff_all_channel_counts(); |
622 |
✗✓ |
1 |
if (!layouts) |
623 |
|
|
return AVERROR(ENOMEM); |
624 |
|
1 |
ret = ff_set_common_channel_layouts(ctx, layouts); |
625 |
✗✓ |
1 |
if (ret < 0) |
626 |
|
|
return ret; |
627 |
|
|
|
628 |
|
1 |
formats = ff_make_format_list(sample_fmts); |
629 |
✗✓ |
1 |
if (!formats) |
630 |
|
|
return AVERROR(ENOMEM); |
631 |
|
1 |
ret = ff_set_common_formats(ctx, formats); |
632 |
✗✓ |
1 |
if (ret < 0) |
633 |
|
|
return ret; |
634 |
|
|
|
635 |
|
1 |
formats = ff_all_samplerates(); |
636 |
✗✓ |
1 |
if (!formats) |
637 |
|
|
return AVERROR(ENOMEM); |
638 |
|
1 |
return ff_set_common_samplerates(ctx, formats); |
639 |
|
|
} |
640 |
|
|
|
641 |
|
1 |
static av_cold void uninit(AVFilterContext *ctx) |
642 |
|
|
{ |
643 |
|
1 |
SilenceRemoveContext *s = ctx->priv; |
644 |
|
|
|
645 |
|
1 |
av_freep(&s->start_holdoff); |
646 |
|
1 |
av_freep(&s->start_silence_hold); |
647 |
|
1 |
av_freep(&s->stop_holdoff); |
648 |
|
1 |
av_freep(&s->stop_silence_hold); |
649 |
|
1 |
av_freep(&s->window); |
650 |
|
1 |
} |
651 |
|
|
|
652 |
|
|
static const AVFilterPad silenceremove_inputs[] = { |
653 |
|
|
{ |
654 |
|
|
.name = "default", |
655 |
|
|
.type = AVMEDIA_TYPE_AUDIO, |
656 |
|
|
.config_props = config_input, |
657 |
|
|
.filter_frame = filter_frame, |
658 |
|
|
}, |
659 |
|
|
{ NULL } |
660 |
|
|
}; |
661 |
|
|
|
662 |
|
|
static const AVFilterPad silenceremove_outputs[] = { |
663 |
|
|
{ |
664 |
|
|
.name = "default", |
665 |
|
|
.type = AVMEDIA_TYPE_AUDIO, |
666 |
|
|
.request_frame = request_frame, |
667 |
|
|
}, |
668 |
|
|
{ NULL } |
669 |
|
|
}; |
670 |
|
|
|
671 |
|
|
AVFilter ff_af_silenceremove = { |
672 |
|
|
.name = "silenceremove", |
673 |
|
|
.description = NULL_IF_CONFIG_SMALL("Remove silence."), |
674 |
|
|
.priv_size = sizeof(SilenceRemoveContext), |
675 |
|
|
.priv_class = &silenceremove_class, |
676 |
|
|
.init = init, |
677 |
|
|
.uninit = uninit, |
678 |
|
|
.query_formats = query_formats, |
679 |
|
|
.inputs = silenceremove_inputs, |
680 |
|
|
.outputs = silenceremove_outputs, |
681 |
|
|
}; |