Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (c) 2012 Pavel Koshevoy <pkoshevoy at gmail dot com> | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file | ||
23 | * tempo scaling audio filter -- an implementation of WSOLA algorithm | ||
24 | * | ||
25 | * Based on MIT licensed yaeAudioTempoFilter.h and yaeAudioFragment.h | ||
26 | * from Apprentice Video player by Pavel Koshevoy. | ||
27 | * https://sourceforge.net/projects/apprenticevideo/ | ||
28 | * | ||
29 | * An explanation of SOLA algorithm is available at | ||
30 | * http://www.surina.net/article/time-and-pitch-scaling.html | ||
31 | * | ||
32 | * WSOLA is very similar to SOLA, only one major difference exists between | ||
33 | * these algorithms. SOLA shifts audio fragments along the output stream, | ||
34 | * where as WSOLA shifts audio fragments along the input stream. | ||
35 | * | ||
36 | * The advantage of WSOLA algorithm is that the overlap region size is | ||
37 | * always the same, therefore the blending function is constant and | ||
38 | * can be precomputed. | ||
39 | */ | ||
40 | |||
41 | #include <float.h> | ||
42 | #include "libavutil/avassert.h" | ||
43 | #include "libavutil/channel_layout.h" | ||
44 | #include "libavutil/mem.h" | ||
45 | #include "libavutil/opt.h" | ||
46 | #include "libavutil/samplefmt.h" | ||
47 | #include "libavutil/tx.h" | ||
48 | #include "avfilter.h" | ||
49 | #include "audio.h" | ||
50 | #include "filters.h" | ||
51 | |||
52 | /** | ||
53 | * A fragment of audio waveform | ||
54 | */ | ||
55 | typedef struct AudioFragment { | ||
56 | // index of the first sample of this fragment in the overall waveform; | ||
57 | // 0: input sample position | ||
58 | // 1: output sample position | ||
59 | int64_t position[2]; | ||
60 | |||
61 | // original packed multi-channel samples: | ||
62 | uint8_t *data; | ||
63 | |||
64 | // number of samples in this fragment: | ||
65 | int nsamples; | ||
66 | |||
67 | // rDFT transform of the down-mixed mono fragment, used for | ||
68 | // fast waveform alignment via correlation in frequency domain: | ||
69 | float *xdat_in; | ||
70 | float *xdat; | ||
71 | } AudioFragment; | ||
72 | |||
73 | /** | ||
74 | * Filter state machine states | ||
75 | */ | ||
76 | typedef enum { | ||
77 | YAE_LOAD_FRAGMENT, | ||
78 | YAE_ADJUST_POSITION, | ||
79 | YAE_RELOAD_FRAGMENT, | ||
80 | YAE_OUTPUT_OVERLAP_ADD, | ||
81 | YAE_FLUSH_OUTPUT, | ||
82 | } FilterState; | ||
83 | |||
84 | /** | ||
85 | * Filter state machine | ||
86 | */ | ||
87 | typedef struct ATempoContext { | ||
88 | const AVClass *class; | ||
89 | |||
90 | // ring-buffer of input samples, necessary because some times | ||
91 | // input fragment position may be adjusted backwards: | ||
92 | uint8_t *buffer; | ||
93 | |||
94 | // ring-buffer maximum capacity, expressed in sample rate time base: | ||
95 | int ring; | ||
96 | |||
97 | // ring-buffer house keeping: | ||
98 | int size; | ||
99 | int head; | ||
100 | int tail; | ||
101 | |||
102 | // 0: input sample position corresponding to the ring buffer tail | ||
103 | // 1: output sample position | ||
104 | int64_t position[2]; | ||
105 | |||
106 | // first input timestamp, all other timestamps are offset by this one | ||
107 | int64_t start_pts; | ||
108 | |||
109 | // sample format: | ||
110 | enum AVSampleFormat format; | ||
111 | |||
112 | // number of channels: | ||
113 | int channels; | ||
114 | |||
115 | // row of bytes to skip from one sample to next, across multple channels; | ||
116 | // stride = (number-of-channels * bits-per-sample-per-channel) / 8 | ||
117 | int stride; | ||
118 | |||
119 | // fragment window size, power-of-two integer: | ||
120 | int window; | ||
121 | |||
122 | // Hann window coefficients, for feathering | ||
123 | // (blending) the overlapping fragment region: | ||
124 | float *hann; | ||
125 | |||
126 | // tempo scaling factor: | ||
127 | double tempo; | ||
128 | |||
129 | // a snapshot of previous fragment input and output position values | ||
130 | // captured when the tempo scale factor was set most recently: | ||
131 | int64_t origin[2]; | ||
132 | |||
133 | // current/previous fragment ring-buffer: | ||
134 | AudioFragment frag[2]; | ||
135 | |||
136 | // current fragment index: | ||
137 | uint64_t nfrag; | ||
138 | |||
139 | // current state: | ||
140 | FilterState state; | ||
141 | |||
142 | // for fast correlation calculation in frequency domain: | ||
143 | AVTXContext *real_to_complex; | ||
144 | AVTXContext *complex_to_real; | ||
145 | av_tx_fn r2c_fn, c2r_fn; | ||
146 | float *correlation_in; | ||
147 | float *correlation; | ||
148 | |||
149 | // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame | ||
150 | AVFrame *dst_buffer; | ||
151 | uint8_t *dst; | ||
152 | uint8_t *dst_end; | ||
153 | uint64_t nsamples_in; | ||
154 | uint64_t nsamples_out; | ||
155 | } ATempoContext; | ||
156 | |||
157 | #define YAE_ATEMPO_MIN 0.5 | ||
158 | #define YAE_ATEMPO_MAX 100.0 | ||
159 | |||
160 | #define OFFSET(x) offsetof(ATempoContext, x) | ||
161 | |||
162 | static const AVOption atempo_options[] = { | ||
163 | { "tempo", "set tempo scale factor", | ||
164 | OFFSET(tempo), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, | ||
165 | YAE_ATEMPO_MIN, | ||
166 | YAE_ATEMPO_MAX, | ||
167 | AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_RUNTIME_PARAM }, | ||
168 | { NULL } | ||
169 | }; | ||
170 | |||
171 | AVFILTER_DEFINE_CLASS(atempo); | ||
172 | |||
173 | 1546 | inline static AudioFragment *yae_curr_frag(ATempoContext *atempo) | |
174 | { | ||
175 | 1546 | return &atempo->frag[atempo->nfrag % 2]; | |
176 | } | ||
177 | |||
178 | 450 | inline static AudioFragment *yae_prev_frag(ATempoContext *atempo) | |
179 | { | ||
180 | 450 | return &atempo->frag[(atempo->nfrag + 1) % 2]; | |
181 | } | ||
182 | |||
183 | /** | ||
184 | * Reset filter to initial state, do not deallocate existing local buffers. | ||
185 | */ | ||
186 | 3 | static void yae_clear(ATempoContext *atempo) | |
187 | { | ||
188 | 3 | atempo->size = 0; | |
189 | 3 | atempo->head = 0; | |
190 | 3 | atempo->tail = 0; | |
191 | |||
192 | 3 | atempo->nfrag = 0; | |
193 | 3 | atempo->state = YAE_LOAD_FRAGMENT; | |
194 | 3 | atempo->start_pts = AV_NOPTS_VALUE; | |
195 | |||
196 | 3 | atempo->position[0] = 0; | |
197 | 3 | atempo->position[1] = 0; | |
198 | |||
199 | 3 | atempo->origin[0] = 0; | |
200 | 3 | atempo->origin[1] = 0; | |
201 | |||
202 | 3 | atempo->frag[0].position[0] = 0; | |
203 | 3 | atempo->frag[0].position[1] = 0; | |
204 | 3 | atempo->frag[0].nsamples = 0; | |
205 | |||
206 | 3 | atempo->frag[1].position[0] = 0; | |
207 | 3 | atempo->frag[1].position[1] = 0; | |
208 | 3 | atempo->frag[1].nsamples = 0; | |
209 | |||
210 | // shift left position of 1st fragment by half a window | ||
211 | // so that no re-normalization would be required for | ||
212 | // the left half of the 1st fragment: | ||
213 | 3 | atempo->frag[0].position[0] = -(int64_t)(atempo->window / 2); | |
214 | 3 | atempo->frag[0].position[1] = -(int64_t)(atempo->window / 2); | |
215 | |||
216 | 3 | av_frame_free(&atempo->dst_buffer); | |
217 | 3 | atempo->dst = NULL; | |
218 | 3 | atempo->dst_end = NULL; | |
219 | |||
220 | 3 | atempo->nsamples_in = 0; | |
221 | 3 | atempo->nsamples_out = 0; | |
222 | 3 | } | |
223 | |||
224 | /** | ||
225 | * Reset filter to initial state and deallocate all buffers. | ||
226 | */ | ||
227 | 3 | static void yae_release_buffers(ATempoContext *atempo) | |
228 | { | ||
229 | 3 | yae_clear(atempo); | |
230 | |||
231 | 3 | av_freep(&atempo->frag[0].data); | |
232 | 3 | av_freep(&atempo->frag[1].data); | |
233 | 3 | av_freep(&atempo->frag[0].xdat_in); | |
234 | 3 | av_freep(&atempo->frag[1].xdat_in); | |
235 | 3 | av_freep(&atempo->frag[0].xdat); | |
236 | 3 | av_freep(&atempo->frag[1].xdat); | |
237 | |||
238 | 3 | av_freep(&atempo->buffer); | |
239 | 3 | av_freep(&atempo->hann); | |
240 | 3 | av_freep(&atempo->correlation_in); | |
241 | 3 | av_freep(&atempo->correlation); | |
242 | |||
243 | 3 | av_tx_uninit(&atempo->real_to_complex); | |
244 | 3 | av_tx_uninit(&atempo->complex_to_real); | |
245 | 3 | } | |
246 | |||
247 | /** | ||
248 | * Prepare filter for processing audio data of given format, | ||
249 | * sample rate and number of channels. | ||
250 | */ | ||
251 | 1 | static int yae_reset(ATempoContext *atempo, | |
252 | enum AVSampleFormat format, | ||
253 | int sample_rate, | ||
254 | int channels) | ||
255 | { | ||
256 | 1 | const int sample_size = av_get_bytes_per_sample(format); | |
257 | 1 | uint32_t nlevels = 0; | |
258 | 1 | float scale = 1.f, iscale = 1.f; | |
259 | uint32_t pot; | ||
260 | int ret; | ||
261 | int i; | ||
262 | |||
263 | 1 | atempo->format = format; | |
264 | 1 | atempo->channels = channels; | |
265 | 1 | atempo->stride = sample_size * channels; | |
266 | |||
267 | // pick a segment window size: | ||
268 | 1 | atempo->window = sample_rate / 24; | |
269 | |||
270 | // adjust window size to be a power-of-two integer: | ||
271 | 1 | nlevels = av_log2(atempo->window); | |
272 | 1 | pot = 1 << nlevels; | |
273 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | av_assert0(pot <= atempo->window); |
274 | |||
275 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (pot < atempo->window) { |
276 | 1 | atempo->window = pot * 2; | |
277 | 1 | nlevels++; | |
278 | } | ||
279 | |||
280 | /* av_realloc is not aligned enough, so simply discard all the old buffers | ||
281 | * (fortunately, their data does not need to be preserved) */ | ||
282 | 1 | yae_release_buffers(atempo); | |
283 | |||
284 | // initialize audio fragment buffers: | ||
285 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | if (!(atempo->frag[0].data = av_calloc(atempo->window, atempo->stride)) || |
286 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | !(atempo->frag[1].data = av_calloc(atempo->window, atempo->stride)) || |
287 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | !(atempo->frag[0].xdat_in = av_calloc(atempo->window + 1, sizeof(AVComplexFloat))) || |
288 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | !(atempo->frag[1].xdat_in = av_calloc(atempo->window + 1, sizeof(AVComplexFloat))) || |
289 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | !(atempo->frag[0].xdat = av_calloc(atempo->window + 1, sizeof(AVComplexFloat))) || |
290 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
|
1 | !(atempo->frag[1].xdat = av_calloc(atempo->window + 1, sizeof(AVComplexFloat)))) { |
291 | ✗ | ret = AVERROR(ENOMEM); | |
292 | ✗ | goto fail; | |
293 | } | ||
294 | |||
295 | // initialize rDFT contexts: | ||
296 | 1 | ret = av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn, | |
297 | 1 | AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0); | |
298 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (ret < 0) |
299 | ✗ | goto fail; | |
300 | |||
301 | 1 | ret = av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn, | |
302 | 1 | AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0); | |
303 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (ret < 0) |
304 | ✗ | goto fail; | |
305 | |||
306 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | if (!(atempo->correlation_in = av_calloc(atempo->window + 1, sizeof(AVComplexFloat))) || |
307 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
|
1 | !(atempo->correlation = av_calloc(atempo->window, sizeof(AVComplexFloat)))) { |
308 | ✗ | ret = AVERROR(ENOMEM); | |
309 | ✗ | goto fail; | |
310 | } | ||
311 | |||
312 | 1 | atempo->ring = atempo->window * 3; | |
313 | 1 | atempo->buffer = av_calloc(atempo->ring, atempo->stride); | |
314 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (!atempo->buffer) { |
315 | ✗ | ret = AVERROR(ENOMEM); | |
316 | ✗ | goto fail; | |
317 | } | ||
318 | |||
319 | // initialize the Hann window function: | ||
320 | 1 | atempo->hann = av_malloc_array(atempo->window, sizeof(float)); | |
321 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (!atempo->hann) { |
322 | ✗ | ret = AVERROR(ENOMEM); | |
323 | ✗ | goto fail; | |
324 | } | ||
325 | |||
326 |
2/2✓ Branch 0 taken 2048 times.
✓ Branch 1 taken 1 times.
|
2049 | for (i = 0; i < atempo->window; i++) { |
327 | 2048 | double t = (double)i / (double)(atempo->window - 1); | |
328 | 2048 | double h = 0.5 * (1.0 - cos(2.0 * M_PI * t)); | |
329 | 2048 | atempo->hann[i] = (float)h; | |
330 | } | ||
331 | |||
332 | 1 | return 0; | |
333 | ✗ | fail: | |
334 | ✗ | yae_release_buffers(atempo); | |
335 | ✗ | return ret; | |
336 | } | ||
337 | |||
338 | ✗ | static int yae_update(AVFilterContext *ctx) | |
339 | { | ||
340 | const AudioFragment *prev; | ||
341 | ✗ | ATempoContext *atempo = ctx->priv; | |
342 | |||
343 | ✗ | prev = yae_prev_frag(atempo); | |
344 | ✗ | atempo->origin[0] = prev->position[0] + atempo->window / 2; | |
345 | ✗ | atempo->origin[1] = prev->position[1] + atempo->window / 2; | |
346 | ✗ | return 0; | |
347 | } | ||
348 | |||
349 | /** | ||
350 | * A helper macro for initializing complex data buffer with scalar data | ||
351 | * of a given type. | ||
352 | */ | ||
353 | #define yae_init_xdat(scalar_type, scalar_max) \ | ||
354 | do { \ | ||
355 | const uint8_t *src_end = src + \ | ||
356 | frag->nsamples * atempo->channels * sizeof(scalar_type); \ | ||
357 | \ | ||
358 | float *xdat = frag->xdat_in; \ | ||
359 | scalar_type tmp; \ | ||
360 | \ | ||
361 | if (atempo->channels == 1) { \ | ||
362 | for (; src < src_end; xdat++) { \ | ||
363 | tmp = *(const scalar_type *)src; \ | ||
364 | src += sizeof(scalar_type); \ | ||
365 | \ | ||
366 | *xdat = (float)tmp; \ | ||
367 | } \ | ||
368 | } else { \ | ||
369 | float s, max, ti, si; \ | ||
370 | int i; \ | ||
371 | \ | ||
372 | for (; src < src_end; xdat++) { \ | ||
373 | tmp = *(const scalar_type *)src; \ | ||
374 | src += sizeof(scalar_type); \ | ||
375 | \ | ||
376 | max = (float)tmp; \ | ||
377 | s = FFMIN((float)scalar_max, \ | ||
378 | (float)fabsf(max)); \ | ||
379 | \ | ||
380 | for (i = 1; i < atempo->channels; i++) { \ | ||
381 | tmp = *(const scalar_type *)src; \ | ||
382 | src += sizeof(scalar_type); \ | ||
383 | \ | ||
384 | ti = (float)tmp; \ | ||
385 | si = FFMIN((float)scalar_max, \ | ||
386 | (float)fabsf(ti)); \ | ||
387 | \ | ||
388 | if (s < si) { \ | ||
389 | s = si; \ | ||
390 | max = ti; \ | ||
391 | } \ | ||
392 | } \ | ||
393 | \ | ||
394 | *xdat = max; \ | ||
395 | } \ | ||
396 | } \ | ||
397 | } while (0) | ||
398 | |||
399 | /** | ||
400 | * Initialize complex data buffer of a given audio fragment | ||
401 | * with down-mixed mono data of appropriate scalar type. | ||
402 | */ | ||
403 | 258 | static void yae_downmix(ATempoContext *atempo, AudioFragment *frag) | |
404 | { | ||
405 | // shortcuts: | ||
406 | 258 | const uint8_t *src = frag->data; | |
407 | |||
408 | // init complex data buffer used for FFT and Correlation: | ||
409 | 258 | memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * (atempo->window + 1)); | |
410 | |||
411 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 258 times.
|
258 | if (atempo->format == AV_SAMPLE_FMT_U8) { |
412 | ✗ | yae_init_xdat(uint8_t, 127); | |
413 |
1/2✓ Branch 0 taken 258 times.
✗ Branch 1 not taken.
|
258 | } else if (atempo->format == AV_SAMPLE_FMT_S16) { |
414 |
3/14✓ Branch 0 taken 258 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 527650 times.
✓ Branch 3 taken 258 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
|
527908 | yae_init_xdat(int16_t, 32767); |
415 | ✗ | } else if (atempo->format == AV_SAMPLE_FMT_S32) { | |
416 | ✗ | yae_init_xdat(int, 2147483647); | |
417 | ✗ | } else if (atempo->format == AV_SAMPLE_FMT_FLT) { | |
418 | ✗ | yae_init_xdat(float, 1); | |
419 | ✗ | } else if (atempo->format == AV_SAMPLE_FMT_DBL) { | |
420 | ✗ | yae_init_xdat(double, 1); | |
421 | } | ||
422 | 258 | } | |
423 | |||
424 | /** | ||
425 | * Populate the internal data buffer on as-needed basis. | ||
426 | * | ||
427 | * @return | ||
428 | * 0 if requested data was already available or was successfully loaded, | ||
429 | * AVERROR(EAGAIN) if more input data is required. | ||
430 | */ | ||
431 | 322 | static int yae_load_data(ATempoContext *atempo, | |
432 | const uint8_t **src_ref, | ||
433 | const uint8_t *src_end, | ||
434 | int64_t stop_here) | ||
435 | { | ||
436 | // shortcut: | ||
437 | 322 | const uint8_t *src = *src_ref; | |
438 | 322 | const int read_size = stop_here - atempo->position[0]; | |
439 | |||
440 |
2/2✓ Branch 0 taken 53 times.
✓ Branch 1 taken 269 times.
|
322 | if (stop_here <= atempo->position[0]) { |
441 | 53 | return 0; | |
442 | } | ||
443 | |||
444 | // samples are not expected to be skipped, unless tempo is greater than 2: | ||
445 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 269 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
269 | av_assert0(read_size <= atempo->ring || atempo->tempo > 2.0); |
446 | |||
447 |
4/4✓ Branch 0 taken 334 times.
✓ Branch 1 taken 204 times.
✓ Branch 2 taken 269 times.
✓ Branch 3 taken 65 times.
|
807 | while (atempo->position[0] < stop_here && src < src_end) { |
448 | 269 | int src_samples = (src_end - src) / atempo->stride; | |
449 | |||
450 | // load data piece-wise, in order to avoid complicating the logic: | ||
451 | 269 | int nsamples = FFMIN(read_size, src_samples); | |
452 | int na; | ||
453 | int nb; | ||
454 | |||
455 | 269 | nsamples = FFMIN(nsamples, atempo->ring); | |
456 | 269 | na = FFMIN(nsamples, atempo->ring - atempo->tail); | |
457 | 269 | nb = FFMIN(nsamples - na, atempo->ring); | |
458 | |||
459 |
1/2✓ Branch 0 taken 269 times.
✗ Branch 1 not taken.
|
269 | if (na) { |
460 | 269 | uint8_t *a = atempo->buffer + atempo->tail * atempo->stride; | |
461 | 269 | memcpy(a, src, na * atempo->stride); | |
462 | |||
463 | 269 | src += na * atempo->stride; | |
464 | 269 | atempo->position[0] += na; | |
465 | |||
466 | 269 | atempo->size = FFMIN(atempo->size + na, atempo->ring); | |
467 | 269 | atempo->tail = (atempo->tail + na) % atempo->ring; | |
468 | 269 | atempo->head = | |
469 | 269 | atempo->size < atempo->ring ? | |
470 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 264 times.
|
269 | atempo->tail - atempo->size : |
471 | atempo->tail; | ||
472 | } | ||
473 | |||
474 |
2/2✓ Branch 0 taken 247 times.
✓ Branch 1 taken 22 times.
|
269 | if (nb) { |
475 | 22 | uint8_t *b = atempo->buffer; | |
476 | 22 | memcpy(b, src, nb * atempo->stride); | |
477 | |||
478 | 22 | src += nb * atempo->stride; | |
479 | 22 | atempo->position[0] += nb; | |
480 | |||
481 | 22 | atempo->size = FFMIN(atempo->size + nb, atempo->ring); | |
482 | 22 | atempo->tail = (atempo->tail + nb) % atempo->ring; | |
483 | 22 | atempo->head = | |
484 | 22 | atempo->size < atempo->ring ? | |
485 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 22 times.
|
22 | atempo->tail - atempo->size : |
486 | atempo->tail; | ||
487 | } | ||
488 | } | ||
489 | |||
490 | // pass back the updated source buffer pointer: | ||
491 | 269 | *src_ref = src; | |
492 | |||
493 | // sanity check: | ||
494 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 269 times.
|
269 | av_assert0(atempo->position[0] <= stop_here); |
495 | |||
496 |
2/2✓ Branch 0 taken 204 times.
✓ Branch 1 taken 65 times.
|
269 | return atempo->position[0] == stop_here ? 0 : AVERROR(EAGAIN); |
497 | } | ||
498 | |||
499 | /** | ||
500 | * Populate current audio fragment data buffer. | ||
501 | * | ||
502 | * @return | ||
503 | * 0 when the fragment is ready, | ||
504 | * AVERROR(EAGAIN) if more input data is required. | ||
505 | */ | ||
506 | 324 | static int yae_load_frag(ATempoContext *atempo, | |
507 | const uint8_t **src_ref, | ||
508 | const uint8_t *src_end) | ||
509 | { | ||
510 | // shortcuts: | ||
511 | 324 | AudioFragment *frag = yae_curr_frag(atempo); | |
512 | uint8_t *dst; | ||
513 | int64_t missing, start, zeros; | ||
514 | uint32_t nsamples; | ||
515 | const uint8_t *a, *b; | ||
516 | int i0, i1, n0, n1, na, nb; | ||
517 | |||
518 | 324 | int64_t stop_here = frag->position[0] + atempo->window; | |
519 |
4/4✓ Branch 0 taken 322 times.
✓ Branch 1 taken 2 times.
✓ Branch 3 taken 65 times.
✓ Branch 4 taken 257 times.
|
324 | if (src_ref && yae_load_data(atempo, src_ref, src_end, stop_here) != 0) { |
520 | 65 | return AVERROR(EAGAIN); | |
521 | } | ||
522 | |||
523 | // calculate the number of samples we don't have: | ||
524 | 259 | missing = | |
525 | 259 | stop_here > atempo->position[0] ? | |
526 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 257 times.
|
259 | stop_here - atempo->position[0] : 0; |
527 | |||
528 | 259 | nsamples = | |
529 | 259 | missing < (int64_t)atempo->window ? | |
530 |
1/2✓ Branch 0 taken 259 times.
✗ Branch 1 not taken.
|
259 | (uint32_t)(atempo->window - missing) : 0; |
531 | |||
532 | // setup the output buffer: | ||
533 | 259 | frag->nsamples = nsamples; | |
534 | 259 | dst = frag->data; | |
535 | |||
536 | 259 | start = atempo->position[0] - atempo->size; | |
537 | |||
538 | // what we don't have we substitute with zeros: | ||
539 | 259 | zeros = | |
540 | 259 | frag->position[0] < start ? | |
541 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 258 times.
|
259 | FFMIN(start - frag->position[0], (int64_t)nsamples) : 0; |
542 | |||
543 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 259 times.
|
259 | if (zeros == nsamples) { |
544 | ✗ | return 0; | |
545 | } | ||
546 | |||
547 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 258 times.
|
259 | if (frag->position[0] < start) { |
548 | 1 | memset(dst, 0, zeros * atempo->stride); | |
549 | 1 | dst += zeros * atempo->stride; | |
550 | } | ||
551 | |||
552 | // get the remaining data from the ring buffer: | ||
553 | 518 | na = (atempo->head < atempo->tail ? | |
554 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 254 times.
|
259 | atempo->tail - atempo->head : |
555 | 254 | atempo->ring - atempo->head); | |
556 | |||
557 |
2/2✓ Branch 0 taken 254 times.
✓ Branch 1 taken 5 times.
|
259 | nb = atempo->head < atempo->tail ? 0 : atempo->tail; |
558 | |||
559 | // sanity check: | ||
560 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 259 times.
|
259 | av_assert0(nsamples <= zeros + na + nb); |
561 | |||
562 | 259 | a = atempo->buffer + atempo->head * atempo->stride; | |
563 | 259 | b = atempo->buffer; | |
564 | |||
565 | 259 | i0 = frag->position[0] + zeros - start; | |
566 |
2/2✓ Branch 0 taken 168 times.
✓ Branch 1 taken 91 times.
|
259 | i1 = i0 < na ? 0 : i0 - na; |
567 | |||
568 |
2/2✓ Branch 0 taken 91 times.
✓ Branch 1 taken 168 times.
|
259 | n0 = i0 < na ? FFMIN(na - i0, (int)(nsamples - zeros)) : 0; |
569 | 259 | n1 = nsamples - zeros - n0; | |
570 | |||
571 |
2/2✓ Branch 0 taken 91 times.
✓ Branch 1 taken 168 times.
|
259 | if (n0) { |
572 | 91 | memcpy(dst, a + i0 * atempo->stride, n0 * atempo->stride); | |
573 | 91 | dst += n0 * atempo->stride; | |
574 | } | ||
575 | |||
576 |
2/2✓ Branch 0 taken 254 times.
✓ Branch 1 taken 5 times.
|
259 | if (n1) { |
577 | 254 | memcpy(dst, b + i1 * atempo->stride, n1 * atempo->stride); | |
578 | } | ||
579 | |||
580 | 259 | return 0; | |
581 | } | ||
582 | |||
583 | /** | ||
584 | * Prepare for loading next audio fragment. | ||
585 | */ | ||
586 | 129 | static void yae_advance_to_next_frag(ATempoContext *atempo) | |
587 | { | ||
588 | 129 | const double fragment_step = atempo->tempo * (double)(atempo->window / 2); | |
589 | |||
590 | const AudioFragment *prev; | ||
591 | AudioFragment *frag; | ||
592 | |||
593 | 129 | atempo->nfrag++; | |
594 | 129 | prev = yae_prev_frag(atempo); | |
595 | 129 | frag = yae_curr_frag(atempo); | |
596 | |||
597 | 129 | frag->position[0] = prev->position[0] + (int64_t)fragment_step; | |
598 | 129 | frag->position[1] = prev->position[1] + atempo->window / 2; | |
599 | 129 | frag->nsamples = 0; | |
600 | 129 | } | |
601 | |||
602 | /** | ||
603 | * Calculate cross-correlation via rDFT. | ||
604 | * | ||
605 | * Multiply two vectors of complex numbers (result of real_to_complex rDFT) | ||
606 | * and transform back via complex_to_real rDFT. | ||
607 | */ | ||
608 | 129 | static void yae_xcorr_via_rdft(float *xcorr_in, | |
609 | float *xcorr, | ||
610 | AVTXContext *complex_to_real, | ||
611 | av_tx_fn c2r_fn, | ||
612 | const AVComplexFloat *xa, | ||
613 | const AVComplexFloat *xb, | ||
614 | const int window) | ||
615 | { | ||
616 | 129 | AVComplexFloat *xc = (AVComplexFloat *)xcorr_in; | |
617 | int i; | ||
618 | |||
619 |
2/2✓ Branch 0 taken 264321 times.
✓ Branch 1 taken 129 times.
|
264450 | for (i = 0; i <= window; i++, xa++, xb++, xc++) { |
620 | 264321 | xc->re = (xa->re * xb->re + xa->im * xb->im); | |
621 | 264321 | xc->im = (xa->im * xb->re - xa->re * xb->im); | |
622 | } | ||
623 | |||
624 | // apply inverse rDFT: | ||
625 | 129 | c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(*xc)); | |
626 | 129 | } | |
627 | |||
628 | /** | ||
629 | * Calculate alignment offset for given fragment | ||
630 | * relative to the previous fragment. | ||
631 | * | ||
632 | * @return alignment offset of current fragment relative to previous. | ||
633 | */ | ||
634 | 129 | static int yae_align(AudioFragment *frag, | |
635 | const AudioFragment *prev, | ||
636 | const int window, | ||
637 | const int delta_max, | ||
638 | const int drift, | ||
639 | float *correlation_in, | ||
640 | float *correlation, | ||
641 | AVTXContext *complex_to_real, | ||
642 | av_tx_fn c2r_fn) | ||
643 | { | ||
644 | 129 | int best_offset = -drift; | |
645 | 129 | float best_metric = -FLT_MAX; | |
646 | float *xcorr; | ||
647 | |||
648 | int i0; | ||
649 | int i1; | ||
650 | int i; | ||
651 | |||
652 | 129 | yae_xcorr_via_rdft(correlation_in, | |
653 | correlation, | ||
654 | complex_to_real, | ||
655 | c2r_fn, | ||
656 | 129 | (const AVComplexFloat *)prev->xdat, | |
657 | 129 | (const AVComplexFloat *)frag->xdat, | |
658 | window); | ||
659 | |||
660 | // identify search window boundaries: | ||
661 | 129 | i0 = FFMAX(window / 2 - delta_max - drift, 0); | |
662 | 129 | i0 = FFMIN(i0, window); | |
663 | |||
664 | 129 | i1 = FFMIN(window / 2 + delta_max - drift, window - window / 16); | |
665 | 129 | i1 = FFMAX(i1, 0); | |
666 | |||
667 | // identify cross-correlation peaks within search window: | ||
668 | 129 | xcorr = correlation + i0; | |
669 | |||
670 |
2/2✓ Branch 0 taken 219329 times.
✓ Branch 1 taken 129 times.
|
219458 | for (i = i0; i < i1; i++, xcorr++) { |
671 | 219329 | float metric = *xcorr; | |
672 | |||
673 | // normalize: | ||
674 | 219329 | float drifti = (float)(drift + i); | |
675 | 219329 | metric *= drifti * (float)(i - i0) * (float)(i1 - i); | |
676 | |||
677 |
2/2✓ Branch 0 taken 11612 times.
✓ Branch 1 taken 207717 times.
|
219329 | if (metric > best_metric) { |
678 | 11612 | best_metric = metric; | |
679 | 11612 | best_offset = i - window / 2; | |
680 | } | ||
681 | } | ||
682 | |||
683 | 129 | return best_offset; | |
684 | } | ||
685 | |||
686 | /** | ||
687 | * Adjust current fragment position for better alignment | ||
688 | * with previous fragment. | ||
689 | * | ||
690 | * @return alignment correction. | ||
691 | */ | ||
692 | 129 | static int yae_adjust_position(ATempoContext *atempo) | |
693 | { | ||
694 | 129 | const AudioFragment *prev = yae_prev_frag(atempo); | |
695 | 129 | AudioFragment *frag = yae_curr_frag(atempo); | |
696 | |||
697 | 129 | const double prev_output_position = | |
698 | 129 | (double)(prev->position[1] - atempo->origin[1] + atempo->window / 2) * | |
699 | 129 | atempo->tempo; | |
700 | |||
701 | 129 | const double ideal_output_position = | |
702 | 129 | (double)(prev->position[0] - atempo->origin[0] + atempo->window / 2); | |
703 | |||
704 | 129 | const int drift = (int)(prev_output_position - ideal_output_position); | |
705 | |||
706 | 129 | const int delta_max = atempo->window / 2; | |
707 | 129 | const int correction = yae_align(frag, | |
708 | prev, | ||
709 | atempo->window, | ||
710 | delta_max, | ||
711 | drift, | ||
712 | atempo->correlation_in, | ||
713 | atempo->correlation, | ||
714 | atempo->complex_to_real, | ||
715 | atempo->c2r_fn); | ||
716 | |||
717 |
1/2✓ Branch 0 taken 129 times.
✗ Branch 1 not taken.
|
129 | if (correction) { |
718 | // adjust fragment position: | ||
719 | 129 | frag->position[0] -= correction; | |
720 | |||
721 | // clear so that the fragment can be reloaded: | ||
722 | 129 | frag->nsamples = 0; | |
723 | } | ||
724 | |||
725 | 129 | return correction; | |
726 | } | ||
727 | |||
728 | /** | ||
729 | * A helper macro for blending the overlap region of previous | ||
730 | * and current audio fragment. | ||
731 | */ | ||
732 | #define yae_blend(scalar_type) \ | ||
733 | do { \ | ||
734 | const scalar_type *aaa = (const scalar_type *)a; \ | ||
735 | const scalar_type *bbb = (const scalar_type *)b; \ | ||
736 | \ | ||
737 | scalar_type *out = (scalar_type *)dst; \ | ||
738 | scalar_type *out_end = (scalar_type *)dst_end; \ | ||
739 | int64_t i; \ | ||
740 | \ | ||
741 | for (i = 0; i < overlap && out < out_end; \ | ||
742 | i++, atempo->position[1]++, wa++, wb++) { \ | ||
743 | float w0 = *wa; \ | ||
744 | float w1 = *wb; \ | ||
745 | int j; \ | ||
746 | \ | ||
747 | for (j = 0; j < atempo->channels; \ | ||
748 | j++, aaa++, bbb++, out++) { \ | ||
749 | float t0 = (float)*aaa; \ | ||
750 | float t1 = (float)*bbb; \ | ||
751 | \ | ||
752 | *out = \ | ||
753 | frag->position[0] + i < 0 ? \ | ||
754 | *aaa : \ | ||
755 | (scalar_type)(t0 * w0 + t1 * w1); \ | ||
756 | } \ | ||
757 | } \ | ||
758 | dst = (uint8_t *)out; \ | ||
759 | } while (0) | ||
760 | |||
761 | /** | ||
762 | * Blend the overlap region of previous and current audio fragment | ||
763 | * and output the results to the given destination buffer. | ||
764 | * | ||
765 | * @return | ||
766 | * 0 if the overlap region was completely stored in the dst buffer, | ||
767 | * AVERROR(EAGAIN) if more destination buffer space is required. | ||
768 | */ | ||
769 | 192 | static int yae_overlap_add(ATempoContext *atempo, | |
770 | uint8_t **dst_ref, | ||
771 | uint8_t *dst_end) | ||
772 | { | ||
773 | // shortcuts: | ||
774 | 192 | const AudioFragment *prev = yae_prev_frag(atempo); | |
775 | 192 | const AudioFragment *frag = yae_curr_frag(atempo); | |
776 | |||
777 | 192 | const int64_t start_here = FFMAX(atempo->position[1], | |
778 | frag->position[1]); | ||
779 | |||
780 | 192 | const int64_t stop_here = FFMIN(prev->position[1] + prev->nsamples, | |
781 | frag->position[1] + frag->nsamples); | ||
782 | |||
783 | 192 | const int64_t overlap = stop_here - start_here; | |
784 | |||
785 | 192 | const int64_t ia = start_here - prev->position[1]; | |
786 | 192 | const int64_t ib = start_here - frag->position[1]; | |
787 | |||
788 | 192 | const float *wa = atempo->hann + ia; | |
789 | 192 | const float *wb = atempo->hann + ib; | |
790 | |||
791 | 192 | const uint8_t *a = prev->data + ia * atempo->stride; | |
792 | 192 | const uint8_t *b = frag->data + ib * atempo->stride; | |
793 | |||
794 | 192 | uint8_t *dst = *dst_ref; | |
795 | |||
796 |
3/6✓ Branch 0 taken 192 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 192 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 192 times.
|
192 | av_assert0(start_here <= stop_here && |
797 | frag->position[1] <= start_here && | ||
798 | overlap <= frag->nsamples); | ||
799 | |||
800 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 192 times.
|
192 | if (atempo->format == AV_SAMPLE_FMT_U8) { |
801 | ✗ | yae_blend(uint8_t); | |
802 |
1/2✓ Branch 0 taken 192 times.
✗ Branch 1 not taken.
|
192 | } else if (atempo->format == AV_SAMPLE_FMT_S16) { |
803 |
7/8✗ Branch 0 not taken.
✓ Branch 1 taken 132096 times.
✓ Branch 2 taken 132096 times.
✓ Branch 3 taken 132096 times.
✓ Branch 4 taken 132159 times.
✓ Branch 5 taken 129 times.
✓ Branch 6 taken 132096 times.
✓ Branch 7 taken 63 times.
|
264384 | yae_blend(int16_t); |
804 | ✗ | } else if (atempo->format == AV_SAMPLE_FMT_S32) { | |
805 | ✗ | yae_blend(int); | |
806 | ✗ | } else if (atempo->format == AV_SAMPLE_FMT_FLT) { | |
807 | ✗ | yae_blend(float); | |
808 | ✗ | } else if (atempo->format == AV_SAMPLE_FMT_DBL) { | |
809 | ✗ | yae_blend(double); | |
810 | } | ||
811 | |||
812 | // pass-back the updated destination buffer pointer: | ||
813 | 192 | *dst_ref = dst; | |
814 | |||
815 |
2/2✓ Branch 0 taken 129 times.
✓ Branch 1 taken 63 times.
|
192 | return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN); |
816 | } | ||
817 | |||
818 | /** | ||
819 | * Feed as much data to the filter as it is able to consume | ||
820 | * and receive as much processed data in the destination buffer | ||
821 | * as it is able to produce or store. | ||
822 | */ | ||
823 | static void | ||
824 | 128 | yae_apply(ATempoContext *atempo, | |
825 | const uint8_t **src_ref, | ||
826 | const uint8_t *src_end, | ||
827 | uint8_t **dst_ref, | ||
828 | uint8_t *dst_end) | ||
829 | { | ||
830 | while (1) { | ||
831 |
2/2✓ Branch 0 taken 194 times.
✓ Branch 1 taken 63 times.
|
257 | if (atempo->state == YAE_LOAD_FRAGMENT) { |
832 | // load additional data for the current fragment: | ||
833 |
2/2✓ Branch 1 taken 65 times.
✓ Branch 2 taken 129 times.
|
194 | if (yae_load_frag(atempo, src_ref, src_end) != 0) { |
834 | 65 | break; | |
835 | } | ||
836 | |||
837 | // down-mix to mono: | ||
838 | 129 | yae_downmix(atempo, yae_curr_frag(atempo)); | |
839 | |||
840 | // apply rDFT: | ||
841 | 129 | atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float)); | |
842 | |||
843 | // must load the second fragment before alignment can start: | ||
844 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 128 times.
|
129 | if (!atempo->nfrag) { |
845 | 1 | yae_advance_to_next_frag(atempo); | |
846 | 1 | continue; | |
847 | } | ||
848 | |||
849 | 128 | atempo->state = YAE_ADJUST_POSITION; | |
850 | } | ||
851 | |||
852 |
2/2✓ Branch 0 taken 128 times.
✓ Branch 1 taken 63 times.
|
191 | if (atempo->state == YAE_ADJUST_POSITION) { |
853 | // adjust position for better alignment: | ||
854 |
1/2✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
|
128 | if (yae_adjust_position(atempo)) { |
855 | // reload the fragment at the corrected position, so that the | ||
856 | // Hann window blending would not require normalization: | ||
857 | 128 | atempo->state = YAE_RELOAD_FRAGMENT; | |
858 | } else { | ||
859 | ✗ | atempo->state = YAE_OUTPUT_OVERLAP_ADD; | |
860 | } | ||
861 | } | ||
862 | |||
863 |
2/2✓ Branch 0 taken 128 times.
✓ Branch 1 taken 63 times.
|
191 | if (atempo->state == YAE_RELOAD_FRAGMENT) { |
864 | // load additional data if necessary due to position adjustment: | ||
865 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 128 times.
|
128 | if (yae_load_frag(atempo, src_ref, src_end) != 0) { |
866 | ✗ | break; | |
867 | } | ||
868 | |||
869 | // down-mix to mono: | ||
870 | 128 | yae_downmix(atempo, yae_curr_frag(atempo)); | |
871 | |||
872 | // apply rDFT: | ||
873 | 128 | atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float)); | |
874 | |||
875 | 128 | atempo->state = YAE_OUTPUT_OVERLAP_ADD; | |
876 | } | ||
877 | |||
878 |
1/2✓ Branch 0 taken 191 times.
✗ Branch 1 not taken.
|
191 | if (atempo->state == YAE_OUTPUT_OVERLAP_ADD) { |
879 | // overlap-add and output the result: | ||
880 |
2/2✓ Branch 1 taken 63 times.
✓ Branch 2 taken 128 times.
|
191 | if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) { |
881 | 63 | break; | |
882 | } | ||
883 | |||
884 | // advance to the next fragment, repeat: | ||
885 | 128 | yae_advance_to_next_frag(atempo); | |
886 | 128 | atempo->state = YAE_LOAD_FRAGMENT; | |
887 | } | ||
888 | } | ||
889 | 128 | } | |
890 | |||
891 | /** | ||
892 | * Flush any buffered data from the filter. | ||
893 | * | ||
894 | * @return | ||
895 | * 0 if all data was completely stored in the dst buffer, | ||
896 | * AVERROR(EAGAIN) if more destination buffer space is required. | ||
897 | */ | ||
898 | 1 | static int yae_flush(ATempoContext *atempo, | |
899 | uint8_t **dst_ref, | ||
900 | uint8_t *dst_end) | ||
901 | { | ||
902 | 1 | AudioFragment *frag = yae_curr_frag(atempo); | |
903 | int64_t overlap_end; | ||
904 | int64_t start_here; | ||
905 | int64_t stop_here; | ||
906 | int64_t offset; | ||
907 | |||
908 | const uint8_t *src; | ||
909 | uint8_t *dst; | ||
910 | |||
911 | int src_size; | ||
912 | int dst_size; | ||
913 | int nbytes; | ||
914 | |||
915 | 1 | atempo->state = YAE_FLUSH_OUTPUT; | |
916 | |||
917 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (!atempo->nfrag) { |
918 | // there is nothing to flush: | ||
919 | ✗ | return 0; | |
920 | } | ||
921 | |||
922 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (atempo->position[0] == frag->position[0] + frag->nsamples && |
923 | ✗ | atempo->position[1] == frag->position[1] + frag->nsamples) { | |
924 | // the current fragment is already flushed: | ||
925 | ✗ | return 0; | |
926 | } | ||
927 | |||
928 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (frag->position[0] + frag->nsamples < atempo->position[0]) { |
929 | // finish loading the current (possibly partial) fragment: | ||
930 | 1 | yae_load_frag(atempo, NULL, NULL); | |
931 | |||
932 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (atempo->nfrag) { |
933 | // down-mix to mono: | ||
934 | 1 | yae_downmix(atempo, frag); | |
935 | |||
936 | // apply rDFT: | ||
937 | 1 | atempo->r2c_fn(atempo->real_to_complex, frag->xdat, frag->xdat_in, sizeof(float)); | |
938 | |||
939 | // align current fragment to previous fragment: | ||
940 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | if (yae_adjust_position(atempo)) { |
941 | // reload the current fragment due to adjusted position: | ||
942 | 1 | yae_load_frag(atempo, NULL, NULL); | |
943 | } | ||
944 | } | ||
945 | } | ||
946 | |||
947 | // flush the overlap region: | ||
948 | 1 | overlap_end = frag->position[1] + FFMIN(atempo->window / 2, | |
949 | frag->nsamples); | ||
950 | |||
951 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | while (atempo->position[1] < overlap_end) { |
952 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
|
1 | if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) { |
953 | ✗ | return AVERROR(EAGAIN); | |
954 | } | ||
955 | } | ||
956 | |||
957 | // check whether all of the input samples have been consumed: | ||
958 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (frag->position[0] + frag->nsamples < atempo->position[0]) { |
959 | ✗ | yae_advance_to_next_frag(atempo); | |
960 | ✗ | return AVERROR(EAGAIN); | |
961 | } | ||
962 | |||
963 | // flush the remainder of the current fragment: | ||
964 | 1 | start_here = FFMAX(atempo->position[1], overlap_end); | |
965 | 1 | stop_here = frag->position[1] + frag->nsamples; | |
966 | 1 | offset = start_here - frag->position[1]; | |
967 |
2/4✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
|
1 | av_assert0(start_here <= stop_here && frag->position[1] <= start_here); |
968 | |||
969 | 1 | src = frag->data + offset * atempo->stride; | |
970 | 1 | dst = (uint8_t *)*dst_ref; | |
971 | |||
972 | 1 | src_size = (int)(stop_here - start_here) * atempo->stride; | |
973 | 1 | dst_size = dst_end - dst; | |
974 | 1 | nbytes = FFMIN(src_size, dst_size); | |
975 | |||
976 | 1 | memcpy(dst, src, nbytes); | |
977 | 1 | dst += nbytes; | |
978 | |||
979 | 1 | atempo->position[1] += (nbytes / atempo->stride); | |
980 | |||
981 | // pass-back the updated destination buffer pointer: | ||
982 | 1 | *dst_ref = (uint8_t *)dst; | |
983 | |||
984 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN); |
985 | } | ||
986 | |||
987 | 2 | static av_cold int init(AVFilterContext *ctx) | |
988 | { | ||
989 | 2 | ATempoContext *atempo = ctx->priv; | |
990 | 2 | atempo->format = AV_SAMPLE_FMT_NONE; | |
991 | 2 | atempo->state = YAE_LOAD_FRAGMENT; | |
992 | 2 | return 0; | |
993 | } | ||
994 | |||
995 | 2 | static av_cold void uninit(AVFilterContext *ctx) | |
996 | { | ||
997 | 2 | ATempoContext *atempo = ctx->priv; | |
998 | 2 | yae_release_buffers(atempo); | |
999 | 2 | } | |
1000 | |||
1001 | // WSOLA necessitates an internal sliding window ring buffer | ||
1002 | // for incoming audio stream. | ||
1003 | // | ||
1004 | // Planar sample formats are too cumbersome to store in a ring buffer, | ||
1005 | // therefore planar sample formats are not supported. | ||
1006 | // | ||
1007 | static const enum AVSampleFormat sample_fmts[] = { | ||
1008 | AV_SAMPLE_FMT_U8, | ||
1009 | AV_SAMPLE_FMT_S16, | ||
1010 | AV_SAMPLE_FMT_S32, | ||
1011 | AV_SAMPLE_FMT_FLT, | ||
1012 | AV_SAMPLE_FMT_DBL, | ||
1013 | AV_SAMPLE_FMT_NONE | ||
1014 | }; | ||
1015 | |||
1016 | 1 | static int config_props(AVFilterLink *inlink) | |
1017 | { | ||
1018 | 1 | AVFilterContext *ctx = inlink->dst; | |
1019 | 1 | ATempoContext *atempo = ctx->priv; | |
1020 | |||
1021 | 1 | enum AVSampleFormat format = inlink->format; | |
1022 | 1 | int sample_rate = (int)inlink->sample_rate; | |
1023 | |||
1024 | 1 | return yae_reset(atempo, format, sample_rate, inlink->ch_layout.nb_channels); | |
1025 | } | ||
1026 | |||
1027 | 65 | static int push_samples(ATempoContext *atempo, | |
1028 | AVFilterLink *outlink, | ||
1029 | int n_out) | ||
1030 | { | ||
1031 | int ret; | ||
1032 | |||
1033 | 65 | atempo->dst_buffer->sample_rate = outlink->sample_rate; | |
1034 | 65 | atempo->dst_buffer->nb_samples = n_out; | |
1035 | |||
1036 | // adjust the PTS: | ||
1037 | 65 | atempo->dst_buffer->pts = atempo->start_pts + | |
1038 | 65 | av_rescale_q(atempo->nsamples_out, | |
1039 | 65 | (AVRational){ 1, outlink->sample_rate }, | |
1040 | outlink->time_base); | ||
1041 | |||
1042 | 65 | ret = ff_filter_frame(outlink, atempo->dst_buffer); | |
1043 | 65 | atempo->dst_buffer = NULL; | |
1044 | 65 | atempo->dst = NULL; | |
1045 | 65 | atempo->dst_end = NULL; | |
1046 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 65 times.
|
65 | if (ret < 0) |
1047 | ✗ | return ret; | |
1048 | |||
1049 | 65 | atempo->nsamples_out += n_out; | |
1050 | 65 | return 0; | |
1051 | } | ||
1052 | |||
1053 | 65 | static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer) | |
1054 | { | ||
1055 | 65 | AVFilterContext *ctx = inlink->dst; | |
1056 | 65 | ATempoContext *atempo = ctx->priv; | |
1057 | 65 | AVFilterLink *outlink = ctx->outputs[0]; | |
1058 | |||
1059 | 65 | int ret = 0; | |
1060 | 65 | int n_in = src_buffer->nb_samples; | |
1061 | 65 | int n_out = (int)(0.5 + ((double)n_in) / atempo->tempo); | |
1062 | |||
1063 | 65 | const uint8_t *src = src_buffer->data[0]; | |
1064 | 65 | const uint8_t *src_end = src + n_in * atempo->stride; | |
1065 | |||
1066 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 64 times.
|
65 | if (atempo->start_pts == AV_NOPTS_VALUE) |
1067 | 1 | atempo->start_pts = av_rescale_q(src_buffer->pts, | |
1068 | inlink->time_base, | ||
1069 | outlink->time_base); | ||
1070 | |||
1071 |
2/2✓ Branch 0 taken 128 times.
✓ Branch 1 taken 65 times.
|
258 | while (src < src_end) { |
1072 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
|
128 | if (!atempo->dst_buffer) { |
1073 | 64 | atempo->dst_buffer = ff_get_audio_buffer(outlink, n_out); | |
1074 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
|
64 | if (!atempo->dst_buffer) { |
1075 | ✗ | av_frame_free(&src_buffer); | |
1076 | ✗ | return AVERROR(ENOMEM); | |
1077 | } | ||
1078 | 64 | av_frame_copy_props(atempo->dst_buffer, src_buffer); | |
1079 | |||
1080 | 64 | atempo->dst = atempo->dst_buffer->data[0]; | |
1081 | 64 | atempo->dst_end = atempo->dst + n_out * atempo->stride; | |
1082 | } | ||
1083 | |||
1084 | 128 | yae_apply(atempo, &src, src_end, &atempo->dst, atempo->dst_end); | |
1085 | |||
1086 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
|
128 | if (atempo->dst == atempo->dst_end) { |
1087 | 64 | int n_samples = ((atempo->dst - atempo->dst_buffer->data[0]) / | |
1088 | 64 | atempo->stride); | |
1089 | 64 | ret = push_samples(atempo, outlink, n_samples); | |
1090 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
|
64 | if (ret < 0) |
1091 | ✗ | goto end; | |
1092 | } | ||
1093 | } | ||
1094 | |||
1095 | 65 | atempo->nsamples_in += n_in; | |
1096 | 65 | end: | |
1097 | 65 | av_frame_free(&src_buffer); | |
1098 | 65 | return ret; | |
1099 | } | ||
1100 | |||
1101 | 66 | static int request_frame(AVFilterLink *outlink) | |
1102 | { | ||
1103 | 66 | AVFilterContext *ctx = outlink->src; | |
1104 | 66 | ATempoContext *atempo = ctx->priv; | |
1105 | int ret; | ||
1106 | |||
1107 | 66 | ret = ff_request_frame(ctx->inputs[0]); | |
1108 | |||
1109 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 65 times.
|
66 | if (ret == AVERROR_EOF) { |
1110 | // flush the filter: | ||
1111 | 1 | int n_max = atempo->ring; | |
1112 | int n_out; | ||
1113 | 1 | int err = AVERROR(EAGAIN); | |
1114 | |||
1115 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | while (err == AVERROR(EAGAIN)) { |
1116 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (!atempo->dst_buffer) { |
1117 | 1 | atempo->dst_buffer = ff_get_audio_buffer(outlink, n_max); | |
1118 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (!atempo->dst_buffer) |
1119 | ✗ | return AVERROR(ENOMEM); | |
1120 | |||
1121 | 1 | atempo->dst = atempo->dst_buffer->data[0]; | |
1122 | 1 | atempo->dst_end = atempo->dst + n_max * atempo->stride; | |
1123 | } | ||
1124 | |||
1125 | 1 | err = yae_flush(atempo, &atempo->dst, atempo->dst_end); | |
1126 | |||
1127 | 1 | n_out = ((atempo->dst - atempo->dst_buffer->data[0]) / | |
1128 | 1 | atempo->stride); | |
1129 | |||
1130 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (n_out) { |
1131 | 1 | ret = push_samples(atempo, outlink, n_out); | |
1132 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (ret < 0) |
1133 | ✗ | return ret; | |
1134 | } | ||
1135 | } | ||
1136 | |||
1137 | 1 | av_frame_free(&atempo->dst_buffer); | |
1138 | 1 | atempo->dst = NULL; | |
1139 | 1 | atempo->dst_end = NULL; | |
1140 | |||
1141 | 1 | return AVERROR_EOF; | |
1142 | } | ||
1143 | |||
1144 | 65 | return ret; | |
1145 | } | ||
1146 | |||
1147 | ✗ | static int process_command(AVFilterContext *ctx, | |
1148 | const char *cmd, | ||
1149 | const char *arg, | ||
1150 | char *res, | ||
1151 | int res_len, | ||
1152 | int flags) | ||
1153 | { | ||
1154 | ✗ | int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags); | |
1155 | |||
1156 | ✗ | if (ret < 0) | |
1157 | ✗ | return ret; | |
1158 | |||
1159 | ✗ | return yae_update(ctx); | |
1160 | } | ||
1161 | |||
1162 | static const AVFilterPad atempo_inputs[] = { | ||
1163 | { | ||
1164 | .name = "default", | ||
1165 | .type = AVMEDIA_TYPE_AUDIO, | ||
1166 | .filter_frame = filter_frame, | ||
1167 | .config_props = config_props, | ||
1168 | }, | ||
1169 | }; | ||
1170 | |||
1171 | static const AVFilterPad atempo_outputs[] = { | ||
1172 | { | ||
1173 | .name = "default", | ||
1174 | .request_frame = request_frame, | ||
1175 | .type = AVMEDIA_TYPE_AUDIO, | ||
1176 | }, | ||
1177 | }; | ||
1178 | |||
1179 | const AVFilter ff_af_atempo = { | ||
1180 | .name = "atempo", | ||
1181 | .description = NULL_IF_CONFIG_SMALL("Adjust audio tempo."), | ||
1182 | .init = init, | ||
1183 | .uninit = uninit, | ||
1184 | .process_command = process_command, | ||
1185 | .priv_size = sizeof(ATempoContext), | ||
1186 | .priv_class = &atempo_class, | ||
1187 | FILTER_INPUTS(atempo_inputs), | ||
1188 | FILTER_OUTPUTS(atempo_outputs), | ||
1189 | FILTER_SAMPLEFMTS_ARRAY(sample_fmts), | ||
1190 | }; | ||
1191 |