| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (c) 2012 Pavel Koshevoy <pkoshevoy at gmail dot com> | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public | ||
| 17 | * License along with FFmpeg; if not, write to the Free Software | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | /** | ||
| 22 | * @file | ||
| 23 | * tempo scaling audio filter -- an implementation of WSOLA algorithm | ||
| 24 | * | ||
| 25 | * Based on MIT licensed yaeAudioTempoFilter.h and yaeAudioFragment.h | ||
| 26 | * from Apprentice Video player by Pavel Koshevoy. | ||
| 27 | * https://sourceforge.net/projects/apprenticevideo/ | ||
| 28 | * | ||
| 29 | * An explanation of SOLA algorithm is available at | ||
| 30 | * http://www.surina.net/article/time-and-pitch-scaling.html | ||
| 31 | * | ||
| 32 | * WSOLA is very similar to SOLA, only one major difference exists between | ||
| 33 | * these algorithms. SOLA shifts audio fragments along the output stream, | ||
| 34 | * where as WSOLA shifts audio fragments along the input stream. | ||
| 35 | * | ||
| 36 | * The advantage of WSOLA algorithm is that the overlap region size is | ||
| 37 | * always the same, therefore the blending function is constant and | ||
| 38 | * can be precomputed. | ||
| 39 | */ | ||
| 40 | |||
| 41 | #include <float.h> | ||
| 42 | #include "libavutil/avassert.h" | ||
| 43 | #include "libavutil/channel_layout.h" | ||
| 44 | #include "libavutil/mem.h" | ||
| 45 | #include "libavutil/opt.h" | ||
| 46 | #include "libavutil/samplefmt.h" | ||
| 47 | #include "libavutil/tx.h" | ||
| 48 | #include "avfilter.h" | ||
| 49 | #include "audio.h" | ||
| 50 | #include "filters.h" | ||
| 51 | |||
| 52 | /** | ||
| 53 | * A fragment of audio waveform | ||
| 54 | */ | ||
| 55 | typedef struct AudioFragment { | ||
| 56 | // index of the first sample of this fragment in the overall waveform; | ||
| 57 | // 0: input sample position | ||
| 58 | // 1: output sample position | ||
| 59 | int64_t position[2]; | ||
| 60 | |||
| 61 | // original packed multi-channel samples: | ||
| 62 | uint8_t *data; | ||
| 63 | |||
| 64 | // number of samples in this fragment: | ||
| 65 | int nsamples; | ||
| 66 | |||
| 67 | // rDFT transform of the down-mixed mono fragment, used for | ||
| 68 | // fast waveform alignment via correlation in frequency domain: | ||
| 69 | float *xdat_in; | ||
| 70 | float *xdat; | ||
| 71 | } AudioFragment; | ||
| 72 | |||
| 73 | /** | ||
| 74 | * Filter state machine states | ||
| 75 | */ | ||
| 76 | typedef enum { | ||
| 77 | YAE_LOAD_FRAGMENT, | ||
| 78 | YAE_ADJUST_POSITION, | ||
| 79 | YAE_RELOAD_FRAGMENT, | ||
| 80 | YAE_OUTPUT_OVERLAP_ADD, | ||
| 81 | YAE_FLUSH_OUTPUT, | ||
| 82 | } FilterState; | ||
| 83 | |||
| 84 | /** | ||
| 85 | * Filter state machine | ||
| 86 | */ | ||
| 87 | typedef struct ATempoContext { | ||
| 88 | const AVClass *class; | ||
| 89 | |||
| 90 | // ring-buffer of input samples, necessary because some times | ||
| 91 | // input fragment position may be adjusted backwards: | ||
| 92 | uint8_t *buffer; | ||
| 93 | |||
| 94 | // ring-buffer maximum capacity, expressed in sample rate time base: | ||
| 95 | int ring; | ||
| 96 | |||
| 97 | // ring-buffer house keeping: | ||
| 98 | int size; | ||
| 99 | int head; | ||
| 100 | int tail; | ||
| 101 | |||
| 102 | // 0: input sample position corresponding to the ring buffer tail | ||
| 103 | // 1: output sample position | ||
| 104 | int64_t position[2]; | ||
| 105 | |||
| 106 | // first input timestamp, all other timestamps are offset by this one | ||
| 107 | int64_t start_pts; | ||
| 108 | |||
| 109 | // sample format: | ||
| 110 | enum AVSampleFormat format; | ||
| 111 | |||
| 112 | // number of channels: | ||
| 113 | int channels; | ||
| 114 | |||
| 115 | // row of bytes to skip from one sample to next, across multiple channels; | ||
| 116 | // stride = (number-of-channels * bits-per-sample-per-channel) / 8 | ||
| 117 | int stride; | ||
| 118 | |||
| 119 | // fragment window size, power-of-two integer: | ||
| 120 | int window; | ||
| 121 | |||
| 122 | // Hann window coefficients, for feathering | ||
| 123 | // (blending) the overlapping fragment region: | ||
| 124 | float *hann; | ||
| 125 | |||
| 126 | // tempo scaling factor: | ||
| 127 | double tempo; | ||
| 128 | |||
| 129 | // a snapshot of previous fragment input and output position values | ||
| 130 | // captured when the tempo scale factor was set most recently: | ||
| 131 | int64_t origin[2]; | ||
| 132 | |||
| 133 | // current/previous fragment ring-buffer: | ||
| 134 | AudioFragment frag[2]; | ||
| 135 | |||
| 136 | // current fragment index: | ||
| 137 | uint64_t nfrag; | ||
| 138 | |||
| 139 | // current state: | ||
| 140 | FilterState state; | ||
| 141 | |||
| 142 | // for fast correlation calculation in frequency domain: | ||
| 143 | AVTXContext *real_to_complex; | ||
| 144 | AVTXContext *complex_to_real; | ||
| 145 | av_tx_fn r2c_fn, c2r_fn; | ||
| 146 | float *correlation_in; | ||
| 147 | float *correlation; | ||
| 148 | |||
| 149 | // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame | ||
| 150 | AVFrame *dst_buffer; | ||
| 151 | uint8_t *dst; | ||
| 152 | uint8_t *dst_end; | ||
| 153 | uint64_t nsamples_in; | ||
| 154 | uint64_t nsamples_out; | ||
| 155 | } ATempoContext; | ||
| 156 | |||
| 157 | #define YAE_ATEMPO_MIN 0.5 | ||
| 158 | #define YAE_ATEMPO_MAX 100.0 | ||
| 159 | |||
| 160 | #define OFFSET(x) offsetof(ATempoContext, x) | ||
| 161 | |||
| 162 | static const AVOption atempo_options[] = { | ||
| 163 | { "tempo", "set tempo scale factor", | ||
| 164 | OFFSET(tempo), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, | ||
| 165 | YAE_ATEMPO_MIN, | ||
| 166 | YAE_ATEMPO_MAX, | ||
| 167 | AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_RUNTIME_PARAM }, | ||
| 168 | { NULL } | ||
| 169 | }; | ||
| 170 | |||
| 171 | AVFILTER_DEFINE_CLASS(atempo); | ||
| 172 | |||
| 173 | 1546 | inline static AudioFragment *yae_curr_frag(ATempoContext *atempo) | |
| 174 | { | ||
| 175 | 1546 | return &atempo->frag[atempo->nfrag % 2]; | |
| 176 | } | ||
| 177 | |||
| 178 | 450 | inline static AudioFragment *yae_prev_frag(ATempoContext *atempo) | |
| 179 | { | ||
| 180 | 450 | return &atempo->frag[(atempo->nfrag + 1) % 2]; | |
| 181 | } | ||
| 182 | |||
| 183 | /** | ||
| 184 | * Reset filter to initial state, do not deallocate existing local buffers. | ||
| 185 | */ | ||
| 186 | 3 | static void yae_clear(ATempoContext *atempo) | |
| 187 | { | ||
| 188 | 3 | atempo->size = 0; | |
| 189 | 3 | atempo->head = 0; | |
| 190 | 3 | atempo->tail = 0; | |
| 191 | |||
| 192 | 3 | atempo->nfrag = 0; | |
| 193 | 3 | atempo->state = YAE_LOAD_FRAGMENT; | |
| 194 | 3 | atempo->start_pts = AV_NOPTS_VALUE; | |
| 195 | |||
| 196 | 3 | atempo->position[0] = 0; | |
| 197 | 3 | atempo->position[1] = 0; | |
| 198 | |||
| 199 | 3 | atempo->origin[0] = 0; | |
| 200 | 3 | atempo->origin[1] = 0; | |
| 201 | |||
| 202 | 3 | atempo->frag[0].position[0] = 0; | |
| 203 | 3 | atempo->frag[0].position[1] = 0; | |
| 204 | 3 | atempo->frag[0].nsamples = 0; | |
| 205 | |||
| 206 | 3 | atempo->frag[1].position[0] = 0; | |
| 207 | 3 | atempo->frag[1].position[1] = 0; | |
| 208 | 3 | atempo->frag[1].nsamples = 0; | |
| 209 | |||
| 210 | // shift left position of 1st fragment by half a window | ||
| 211 | // so that no re-normalization would be required for | ||
| 212 | // the left half of the 1st fragment: | ||
| 213 | 3 | atempo->frag[0].position[0] = -(int64_t)(atempo->window / 2); | |
| 214 | 3 | atempo->frag[0].position[1] = -(int64_t)(atempo->window / 2); | |
| 215 | |||
| 216 | 3 | av_frame_free(&atempo->dst_buffer); | |
| 217 | 3 | atempo->dst = NULL; | |
| 218 | 3 | atempo->dst_end = NULL; | |
| 219 | |||
| 220 | 3 | atempo->nsamples_in = 0; | |
| 221 | 3 | atempo->nsamples_out = 0; | |
| 222 | 3 | } | |
| 223 | |||
| 224 | /** | ||
| 225 | * Reset filter to initial state and deallocate all buffers. | ||
| 226 | */ | ||
| 227 | 3 | static void yae_release_buffers(ATempoContext *atempo) | |
| 228 | { | ||
| 229 | 3 | yae_clear(atempo); | |
| 230 | |||
| 231 | 3 | av_freep(&atempo->frag[0].data); | |
| 232 | 3 | av_freep(&atempo->frag[1].data); | |
| 233 | 3 | av_freep(&atempo->frag[0].xdat_in); | |
| 234 | 3 | av_freep(&atempo->frag[1].xdat_in); | |
| 235 | 3 | av_freep(&atempo->frag[0].xdat); | |
| 236 | 3 | av_freep(&atempo->frag[1].xdat); | |
| 237 | |||
| 238 | 3 | av_freep(&atempo->buffer); | |
| 239 | 3 | av_freep(&atempo->hann); | |
| 240 | 3 | av_freep(&atempo->correlation_in); | |
| 241 | 3 | av_freep(&atempo->correlation); | |
| 242 | |||
| 243 | 3 | av_tx_uninit(&atempo->real_to_complex); | |
| 244 | 3 | av_tx_uninit(&atempo->complex_to_real); | |
| 245 | 3 | } | |
| 246 | |||
| 247 | /** | ||
| 248 | * Prepare filter for processing audio data of given format, | ||
| 249 | * sample rate and number of channels. | ||
| 250 | */ | ||
| 251 | 1 | static int yae_reset(ATempoContext *atempo, | |
| 252 | enum AVSampleFormat format, | ||
| 253 | int sample_rate, | ||
| 254 | int channels) | ||
| 255 | { | ||
| 256 | 1 | const int sample_size = av_get_bytes_per_sample(format); | |
| 257 | 1 | uint32_t nlevels = 0; | |
| 258 | 1 | float scale = 1.f, iscale = 1.f; | |
| 259 | uint32_t pot; | ||
| 260 | int ret; | ||
| 261 | int i; | ||
| 262 | |||
| 263 | 1 | atempo->format = format; | |
| 264 | 1 | atempo->channels = channels; | |
| 265 | 1 | atempo->stride = sample_size * channels; | |
| 266 | |||
| 267 | // pick a segment window size: | ||
| 268 | 1 | atempo->window = sample_rate / 24; | |
| 269 | |||
| 270 | // adjust window size to be a power-of-two integer: | ||
| 271 | 1 | nlevels = av_log2(atempo->window); | |
| 272 | 1 | pot = 1 << nlevels; | |
| 273 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | av_assert0(pot <= atempo->window); |
| 274 | |||
| 275 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (pot < atempo->window) { |
| 276 | 1 | atempo->window = pot * 2; | |
| 277 | 1 | nlevels++; | |
| 278 | } | ||
| 279 | |||
| 280 | /* av_realloc is not aligned enough, so simply discard all the old buffers | ||
| 281 | * (fortunately, their data does not need to be preserved) */ | ||
| 282 | 1 | yae_release_buffers(atempo); | |
| 283 | |||
| 284 | // initialize audio fragment buffers: | ||
| 285 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | if (!(atempo->frag[0].data = av_calloc(atempo->window, atempo->stride)) || |
| 286 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | !(atempo->frag[1].data = av_calloc(atempo->window, atempo->stride)) || |
| 287 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | !(atempo->frag[0].xdat_in = av_calloc(atempo->window + 1, sizeof(AVComplexFloat))) || |
| 288 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | !(atempo->frag[1].xdat_in = av_calloc(atempo->window + 1, sizeof(AVComplexFloat))) || |
| 289 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | !(atempo->frag[0].xdat = av_calloc(atempo->window + 1, sizeof(AVComplexFloat))) || |
| 290 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
|
1 | !(atempo->frag[1].xdat = av_calloc(atempo->window + 1, sizeof(AVComplexFloat)))) { |
| 291 | ✗ | ret = AVERROR(ENOMEM); | |
| 292 | ✗ | goto fail; | |
| 293 | } | ||
| 294 | |||
| 295 | // initialize rDFT contexts: | ||
| 296 | 1 | ret = av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn, | |
| 297 | 1 | AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0); | |
| 298 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (ret < 0) |
| 299 | ✗ | goto fail; | |
| 300 | |||
| 301 | 1 | ret = av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn, | |
| 302 | 1 | AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0); | |
| 303 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (ret < 0) |
| 304 | ✗ | goto fail; | |
| 305 | |||
| 306 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | if (!(atempo->correlation_in = av_calloc(atempo->window + 1, sizeof(AVComplexFloat))) || |
| 307 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
|
1 | !(atempo->correlation = av_calloc(atempo->window, sizeof(AVComplexFloat)))) { |
| 308 | ✗ | ret = AVERROR(ENOMEM); | |
| 309 | ✗ | goto fail; | |
| 310 | } | ||
| 311 | |||
| 312 | 1 | atempo->ring = atempo->window * 3; | |
| 313 | 1 | atempo->buffer = av_calloc(atempo->ring, atempo->stride); | |
| 314 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (!atempo->buffer) { |
| 315 | ✗ | ret = AVERROR(ENOMEM); | |
| 316 | ✗ | goto fail; | |
| 317 | } | ||
| 318 | |||
| 319 | // initialize the Hann window function: | ||
| 320 | 1 | atempo->hann = av_malloc_array(atempo->window, sizeof(float)); | |
| 321 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (!atempo->hann) { |
| 322 | ✗ | ret = AVERROR(ENOMEM); | |
| 323 | ✗ | goto fail; | |
| 324 | } | ||
| 325 | |||
| 326 |
2/2✓ Branch 0 taken 2048 times.
✓ Branch 1 taken 1 times.
|
2049 | for (i = 0; i < atempo->window; i++) { |
| 327 | 2048 | double t = (double)i / (double)(atempo->window - 1); | |
| 328 | 2048 | double h = 0.5 * (1.0 - cos(2.0 * M_PI * t)); | |
| 329 | 2048 | atempo->hann[i] = (float)h; | |
| 330 | } | ||
| 331 | |||
| 332 | 1 | return 0; | |
| 333 | ✗ | fail: | |
| 334 | ✗ | yae_release_buffers(atempo); | |
| 335 | ✗ | return ret; | |
| 336 | } | ||
| 337 | |||
| 338 | ✗ | static int yae_update(AVFilterContext *ctx) | |
| 339 | { | ||
| 340 | const AudioFragment *prev; | ||
| 341 | ✗ | ATempoContext *atempo = ctx->priv; | |
| 342 | |||
| 343 | ✗ | prev = yae_prev_frag(atempo); | |
| 344 | ✗ | atempo->origin[0] = prev->position[0] + atempo->window / 2; | |
| 345 | ✗ | atempo->origin[1] = prev->position[1] + atempo->window / 2; | |
| 346 | ✗ | return 0; | |
| 347 | } | ||
| 348 | |||
| 349 | /** | ||
| 350 | * A helper macro for initializing complex data buffer with scalar data | ||
| 351 | * of a given type. | ||
| 352 | */ | ||
| 353 | #define yae_init_xdat(scalar_type, scalar_max) \ | ||
| 354 | do { \ | ||
| 355 | const uint8_t *src_end = src + \ | ||
| 356 | frag->nsamples * atempo->channels * sizeof(scalar_type); \ | ||
| 357 | \ | ||
| 358 | float *xdat = frag->xdat_in; \ | ||
| 359 | scalar_type tmp; \ | ||
| 360 | \ | ||
| 361 | if (atempo->channels == 1) { \ | ||
| 362 | for (; src < src_end; xdat++) { \ | ||
| 363 | tmp = *(const scalar_type *)src; \ | ||
| 364 | src += sizeof(scalar_type); \ | ||
| 365 | \ | ||
| 366 | *xdat = (float)tmp; \ | ||
| 367 | } \ | ||
| 368 | } else { \ | ||
| 369 | float s, max, ti, si; \ | ||
| 370 | int i; \ | ||
| 371 | \ | ||
| 372 | for (; src < src_end; xdat++) { \ | ||
| 373 | tmp = *(const scalar_type *)src; \ | ||
| 374 | src += sizeof(scalar_type); \ | ||
| 375 | \ | ||
| 376 | max = (float)tmp; \ | ||
| 377 | s = FFMIN((float)scalar_max, \ | ||
| 378 | (float)fabsf(max)); \ | ||
| 379 | \ | ||
| 380 | for (i = 1; i < atempo->channels; i++) { \ | ||
| 381 | tmp = *(const scalar_type *)src; \ | ||
| 382 | src += sizeof(scalar_type); \ | ||
| 383 | \ | ||
| 384 | ti = (float)tmp; \ | ||
| 385 | si = FFMIN((float)scalar_max, \ | ||
| 386 | (float)fabsf(ti)); \ | ||
| 387 | \ | ||
| 388 | if (s < si) { \ | ||
| 389 | s = si; \ | ||
| 390 | max = ti; \ | ||
| 391 | } \ | ||
| 392 | } \ | ||
| 393 | \ | ||
| 394 | *xdat = max; \ | ||
| 395 | } \ | ||
| 396 | } \ | ||
| 397 | } while (0) | ||
| 398 | |||
| 399 | /** | ||
| 400 | * Initialize complex data buffer of a given audio fragment | ||
| 401 | * with down-mixed mono data of appropriate scalar type. | ||
| 402 | */ | ||
| 403 | 258 | static void yae_downmix(ATempoContext *atempo, AudioFragment *frag) | |
| 404 | { | ||
| 405 | // shortcuts: | ||
| 406 | 258 | const uint8_t *src = frag->data; | |
| 407 | |||
| 408 | // init complex data buffer used for FFT and Correlation: | ||
| 409 | 258 | memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * (atempo->window + 1)); | |
| 410 | |||
| 411 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 258 times.
|
258 | if (atempo->format == AV_SAMPLE_FMT_U8) { |
| 412 | ✗ | yae_init_xdat(uint8_t, 127); | |
| 413 |
1/2✓ Branch 0 taken 258 times.
✗ Branch 1 not taken.
|
258 | } else if (atempo->format == AV_SAMPLE_FMT_S16) { |
| 414 |
3/14✓ Branch 0 taken 258 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 527650 times.
✓ Branch 3 taken 258 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
|
527908 | yae_init_xdat(int16_t, 32767); |
| 415 | ✗ | } else if (atempo->format == AV_SAMPLE_FMT_S32) { | |
| 416 | ✗ | yae_init_xdat(int, 2147483647); | |
| 417 | ✗ | } else if (atempo->format == AV_SAMPLE_FMT_FLT) { | |
| 418 | ✗ | yae_init_xdat(float, 1); | |
| 419 | ✗ | } else if (atempo->format == AV_SAMPLE_FMT_DBL) { | |
| 420 | ✗ | yae_init_xdat(double, 1); | |
| 421 | } | ||
| 422 | 258 | } | |
| 423 | |||
| 424 | /** | ||
| 425 | * Populate the internal data buffer on as-needed basis. | ||
| 426 | * | ||
| 427 | * @return | ||
| 428 | * 0 if requested data was already available or was successfully loaded, | ||
| 429 | * AVERROR(EAGAIN) if more input data is required. | ||
| 430 | */ | ||
| 431 | 322 | static int yae_load_data(ATempoContext *atempo, | |
| 432 | const uint8_t **src_ref, | ||
| 433 | const uint8_t *src_end, | ||
| 434 | int64_t stop_here) | ||
| 435 | { | ||
| 436 | // shortcut: | ||
| 437 | 322 | const uint8_t *src = *src_ref; | |
| 438 | 322 | const int read_size = stop_here - atempo->position[0]; | |
| 439 | |||
| 440 |
2/2✓ Branch 0 taken 53 times.
✓ Branch 1 taken 269 times.
|
322 | if (stop_here <= atempo->position[0]) { |
| 441 | 53 | return 0; | |
| 442 | } | ||
| 443 | |||
| 444 | // samples are not expected to be skipped, unless tempo is greater than 2: | ||
| 445 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 269 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
269 | av_assert0(read_size <= atempo->ring || atempo->tempo > 2.0); |
| 446 | |||
| 447 |
4/4✓ Branch 0 taken 334 times.
✓ Branch 1 taken 204 times.
✓ Branch 2 taken 269 times.
✓ Branch 3 taken 65 times.
|
807 | while (atempo->position[0] < stop_here && src < src_end) { |
| 448 | 269 | int src_samples = (src_end - src) / atempo->stride; | |
| 449 | |||
| 450 | // load data piece-wise, in order to avoid complicating the logic: | ||
| 451 | 269 | int nsamples = FFMIN(read_size, src_samples); | |
| 452 | int na; | ||
| 453 | int nb; | ||
| 454 | |||
| 455 | 269 | nsamples = FFMIN(nsamples, atempo->ring); | |
| 456 | 269 | na = FFMIN(nsamples, atempo->ring - atempo->tail); | |
| 457 | 269 | nb = FFMIN(nsamples - na, atempo->ring); | |
| 458 | |||
| 459 |
1/2✓ Branch 0 taken 269 times.
✗ Branch 1 not taken.
|
269 | if (na) { |
| 460 | 269 | uint8_t *a = atempo->buffer + atempo->tail * atempo->stride; | |
| 461 | 269 | memcpy(a, src, na * atempo->stride); | |
| 462 | |||
| 463 | 269 | src += na * atempo->stride; | |
| 464 | 269 | atempo->position[0] += na; | |
| 465 | |||
| 466 | 269 | atempo->size = FFMIN(atempo->size + na, atempo->ring); | |
| 467 | 269 | atempo->tail = (atempo->tail + na) % atempo->ring; | |
| 468 | 269 | atempo->head = | |
| 469 | 269 | atempo->size < atempo->ring ? | |
| 470 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 264 times.
|
269 | atempo->tail - atempo->size : |
| 471 | atempo->tail; | ||
| 472 | } | ||
| 473 | |||
| 474 |
2/2✓ Branch 0 taken 247 times.
✓ Branch 1 taken 22 times.
|
269 | if (nb) { |
| 475 | 22 | uint8_t *b = atempo->buffer; | |
| 476 | 22 | memcpy(b, src, nb * atempo->stride); | |
| 477 | |||
| 478 | 22 | src += nb * atempo->stride; | |
| 479 | 22 | atempo->position[0] += nb; | |
| 480 | |||
| 481 | 22 | atempo->size = FFMIN(atempo->size + nb, atempo->ring); | |
| 482 | 22 | atempo->tail = (atempo->tail + nb) % atempo->ring; | |
| 483 | 22 | atempo->head = | |
| 484 | 22 | atempo->size < atempo->ring ? | |
| 485 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 22 times.
|
22 | atempo->tail - atempo->size : |
| 486 | atempo->tail; | ||
| 487 | } | ||
| 488 | } | ||
| 489 | |||
| 490 | // pass back the updated source buffer pointer: | ||
| 491 | 269 | *src_ref = src; | |
| 492 | |||
| 493 | // sanity check: | ||
| 494 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 269 times.
|
269 | av_assert0(atempo->position[0] <= stop_here); |
| 495 | |||
| 496 |
2/2✓ Branch 0 taken 204 times.
✓ Branch 1 taken 65 times.
|
269 | return atempo->position[0] == stop_here ? 0 : AVERROR(EAGAIN); |
| 497 | } | ||
| 498 | |||
| 499 | /** | ||
| 500 | * Populate current audio fragment data buffer. | ||
| 501 | * | ||
| 502 | * @return | ||
| 503 | * 0 when the fragment is ready, | ||
| 504 | * AVERROR(EAGAIN) if more input data is required. | ||
| 505 | */ | ||
| 506 | 324 | static int yae_load_frag(ATempoContext *atempo, | |
| 507 | const uint8_t **src_ref, | ||
| 508 | const uint8_t *src_end) | ||
| 509 | { | ||
| 510 | // shortcuts: | ||
| 511 | 324 | AudioFragment *frag = yae_curr_frag(atempo); | |
| 512 | uint8_t *dst; | ||
| 513 | int64_t missing, start, zeros; | ||
| 514 | uint32_t nsamples; | ||
| 515 | const uint8_t *a, *b; | ||
| 516 | int i0, i1, n0, n1, na, nb; | ||
| 517 | |||
| 518 | 324 | int64_t stop_here = frag->position[0] + atempo->window; | |
| 519 |
4/4✓ Branch 0 taken 322 times.
✓ Branch 1 taken 2 times.
✓ Branch 3 taken 65 times.
✓ Branch 4 taken 257 times.
|
324 | if (src_ref && yae_load_data(atempo, src_ref, src_end, stop_here) != 0) { |
| 520 | 65 | return AVERROR(EAGAIN); | |
| 521 | } | ||
| 522 | |||
| 523 | // calculate the number of samples we don't have: | ||
| 524 | 259 | missing = | |
| 525 | 259 | stop_here > atempo->position[0] ? | |
| 526 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 257 times.
|
259 | stop_here - atempo->position[0] : 0; |
| 527 | |||
| 528 | 259 | nsamples = | |
| 529 | 259 | missing < (int64_t)atempo->window ? | |
| 530 |
1/2✓ Branch 0 taken 259 times.
✗ Branch 1 not taken.
|
259 | (uint32_t)(atempo->window - missing) : 0; |
| 531 | |||
| 532 | // setup the output buffer: | ||
| 533 | 259 | frag->nsamples = nsamples; | |
| 534 | 259 | dst = frag->data; | |
| 535 | |||
| 536 | 259 | start = atempo->position[0] - atempo->size; | |
| 537 | |||
| 538 | // what we don't have we substitute with zeros: | ||
| 539 | 259 | zeros = | |
| 540 | 259 | frag->position[0] < start ? | |
| 541 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 258 times.
|
259 | FFMIN(start - frag->position[0], (int64_t)nsamples) : 0; |
| 542 | |||
| 543 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 259 times.
|
259 | if (zeros == nsamples) { |
| 544 | ✗ | return 0; | |
| 545 | } | ||
| 546 | |||
| 547 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 258 times.
|
259 | if (frag->position[0] < start) { |
| 548 | 1 | memset(dst, 0, zeros * atempo->stride); | |
| 549 | 1 | dst += zeros * atempo->stride; | |
| 550 | } | ||
| 551 | |||
| 552 | // get the remaining data from the ring buffer: | ||
| 553 | 518 | na = (atempo->head < atempo->tail ? | |
| 554 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 254 times.
|
259 | atempo->tail - atempo->head : |
| 555 | 254 | atempo->ring - atempo->head); | |
| 556 | |||
| 557 |
2/2✓ Branch 0 taken 254 times.
✓ Branch 1 taken 5 times.
|
259 | nb = atempo->head < atempo->tail ? 0 : atempo->tail; |
| 558 | |||
| 559 | // sanity check: | ||
| 560 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 259 times.
|
259 | av_assert0(nsamples <= zeros + na + nb); |
| 561 | |||
| 562 | 259 | a = atempo->buffer + atempo->head * atempo->stride; | |
| 563 | 259 | b = atempo->buffer; | |
| 564 | |||
| 565 | 259 | i0 = frag->position[0] + zeros - start; | |
| 566 |
2/2✓ Branch 0 taken 168 times.
✓ Branch 1 taken 91 times.
|
259 | i1 = i0 < na ? 0 : i0 - na; |
| 567 | |||
| 568 |
2/2✓ Branch 0 taken 91 times.
✓ Branch 1 taken 168 times.
|
259 | n0 = i0 < na ? FFMIN(na - i0, (int)(nsamples - zeros)) : 0; |
| 569 | 259 | n1 = nsamples - zeros - n0; | |
| 570 | |||
| 571 |
2/2✓ Branch 0 taken 91 times.
✓ Branch 1 taken 168 times.
|
259 | if (n0) { |
| 572 | 91 | memcpy(dst, a + i0 * atempo->stride, n0 * atempo->stride); | |
| 573 | 91 | dst += n0 * atempo->stride; | |
| 574 | } | ||
| 575 | |||
| 576 |
2/2✓ Branch 0 taken 254 times.
✓ Branch 1 taken 5 times.
|
259 | if (n1) { |
| 577 | 254 | memcpy(dst, b + i1 * atempo->stride, n1 * atempo->stride); | |
| 578 | } | ||
| 579 | |||
| 580 | 259 | return 0; | |
| 581 | } | ||
| 582 | |||
| 583 | /** | ||
| 584 | * Prepare for loading next audio fragment. | ||
| 585 | */ | ||
| 586 | 129 | static void yae_advance_to_next_frag(ATempoContext *atempo) | |
| 587 | { | ||
| 588 | 129 | const double fragment_step = atempo->tempo * (double)(atempo->window / 2); | |
| 589 | |||
| 590 | const AudioFragment *prev; | ||
| 591 | AudioFragment *frag; | ||
| 592 | |||
| 593 | 129 | atempo->nfrag++; | |
| 594 | 129 | prev = yae_prev_frag(atempo); | |
| 595 | 129 | frag = yae_curr_frag(atempo); | |
| 596 | |||
| 597 | 129 | frag->position[0] = prev->position[0] + (int64_t)fragment_step; | |
| 598 | 129 | frag->position[1] = prev->position[1] + atempo->window / 2; | |
| 599 | 129 | frag->nsamples = 0; | |
| 600 | 129 | } | |
| 601 | |||
| 602 | /** | ||
| 603 | * Calculate cross-correlation via rDFT. | ||
| 604 | * | ||
| 605 | * Multiply two vectors of complex numbers (result of real_to_complex rDFT) | ||
| 606 | * and transform back via complex_to_real rDFT. | ||
| 607 | */ | ||
| 608 | 129 | static void yae_xcorr_via_rdft(float *xcorr_in, | |
| 609 | float *xcorr, | ||
| 610 | AVTXContext *complex_to_real, | ||
| 611 | av_tx_fn c2r_fn, | ||
| 612 | const AVComplexFloat *xa, | ||
| 613 | const AVComplexFloat *xb, | ||
| 614 | const int window) | ||
| 615 | { | ||
| 616 | 129 | AVComplexFloat *xc = (AVComplexFloat *)xcorr_in; | |
| 617 | int i; | ||
| 618 | |||
| 619 |
2/2✓ Branch 0 taken 264321 times.
✓ Branch 1 taken 129 times.
|
264450 | for (i = 0; i <= window; i++, xa++, xb++, xc++) { |
| 620 | 264321 | xc->re = (xa->re * xb->re + xa->im * xb->im); | |
| 621 | 264321 | xc->im = (xa->im * xb->re - xa->re * xb->im); | |
| 622 | } | ||
| 623 | |||
| 624 | // apply inverse rDFT: | ||
| 625 | 129 | c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(*xc)); | |
| 626 | 129 | } | |
| 627 | |||
| 628 | /** | ||
| 629 | * Calculate alignment offset for given fragment | ||
| 630 | * relative to the previous fragment. | ||
| 631 | * | ||
| 632 | * @return alignment offset of current fragment relative to previous. | ||
| 633 | */ | ||
| 634 | 129 | static int yae_align(AudioFragment *frag, | |
| 635 | const AudioFragment *prev, | ||
| 636 | const int window, | ||
| 637 | const int delta_max, | ||
| 638 | const int drift, | ||
| 639 | float *correlation_in, | ||
| 640 | float *correlation, | ||
| 641 | AVTXContext *complex_to_real, | ||
| 642 | av_tx_fn c2r_fn) | ||
| 643 | { | ||
| 644 | 129 | int best_offset = -drift; | |
| 645 | 129 | float best_metric = -FLT_MAX; | |
| 646 | float *xcorr; | ||
| 647 | |||
| 648 | int i0; | ||
| 649 | int i1; | ||
| 650 | int i; | ||
| 651 | |||
| 652 | 129 | yae_xcorr_via_rdft(correlation_in, | |
| 653 | correlation, | ||
| 654 | complex_to_real, | ||
| 655 | c2r_fn, | ||
| 656 | 129 | (const AVComplexFloat *)prev->xdat, | |
| 657 | 129 | (const AVComplexFloat *)frag->xdat, | |
| 658 | window); | ||
| 659 | |||
| 660 | // identify search window boundaries: | ||
| 661 | 129 | i0 = FFMAX(window / 2 - delta_max - drift, 0); | |
| 662 | 129 | i0 = FFMIN(i0, window); | |
| 663 | |||
| 664 | 129 | i1 = FFMIN(window / 2 + delta_max - drift, window - window / 16); | |
| 665 | 129 | i1 = FFMAX(i1, 0); | |
| 666 | |||
| 667 | // identify cross-correlation peaks within search window: | ||
| 668 | 129 | xcorr = correlation + i0; | |
| 669 | |||
| 670 |
2/2✓ Branch 0 taken 219329 times.
✓ Branch 1 taken 129 times.
|
219458 | for (i = i0; i < i1; i++, xcorr++) { |
| 671 | 219329 | float metric = *xcorr; | |
| 672 | |||
| 673 | // normalize: | ||
| 674 | 219329 | float drifti = (float)(drift + i); | |
| 675 | 219329 | metric *= drifti * (float)(i - i0) * (float)(i1 - i); | |
| 676 | |||
| 677 |
2/2✓ Branch 0 taken 11612 times.
✓ Branch 1 taken 207717 times.
|
219329 | if (metric > best_metric) { |
| 678 | 11612 | best_metric = metric; | |
| 679 | 11612 | best_offset = i - window / 2; | |
| 680 | } | ||
| 681 | } | ||
| 682 | |||
| 683 | 129 | return best_offset; | |
| 684 | } | ||
| 685 | |||
| 686 | /** | ||
| 687 | * Adjust current fragment position for better alignment | ||
| 688 | * with previous fragment. | ||
| 689 | * | ||
| 690 | * @return alignment correction. | ||
| 691 | */ | ||
| 692 | 129 | static int yae_adjust_position(ATempoContext *atempo) | |
| 693 | { | ||
| 694 | 129 | const AudioFragment *prev = yae_prev_frag(atempo); | |
| 695 | 129 | AudioFragment *frag = yae_curr_frag(atempo); | |
| 696 | |||
| 697 | 129 | const double prev_output_position = | |
| 698 | 129 | (double)(prev->position[1] - atempo->origin[1] + atempo->window / 2) * | |
| 699 | 129 | atempo->tempo; | |
| 700 | |||
| 701 | 129 | const double ideal_output_position = | |
| 702 | 129 | (double)(prev->position[0] - atempo->origin[0] + atempo->window / 2); | |
| 703 | |||
| 704 | 129 | const int drift = (int)(prev_output_position - ideal_output_position); | |
| 705 | |||
| 706 | 129 | const int delta_max = atempo->window / 2; | |
| 707 | 129 | const int correction = yae_align(frag, | |
| 708 | prev, | ||
| 709 | atempo->window, | ||
| 710 | delta_max, | ||
| 711 | drift, | ||
| 712 | atempo->correlation_in, | ||
| 713 | atempo->correlation, | ||
| 714 | atempo->complex_to_real, | ||
| 715 | atempo->c2r_fn); | ||
| 716 | |||
| 717 |
1/2✓ Branch 0 taken 129 times.
✗ Branch 1 not taken.
|
129 | if (correction) { |
| 718 | // adjust fragment position: | ||
| 719 | 129 | frag->position[0] -= correction; | |
| 720 | |||
| 721 | // clear so that the fragment can be reloaded: | ||
| 722 | 129 | frag->nsamples = 0; | |
| 723 | } | ||
| 724 | |||
| 725 | 129 | return correction; | |
| 726 | } | ||
| 727 | |||
| 728 | /** | ||
| 729 | * A helper macro for blending the overlap region of previous | ||
| 730 | * and current audio fragment. | ||
| 731 | */ | ||
| 732 | #define yae_blend(scalar_type) \ | ||
| 733 | do { \ | ||
| 734 | const scalar_type *aaa = (const scalar_type *)a; \ | ||
| 735 | const scalar_type *bbb = (const scalar_type *)b; \ | ||
| 736 | \ | ||
| 737 | scalar_type *out = (scalar_type *)dst; \ | ||
| 738 | scalar_type *out_end = (scalar_type *)dst_end; \ | ||
| 739 | int64_t i; \ | ||
| 740 | \ | ||
| 741 | for (i = 0; i < overlap && out < out_end; \ | ||
| 742 | i++, atempo->position[1]++, wa++, wb++) { \ | ||
| 743 | float w0 = *wa; \ | ||
| 744 | float w1 = *wb; \ | ||
| 745 | int j; \ | ||
| 746 | \ | ||
| 747 | for (j = 0; j < atempo->channels; \ | ||
| 748 | j++, aaa++, bbb++, out++) { \ | ||
| 749 | float t0 = (float)*aaa; \ | ||
| 750 | float t1 = (float)*bbb; \ | ||
| 751 | \ | ||
| 752 | *out = \ | ||
| 753 | frag->position[0] + i < 0 ? \ | ||
| 754 | *aaa : \ | ||
| 755 | (scalar_type)(t0 * w0 + t1 * w1); \ | ||
| 756 | } \ | ||
| 757 | } \ | ||
| 758 | dst = (uint8_t *)out; \ | ||
| 759 | } while (0) | ||
| 760 | |||
| 761 | /** | ||
| 762 | * Blend the overlap region of previous and current audio fragment | ||
| 763 | * and output the results to the given destination buffer. | ||
| 764 | * | ||
| 765 | * @return | ||
| 766 | * 0 if the overlap region was completely stored in the dst buffer, | ||
| 767 | * AVERROR(EAGAIN) if more destination buffer space is required. | ||
| 768 | */ | ||
| 769 | 192 | static int yae_overlap_add(ATempoContext *atempo, | |
| 770 | uint8_t **dst_ref, | ||
| 771 | uint8_t *dst_end) | ||
| 772 | { | ||
| 773 | // shortcuts: | ||
| 774 | 192 | const AudioFragment *prev = yae_prev_frag(atempo); | |
| 775 | 192 | const AudioFragment *frag = yae_curr_frag(atempo); | |
| 776 | |||
| 777 | 192 | const int64_t start_here = FFMAX(atempo->position[1], | |
| 778 | frag->position[1]); | ||
| 779 | |||
| 780 | 192 | const int64_t stop_here = FFMIN(prev->position[1] + prev->nsamples, | |
| 781 | frag->position[1] + frag->nsamples); | ||
| 782 | |||
| 783 | 192 | const int64_t overlap = stop_here - start_here; | |
| 784 | |||
| 785 | 192 | const int64_t ia = start_here - prev->position[1]; | |
| 786 | 192 | const int64_t ib = start_here - frag->position[1]; | |
| 787 | |||
| 788 | 192 | const float *wa = atempo->hann + ia; | |
| 789 | 192 | const float *wb = atempo->hann + ib; | |
| 790 | |||
| 791 | 192 | const uint8_t *a = prev->data + ia * atempo->stride; | |
| 792 | 192 | const uint8_t *b = frag->data + ib * atempo->stride; | |
| 793 | |||
| 794 | 192 | uint8_t *dst = *dst_ref; | |
| 795 | |||
| 796 |
3/6✓ Branch 0 taken 192 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 192 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 192 times.
|
192 | av_assert0(start_here <= stop_here && |
| 797 | frag->position[1] <= start_here && | ||
| 798 | overlap <= frag->nsamples); | ||
| 799 | |||
| 800 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 192 times.
|
192 | if (atempo->format == AV_SAMPLE_FMT_U8) { |
| 801 | ✗ | yae_blend(uint8_t); | |
| 802 |
1/2✓ Branch 0 taken 192 times.
✗ Branch 1 not taken.
|
192 | } else if (atempo->format == AV_SAMPLE_FMT_S16) { |
| 803 |
7/8✗ Branch 0 not taken.
✓ Branch 1 taken 132096 times.
✓ Branch 2 taken 132096 times.
✓ Branch 3 taken 132096 times.
✓ Branch 4 taken 132159 times.
✓ Branch 5 taken 129 times.
✓ Branch 6 taken 132096 times.
✓ Branch 7 taken 63 times.
|
264384 | yae_blend(int16_t); |
| 804 | ✗ | } else if (atempo->format == AV_SAMPLE_FMT_S32) { | |
| 805 | ✗ | yae_blend(int); | |
| 806 | ✗ | } else if (atempo->format == AV_SAMPLE_FMT_FLT) { | |
| 807 | ✗ | yae_blend(float); | |
| 808 | ✗ | } else if (atempo->format == AV_SAMPLE_FMT_DBL) { | |
| 809 | ✗ | yae_blend(double); | |
| 810 | } | ||
| 811 | |||
| 812 | // pass-back the updated destination buffer pointer: | ||
| 813 | 192 | *dst_ref = dst; | |
| 814 | |||
| 815 |
2/2✓ Branch 0 taken 129 times.
✓ Branch 1 taken 63 times.
|
192 | return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN); |
| 816 | } | ||
| 817 | |||
| 818 | /** | ||
| 819 | * Feed as much data to the filter as it is able to consume | ||
| 820 | * and receive as much processed data in the destination buffer | ||
| 821 | * as it is able to produce or store. | ||
| 822 | */ | ||
| 823 | static void | ||
| 824 | 128 | yae_apply(ATempoContext *atempo, | |
| 825 | const uint8_t **src_ref, | ||
| 826 | const uint8_t *src_end, | ||
| 827 | uint8_t **dst_ref, | ||
| 828 | uint8_t *dst_end) | ||
| 829 | { | ||
| 830 | while (1) { | ||
| 831 |
2/2✓ Branch 0 taken 194 times.
✓ Branch 1 taken 63 times.
|
257 | if (atempo->state == YAE_LOAD_FRAGMENT) { |
| 832 | // load additional data for the current fragment: | ||
| 833 |
2/2✓ Branch 1 taken 65 times.
✓ Branch 2 taken 129 times.
|
194 | if (yae_load_frag(atempo, src_ref, src_end) != 0) { |
| 834 | 65 | break; | |
| 835 | } | ||
| 836 | |||
| 837 | // down-mix to mono: | ||
| 838 | 129 | yae_downmix(atempo, yae_curr_frag(atempo)); | |
| 839 | |||
| 840 | // apply rDFT: | ||
| 841 | 129 | atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float)); | |
| 842 | |||
| 843 | // must load the second fragment before alignment can start: | ||
| 844 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 128 times.
|
129 | if (!atempo->nfrag) { |
| 845 | 1 | yae_advance_to_next_frag(atempo); | |
| 846 | 1 | continue; | |
| 847 | } | ||
| 848 | |||
| 849 | 128 | atempo->state = YAE_ADJUST_POSITION; | |
| 850 | } | ||
| 851 | |||
| 852 |
2/2✓ Branch 0 taken 128 times.
✓ Branch 1 taken 63 times.
|
191 | if (atempo->state == YAE_ADJUST_POSITION) { |
| 853 | // adjust position for better alignment: | ||
| 854 |
1/2✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
|
128 | if (yae_adjust_position(atempo)) { |
| 855 | // reload the fragment at the corrected position, so that the | ||
| 856 | // Hann window blending would not require normalization: | ||
| 857 | 128 | atempo->state = YAE_RELOAD_FRAGMENT; | |
| 858 | } else { | ||
| 859 | ✗ | atempo->state = YAE_OUTPUT_OVERLAP_ADD; | |
| 860 | } | ||
| 861 | } | ||
| 862 | |||
| 863 |
2/2✓ Branch 0 taken 128 times.
✓ Branch 1 taken 63 times.
|
191 | if (atempo->state == YAE_RELOAD_FRAGMENT) { |
| 864 | // load additional data if necessary due to position adjustment: | ||
| 865 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 128 times.
|
128 | if (yae_load_frag(atempo, src_ref, src_end) != 0) { |
| 866 | ✗ | break; | |
| 867 | } | ||
| 868 | |||
| 869 | // down-mix to mono: | ||
| 870 | 128 | yae_downmix(atempo, yae_curr_frag(atempo)); | |
| 871 | |||
| 872 | // apply rDFT: | ||
| 873 | 128 | atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float)); | |
| 874 | |||
| 875 | 128 | atempo->state = YAE_OUTPUT_OVERLAP_ADD; | |
| 876 | } | ||
| 877 | |||
| 878 |
1/2✓ Branch 0 taken 191 times.
✗ Branch 1 not taken.
|
191 | if (atempo->state == YAE_OUTPUT_OVERLAP_ADD) { |
| 879 | // overlap-add and output the result: | ||
| 880 |
2/2✓ Branch 1 taken 63 times.
✓ Branch 2 taken 128 times.
|
191 | if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) { |
| 881 | 63 | break; | |
| 882 | } | ||
| 883 | |||
| 884 | // advance to the next fragment, repeat: | ||
| 885 | 128 | yae_advance_to_next_frag(atempo); | |
| 886 | 128 | atempo->state = YAE_LOAD_FRAGMENT; | |
| 887 | } | ||
| 888 | } | ||
| 889 | 128 | } | |
| 890 | |||
| 891 | /** | ||
| 892 | * Flush any buffered data from the filter. | ||
| 893 | * | ||
| 894 | * @return | ||
| 895 | * 0 if all data was completely stored in the dst buffer, | ||
| 896 | * AVERROR(EAGAIN) if more destination buffer space is required. | ||
| 897 | */ | ||
| 898 | 1 | static int yae_flush(ATempoContext *atempo, | |
| 899 | uint8_t **dst_ref, | ||
| 900 | uint8_t *dst_end) | ||
| 901 | { | ||
| 902 | 1 | AudioFragment *frag = yae_curr_frag(atempo); | |
| 903 | int64_t overlap_end; | ||
| 904 | int64_t start_here; | ||
| 905 | int64_t stop_here; | ||
| 906 | int64_t offset; | ||
| 907 | |||
| 908 | const uint8_t *src; | ||
| 909 | uint8_t *dst; | ||
| 910 | |||
| 911 | int src_size; | ||
| 912 | int dst_size; | ||
| 913 | int nbytes; | ||
| 914 | |||
| 915 | 1 | atempo->state = YAE_FLUSH_OUTPUT; | |
| 916 | |||
| 917 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (!atempo->nfrag) { |
| 918 | // there is nothing to flush: | ||
| 919 | ✗ | return 0; | |
| 920 | } | ||
| 921 | |||
| 922 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (atempo->position[0] == frag->position[0] + frag->nsamples && |
| 923 | ✗ | atempo->position[1] == frag->position[1] + frag->nsamples) { | |
| 924 | // the current fragment is already flushed: | ||
| 925 | ✗ | return 0; | |
| 926 | } | ||
| 927 | |||
| 928 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (frag->position[0] + frag->nsamples < atempo->position[0]) { |
| 929 | // finish loading the current (possibly partial) fragment: | ||
| 930 | 1 | yae_load_frag(atempo, NULL, NULL); | |
| 931 | |||
| 932 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (atempo->nfrag) { |
| 933 | // down-mix to mono: | ||
| 934 | 1 | yae_downmix(atempo, frag); | |
| 935 | |||
| 936 | // apply rDFT: | ||
| 937 | 1 | atempo->r2c_fn(atempo->real_to_complex, frag->xdat, frag->xdat_in, sizeof(float)); | |
| 938 | |||
| 939 | // align current fragment to previous fragment: | ||
| 940 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | if (yae_adjust_position(atempo)) { |
| 941 | // reload the current fragment due to adjusted position: | ||
| 942 | 1 | yae_load_frag(atempo, NULL, NULL); | |
| 943 | } | ||
| 944 | } | ||
| 945 | } | ||
| 946 | |||
| 947 | // flush the overlap region: | ||
| 948 | 1 | overlap_end = frag->position[1] + FFMIN(atempo->window / 2, | |
| 949 | frag->nsamples); | ||
| 950 | |||
| 951 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | while (atempo->position[1] < overlap_end) { |
| 952 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
|
1 | if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) { |
| 953 | ✗ | return AVERROR(EAGAIN); | |
| 954 | } | ||
| 955 | } | ||
| 956 | |||
| 957 | // check whether all of the input samples have been consumed: | ||
| 958 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (frag->position[0] + frag->nsamples < atempo->position[0]) { |
| 959 | ✗ | yae_advance_to_next_frag(atempo); | |
| 960 | ✗ | return AVERROR(EAGAIN); | |
| 961 | } | ||
| 962 | |||
| 963 | // flush the remainder of the current fragment: | ||
| 964 | 1 | start_here = FFMAX(atempo->position[1], overlap_end); | |
| 965 | 1 | stop_here = frag->position[1] + frag->nsamples; | |
| 966 | 1 | offset = start_here - frag->position[1]; | |
| 967 |
2/4✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
|
1 | av_assert0(start_here <= stop_here && frag->position[1] <= start_here); |
| 968 | |||
| 969 | 1 | src = frag->data + offset * atempo->stride; | |
| 970 | 1 | dst = (uint8_t *)*dst_ref; | |
| 971 | |||
| 972 | 1 | src_size = (int)(stop_here - start_here) * atempo->stride; | |
| 973 | 1 | dst_size = dst_end - dst; | |
| 974 | 1 | nbytes = FFMIN(src_size, dst_size); | |
| 975 | |||
| 976 | 1 | memcpy(dst, src, nbytes); | |
| 977 | 1 | dst += nbytes; | |
| 978 | |||
| 979 | 1 | atempo->position[1] += (nbytes / atempo->stride); | |
| 980 | |||
| 981 | // pass-back the updated destination buffer pointer: | ||
| 982 | 1 | *dst_ref = (uint8_t *)dst; | |
| 983 | |||
| 984 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN); |
| 985 | } | ||
| 986 | |||
| 987 | 2 | static av_cold int init(AVFilterContext *ctx) | |
| 988 | { | ||
| 989 | 2 | ATempoContext *atempo = ctx->priv; | |
| 990 | 2 | atempo->format = AV_SAMPLE_FMT_NONE; | |
| 991 | 2 | atempo->state = YAE_LOAD_FRAGMENT; | |
| 992 | 2 | return 0; | |
| 993 | } | ||
| 994 | |||
| 995 | 2 | static av_cold void uninit(AVFilterContext *ctx) | |
| 996 | { | ||
| 997 | 2 | ATempoContext *atempo = ctx->priv; | |
| 998 | 2 | yae_release_buffers(atempo); | |
| 999 | 2 | } | |
| 1000 | |||
| 1001 | // WSOLA necessitates an internal sliding window ring buffer | ||
| 1002 | // for incoming audio stream. | ||
| 1003 | // | ||
| 1004 | // Planar sample formats are too cumbersome to store in a ring buffer, | ||
| 1005 | // therefore planar sample formats are not supported. | ||
| 1006 | // | ||
| 1007 | static const enum AVSampleFormat sample_fmts[] = { | ||
| 1008 | AV_SAMPLE_FMT_U8, | ||
| 1009 | AV_SAMPLE_FMT_S16, | ||
| 1010 | AV_SAMPLE_FMT_S32, | ||
| 1011 | AV_SAMPLE_FMT_FLT, | ||
| 1012 | AV_SAMPLE_FMT_DBL, | ||
| 1013 | AV_SAMPLE_FMT_NONE | ||
| 1014 | }; | ||
| 1015 | |||
| 1016 | 1 | static int config_props(AVFilterLink *inlink) | |
| 1017 | { | ||
| 1018 | 1 | AVFilterContext *ctx = inlink->dst; | |
| 1019 | 1 | ATempoContext *atempo = ctx->priv; | |
| 1020 | |||
| 1021 | 1 | enum AVSampleFormat format = inlink->format; | |
| 1022 | 1 | int sample_rate = (int)inlink->sample_rate; | |
| 1023 | |||
| 1024 | 1 | return yae_reset(atempo, format, sample_rate, inlink->ch_layout.nb_channels); | |
| 1025 | } | ||
| 1026 | |||
| 1027 | 65 | static int push_samples(ATempoContext *atempo, | |
| 1028 | AVFilterLink *outlink, | ||
| 1029 | int n_out) | ||
| 1030 | { | ||
| 1031 | int ret; | ||
| 1032 | |||
| 1033 | 65 | atempo->dst_buffer->sample_rate = outlink->sample_rate; | |
| 1034 | 65 | atempo->dst_buffer->nb_samples = n_out; | |
| 1035 | |||
| 1036 | // adjust the PTS: | ||
| 1037 | 65 | atempo->dst_buffer->pts = atempo->start_pts + | |
| 1038 | 65 | av_rescale_q(atempo->nsamples_out, | |
| 1039 | 65 | (AVRational){ 1, outlink->sample_rate }, | |
| 1040 | outlink->time_base); | ||
| 1041 | |||
| 1042 | 65 | ret = ff_filter_frame(outlink, atempo->dst_buffer); | |
| 1043 | 65 | atempo->dst_buffer = NULL; | |
| 1044 | 65 | atempo->dst = NULL; | |
| 1045 | 65 | atempo->dst_end = NULL; | |
| 1046 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 65 times.
|
65 | if (ret < 0) |
| 1047 | ✗ | return ret; | |
| 1048 | |||
| 1049 | 65 | atempo->nsamples_out += n_out; | |
| 1050 | 65 | return 0; | |
| 1051 | } | ||
| 1052 | |||
| 1053 | 65 | static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer) | |
| 1054 | { | ||
| 1055 | 65 | AVFilterContext *ctx = inlink->dst; | |
| 1056 | 65 | ATempoContext *atempo = ctx->priv; | |
| 1057 | 65 | AVFilterLink *outlink = ctx->outputs[0]; | |
| 1058 | |||
| 1059 | 65 | int ret = 0; | |
| 1060 | 65 | int n_in = src_buffer->nb_samples; | |
| 1061 | 65 | int n_out = (int)(0.5 + ((double)n_in) / atempo->tempo); | |
| 1062 | |||
| 1063 | 65 | const uint8_t *src = src_buffer->data[0]; | |
| 1064 | 65 | const uint8_t *src_end = src + n_in * atempo->stride; | |
| 1065 | |||
| 1066 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 64 times.
|
65 | if (atempo->start_pts == AV_NOPTS_VALUE) |
| 1067 | 1 | atempo->start_pts = av_rescale_q(src_buffer->pts, | |
| 1068 | inlink->time_base, | ||
| 1069 | outlink->time_base); | ||
| 1070 | |||
| 1071 |
2/2✓ Branch 0 taken 128 times.
✓ Branch 1 taken 65 times.
|
258 | while (src < src_end) { |
| 1072 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
|
128 | if (!atempo->dst_buffer) { |
| 1073 | 64 | atempo->dst_buffer = ff_get_audio_buffer(outlink, n_out); | |
| 1074 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
|
64 | if (!atempo->dst_buffer) { |
| 1075 | ✗ | av_frame_free(&src_buffer); | |
| 1076 | ✗ | return AVERROR(ENOMEM); | |
| 1077 | } | ||
| 1078 | 64 | av_frame_copy_props(atempo->dst_buffer, src_buffer); | |
| 1079 | |||
| 1080 | 64 | atempo->dst = atempo->dst_buffer->data[0]; | |
| 1081 | 64 | atempo->dst_end = atempo->dst + n_out * atempo->stride; | |
| 1082 | } | ||
| 1083 | |||
| 1084 | 128 | yae_apply(atempo, &src, src_end, &atempo->dst, atempo->dst_end); | |
| 1085 | |||
| 1086 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
|
128 | if (atempo->dst == atempo->dst_end) { |
| 1087 | 64 | int n_samples = ((atempo->dst - atempo->dst_buffer->data[0]) / | |
| 1088 | 64 | atempo->stride); | |
| 1089 | 64 | ret = push_samples(atempo, outlink, n_samples); | |
| 1090 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
|
64 | if (ret < 0) |
| 1091 | ✗ | goto end; | |
| 1092 | } | ||
| 1093 | } | ||
| 1094 | |||
| 1095 | 65 | atempo->nsamples_in += n_in; | |
| 1096 | 65 | end: | |
| 1097 | 65 | av_frame_free(&src_buffer); | |
| 1098 | 65 | return ret; | |
| 1099 | } | ||
| 1100 | |||
| 1101 | 66 | static int request_frame(AVFilterLink *outlink) | |
| 1102 | { | ||
| 1103 | 66 | AVFilterContext *ctx = outlink->src; | |
| 1104 | 66 | ATempoContext *atempo = ctx->priv; | |
| 1105 | int ret; | ||
| 1106 | |||
| 1107 | 66 | ret = ff_request_frame(ctx->inputs[0]); | |
| 1108 | |||
| 1109 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 65 times.
|
66 | if (ret == AVERROR_EOF) { |
| 1110 | // flush the filter: | ||
| 1111 | 1 | int n_max = atempo->ring; | |
| 1112 | int n_out; | ||
| 1113 | 1 | int err = AVERROR(EAGAIN); | |
| 1114 | |||
| 1115 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | while (err == AVERROR(EAGAIN)) { |
| 1116 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (!atempo->dst_buffer) { |
| 1117 | 1 | atempo->dst_buffer = ff_get_audio_buffer(outlink, n_max); | |
| 1118 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (!atempo->dst_buffer) |
| 1119 | ✗ | return AVERROR(ENOMEM); | |
| 1120 | |||
| 1121 | 1 | atempo->dst = atempo->dst_buffer->data[0]; | |
| 1122 | 1 | atempo->dst_end = atempo->dst + n_max * atempo->stride; | |
| 1123 | } | ||
| 1124 | |||
| 1125 | 1 | err = yae_flush(atempo, &atempo->dst, atempo->dst_end); | |
| 1126 | |||
| 1127 | 1 | n_out = ((atempo->dst - atempo->dst_buffer->data[0]) / | |
| 1128 | 1 | atempo->stride); | |
| 1129 | |||
| 1130 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (n_out) { |
| 1131 | 1 | ret = push_samples(atempo, outlink, n_out); | |
| 1132 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (ret < 0) |
| 1133 | ✗ | return ret; | |
| 1134 | } | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | 1 | av_frame_free(&atempo->dst_buffer); | |
| 1138 | 1 | atempo->dst = NULL; | |
| 1139 | 1 | atempo->dst_end = NULL; | |
| 1140 | |||
| 1141 | 1 | return AVERROR_EOF; | |
| 1142 | } | ||
| 1143 | |||
| 1144 | 65 | return ret; | |
| 1145 | } | ||
| 1146 | |||
| 1147 | ✗ | static int process_command(AVFilterContext *ctx, | |
| 1148 | const char *cmd, | ||
| 1149 | const char *arg, | ||
| 1150 | char *res, | ||
| 1151 | int res_len, | ||
| 1152 | int flags) | ||
| 1153 | { | ||
| 1154 | ✗ | int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags); | |
| 1155 | |||
| 1156 | ✗ | if (ret < 0) | |
| 1157 | ✗ | return ret; | |
| 1158 | |||
| 1159 | ✗ | return yae_update(ctx); | |
| 1160 | } | ||
| 1161 | |||
| 1162 | static const AVFilterPad atempo_inputs[] = { | ||
| 1163 | { | ||
| 1164 | .name = "default", | ||
| 1165 | .type = AVMEDIA_TYPE_AUDIO, | ||
| 1166 | .filter_frame = filter_frame, | ||
| 1167 | .config_props = config_props, | ||
| 1168 | }, | ||
| 1169 | }; | ||
| 1170 | |||
| 1171 | static const AVFilterPad atempo_outputs[] = { | ||
| 1172 | { | ||
| 1173 | .name = "default", | ||
| 1174 | .request_frame = request_frame, | ||
| 1175 | .type = AVMEDIA_TYPE_AUDIO, | ||
| 1176 | }, | ||
| 1177 | }; | ||
| 1178 | |||
| 1179 | const FFFilter ff_af_atempo = { | ||
| 1180 | .p.name = "atempo", | ||
| 1181 | .p.description = NULL_IF_CONFIG_SMALL("Adjust audio tempo."), | ||
| 1182 | .p.priv_class = &atempo_class, | ||
| 1183 | .init = init, | ||
| 1184 | .uninit = uninit, | ||
| 1185 | .process_command = process_command, | ||
| 1186 | .priv_size = sizeof(ATempoContext), | ||
| 1187 | FILTER_INPUTS(atempo_inputs), | ||
| 1188 | FILTER_OUTPUTS(atempo_outputs), | ||
| 1189 | FILTER_SAMPLEFMTS_ARRAY(sample_fmts), | ||
| 1190 | }; | ||
| 1191 |