FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_axcorrelate.c
Date: 2025-01-20 09:27:23
Exec Total Coverage
Lines: 0 120 0.0%
Functions: 0 15 0.0%
Branches: 0 156 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2019 Paul B Mahol
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/audio_fifo.h"
22 #include "libavutil/channel_layout.h"
23 #include "libavutil/common.h"
24 #include "libavutil/opt.h"
25
26 #include "audio.h"
27 #include "avfilter.h"
28 #include "filters.h"
29
30 typedef struct AudioXCorrelateContext {
31 const AVClass *class;
32
33 int size;
34 int algo;
35 int64_t pts;
36
37 AVAudioFifo *fifo[2];
38 AVFrame *cache[2];
39 AVFrame *mean_sum[2];
40 AVFrame *num_sum;
41 AVFrame *den_sum[2];
42 int used;
43 int eof;
44
45 int (*xcorrelate)(AVFilterContext *ctx, AVFrame *out, int available);
46 } AudioXCorrelateContext;
47
48 #define MEAN_SUM(suffix, type, zero) \
49 static type mean_sum_##suffix(const type *in, \
50 int size) \
51 { \
52 type mean_sum = zero; \
53 \
54 for (int i = 0; i < size; i++) \
55 mean_sum += in[i]; \
56 \
57 return mean_sum; \
58 }
59
60 MEAN_SUM(f, float, 0.f)
61 MEAN_SUM(d, double, 0.0)
62
63 #define SQUARE_SUM(suffix, type, zero) \
64 static type square_sum_##suffix(const type *x, \
65 const type *y, \
66 int size) \
67 { \
68 type square_sum = zero; \
69 \
70 for (int i = 0; i < size; i++) \
71 square_sum += x[i] * y[i]; \
72 \
73 return square_sum; \
74 }
75
76 SQUARE_SUM(f, float, 0.f)
77 SQUARE_SUM(d, double, 0.0)
78
79 #define XCORRELATE(suffix, type, zero, small, sqrtfun)\
80 static type xcorrelate_##suffix(const type *x, \
81 const type *y, \
82 type sumx, \
83 type sumy, int size) \
84 { \
85 const type xm = sumx / size, ym = sumy / size; \
86 type num = zero, den, den0 = zero, den1 = zero; \
87 \
88 for (int i = 0; i < size; i++) { \
89 type xd = x[i] - xm; \
90 type yd = y[i] - ym; \
91 \
92 num += xd * yd; \
93 den0 += xd * xd; \
94 den1 += yd * yd; \
95 } \
96 \
97 num /= size; \
98 den = sqrtfun((den0 * den1) / size / size); \
99 \
100 return den <= small ? zero : num / den; \
101 }
102
103 XCORRELATE(f, float, 0.f, 1e-6f, sqrtf)
104 XCORRELATE(d, double, 0.0, 1e-9, sqrt)
105
106 #define XCORRELATE_SLOW(suffix, type) \
107 static int xcorrelate_slow_##suffix(AVFilterContext *ctx, \
108 AVFrame *out, int available) \
109 { \
110 AudioXCorrelateContext *s = ctx->priv; \
111 const int size = s->size; \
112 int used; \
113 \
114 for (int ch = 0; ch < out->ch_layout.nb_channels; ch++) { \
115 const type *x = (const type *)s->cache[0]->extended_data[ch]; \
116 const type *y = (const type *)s->cache[1]->extended_data[ch]; \
117 type *sumx = (type *)s->mean_sum[0]->extended_data[ch]; \
118 type *sumy = (type *)s->mean_sum[1]->extended_data[ch]; \
119 type *dst = (type *)out->extended_data[ch]; \
120 \
121 used = s->used; \
122 if (!used) { \
123 sumx[0] = mean_sum_##suffix(x, size); \
124 sumy[0] = mean_sum_##suffix(y, size); \
125 used = 1; \
126 } \
127 \
128 for (int n = 0; n < out->nb_samples; n++) { \
129 const int idx = n + size; \
130 \
131 dst[n] = xcorrelate_##suffix(x + n, y + n, \
132 sumx[0], sumy[0],\
133 size); \
134 \
135 sumx[0] -= x[n]; \
136 sumx[0] += x[idx]; \
137 sumy[0] -= y[n]; \
138 sumy[0] += y[idx]; \
139 } \
140 } \
141 \
142 return used; \
143 }
144
145 XCORRELATE_SLOW(f, float)
146 XCORRELATE_SLOW(d, double)
147
148 #define clipf(x) (av_clipf(x, -1.f, 1.f))
149 #define clipd(x) (av_clipd(x, -1.0, 1.0))
150
151 #define XCORRELATE_FAST(suffix, type, zero, small, sqrtfun, CLIP) \
152 static int xcorrelate_fast_##suffix(AVFilterContext *ctx, AVFrame *out, \
153 int available) \
154 { \
155 AudioXCorrelateContext *s = ctx->priv; \
156 const int size = s->size; \
157 int used; \
158 \
159 for (int ch = 0; ch < out->ch_layout.nb_channels; ch++) { \
160 const type *x = (const type *)s->cache[0]->extended_data[ch]; \
161 const type *y = (const type *)s->cache[1]->extended_data[ch]; \
162 type *num_sum = (type *)s->num_sum->extended_data[ch]; \
163 type *den_sumx = (type *)s->den_sum[0]->extended_data[ch]; \
164 type *den_sumy = (type *)s->den_sum[1]->extended_data[ch]; \
165 type *dst = (type *)out->extended_data[ch]; \
166 \
167 used = s->used; \
168 if (!used) { \
169 num_sum[0] = square_sum_##suffix(x, y, size); \
170 den_sumx[0] = square_sum_##suffix(x, x, size); \
171 den_sumy[0] = square_sum_##suffix(y, y, size); \
172 used = 1; \
173 } \
174 \
175 for (int n = 0; n < out->nb_samples; n++) { \
176 const int idx = n + size; \
177 type num, den; \
178 \
179 num = num_sum[0] / size; \
180 den = sqrtfun((den_sumx[0] * den_sumy[0]) / size / size); \
181 \
182 dst[n] = den <= small ? zero : CLIP(num / den); \
183 \
184 num_sum[0] -= x[n] * y[n]; \
185 num_sum[0] += x[idx] * y[idx]; \
186 den_sumx[0] -= x[n] * x[n]; \
187 den_sumx[0] += x[idx] * x[idx]; \
188 den_sumx[0] = FFMAX(den_sumx[0], zero); \
189 den_sumy[0] -= y[n] * y[n]; \
190 den_sumy[0] += y[idx] * y[idx]; \
191 den_sumy[0] = FFMAX(den_sumy[0], zero); \
192 } \
193 } \
194 \
195 return used; \
196 }
197
198 XCORRELATE_FAST(f, float, 0.f, 1e-6f, sqrtf, clipf)
199 XCORRELATE_FAST(d, double, 0.0, 1e-9, sqrt, clipd)
200
201 #define XCORRELATE_BEST(suffix, type, zero, small, sqrtfun, FMAX, CLIP) \
202 static int xcorrelate_best_##suffix(AVFilterContext *ctx, AVFrame *out, \
203 int available) \
204 { \
205 AudioXCorrelateContext *s = ctx->priv; \
206 const int size = s->size; \
207 int used; \
208 \
209 for (int ch = 0; ch < out->ch_layout.nb_channels; ch++) { \
210 const type *x = (const type *)s->cache[0]->extended_data[ch]; \
211 const type *y = (const type *)s->cache[1]->extended_data[ch]; \
212 type *mean_sumx = (type *)s->mean_sum[0]->extended_data[ch]; \
213 type *mean_sumy = (type *)s->mean_sum[1]->extended_data[ch]; \
214 type *num_sum = (type *)s->num_sum->extended_data[ch]; \
215 type *den_sumx = (type *)s->den_sum[0]->extended_data[ch]; \
216 type *den_sumy = (type *)s->den_sum[1]->extended_data[ch]; \
217 type *dst = (type *)out->extended_data[ch]; \
218 \
219 used = s->used; \
220 if (!used) { \
221 num_sum[0] = square_sum_##suffix(x, y, size); \
222 den_sumx[0] = square_sum_##suffix(x, x, size); \
223 den_sumy[0] = square_sum_##suffix(y, y, size); \
224 mean_sumx[0] = mean_sum_##suffix(x, size); \
225 mean_sumy[0] = mean_sum_##suffix(y, size); \
226 used = 1; \
227 } \
228 \
229 for (int n = 0; n < out->nb_samples; n++) { \
230 const int idx = n + size; \
231 type num, den, xm, ym; \
232 \
233 xm = mean_sumx[0] / size; \
234 ym = mean_sumy[0] / size; \
235 num = num_sum[0] - size * xm * ym; \
236 den = sqrtfun(FMAX(den_sumx[0] - size * xm * xm, zero)) * \
237 sqrtfun(FMAX(den_sumy[0] - size * ym * ym, zero)); \
238 \
239 dst[n] = den <= small ? zero : CLIP(num / den); \
240 \
241 mean_sumx[0]-= x[n]; \
242 mean_sumx[0]+= x[idx]; \
243 mean_sumy[0]-= y[n]; \
244 mean_sumy[0]+= y[idx]; \
245 num_sum[0] -= x[n] * y[n]; \
246 num_sum[0] += x[idx] * y[idx]; \
247 den_sumx[0] -= x[n] * x[n]; \
248 den_sumx[0] += x[idx] * x[idx]; \
249 den_sumx[0] = FMAX(den_sumx[0], zero); \
250 den_sumy[0] -= y[n] * y[n]; \
251 den_sumy[0] += y[idx] * y[idx]; \
252 den_sumy[0] = FMAX(den_sumy[0], zero); \
253 } \
254 } \
255 \
256 return used; \
257 }
258
259 XCORRELATE_BEST(f, float, 0.f, 1e-6f, sqrtf, fmaxf, clipf)
260 XCORRELATE_BEST(d, double, 0.0, 1e-9, sqrt, fmax, clipd)
261
262 static int activate(AVFilterContext *ctx)
263 {
264 AudioXCorrelateContext *s = ctx->priv;
265 AVFilterLink *outlink = ctx->outputs[0];
266 AVFrame *frame = NULL;
267 int ret, status;
268 int available;
269 int64_t pts;
270
271 FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, ctx);
272
273 for (int i = 0; i < 2 && !s->eof; i++) {
274 ret = ff_inlink_consume_frame(ctx->inputs[i], &frame);
275 if (ret > 0) {
276 if (s->pts == AV_NOPTS_VALUE)
277 s->pts = frame->pts;
278 ret = av_audio_fifo_write(s->fifo[i], (void **)frame->extended_data,
279 frame->nb_samples);
280 av_frame_free(&frame);
281 if (ret < 0)
282 return ret;
283 }
284 }
285
286 available = FFMIN(av_audio_fifo_size(s->fifo[0]), av_audio_fifo_size(s->fifo[1]));
287 if (available > s->size) {
288 const int out_samples = available - s->size;
289 AVFrame *out;
290
291 if (!s->cache[0] || s->cache[0]->nb_samples < available) {
292 av_frame_free(&s->cache[0]);
293 s->cache[0] = ff_get_audio_buffer(outlink, available);
294 if (!s->cache[0])
295 return AVERROR(ENOMEM);
296 }
297
298 if (!s->cache[1] || s->cache[1]->nb_samples < available) {
299 av_frame_free(&s->cache[1]);
300 s->cache[1] = ff_get_audio_buffer(outlink, available);
301 if (!s->cache[1])
302 return AVERROR(ENOMEM);
303 }
304
305 ret = av_audio_fifo_peek(s->fifo[0], (void **)s->cache[0]->extended_data, available);
306 if (ret < 0)
307 return ret;
308
309 ret = av_audio_fifo_peek(s->fifo[1], (void **)s->cache[1]->extended_data, available);
310 if (ret < 0)
311 return ret;
312
313 out = ff_get_audio_buffer(outlink, out_samples);
314 if (!out)
315 return AVERROR(ENOMEM);
316
317 s->used = s->xcorrelate(ctx, out, available);
318
319 out->pts = s->pts;
320 s->pts += out_samples;
321
322 av_audio_fifo_drain(s->fifo[0], out_samples);
323 av_audio_fifo_drain(s->fifo[1], out_samples);
324
325 return ff_filter_frame(outlink, out);
326 }
327
328 for (int i = 0; i < 2 && !s->eof; i++) {
329 if (ff_inlink_acknowledge_status(ctx->inputs[i], &status, &pts)) {
330 AVFrame *silence = ff_get_audio_buffer(outlink, s->size);
331
332 s->eof = 1;
333 if (!silence)
334 return AVERROR(ENOMEM);
335
336 av_audio_fifo_write(s->fifo[0], (void **)silence->extended_data,
337 silence->nb_samples);
338
339 av_audio_fifo_write(s->fifo[1], (void **)silence->extended_data,
340 silence->nb_samples);
341
342 av_frame_free(&silence);
343 }
344 }
345
346 if (s->eof &&
347 (av_audio_fifo_size(s->fifo[0]) <= s->size ||
348 av_audio_fifo_size(s->fifo[1]) <= s->size)) {
349 ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
350 return 0;
351 }
352
353 if ((av_audio_fifo_size(s->fifo[0]) > s->size &&
354 av_audio_fifo_size(s->fifo[1]) > s->size) || s->eof) {
355 ff_filter_set_ready(ctx, 10);
356 return 0;
357 }
358
359 if (ff_outlink_frame_wanted(outlink) && !s->eof) {
360 for (int i = 0; i < 2; i++) {
361 if (av_audio_fifo_size(s->fifo[i]) > s->size)
362 continue;
363 ff_inlink_request_frame(ctx->inputs[i]);
364 return 0;
365 }
366 }
367
368 return FFERROR_NOT_READY;
369 }
370
371 static int config_output(AVFilterLink *outlink)
372 {
373 AVFilterContext *ctx = outlink->src;
374 AudioXCorrelateContext *s = ctx->priv;
375
376 s->pts = AV_NOPTS_VALUE;
377
378 s->fifo[0] = av_audio_fifo_alloc(outlink->format, outlink->ch_layout.nb_channels, s->size);
379 s->fifo[1] = av_audio_fifo_alloc(outlink->format, outlink->ch_layout.nb_channels, s->size);
380 if (!s->fifo[0] || !s->fifo[1])
381 return AVERROR(ENOMEM);
382
383 s->mean_sum[0] = ff_get_audio_buffer(outlink, 1);
384 s->mean_sum[1] = ff_get_audio_buffer(outlink, 1);
385 s->num_sum = ff_get_audio_buffer(outlink, 1);
386 s->den_sum[0] = ff_get_audio_buffer(outlink, 1);
387 s->den_sum[1] = ff_get_audio_buffer(outlink, 1);
388 if (!s->mean_sum[0] || !s->mean_sum[1] || !s->num_sum ||
389 !s->den_sum[0] || !s->den_sum[1])
390 return AVERROR(ENOMEM);
391
392 switch (s->algo) {
393 case 0: s->xcorrelate = xcorrelate_slow_f; break;
394 case 1: s->xcorrelate = xcorrelate_fast_f; break;
395 case 2: s->xcorrelate = xcorrelate_best_f; break;
396 }
397
398 if (outlink->format == AV_SAMPLE_FMT_DBLP) {
399 switch (s->algo) {
400 case 0: s->xcorrelate = xcorrelate_slow_d; break;
401 case 1: s->xcorrelate = xcorrelate_fast_d; break;
402 case 2: s->xcorrelate = xcorrelate_best_d; break;
403 }
404 }
405
406 return 0;
407 }
408
409 static av_cold void uninit(AVFilterContext *ctx)
410 {
411 AudioXCorrelateContext *s = ctx->priv;
412
413 av_audio_fifo_free(s->fifo[0]);
414 av_audio_fifo_free(s->fifo[1]);
415 av_frame_free(&s->cache[0]);
416 av_frame_free(&s->cache[1]);
417 av_frame_free(&s->mean_sum[0]);
418 av_frame_free(&s->mean_sum[1]);
419 av_frame_free(&s->num_sum);
420 av_frame_free(&s->den_sum[0]);
421 av_frame_free(&s->den_sum[1]);
422 }
423
424 static const AVFilterPad inputs[] = {
425 {
426 .name = "axcorrelate0",
427 .type = AVMEDIA_TYPE_AUDIO,
428 },
429 {
430 .name = "axcorrelate1",
431 .type = AVMEDIA_TYPE_AUDIO,
432 },
433 };
434
435 static const AVFilterPad outputs[] = {
436 {
437 .name = "default",
438 .type = AVMEDIA_TYPE_AUDIO,
439 .config_props = config_output,
440 },
441 };
442
443 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
444 #define OFFSET(x) offsetof(AudioXCorrelateContext, x)
445
446 static const AVOption axcorrelate_options[] = {
447 { "size", "set the segment size", OFFSET(size), AV_OPT_TYPE_INT, {.i64=256}, 2, 131072, AF },
448 { "algo", "set the algorithm", OFFSET(algo), AV_OPT_TYPE_INT, {.i64=2}, 0, 2, AF, .unit = "algo" },
449 { "slow", "slow algorithm", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, .unit = "algo" },
450 { "fast", "fast algorithm", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, .unit = "algo" },
451 { "best", "best algorithm", 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, AF, .unit = "algo" },
452 { NULL }
453 };
454
455 AVFILTER_DEFINE_CLASS(axcorrelate);
456
457 const FFFilter ff_af_axcorrelate = {
458 .p.name = "axcorrelate",
459 .p.description = NULL_IF_CONFIG_SMALL("Cross-correlate two audio streams."),
460 .p.priv_class = &axcorrelate_class,
461 .priv_size = sizeof(AudioXCorrelateContext),
462 .activate = activate,
463 .uninit = uninit,
464 FILTER_INPUTS(inputs),
465 FILTER_OUTPUTS(outputs),
466 FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP),
467 };
468