FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_axcorrelate.c
Date: 2024-04-26 14:42:52
Exec Total Coverage
Lines: 0 120 0.0%
Functions: 0 15 0.0%
Branches: 0 156 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2019 Paul B Mahol
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/audio_fifo.h"
22 #include "libavutil/channel_layout.h"
23 #include "libavutil/common.h"
24 #include "libavutil/opt.h"
25
26 #include "audio.h"
27 #include "avfilter.h"
28 #include "filters.h"
29 #include "internal.h"
30
31 typedef struct AudioXCorrelateContext {
32 const AVClass *class;
33
34 int size;
35 int algo;
36 int64_t pts;
37
38 AVAudioFifo *fifo[2];
39 AVFrame *cache[2];
40 AVFrame *mean_sum[2];
41 AVFrame *num_sum;
42 AVFrame *den_sum[2];
43 int used;
44 int eof;
45
46 int (*xcorrelate)(AVFilterContext *ctx, AVFrame *out, int available);
47 } AudioXCorrelateContext;
48
49 #define MEAN_SUM(suffix, type, zero) \
50 static type mean_sum_##suffix(const type *in, \
51 int size) \
52 { \
53 type mean_sum = zero; \
54 \
55 for (int i = 0; i < size; i++) \
56 mean_sum += in[i]; \
57 \
58 return mean_sum; \
59 }
60
61 MEAN_SUM(f, float, 0.f)
62 MEAN_SUM(d, double, 0.0)
63
64 #define SQUARE_SUM(suffix, type, zero) \
65 static type square_sum_##suffix(const type *x, \
66 const type *y, \
67 int size) \
68 { \
69 type square_sum = zero; \
70 \
71 for (int i = 0; i < size; i++) \
72 square_sum += x[i] * y[i]; \
73 \
74 return square_sum; \
75 }
76
77 SQUARE_SUM(f, float, 0.f)
78 SQUARE_SUM(d, double, 0.0)
79
80 #define XCORRELATE(suffix, type, zero, small, sqrtfun)\
81 static type xcorrelate_##suffix(const type *x, \
82 const type *y, \
83 type sumx, \
84 type sumy, int size) \
85 { \
86 const type xm = sumx / size, ym = sumy / size; \
87 type num = zero, den, den0 = zero, den1 = zero; \
88 \
89 for (int i = 0; i < size; i++) { \
90 type xd = x[i] - xm; \
91 type yd = y[i] - ym; \
92 \
93 num += xd * yd; \
94 den0 += xd * xd; \
95 den1 += yd * yd; \
96 } \
97 \
98 num /= size; \
99 den = sqrtfun((den0 * den1) / size / size); \
100 \
101 return den <= small ? zero : num / den; \
102 }
103
104 XCORRELATE(f, float, 0.f, 1e-6f, sqrtf)
105 XCORRELATE(d, double, 0.0, 1e-9, sqrt)
106
107 #define XCORRELATE_SLOW(suffix, type) \
108 static int xcorrelate_slow_##suffix(AVFilterContext *ctx, \
109 AVFrame *out, int available) \
110 { \
111 AudioXCorrelateContext *s = ctx->priv; \
112 const int size = s->size; \
113 int used; \
114 \
115 for (int ch = 0; ch < out->ch_layout.nb_channels; ch++) { \
116 const type *x = (const type *)s->cache[0]->extended_data[ch]; \
117 const type *y = (const type *)s->cache[1]->extended_data[ch]; \
118 type *sumx = (type *)s->mean_sum[0]->extended_data[ch]; \
119 type *sumy = (type *)s->mean_sum[1]->extended_data[ch]; \
120 type *dst = (type *)out->extended_data[ch]; \
121 \
122 used = s->used; \
123 if (!used) { \
124 sumx[0] = mean_sum_##suffix(x, size); \
125 sumy[0] = mean_sum_##suffix(y, size); \
126 used = 1; \
127 } \
128 \
129 for (int n = 0; n < out->nb_samples; n++) { \
130 const int idx = n + size; \
131 \
132 dst[n] = xcorrelate_##suffix(x + n, y + n, \
133 sumx[0], sumy[0],\
134 size); \
135 \
136 sumx[0] -= x[n]; \
137 sumx[0] += x[idx]; \
138 sumy[0] -= y[n]; \
139 sumy[0] += y[idx]; \
140 } \
141 } \
142 \
143 return used; \
144 }
145
146 XCORRELATE_SLOW(f, float)
147 XCORRELATE_SLOW(d, double)
148
149 #define clipf(x) (av_clipf(x, -1.f, 1.f))
150 #define clipd(x) (av_clipd(x, -1.0, 1.0))
151
152 #define XCORRELATE_FAST(suffix, type, zero, small, sqrtfun, CLIP) \
153 static int xcorrelate_fast_##suffix(AVFilterContext *ctx, AVFrame *out, \
154 int available) \
155 { \
156 AudioXCorrelateContext *s = ctx->priv; \
157 const int size = s->size; \
158 int used; \
159 \
160 for (int ch = 0; ch < out->ch_layout.nb_channels; ch++) { \
161 const type *x = (const type *)s->cache[0]->extended_data[ch]; \
162 const type *y = (const type *)s->cache[1]->extended_data[ch]; \
163 type *num_sum = (type *)s->num_sum->extended_data[ch]; \
164 type *den_sumx = (type *)s->den_sum[0]->extended_data[ch]; \
165 type *den_sumy = (type *)s->den_sum[1]->extended_data[ch]; \
166 type *dst = (type *)out->extended_data[ch]; \
167 \
168 used = s->used; \
169 if (!used) { \
170 num_sum[0] = square_sum_##suffix(x, y, size); \
171 den_sumx[0] = square_sum_##suffix(x, x, size); \
172 den_sumy[0] = square_sum_##suffix(y, y, size); \
173 used = 1; \
174 } \
175 \
176 for (int n = 0; n < out->nb_samples; n++) { \
177 const int idx = n + size; \
178 type num, den; \
179 \
180 num = num_sum[0] / size; \
181 den = sqrtfun((den_sumx[0] * den_sumy[0]) / size / size); \
182 \
183 dst[n] = den <= small ? zero : CLIP(num / den); \
184 \
185 num_sum[0] -= x[n] * y[n]; \
186 num_sum[0] += x[idx] * y[idx]; \
187 den_sumx[0] -= x[n] * x[n]; \
188 den_sumx[0] += x[idx] * x[idx]; \
189 den_sumx[0] = FFMAX(den_sumx[0], zero); \
190 den_sumy[0] -= y[n] * y[n]; \
191 den_sumy[0] += y[idx] * y[idx]; \
192 den_sumy[0] = FFMAX(den_sumy[0], zero); \
193 } \
194 } \
195 \
196 return used; \
197 }
198
199 XCORRELATE_FAST(f, float, 0.f, 1e-6f, sqrtf, clipf)
200 XCORRELATE_FAST(d, double, 0.0, 1e-9, sqrt, clipd)
201
202 #define XCORRELATE_BEST(suffix, type, zero, small, sqrtfun, FMAX, CLIP) \
203 static int xcorrelate_best_##suffix(AVFilterContext *ctx, AVFrame *out, \
204 int available) \
205 { \
206 AudioXCorrelateContext *s = ctx->priv; \
207 const int size = s->size; \
208 int used; \
209 \
210 for (int ch = 0; ch < out->ch_layout.nb_channels; ch++) { \
211 const type *x = (const type *)s->cache[0]->extended_data[ch]; \
212 const type *y = (const type *)s->cache[1]->extended_data[ch]; \
213 type *mean_sumx = (type *)s->mean_sum[0]->extended_data[ch]; \
214 type *mean_sumy = (type *)s->mean_sum[1]->extended_data[ch]; \
215 type *num_sum = (type *)s->num_sum->extended_data[ch]; \
216 type *den_sumx = (type *)s->den_sum[0]->extended_data[ch]; \
217 type *den_sumy = (type *)s->den_sum[1]->extended_data[ch]; \
218 type *dst = (type *)out->extended_data[ch]; \
219 \
220 used = s->used; \
221 if (!used) { \
222 num_sum[0] = square_sum_##suffix(x, y, size); \
223 den_sumx[0] = square_sum_##suffix(x, x, size); \
224 den_sumy[0] = square_sum_##suffix(y, y, size); \
225 mean_sumx[0] = mean_sum_##suffix(x, size); \
226 mean_sumy[0] = mean_sum_##suffix(y, size); \
227 used = 1; \
228 } \
229 \
230 for (int n = 0; n < out->nb_samples; n++) { \
231 const int idx = n + size; \
232 type num, den, xm, ym; \
233 \
234 xm = mean_sumx[0] / size; \
235 ym = mean_sumy[0] / size; \
236 num = num_sum[0] - size * xm * ym; \
237 den = sqrtfun(FMAX(den_sumx[0] - size * xm * xm, zero)) * \
238 sqrtfun(FMAX(den_sumy[0] - size * ym * ym, zero)); \
239 \
240 dst[n] = den <= small ? zero : CLIP(num / den); \
241 \
242 mean_sumx[0]-= x[n]; \
243 mean_sumx[0]+= x[idx]; \
244 mean_sumy[0]-= y[n]; \
245 mean_sumy[0]+= y[idx]; \
246 num_sum[0] -= x[n] * y[n]; \
247 num_sum[0] += x[idx] * y[idx]; \
248 den_sumx[0] -= x[n] * x[n]; \
249 den_sumx[0] += x[idx] * x[idx]; \
250 den_sumx[0] = FMAX(den_sumx[0], zero); \
251 den_sumy[0] -= y[n] * y[n]; \
252 den_sumy[0] += y[idx] * y[idx]; \
253 den_sumy[0] = FMAX(den_sumy[0], zero); \
254 } \
255 } \
256 \
257 return used; \
258 }
259
260 XCORRELATE_BEST(f, float, 0.f, 1e-6f, sqrtf, fmaxf, clipf)
261 XCORRELATE_BEST(d, double, 0.0, 1e-9, sqrt, fmax, clipd)
262
263 static int activate(AVFilterContext *ctx)
264 {
265 AudioXCorrelateContext *s = ctx->priv;
266 AVFilterLink *outlink = ctx->outputs[0];
267 AVFrame *frame = NULL;
268 int ret, status;
269 int available;
270 int64_t pts;
271
272 FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, ctx);
273
274 for (int i = 0; i < 2 && !s->eof; i++) {
275 ret = ff_inlink_consume_frame(ctx->inputs[i], &frame);
276 if (ret > 0) {
277 if (s->pts == AV_NOPTS_VALUE)
278 s->pts = frame->pts;
279 ret = av_audio_fifo_write(s->fifo[i], (void **)frame->extended_data,
280 frame->nb_samples);
281 av_frame_free(&frame);
282 if (ret < 0)
283 return ret;
284 }
285 }
286
287 available = FFMIN(av_audio_fifo_size(s->fifo[0]), av_audio_fifo_size(s->fifo[1]));
288 if (available > s->size) {
289 const int out_samples = available - s->size;
290 AVFrame *out;
291
292 if (!s->cache[0] || s->cache[0]->nb_samples < available) {
293 av_frame_free(&s->cache[0]);
294 s->cache[0] = ff_get_audio_buffer(outlink, available);
295 if (!s->cache[0])
296 return AVERROR(ENOMEM);
297 }
298
299 if (!s->cache[1] || s->cache[1]->nb_samples < available) {
300 av_frame_free(&s->cache[1]);
301 s->cache[1] = ff_get_audio_buffer(outlink, available);
302 if (!s->cache[1])
303 return AVERROR(ENOMEM);
304 }
305
306 ret = av_audio_fifo_peek(s->fifo[0], (void **)s->cache[0]->extended_data, available);
307 if (ret < 0)
308 return ret;
309
310 ret = av_audio_fifo_peek(s->fifo[1], (void **)s->cache[1]->extended_data, available);
311 if (ret < 0)
312 return ret;
313
314 out = ff_get_audio_buffer(outlink, out_samples);
315 if (!out)
316 return AVERROR(ENOMEM);
317
318 s->used = s->xcorrelate(ctx, out, available);
319
320 out->pts = s->pts;
321 s->pts += out_samples;
322
323 av_audio_fifo_drain(s->fifo[0], out_samples);
324 av_audio_fifo_drain(s->fifo[1], out_samples);
325
326 return ff_filter_frame(outlink, out);
327 }
328
329 for (int i = 0; i < 2 && !s->eof; i++) {
330 if (ff_inlink_acknowledge_status(ctx->inputs[i], &status, &pts)) {
331 AVFrame *silence = ff_get_audio_buffer(outlink, s->size);
332
333 s->eof = 1;
334 if (!silence)
335 return AVERROR(ENOMEM);
336
337 av_audio_fifo_write(s->fifo[0], (void **)silence->extended_data,
338 silence->nb_samples);
339
340 av_audio_fifo_write(s->fifo[1], (void **)silence->extended_data,
341 silence->nb_samples);
342
343 av_frame_free(&silence);
344 }
345 }
346
347 if (s->eof &&
348 (av_audio_fifo_size(s->fifo[0]) <= s->size ||
349 av_audio_fifo_size(s->fifo[1]) <= s->size)) {
350 ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
351 return 0;
352 }
353
354 if ((av_audio_fifo_size(s->fifo[0]) > s->size &&
355 av_audio_fifo_size(s->fifo[1]) > s->size) || s->eof) {
356 ff_filter_set_ready(ctx, 10);
357 return 0;
358 }
359
360 if (ff_outlink_frame_wanted(outlink) && !s->eof) {
361 for (int i = 0; i < 2; i++) {
362 if (av_audio_fifo_size(s->fifo[i]) > s->size)
363 continue;
364 ff_inlink_request_frame(ctx->inputs[i]);
365 return 0;
366 }
367 }
368
369 return FFERROR_NOT_READY;
370 }
371
372 static int config_output(AVFilterLink *outlink)
373 {
374 AVFilterContext *ctx = outlink->src;
375 AudioXCorrelateContext *s = ctx->priv;
376
377 s->pts = AV_NOPTS_VALUE;
378
379 s->fifo[0] = av_audio_fifo_alloc(outlink->format, outlink->ch_layout.nb_channels, s->size);
380 s->fifo[1] = av_audio_fifo_alloc(outlink->format, outlink->ch_layout.nb_channels, s->size);
381 if (!s->fifo[0] || !s->fifo[1])
382 return AVERROR(ENOMEM);
383
384 s->mean_sum[0] = ff_get_audio_buffer(outlink, 1);
385 s->mean_sum[1] = ff_get_audio_buffer(outlink, 1);
386 s->num_sum = ff_get_audio_buffer(outlink, 1);
387 s->den_sum[0] = ff_get_audio_buffer(outlink, 1);
388 s->den_sum[1] = ff_get_audio_buffer(outlink, 1);
389 if (!s->mean_sum[0] || !s->mean_sum[1] || !s->num_sum ||
390 !s->den_sum[0] || !s->den_sum[1])
391 return AVERROR(ENOMEM);
392
393 switch (s->algo) {
394 case 0: s->xcorrelate = xcorrelate_slow_f; break;
395 case 1: s->xcorrelate = xcorrelate_fast_f; break;
396 case 2: s->xcorrelate = xcorrelate_best_f; break;
397 }
398
399 if (outlink->format == AV_SAMPLE_FMT_DBLP) {
400 switch (s->algo) {
401 case 0: s->xcorrelate = xcorrelate_slow_d; break;
402 case 1: s->xcorrelate = xcorrelate_fast_d; break;
403 case 2: s->xcorrelate = xcorrelate_best_d; break;
404 }
405 }
406
407 return 0;
408 }
409
410 static av_cold void uninit(AVFilterContext *ctx)
411 {
412 AudioXCorrelateContext *s = ctx->priv;
413
414 av_audio_fifo_free(s->fifo[0]);
415 av_audio_fifo_free(s->fifo[1]);
416 av_frame_free(&s->cache[0]);
417 av_frame_free(&s->cache[1]);
418 av_frame_free(&s->mean_sum[0]);
419 av_frame_free(&s->mean_sum[1]);
420 av_frame_free(&s->num_sum);
421 av_frame_free(&s->den_sum[0]);
422 av_frame_free(&s->den_sum[1]);
423 }
424
425 static const AVFilterPad inputs[] = {
426 {
427 .name = "axcorrelate0",
428 .type = AVMEDIA_TYPE_AUDIO,
429 },
430 {
431 .name = "axcorrelate1",
432 .type = AVMEDIA_TYPE_AUDIO,
433 },
434 };
435
436 static const AVFilterPad outputs[] = {
437 {
438 .name = "default",
439 .type = AVMEDIA_TYPE_AUDIO,
440 .config_props = config_output,
441 },
442 };
443
444 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
445 #define OFFSET(x) offsetof(AudioXCorrelateContext, x)
446
447 static const AVOption axcorrelate_options[] = {
448 { "size", "set the segment size", OFFSET(size), AV_OPT_TYPE_INT, {.i64=256}, 2, 131072, AF },
449 { "algo", "set the algorithm", OFFSET(algo), AV_OPT_TYPE_INT, {.i64=2}, 0, 2, AF, .unit = "algo" },
450 { "slow", "slow algorithm", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, .unit = "algo" },
451 { "fast", "fast algorithm", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, .unit = "algo" },
452 { "best", "best algorithm", 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, AF, .unit = "algo" },
453 { NULL }
454 };
455
456 AVFILTER_DEFINE_CLASS(axcorrelate);
457
458 const AVFilter ff_af_axcorrelate = {
459 .name = "axcorrelate",
460 .description = NULL_IF_CONFIG_SMALL("Cross-correlate two audio streams."),
461 .priv_size = sizeof(AudioXCorrelateContext),
462 .priv_class = &axcorrelate_class,
463 .activate = activate,
464 .uninit = uninit,
465 FILTER_INPUTS(inputs),
466 FILTER_OUTPUTS(outputs),
467 FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP),
468 };
469