FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_loudnorm.c
Date: 2022-12-05 20:26:17
Exec Total Coverage
Lines: 0 472 0.0%
Functions: 0 12 0.0%
Branches: 0 318 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2016 Kyle Swanson <k@ylo.ph>.
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /* http://k.ylo.ph/2016/04/04/loudnorm.html */
22
23 #include "libavutil/opt.h"
24 #include "avfilter.h"
25 #include "filters.h"
26 #include "internal.h"
27 #include "audio.h"
28 #include "ebur128.h"
29
30 enum FrameType {
31 FIRST_FRAME,
32 INNER_FRAME,
33 FINAL_FRAME,
34 LINEAR_MODE,
35 FRAME_NB
36 };
37
38 enum LimiterState {
39 OUT,
40 ATTACK,
41 SUSTAIN,
42 RELEASE,
43 STATE_NB
44 };
45
46 enum PrintFormat {
47 NONE,
48 JSON,
49 SUMMARY,
50 PF_NB
51 };
52
53 typedef struct LoudNormContext {
54 const AVClass *class;
55 double target_i;
56 double target_lra;
57 double target_tp;
58 double measured_i;
59 double measured_lra;
60 double measured_tp;
61 double measured_thresh;
62 double offset;
63 int linear;
64 int dual_mono;
65 enum PrintFormat print_format;
66
67 double *buf;
68 int buf_size;
69 int buf_index;
70 int prev_buf_index;
71
72 double delta[30];
73 double weights[21];
74 double prev_delta;
75 int index;
76
77 double gain_reduction[2];
78 double *limiter_buf;
79 double *prev_smp;
80 int limiter_buf_index;
81 int limiter_buf_size;
82 enum LimiterState limiter_state;
83 int peak_index;
84 int env_index;
85 int env_cnt;
86 int attack_length;
87 int release_length;
88
89 int64_t pts[30];
90 enum FrameType frame_type;
91 int above_threshold;
92 int prev_nb_samples;
93 int channels;
94
95 FFEBUR128State *r128_in;
96 FFEBUR128State *r128_out;
97 } LoudNormContext;
98
99 #define OFFSET(x) offsetof(LoudNormContext, x)
100 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
101
102 static const AVOption loudnorm_options[] = {
103 { "I", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
104 { "i", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
105 { "LRA", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 50., FLAGS },
106 { "lra", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 50., FLAGS },
107 { "TP", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
108 { "tp", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
109 { "measured_I", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
110 { "measured_i", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
111 { "measured_LRA", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
112 { "measured_lra", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
113 { "measured_TP", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
114 { "measured_tp", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
115 { "measured_thresh", "measured threshold of input file", OFFSET(measured_thresh), AV_OPT_TYPE_DOUBLE, {.dbl = -70.}, -99., 0., FLAGS },
116 { "offset", "set offset gain", OFFSET(offset), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 99., FLAGS },
117 { "linear", "normalize linearly if possible", OFFSET(linear), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
118 { "dual_mono", "treat mono input as dual-mono", OFFSET(dual_mono), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
119 { "print_format", "set print format for stats", OFFSET(print_format), AV_OPT_TYPE_INT, {.i64 = NONE}, NONE, PF_NB -1, FLAGS, "print_format" },
120 { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = NONE}, 0, 0, FLAGS, "print_format" },
121 { "json", 0, 0, AV_OPT_TYPE_CONST, {.i64 = JSON}, 0, 0, FLAGS, "print_format" },
122 { "summary", 0, 0, AV_OPT_TYPE_CONST, {.i64 = SUMMARY}, 0, 0, FLAGS, "print_format" },
123 { NULL }
124 };
125
126 AVFILTER_DEFINE_CLASS(loudnorm);
127
128 static inline int frame_size(int sample_rate, int frame_len_msec)
129 {
130 const int frame_size = round((double)sample_rate * (frame_len_msec / 1000.0));
131 return frame_size + (frame_size % 2);
132 }
133
134 static void init_gaussian_filter(LoudNormContext *s)
135 {
136 double total_weight = 0.0;
137 const double sigma = 3.5;
138 double adjust;
139 int i;
140
141 const int offset = 21 / 2;
142 const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
143 const double c2 = 2.0 * pow(sigma, 2.0);
144
145 for (i = 0; i < 21; i++) {
146 const int x = i - offset;
147 s->weights[i] = c1 * exp(-(pow(x, 2.0) / c2));
148 total_weight += s->weights[i];
149 }
150
151 adjust = 1.0 / total_weight;
152 for (i = 0; i < 21; i++)
153 s->weights[i] *= adjust;
154 }
155
156 static double gaussian_filter(LoudNormContext *s, int index)
157 {
158 double result = 0.;
159 int i;
160
161 index = index - 10 > 0 ? index - 10 : index + 20;
162 for (i = 0; i < 21; i++)
163 result += s->delta[((index + i) < 30) ? (index + i) : (index + i - 30)] * s->weights[i];
164
165 return result;
166 }
167
168 static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
169 {
170 int n, c, i, index;
171 double ceiling;
172 double *buf;
173
174 *peak_delta = -1;
175 buf = s->limiter_buf;
176 ceiling = s->target_tp;
177
178 index = s->limiter_buf_index + (offset * channels) + (1920 * channels);
179 if (index >= s->limiter_buf_size)
180 index -= s->limiter_buf_size;
181
182 if (s->frame_type == FIRST_FRAME) {
183 for (c = 0; c < channels; c++)
184 s->prev_smp[c] = fabs(buf[index + c - channels]);
185 }
186
187 for (n = 0; n < nb_samples; n++) {
188 for (c = 0; c < channels; c++) {
189 double this, next, max_peak;
190
191 this = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
192 next = fabs(buf[(index + c + channels) < s->limiter_buf_size ? (index + c + channels) : (index + c + channels - s->limiter_buf_size)]);
193
194 if ((s->prev_smp[c] <= this) && (next <= this) && (this > ceiling) && (n > 0)) {
195 int detected;
196
197 detected = 1;
198 for (i = 2; i < 12; i++) {
199 next = fabs(buf[(index + c + (i * channels)) < s->limiter_buf_size ? (index + c + (i * channels)) : (index + c + (i * channels) - s->limiter_buf_size)]);
200 if (next > this) {
201 detected = 0;
202 break;
203 }
204 }
205
206 if (!detected)
207 continue;
208
209 for (c = 0; c < channels; c++) {
210 if (c == 0 || fabs(buf[index + c]) > max_peak)
211 max_peak = fabs(buf[index + c]);
212
213 s->prev_smp[c] = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
214 }
215
216 *peak_delta = n;
217 s->peak_index = index;
218 *peak_value = max_peak;
219 return;
220 }
221
222 s->prev_smp[c] = this;
223 }
224
225 index += channels;
226 if (index >= s->limiter_buf_size)
227 index -= s->limiter_buf_size;
228 }
229 }
230
231 static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
232 {
233 int n, c, index, peak_delta, smp_cnt;
234 double ceiling, peak_value;
235 double *buf;
236
237 buf = s->limiter_buf;
238 ceiling = s->target_tp;
239 index = s->limiter_buf_index;
240 smp_cnt = 0;
241
242 if (s->frame_type == FIRST_FRAME) {
243 double max;
244
245 max = 0.;
246 for (n = 0; n < 1920; n++) {
247 for (c = 0; c < channels; c++) {
248 max = fabs(buf[c]) > max ? fabs(buf[c]) : max;
249 }
250 buf += channels;
251 }
252
253 if (max > ceiling) {
254 s->gain_reduction[1] = ceiling / max;
255 s->limiter_state = SUSTAIN;
256 buf = s->limiter_buf;
257
258 for (n = 0; n < 1920; n++) {
259 for (c = 0; c < channels; c++) {
260 double env;
261 env = s->gain_reduction[1];
262 buf[c] *= env;
263 }
264 buf += channels;
265 }
266 }
267
268 buf = s->limiter_buf;
269 }
270
271 do {
272
273 switch(s->limiter_state) {
274 case OUT:
275 detect_peak(s, smp_cnt, nb_samples - smp_cnt, channels, &peak_delta, &peak_value);
276 if (peak_delta != -1) {
277 s->env_cnt = 0;
278 smp_cnt += (peak_delta - s->attack_length);
279 s->gain_reduction[0] = 1.;
280 s->gain_reduction[1] = ceiling / peak_value;
281 s->limiter_state = ATTACK;
282
283 s->env_index = s->peak_index - (s->attack_length * channels);
284 if (s->env_index < 0)
285 s->env_index += s->limiter_buf_size;
286
287 s->env_index += (s->env_cnt * channels);
288 if (s->env_index > s->limiter_buf_size)
289 s->env_index -= s->limiter_buf_size;
290
291 } else {
292 smp_cnt = nb_samples;
293 }
294 break;
295
296 case ATTACK:
297 for (; s->env_cnt < s->attack_length; s->env_cnt++) {
298 for (c = 0; c < channels; c++) {
299 double env;
300 env = s->gain_reduction[0] - ((double) s->env_cnt / (s->attack_length - 1) * (s->gain_reduction[0] - s->gain_reduction[1]));
301 buf[s->env_index + c] *= env;
302 }
303
304 s->env_index += channels;
305 if (s->env_index >= s->limiter_buf_size)
306 s->env_index -= s->limiter_buf_size;
307
308 smp_cnt++;
309 if (smp_cnt >= nb_samples) {
310 s->env_cnt++;
311 break;
312 }
313 }
314
315 if (smp_cnt < nb_samples) {
316 s->env_cnt = 0;
317 s->attack_length = 1920;
318 s->limiter_state = SUSTAIN;
319 }
320 break;
321
322 case SUSTAIN:
323 detect_peak(s, smp_cnt, nb_samples, channels, &peak_delta, &peak_value);
324 if (peak_delta == -1) {
325 s->limiter_state = RELEASE;
326 s->gain_reduction[0] = s->gain_reduction[1];
327 s->gain_reduction[1] = 1.;
328 s->env_cnt = 0;
329 break;
330 } else {
331 double gain_reduction;
332 gain_reduction = ceiling / peak_value;
333
334 if (gain_reduction < s->gain_reduction[1]) {
335 s->limiter_state = ATTACK;
336
337 s->attack_length = peak_delta;
338 if (s->attack_length <= 1)
339 s->attack_length = 2;
340
341 s->gain_reduction[0] = s->gain_reduction[1];
342 s->gain_reduction[1] = gain_reduction;
343 s->env_cnt = 0;
344 break;
345 }
346
347 for (s->env_cnt = 0; s->env_cnt < peak_delta; s->env_cnt++) {
348 for (c = 0; c < channels; c++) {
349 double env;
350 env = s->gain_reduction[1];
351 buf[s->env_index + c] *= env;
352 }
353
354 s->env_index += channels;
355 if (s->env_index >= s->limiter_buf_size)
356 s->env_index -= s->limiter_buf_size;
357
358 smp_cnt++;
359 if (smp_cnt >= nb_samples) {
360 s->env_cnt++;
361 break;
362 }
363 }
364 }
365 break;
366
367 case RELEASE:
368 for (; s->env_cnt < s->release_length; s->env_cnt++) {
369 for (c = 0; c < channels; c++) {
370 double env;
371 env = s->gain_reduction[0] + (((double) s->env_cnt / (s->release_length - 1)) * (s->gain_reduction[1] - s->gain_reduction[0]));
372 buf[s->env_index + c] *= env;
373 }
374
375 s->env_index += channels;
376 if (s->env_index >= s->limiter_buf_size)
377 s->env_index -= s->limiter_buf_size;
378
379 smp_cnt++;
380 if (smp_cnt >= nb_samples) {
381 s->env_cnt++;
382 break;
383 }
384 }
385
386 if (smp_cnt < nb_samples) {
387 s->env_cnt = 0;
388 s->limiter_state = OUT;
389 }
390
391 break;
392 }
393
394 } while (smp_cnt < nb_samples);
395
396 for (n = 0; n < nb_samples; n++) {
397 for (c = 0; c < channels; c++) {
398 out[c] = buf[index + c];
399 if (fabs(out[c]) > ceiling) {
400 out[c] = ceiling * (out[c] < 0 ? -1 : 1);
401 }
402 }
403 out += channels;
404 index += channels;
405 if (index >= s->limiter_buf_size)
406 index -= s->limiter_buf_size;
407 }
408 }
409
410 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
411 {
412 AVFilterContext *ctx = inlink->dst;
413 LoudNormContext *s = ctx->priv;
414 AVFilterLink *outlink = ctx->outputs[0];
415 AVFrame *out;
416 const double *src;
417 double *dst;
418 double *buf;
419 double *limiter_buf;
420 int i, n, c, subframe_length, src_index;
421 double gain, gain_next, env_global, env_shortterm,
422 global, shortterm, lra, relative_threshold;
423
424 if (av_frame_is_writable(in)) {
425 out = in;
426 } else {
427 out = ff_get_audio_buffer(outlink, in->nb_samples);
428 if (!out) {
429 av_frame_free(&in);
430 return AVERROR(ENOMEM);
431 }
432 av_frame_copy_props(out, in);
433 }
434
435 out->pts = s->pts[0];
436 memmove(s->pts, &s->pts[1], (FF_ARRAY_ELEMS(s->pts) - 1) * sizeof(s->pts[0]));
437
438 src = (const double *)in->data[0];
439 dst = (double *)out->data[0];
440 buf = s->buf;
441 limiter_buf = s->limiter_buf;
442
443 ff_ebur128_add_frames_double(s->r128_in, src, in->nb_samples);
444
445 if (s->frame_type == FIRST_FRAME && in->nb_samples < frame_size(inlink->sample_rate, 3000)) {
446 double offset, offset_tp, true_peak;
447
448 ff_ebur128_loudness_global(s->r128_in, &global);
449 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
450 double tmp;
451 ff_ebur128_sample_peak(s->r128_in, c, &tmp);
452 if (c == 0 || tmp > true_peak)
453 true_peak = tmp;
454 }
455
456 offset = pow(10., (s->target_i - global) / 20.);
457 offset_tp = true_peak * offset;
458 s->offset = offset_tp < s->target_tp ? offset : s->target_tp / true_peak;
459 s->frame_type = LINEAR_MODE;
460 }
461
462 switch (s->frame_type) {
463 case FIRST_FRAME:
464 for (n = 0; n < in->nb_samples; n++) {
465 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
466 buf[s->buf_index + c] = src[c];
467 }
468 src += inlink->ch_layout.nb_channels;
469 s->buf_index += inlink->ch_layout.nb_channels;
470 }
471
472 ff_ebur128_loudness_shortterm(s->r128_in, &shortterm);
473
474 if (shortterm < s->measured_thresh) {
475 s->above_threshold = 0;
476 env_shortterm = shortterm <= -70. ? 0. : s->target_i - s->measured_i;
477 } else {
478 s->above_threshold = 1;
479 env_shortterm = shortterm <= -70. ? 0. : s->target_i - shortterm;
480 }
481
482 for (n = 0; n < 30; n++)
483 s->delta[n] = pow(10., env_shortterm / 20.);
484 s->prev_delta = s->delta[s->index];
485
486 s->buf_index =
487 s->limiter_buf_index = 0;
488
489 for (n = 0; n < (s->limiter_buf_size / inlink->ch_layout.nb_channels); n++) {
490 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
491 limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * s->delta[s->index] * s->offset;
492 }
493 s->limiter_buf_index += inlink->ch_layout.nb_channels;
494 if (s->limiter_buf_index >= s->limiter_buf_size)
495 s->limiter_buf_index -= s->limiter_buf_size;
496
497 s->buf_index += inlink->ch_layout.nb_channels;
498 }
499
500 subframe_length = frame_size(inlink->sample_rate, 100);
501 true_peak_limiter(s, dst, subframe_length, inlink->ch_layout.nb_channels);
502 ff_ebur128_add_frames_double(s->r128_out, dst, subframe_length);
503
504 out->nb_samples = subframe_length;
505
506 s->frame_type = INNER_FRAME;
507 break;
508
509 case INNER_FRAME:
510 gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
511 gain_next = gaussian_filter(s, s->index + 11 < 30 ? s->index + 11 : s->index + 11 - 30);
512
513 for (n = 0; n < in->nb_samples; n++) {
514 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
515 buf[s->prev_buf_index + c] = src[c];
516 limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * (gain + (((double) n / in->nb_samples) * (gain_next - gain))) * s->offset;
517 }
518 src += inlink->ch_layout.nb_channels;
519
520 s->limiter_buf_index += inlink->ch_layout.nb_channels;
521 if (s->limiter_buf_index >= s->limiter_buf_size)
522 s->limiter_buf_index -= s->limiter_buf_size;
523
524 s->prev_buf_index += inlink->ch_layout.nb_channels;
525 if (s->prev_buf_index >= s->buf_size)
526 s->prev_buf_index -= s->buf_size;
527
528 s->buf_index += inlink->ch_layout.nb_channels;
529 if (s->buf_index >= s->buf_size)
530 s->buf_index -= s->buf_size;
531 }
532
533 subframe_length = (frame_size(inlink->sample_rate, 100) - in->nb_samples) * inlink->ch_layout.nb_channels;
534 s->limiter_buf_index = s->limiter_buf_index + subframe_length < s->limiter_buf_size ? s->limiter_buf_index + subframe_length : s->limiter_buf_index + subframe_length - s->limiter_buf_size;
535
536 true_peak_limiter(s, dst, in->nb_samples, inlink->ch_layout.nb_channels);
537 ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
538
539 ff_ebur128_loudness_range(s->r128_in, &lra);
540 ff_ebur128_loudness_global(s->r128_in, &global);
541 ff_ebur128_loudness_shortterm(s->r128_in, &shortterm);
542 ff_ebur128_relative_threshold(s->r128_in, &relative_threshold);
543
544 if (s->above_threshold == 0) {
545 double shortterm_out;
546
547 if (shortterm > s->measured_thresh)
548 s->prev_delta *= 1.0058;
549
550 ff_ebur128_loudness_shortterm(s->r128_out, &shortterm_out);
551 if (shortterm_out >= s->target_i)
552 s->above_threshold = 1;
553 }
554
555 if (shortterm < relative_threshold || shortterm <= -70. || s->above_threshold == 0) {
556 s->delta[s->index] = s->prev_delta;
557 } else {
558 env_global = fabs(shortterm - global) < (s->target_lra / 2.) ? shortterm - global : (s->target_lra / 2.) * ((shortterm - global) < 0 ? -1 : 1);
559 env_shortterm = s->target_i - shortterm;
560 s->delta[s->index] = pow(10., (env_global + env_shortterm) / 20.);
561 }
562
563 s->prev_delta = s->delta[s->index];
564 s->index++;
565 if (s->index >= 30)
566 s->index -= 30;
567 s->prev_nb_samples = in->nb_samples;
568 break;
569
570 case FINAL_FRAME:
571 gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
572 s->limiter_buf_index = 0;
573 src_index = 0;
574
575 for (n = 0; n < s->limiter_buf_size / inlink->ch_layout.nb_channels; n++) {
576 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
577 s->limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
578 }
579 src_index += inlink->ch_layout.nb_channels;
580
581 s->limiter_buf_index += inlink->ch_layout.nb_channels;
582 if (s->limiter_buf_index >= s->limiter_buf_size)
583 s->limiter_buf_index -= s->limiter_buf_size;
584 }
585
586 subframe_length = frame_size(inlink->sample_rate, 100);
587 for (i = 0; i < in->nb_samples / subframe_length; i++) {
588 true_peak_limiter(s, dst, subframe_length, inlink->ch_layout.nb_channels);
589
590 for (n = 0; n < subframe_length; n++) {
591 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
592 if (src_index < (in->nb_samples * inlink->ch_layout.nb_channels)) {
593 limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
594 } else {
595 limiter_buf[s->limiter_buf_index + c] = 0.;
596 }
597 }
598
599 if (src_index < (in->nb_samples * inlink->ch_layout.nb_channels))
600 src_index += inlink->ch_layout.nb_channels;
601
602 s->limiter_buf_index += inlink->ch_layout.nb_channels;
603 if (s->limiter_buf_index >= s->limiter_buf_size)
604 s->limiter_buf_index -= s->limiter_buf_size;
605 }
606
607 dst += (subframe_length * inlink->ch_layout.nb_channels);
608 }
609
610 dst = (double *)out->data[0];
611 ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
612 break;
613
614 case LINEAR_MODE:
615 for (n = 0; n < in->nb_samples; n++) {
616 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
617 dst[c] = src[c] * s->offset;
618 }
619 src += inlink->ch_layout.nb_channels;
620 dst += inlink->ch_layout.nb_channels;
621 }
622
623 dst = (double *)out->data[0];
624 ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
625 break;
626 }
627
628 if (in != out)
629 av_frame_free(&in);
630 return ff_filter_frame(outlink, out);
631 }
632
633 static int flush_frame(AVFilterLink *outlink)
634 {
635 AVFilterContext *ctx = outlink->src;
636 AVFilterLink *inlink = ctx->inputs[0];
637 LoudNormContext *s = ctx->priv;
638 int ret = 0;
639
640 if (s->frame_type == INNER_FRAME) {
641 double *src;
642 double *buf;
643 int nb_samples, n, c, offset;
644 AVFrame *frame;
645
646 nb_samples = (s->buf_size / inlink->ch_layout.nb_channels) - s->prev_nb_samples;
647 nb_samples -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples);
648
649 frame = ff_get_audio_buffer(outlink, nb_samples);
650 if (!frame)
651 return AVERROR(ENOMEM);
652 frame->nb_samples = nb_samples;
653
654 buf = s->buf;
655 src = (double *)frame->data[0];
656
657 offset = ((s->limiter_buf_size / inlink->ch_layout.nb_channels) - s->prev_nb_samples) * inlink->ch_layout.nb_channels;
658 offset -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples) * inlink->ch_layout.nb_channels;
659 s->buf_index = s->buf_index - offset < 0 ? s->buf_index - offset + s->buf_size : s->buf_index - offset;
660
661 for (n = 0; n < nb_samples; n++) {
662 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
663 src[c] = buf[s->buf_index + c];
664 }
665 src += inlink->ch_layout.nb_channels;
666 s->buf_index += inlink->ch_layout.nb_channels;
667 if (s->buf_index >= s->buf_size)
668 s->buf_index -= s->buf_size;
669 }
670
671 s->frame_type = FINAL_FRAME;
672 ret = filter_frame(inlink, frame);
673 }
674 return ret;
675 }
676
677 static int activate(AVFilterContext *ctx)
678 {
679 AVFilterLink *inlink = ctx->inputs[0];
680 AVFilterLink *outlink = ctx->outputs[0];
681 LoudNormContext *s = ctx->priv;
682 AVFrame *in = NULL;
683 int ret = 0, status;
684 int64_t pts;
685
686 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
687
688 if (s->frame_type != LINEAR_MODE) {
689 int nb_samples;
690
691 if (s->frame_type == FIRST_FRAME) {
692 nb_samples = frame_size(inlink->sample_rate, 3000);
693 } else {
694 nb_samples = frame_size(inlink->sample_rate, 100);
695 }
696
697 ret = ff_inlink_consume_samples(inlink, nb_samples, nb_samples, &in);
698 } else {
699 ret = ff_inlink_consume_frame(inlink, &in);
700 }
701
702 if (ret < 0)
703 return ret;
704 if (ret > 0) {
705 if (s->frame_type == FIRST_FRAME) {
706 const int nb_samples = frame_size(inlink->sample_rate, 100);
707
708 for (int i = 0; i < FF_ARRAY_ELEMS(s->pts); i++)
709 s->pts[i] = in->pts + i * nb_samples;
710 } else if (s->frame_type == LINEAR_MODE) {
711 s->pts[0] = in->pts;
712 } else {
713 s->pts[FF_ARRAY_ELEMS(s->pts) - 1] = in->pts;
714 }
715 ret = filter_frame(inlink, in);
716 }
717 if (ret < 0)
718 return ret;
719
720 if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
721 ff_outlink_set_status(outlink, status, pts);
722 return flush_frame(outlink);
723 }
724
725 FF_FILTER_FORWARD_WANTED(outlink, inlink);
726
727 return FFERROR_NOT_READY;
728 }
729
730 static int query_formats(AVFilterContext *ctx)
731 {
732 LoudNormContext *s = ctx->priv;
733 AVFilterFormats *formats = NULL;
734 static const int input_srate[] = {192000, -1};
735 int ret = ff_set_common_all_channel_counts(ctx);
736 if (ret < 0)
737 return ret;
738
739 ret = ff_add_format(&formats, AV_SAMPLE_FMT_DBL);
740 if (ret)
741 return ret;
742 ret = ff_set_common_formats(ctx, formats);
743 if (ret)
744 return ret;
745
746 if (s->frame_type != LINEAR_MODE) {
747 formats = ff_make_format_list(input_srate);
748 } else {
749 formats = ff_all_samplerates();
750 }
751
752 return ff_set_common_samplerates(ctx, formats);
753 }
754
755 static int config_input(AVFilterLink *inlink)
756 {
757 AVFilterContext *ctx = inlink->dst;
758 LoudNormContext *s = ctx->priv;
759
760 s->r128_in = ff_ebur128_init(inlink->ch_layout.nb_channels, inlink->sample_rate, 0, FF_EBUR128_MODE_I | FF_EBUR128_MODE_S | FF_EBUR128_MODE_LRA | FF_EBUR128_MODE_SAMPLE_PEAK);
761 if (!s->r128_in)
762 return AVERROR(ENOMEM);
763
764 s->r128_out = ff_ebur128_init(inlink->ch_layout.nb_channels, inlink->sample_rate, 0, FF_EBUR128_MODE_I | FF_EBUR128_MODE_S | FF_EBUR128_MODE_LRA | FF_EBUR128_MODE_SAMPLE_PEAK);
765 if (!s->r128_out)
766 return AVERROR(ENOMEM);
767
768 if (inlink->ch_layout.nb_channels == 1 && s->dual_mono) {
769 ff_ebur128_set_channel(s->r128_in, 0, FF_EBUR128_DUAL_MONO);
770 ff_ebur128_set_channel(s->r128_out, 0, FF_EBUR128_DUAL_MONO);
771 }
772
773 s->buf_size = frame_size(inlink->sample_rate, 3000) * inlink->ch_layout.nb_channels;
774 s->buf = av_malloc_array(s->buf_size, sizeof(*s->buf));
775 if (!s->buf)
776 return AVERROR(ENOMEM);
777
778 s->limiter_buf_size = frame_size(inlink->sample_rate, 210) * inlink->ch_layout.nb_channels;
779 s->limiter_buf = av_malloc_array(s->buf_size, sizeof(*s->limiter_buf));
780 if (!s->limiter_buf)
781 return AVERROR(ENOMEM);
782
783 s->prev_smp = av_malloc_array(inlink->ch_layout.nb_channels, sizeof(*s->prev_smp));
784 if (!s->prev_smp)
785 return AVERROR(ENOMEM);
786
787 init_gaussian_filter(s);
788
789 s->buf_index =
790 s->prev_buf_index =
791 s->limiter_buf_index = 0;
792 s->channels = inlink->ch_layout.nb_channels;
793 s->index = 1;
794 s->limiter_state = OUT;
795 s->offset = pow(10., s->offset / 20.);
796 s->target_tp = pow(10., s->target_tp / 20.);
797 s->attack_length = frame_size(inlink->sample_rate, 10);
798 s->release_length = frame_size(inlink->sample_rate, 100);
799
800 return 0;
801 }
802
803 static av_cold int init(AVFilterContext *ctx)
804 {
805 LoudNormContext *s = ctx->priv;
806 s->frame_type = FIRST_FRAME;
807
808 if (s->linear) {
809 double offset, offset_tp;
810 offset = s->target_i - s->measured_i;
811 offset_tp = s->measured_tp + offset;
812
813 if (s->measured_tp != 99 && s->measured_thresh != -70 && s->measured_lra != 0 && s->measured_i != 0) {
814 if ((offset_tp <= s->target_tp) && (s->measured_lra <= s->target_lra)) {
815 s->frame_type = LINEAR_MODE;
816 s->offset = offset;
817 }
818 }
819 }
820
821 return 0;
822 }
823
824 static av_cold void uninit(AVFilterContext *ctx)
825 {
826 LoudNormContext *s = ctx->priv;
827 double i_in, i_out, lra_in, lra_out, thresh_in, thresh_out, tp_in, tp_out;
828 int c;
829
830 if (!s->r128_in || !s->r128_out)
831 goto end;
832
833 ff_ebur128_loudness_range(s->r128_in, &lra_in);
834 ff_ebur128_loudness_global(s->r128_in, &i_in);
835 ff_ebur128_relative_threshold(s->r128_in, &thresh_in);
836 for (c = 0; c < s->channels; c++) {
837 double tmp;
838 ff_ebur128_sample_peak(s->r128_in, c, &tmp);
839 if ((c == 0) || (tmp > tp_in))
840 tp_in = tmp;
841 }
842
843 ff_ebur128_loudness_range(s->r128_out, &lra_out);
844 ff_ebur128_loudness_global(s->r128_out, &i_out);
845 ff_ebur128_relative_threshold(s->r128_out, &thresh_out);
846 for (c = 0; c < s->channels; c++) {
847 double tmp;
848 ff_ebur128_sample_peak(s->r128_out, c, &tmp);
849 if ((c == 0) || (tmp > tp_out))
850 tp_out = tmp;
851 }
852
853 switch(s->print_format) {
854 case NONE:
855 break;
856
857 case JSON:
858 av_log(ctx, AV_LOG_INFO,
859 "\n{\n"
860 "\t\"input_i\" : \"%.2f\",\n"
861 "\t\"input_tp\" : \"%.2f\",\n"
862 "\t\"input_lra\" : \"%.2f\",\n"
863 "\t\"input_thresh\" : \"%.2f\",\n"
864 "\t\"output_i\" : \"%.2f\",\n"
865 "\t\"output_tp\" : \"%+.2f\",\n"
866 "\t\"output_lra\" : \"%.2f\",\n"
867 "\t\"output_thresh\" : \"%.2f\",\n"
868 "\t\"normalization_type\" : \"%s\",\n"
869 "\t\"target_offset\" : \"%.2f\"\n"
870 "}\n",
871 i_in,
872 20. * log10(tp_in),
873 lra_in,
874 thresh_in,
875 i_out,
876 20. * log10(tp_out),
877 lra_out,
878 thresh_out,
879 s->frame_type == LINEAR_MODE ? "linear" : "dynamic",
880 s->target_i - i_out
881 );
882 break;
883
884 case SUMMARY:
885 av_log(ctx, AV_LOG_INFO,
886 "\n"
887 "Input Integrated: %+6.1f LUFS\n"
888 "Input True Peak: %+6.1f dBTP\n"
889 "Input LRA: %6.1f LU\n"
890 "Input Threshold: %+6.1f LUFS\n"
891 "\n"
892 "Output Integrated: %+6.1f LUFS\n"
893 "Output True Peak: %+6.1f dBTP\n"
894 "Output LRA: %6.1f LU\n"
895 "Output Threshold: %+6.1f LUFS\n"
896 "\n"
897 "Normalization Type: %s\n"
898 "Target Offset: %+6.1f LU\n",
899 i_in,
900 20. * log10(tp_in),
901 lra_in,
902 thresh_in,
903 i_out,
904 20. * log10(tp_out),
905 lra_out,
906 thresh_out,
907 s->frame_type == LINEAR_MODE ? "Linear" : "Dynamic",
908 s->target_i - i_out
909 );
910 break;
911 }
912
913 end:
914 if (s->r128_in)
915 ff_ebur128_destroy(&s->r128_in);
916 if (s->r128_out)
917 ff_ebur128_destroy(&s->r128_out);
918 av_freep(&s->limiter_buf);
919 av_freep(&s->prev_smp);
920 av_freep(&s->buf);
921 }
922
923 static const AVFilterPad avfilter_af_loudnorm_inputs[] = {
924 {
925 .name = "default",
926 .type = AVMEDIA_TYPE_AUDIO,
927 .config_props = config_input,
928 },
929 };
930
931 static const AVFilterPad avfilter_af_loudnorm_outputs[] = {
932 {
933 .name = "default",
934 .type = AVMEDIA_TYPE_AUDIO,
935 },
936 };
937
938 const AVFilter ff_af_loudnorm = {
939 .name = "loudnorm",
940 .description = NULL_IF_CONFIG_SMALL("EBU R128 loudness normalization"),
941 .priv_size = sizeof(LoudNormContext),
942 .priv_class = &loudnorm_class,
943 .init = init,
944 .activate = activate,
945 .uninit = uninit,
946 FILTER_INPUTS(avfilter_af_loudnorm_inputs),
947 FILTER_OUTPUTS(avfilter_af_loudnorm_outputs),
948 FILTER_QUERY_FUNC(query_formats),
949 };
950