FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_loudnorm.c
Date: 2024-09-07 18:49:03
Exec Total Coverage
Lines: 0 470 0.0%
Functions: 0 12 0.0%
Branches: 0 316 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2016 Kyle Swanson <k@ylo.ph>.
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /* http://k.ylo.ph/2016/04/04/loudnorm.html */
22
23 #include "libavutil/mem.h"
24 #include "libavutil/opt.h"
25 #include "avfilter.h"
26 #include "filters.h"
27 #include "formats.h"
28 #include "audio.h"
29 #include "ebur128.h"
30
31 enum FrameType {
32 FIRST_FRAME,
33 INNER_FRAME,
34 FINAL_FRAME,
35 LINEAR_MODE,
36 FRAME_NB
37 };
38
39 enum LimiterState {
40 OUT,
41 ATTACK,
42 SUSTAIN,
43 RELEASE,
44 STATE_NB
45 };
46
47 enum PrintFormat {
48 NONE,
49 JSON,
50 SUMMARY,
51 PF_NB
52 };
53
54 typedef struct LoudNormContext {
55 const AVClass *class;
56 double target_i;
57 double target_lra;
58 double target_tp;
59 double measured_i;
60 double measured_lra;
61 double measured_tp;
62 double measured_thresh;
63 double offset;
64 int linear;
65 int dual_mono;
66 enum PrintFormat print_format;
67
68 double *buf;
69 int buf_size;
70 int buf_index;
71 int prev_buf_index;
72
73 double delta[30];
74 double weights[21];
75 double prev_delta;
76 int index;
77
78 double gain_reduction[2];
79 double *limiter_buf;
80 double *prev_smp;
81 int limiter_buf_index;
82 int limiter_buf_size;
83 enum LimiterState limiter_state;
84 int peak_index;
85 int env_index;
86 int env_cnt;
87 int attack_length;
88 int release_length;
89
90 int64_t pts[30];
91 enum FrameType frame_type;
92 int above_threshold;
93 int prev_nb_samples;
94 int channels;
95
96 FFEBUR128State *r128_in;
97 FFEBUR128State *r128_out;
98 } LoudNormContext;
99
100 #define OFFSET(x) offsetof(LoudNormContext, x)
101 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
102
103 static const AVOption loudnorm_options[] = {
104 { "I", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
105 { "i", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
106 { "LRA", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 50., FLAGS },
107 { "lra", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 50., FLAGS },
108 { "TP", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
109 { "tp", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
110 { "measured_I", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
111 { "measured_i", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
112 { "measured_LRA", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
113 { "measured_lra", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
114 { "measured_TP", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
115 { "measured_tp", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
116 { "measured_thresh", "measured threshold of input file", OFFSET(measured_thresh), AV_OPT_TYPE_DOUBLE, {.dbl = -70.}, -99., 0., FLAGS },
117 { "offset", "set offset gain", OFFSET(offset), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 99., FLAGS },
118 { "linear", "normalize linearly if possible", OFFSET(linear), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
119 { "dual_mono", "treat mono input as dual-mono", OFFSET(dual_mono), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
120 { "print_format", "set print format for stats", OFFSET(print_format), AV_OPT_TYPE_INT, {.i64 = NONE}, NONE, PF_NB -1, FLAGS, .unit = "print_format" },
121 { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = NONE}, 0, 0, FLAGS, .unit = "print_format" },
122 { "json", 0, 0, AV_OPT_TYPE_CONST, {.i64 = JSON}, 0, 0, FLAGS, .unit = "print_format" },
123 { "summary", 0, 0, AV_OPT_TYPE_CONST, {.i64 = SUMMARY}, 0, 0, FLAGS, .unit = "print_format" },
124 { NULL }
125 };
126
127 AVFILTER_DEFINE_CLASS(loudnorm);
128
129 static inline int frame_size(int sample_rate, int frame_len_msec)
130 {
131 const int frame_size = round((double)sample_rate * (frame_len_msec / 1000.0));
132 return frame_size + (frame_size % 2);
133 }
134
135 static void init_gaussian_filter(LoudNormContext *s)
136 {
137 double total_weight = 0.0;
138 const double sigma = 3.5;
139 double adjust;
140 int i;
141
142 const int offset = 21 / 2;
143 const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
144 const double c2 = 2.0 * pow(sigma, 2.0);
145
146 for (i = 0; i < 21; i++) {
147 const int x = i - offset;
148 s->weights[i] = c1 * exp(-(pow(x, 2.0) / c2));
149 total_weight += s->weights[i];
150 }
151
152 adjust = 1.0 / total_weight;
153 for (i = 0; i < 21; i++)
154 s->weights[i] *= adjust;
155 }
156
157 static double gaussian_filter(LoudNormContext *s, int index)
158 {
159 double result = 0.;
160 int i;
161
162 index = index - 10 > 0 ? index - 10 : index + 20;
163 for (i = 0; i < 21; i++)
164 result += s->delta[((index + i) < 30) ? (index + i) : (index + i - 30)] * s->weights[i];
165
166 return result;
167 }
168
169 static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
170 {
171 int n, c, i, index;
172 double ceiling;
173 double *buf;
174
175 *peak_delta = -1;
176 buf = s->limiter_buf;
177 ceiling = s->target_tp;
178
179 index = s->limiter_buf_index + (offset * channels) + (1920 * channels);
180 if (index >= s->limiter_buf_size)
181 index -= s->limiter_buf_size;
182
183 if (s->frame_type == FIRST_FRAME) {
184 for (c = 0; c < channels; c++)
185 s->prev_smp[c] = fabs(buf[index + c - channels]);
186 }
187
188 for (n = 0; n < nb_samples; n++) {
189 for (c = 0; c < channels; c++) {
190 double this, next, max_peak;
191
192 this = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
193 next = fabs(buf[(index + c + channels) < s->limiter_buf_size ? (index + c + channels) : (index + c + channels - s->limiter_buf_size)]);
194
195 if ((s->prev_smp[c] <= this) && (next <= this) && (this > ceiling) && (n > 0)) {
196 int detected;
197
198 detected = 1;
199 for (i = 2; i < 12; i++) {
200 next = fabs(buf[(index + c + (i * channels)) < s->limiter_buf_size ? (index + c + (i * channels)) : (index + c + (i * channels) - s->limiter_buf_size)]);
201 if (next > this) {
202 detected = 0;
203 break;
204 }
205 }
206
207 if (!detected)
208 continue;
209
210 for (c = 0; c < channels; c++) {
211 if (c == 0 || fabs(buf[index + c]) > max_peak)
212 max_peak = fabs(buf[index + c]);
213
214 s->prev_smp[c] = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
215 }
216
217 *peak_delta = n;
218 s->peak_index = index;
219 *peak_value = max_peak;
220 return;
221 }
222
223 s->prev_smp[c] = this;
224 }
225
226 index += channels;
227 if (index >= s->limiter_buf_size)
228 index -= s->limiter_buf_size;
229 }
230 }
231
232 static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
233 {
234 int n, c, index, peak_delta, smp_cnt;
235 double ceiling, peak_value;
236 double *buf;
237
238 buf = s->limiter_buf;
239 ceiling = s->target_tp;
240 index = s->limiter_buf_index;
241 smp_cnt = 0;
242
243 if (s->frame_type == FIRST_FRAME) {
244 double max;
245
246 max = 0.;
247 for (n = 0; n < 1920; n++) {
248 for (c = 0; c < channels; c++) {
249 max = fabs(buf[c]) > max ? fabs(buf[c]) : max;
250 }
251 buf += channels;
252 }
253
254 if (max > ceiling) {
255 s->gain_reduction[1] = ceiling / max;
256 s->limiter_state = SUSTAIN;
257 buf = s->limiter_buf;
258
259 for (n = 0; n < 1920; n++) {
260 for (c = 0; c < channels; c++) {
261 double env;
262 env = s->gain_reduction[1];
263 buf[c] *= env;
264 }
265 buf += channels;
266 }
267 }
268
269 buf = s->limiter_buf;
270 }
271
272 do {
273
274 switch(s->limiter_state) {
275 case OUT:
276 detect_peak(s, smp_cnt, nb_samples - smp_cnt, channels, &peak_delta, &peak_value);
277 if (peak_delta != -1) {
278 s->env_cnt = 0;
279 smp_cnt += (peak_delta - s->attack_length);
280 s->gain_reduction[0] = 1.;
281 s->gain_reduction[1] = ceiling / peak_value;
282 s->limiter_state = ATTACK;
283
284 s->env_index = s->peak_index - (s->attack_length * channels);
285 if (s->env_index < 0)
286 s->env_index += s->limiter_buf_size;
287
288 s->env_index += (s->env_cnt * channels);
289 if (s->env_index > s->limiter_buf_size)
290 s->env_index -= s->limiter_buf_size;
291
292 } else {
293 smp_cnt = nb_samples;
294 }
295 break;
296
297 case ATTACK:
298 for (; s->env_cnt < s->attack_length; s->env_cnt++) {
299 for (c = 0; c < channels; c++) {
300 double env;
301 env = s->gain_reduction[0] - ((double) s->env_cnt / (s->attack_length - 1) * (s->gain_reduction[0] - s->gain_reduction[1]));
302 buf[s->env_index + c] *= env;
303 }
304
305 s->env_index += channels;
306 if (s->env_index >= s->limiter_buf_size)
307 s->env_index -= s->limiter_buf_size;
308
309 smp_cnt++;
310 if (smp_cnt >= nb_samples) {
311 s->env_cnt++;
312 break;
313 }
314 }
315
316 if (smp_cnt < nb_samples) {
317 s->env_cnt = 0;
318 s->attack_length = 1920;
319 s->limiter_state = SUSTAIN;
320 }
321 break;
322
323 case SUSTAIN:
324 detect_peak(s, smp_cnt, nb_samples, channels, &peak_delta, &peak_value);
325 if (peak_delta == -1) {
326 s->limiter_state = RELEASE;
327 s->gain_reduction[0] = s->gain_reduction[1];
328 s->gain_reduction[1] = 1.;
329 s->env_cnt = 0;
330 break;
331 } else {
332 double gain_reduction;
333 gain_reduction = ceiling / peak_value;
334
335 if (gain_reduction < s->gain_reduction[1]) {
336 s->limiter_state = ATTACK;
337
338 s->attack_length = peak_delta;
339 if (s->attack_length <= 1)
340 s->attack_length = 2;
341
342 s->gain_reduction[0] = s->gain_reduction[1];
343 s->gain_reduction[1] = gain_reduction;
344 s->env_cnt = 0;
345 break;
346 }
347
348 for (s->env_cnt = 0; s->env_cnt < peak_delta; s->env_cnt++) {
349 for (c = 0; c < channels; c++) {
350 double env;
351 env = s->gain_reduction[1];
352 buf[s->env_index + c] *= env;
353 }
354
355 s->env_index += channels;
356 if (s->env_index >= s->limiter_buf_size)
357 s->env_index -= s->limiter_buf_size;
358
359 smp_cnt++;
360 if (smp_cnt >= nb_samples) {
361 s->env_cnt++;
362 break;
363 }
364 }
365 }
366 break;
367
368 case RELEASE:
369 for (; s->env_cnt < s->release_length; s->env_cnt++) {
370 for (c = 0; c < channels; c++) {
371 double env;
372 env = s->gain_reduction[0] + (((double) s->env_cnt / (s->release_length - 1)) * (s->gain_reduction[1] - s->gain_reduction[0]));
373 buf[s->env_index + c] *= env;
374 }
375
376 s->env_index += channels;
377 if (s->env_index >= s->limiter_buf_size)
378 s->env_index -= s->limiter_buf_size;
379
380 smp_cnt++;
381 if (smp_cnt >= nb_samples) {
382 s->env_cnt++;
383 break;
384 }
385 }
386
387 if (smp_cnt < nb_samples) {
388 s->env_cnt = 0;
389 s->limiter_state = OUT;
390 }
391
392 break;
393 }
394
395 } while (smp_cnt < nb_samples);
396
397 for (n = 0; n < nb_samples; n++) {
398 for (c = 0; c < channels; c++) {
399 out[c] = buf[index + c];
400 if (fabs(out[c]) > ceiling) {
401 out[c] = ceiling * (out[c] < 0 ? -1 : 1);
402 }
403 }
404 out += channels;
405 index += channels;
406 if (index >= s->limiter_buf_size)
407 index -= s->limiter_buf_size;
408 }
409 }
410
411 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
412 {
413 AVFilterContext *ctx = inlink->dst;
414 LoudNormContext *s = ctx->priv;
415 AVFilterLink *outlink = ctx->outputs[0];
416 AVFrame *out;
417 const double *src;
418 double *dst;
419 double *buf;
420 double *limiter_buf;
421 int i, n, c, subframe_length, src_index;
422 double gain, gain_next, env_global, env_shortterm,
423 global, shortterm, lra, relative_threshold;
424
425 if (av_frame_is_writable(in)) {
426 out = in;
427 } else {
428 out = ff_get_audio_buffer(outlink, in->nb_samples);
429 if (!out) {
430 av_frame_free(&in);
431 return AVERROR(ENOMEM);
432 }
433 av_frame_copy_props(out, in);
434 }
435
436 out->pts = s->pts[0];
437 memmove(s->pts, &s->pts[1], (FF_ARRAY_ELEMS(s->pts) - 1) * sizeof(s->pts[0]));
438
439 src = (const double *)in->data[0];
440 dst = (double *)out->data[0];
441 buf = s->buf;
442 limiter_buf = s->limiter_buf;
443
444 ff_ebur128_add_frames_double(s->r128_in, src, in->nb_samples);
445
446 if (s->frame_type == FIRST_FRAME && in->nb_samples < frame_size(inlink->sample_rate, 3000)) {
447 double offset, offset_tp, true_peak;
448
449 ff_ebur128_loudness_global(s->r128_in, &global);
450 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
451 double tmp;
452 ff_ebur128_sample_peak(s->r128_in, c, &tmp);
453 if (c == 0 || tmp > true_peak)
454 true_peak = tmp;
455 }
456
457 offset = pow(10., (s->target_i - global) / 20.);
458 offset_tp = true_peak * offset;
459 s->offset = offset_tp < s->target_tp ? offset : s->target_tp / true_peak;
460 s->frame_type = LINEAR_MODE;
461 }
462
463 switch (s->frame_type) {
464 case FIRST_FRAME:
465 for (n = 0; n < in->nb_samples; n++) {
466 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
467 buf[s->buf_index + c] = src[c];
468 }
469 src += inlink->ch_layout.nb_channels;
470 s->buf_index += inlink->ch_layout.nb_channels;
471 }
472
473 ff_ebur128_loudness_shortterm(s->r128_in, &shortterm);
474
475 if (shortterm < s->measured_thresh) {
476 s->above_threshold = 0;
477 env_shortterm = shortterm <= -70. ? 0. : s->target_i - s->measured_i;
478 } else {
479 s->above_threshold = 1;
480 env_shortterm = shortterm <= -70. ? 0. : s->target_i - shortterm;
481 }
482
483 for (n = 0; n < 30; n++)
484 s->delta[n] = pow(10., env_shortterm / 20.);
485 s->prev_delta = s->delta[s->index];
486
487 s->buf_index =
488 s->limiter_buf_index = 0;
489
490 for (n = 0; n < (s->limiter_buf_size / inlink->ch_layout.nb_channels); n++) {
491 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
492 limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * s->delta[s->index] * s->offset;
493 }
494 s->limiter_buf_index += inlink->ch_layout.nb_channels;
495 if (s->limiter_buf_index >= s->limiter_buf_size)
496 s->limiter_buf_index -= s->limiter_buf_size;
497
498 s->buf_index += inlink->ch_layout.nb_channels;
499 }
500
501 subframe_length = frame_size(inlink->sample_rate, 100);
502 true_peak_limiter(s, dst, subframe_length, inlink->ch_layout.nb_channels);
503 ff_ebur128_add_frames_double(s->r128_out, dst, subframe_length);
504
505 out->nb_samples = subframe_length;
506
507 s->frame_type = INNER_FRAME;
508 break;
509
510 case INNER_FRAME:
511 gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
512 gain_next = gaussian_filter(s, s->index + 11 < 30 ? s->index + 11 : s->index + 11 - 30);
513
514 for (n = 0; n < in->nb_samples; n++) {
515 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
516 buf[s->prev_buf_index + c] = src[c];
517 limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * (gain + (((double) n / in->nb_samples) * (gain_next - gain))) * s->offset;
518 }
519 src += inlink->ch_layout.nb_channels;
520
521 s->limiter_buf_index += inlink->ch_layout.nb_channels;
522 if (s->limiter_buf_index >= s->limiter_buf_size)
523 s->limiter_buf_index -= s->limiter_buf_size;
524
525 s->prev_buf_index += inlink->ch_layout.nb_channels;
526 if (s->prev_buf_index >= s->buf_size)
527 s->prev_buf_index -= s->buf_size;
528
529 s->buf_index += inlink->ch_layout.nb_channels;
530 if (s->buf_index >= s->buf_size)
531 s->buf_index -= s->buf_size;
532 }
533
534 subframe_length = (frame_size(inlink->sample_rate, 100) - in->nb_samples) * inlink->ch_layout.nb_channels;
535 s->limiter_buf_index = s->limiter_buf_index + subframe_length < s->limiter_buf_size ? s->limiter_buf_index + subframe_length : s->limiter_buf_index + subframe_length - s->limiter_buf_size;
536
537 true_peak_limiter(s, dst, in->nb_samples, inlink->ch_layout.nb_channels);
538 ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
539
540 ff_ebur128_loudness_range(s->r128_in, &lra);
541 ff_ebur128_loudness_global(s->r128_in, &global);
542 ff_ebur128_loudness_shortterm(s->r128_in, &shortterm);
543 ff_ebur128_relative_threshold(s->r128_in, &relative_threshold);
544
545 if (s->above_threshold == 0) {
546 double shortterm_out;
547
548 if (shortterm > s->measured_thresh)
549 s->prev_delta *= 1.0058;
550
551 ff_ebur128_loudness_shortterm(s->r128_out, &shortterm_out);
552 if (shortterm_out >= s->target_i)
553 s->above_threshold = 1;
554 }
555
556 if (shortterm < relative_threshold || shortterm <= -70. || s->above_threshold == 0) {
557 s->delta[s->index] = s->prev_delta;
558 } else {
559 env_global = fabs(shortterm - global) < (s->target_lra / 2.) ? shortterm - global : (s->target_lra / 2.) * ((shortterm - global) < 0 ? -1 : 1);
560 env_shortterm = s->target_i - shortterm;
561 s->delta[s->index] = pow(10., (env_global + env_shortterm) / 20.);
562 }
563
564 s->prev_delta = s->delta[s->index];
565 s->index++;
566 if (s->index >= 30)
567 s->index -= 30;
568 s->prev_nb_samples = in->nb_samples;
569 break;
570
571 case FINAL_FRAME:
572 gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
573 s->limiter_buf_index = 0;
574 src_index = 0;
575
576 for (n = 0; n < s->limiter_buf_size / inlink->ch_layout.nb_channels; n++) {
577 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
578 s->limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
579 }
580 src_index += inlink->ch_layout.nb_channels;
581
582 s->limiter_buf_index += inlink->ch_layout.nb_channels;
583 if (s->limiter_buf_index >= s->limiter_buf_size)
584 s->limiter_buf_index -= s->limiter_buf_size;
585 }
586
587 subframe_length = frame_size(inlink->sample_rate, 100);
588 for (i = 0; i < in->nb_samples / subframe_length; i++) {
589 true_peak_limiter(s, dst, subframe_length, inlink->ch_layout.nb_channels);
590
591 for (n = 0; n < subframe_length; n++) {
592 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
593 if (src_index < (in->nb_samples * inlink->ch_layout.nb_channels)) {
594 limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
595 } else {
596 limiter_buf[s->limiter_buf_index + c] = 0.;
597 }
598 }
599
600 if (src_index < (in->nb_samples * inlink->ch_layout.nb_channels))
601 src_index += inlink->ch_layout.nb_channels;
602
603 s->limiter_buf_index += inlink->ch_layout.nb_channels;
604 if (s->limiter_buf_index >= s->limiter_buf_size)
605 s->limiter_buf_index -= s->limiter_buf_size;
606 }
607
608 dst += (subframe_length * inlink->ch_layout.nb_channels);
609 }
610
611 dst = (double *)out->data[0];
612 ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
613 break;
614
615 case LINEAR_MODE:
616 for (n = 0; n < in->nb_samples; n++) {
617 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
618 dst[c] = src[c] * s->offset;
619 }
620 src += inlink->ch_layout.nb_channels;
621 dst += inlink->ch_layout.nb_channels;
622 }
623
624 dst = (double *)out->data[0];
625 ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
626 break;
627 }
628
629 if (in != out)
630 av_frame_free(&in);
631 return ff_filter_frame(outlink, out);
632 }
633
634 static int flush_frame(AVFilterLink *outlink)
635 {
636 AVFilterContext *ctx = outlink->src;
637 AVFilterLink *inlink = ctx->inputs[0];
638 LoudNormContext *s = ctx->priv;
639 int ret = 0;
640
641 if (s->frame_type == INNER_FRAME) {
642 double *src;
643 double *buf;
644 int nb_samples, n, c, offset;
645 AVFrame *frame;
646
647 nb_samples = (s->buf_size / inlink->ch_layout.nb_channels) - s->prev_nb_samples;
648 nb_samples -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples);
649
650 frame = ff_get_audio_buffer(outlink, nb_samples);
651 if (!frame)
652 return AVERROR(ENOMEM);
653 frame->nb_samples = nb_samples;
654
655 buf = s->buf;
656 src = (double *)frame->data[0];
657
658 offset = ((s->limiter_buf_size / inlink->ch_layout.nb_channels) - s->prev_nb_samples) * inlink->ch_layout.nb_channels;
659 offset -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples) * inlink->ch_layout.nb_channels;
660 s->buf_index = s->buf_index - offset < 0 ? s->buf_index - offset + s->buf_size : s->buf_index - offset;
661
662 for (n = 0; n < nb_samples; n++) {
663 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
664 src[c] = buf[s->buf_index + c];
665 }
666 src += inlink->ch_layout.nb_channels;
667 s->buf_index += inlink->ch_layout.nb_channels;
668 if (s->buf_index >= s->buf_size)
669 s->buf_index -= s->buf_size;
670 }
671
672 s->frame_type = FINAL_FRAME;
673 ret = filter_frame(inlink, frame);
674 }
675 return ret;
676 }
677
678 static int activate(AVFilterContext *ctx)
679 {
680 AVFilterLink *inlink = ctx->inputs[0];
681 AVFilterLink *outlink = ctx->outputs[0];
682 LoudNormContext *s = ctx->priv;
683 AVFrame *in = NULL;
684 int ret = 0, status;
685 int64_t pts;
686
687 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
688
689 if (s->frame_type != LINEAR_MODE) {
690 int nb_samples;
691
692 if (s->frame_type == FIRST_FRAME) {
693 nb_samples = frame_size(inlink->sample_rate, 3000);
694 } else {
695 nb_samples = frame_size(inlink->sample_rate, 100);
696 }
697
698 ret = ff_inlink_consume_samples(inlink, nb_samples, nb_samples, &in);
699 } else {
700 ret = ff_inlink_consume_frame(inlink, &in);
701 }
702
703 if (ret < 0)
704 return ret;
705 if (ret > 0) {
706 if (s->frame_type == FIRST_FRAME) {
707 const int nb_samples = frame_size(inlink->sample_rate, 100);
708
709 for (int i = 0; i < FF_ARRAY_ELEMS(s->pts); i++)
710 s->pts[i] = in->pts + i * nb_samples;
711 } else if (s->frame_type == LINEAR_MODE) {
712 s->pts[0] = in->pts;
713 } else {
714 s->pts[FF_ARRAY_ELEMS(s->pts) - 1] = in->pts;
715 }
716 ret = filter_frame(inlink, in);
717 }
718 if (ret < 0)
719 return ret;
720
721 if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
722 ff_outlink_set_status(outlink, status, pts);
723 return flush_frame(outlink);
724 }
725
726 FF_FILTER_FORWARD_WANTED(outlink, inlink);
727
728 return FFERROR_NOT_READY;
729 }
730
731 static int query_formats(AVFilterContext *ctx)
732 {
733 LoudNormContext *s = ctx->priv;
734 static const int input_srate[] = {192000, -1};
735 static const enum AVSampleFormat sample_fmts[] = {
736 AV_SAMPLE_FMT_DBL,
737 AV_SAMPLE_FMT_NONE
738 };
739 int ret = ff_set_common_all_channel_counts(ctx);
740 if (ret < 0)
741 return ret;
742
743 ret = ff_set_common_formats_from_list(ctx, sample_fmts);
744 if (ret < 0)
745 return ret;
746
747 if (s->frame_type == LINEAR_MODE) {
748 return ff_set_common_all_samplerates(ctx);
749 } else {
750 return ff_set_common_samplerates_from_list(ctx, input_srate);
751 }
752 }
753
754 static int config_input(AVFilterLink *inlink)
755 {
756 AVFilterContext *ctx = inlink->dst;
757 LoudNormContext *s = ctx->priv;
758
759 s->r128_in = ff_ebur128_init(inlink->ch_layout.nb_channels, inlink->sample_rate, 0, FF_EBUR128_MODE_I | FF_EBUR128_MODE_S | FF_EBUR128_MODE_LRA | FF_EBUR128_MODE_SAMPLE_PEAK);
760 if (!s->r128_in)
761 return AVERROR(ENOMEM);
762
763 s->r128_out = ff_ebur128_init(inlink->ch_layout.nb_channels, inlink->sample_rate, 0, FF_EBUR128_MODE_I | FF_EBUR128_MODE_S | FF_EBUR128_MODE_LRA | FF_EBUR128_MODE_SAMPLE_PEAK);
764 if (!s->r128_out)
765 return AVERROR(ENOMEM);
766
767 if (inlink->ch_layout.nb_channels == 1 && s->dual_mono) {
768 ff_ebur128_set_channel(s->r128_in, 0, FF_EBUR128_DUAL_MONO);
769 ff_ebur128_set_channel(s->r128_out, 0, FF_EBUR128_DUAL_MONO);
770 }
771
772 s->buf_size = frame_size(inlink->sample_rate, 3000) * inlink->ch_layout.nb_channels;
773 s->buf = av_malloc_array(s->buf_size, sizeof(*s->buf));
774 if (!s->buf)
775 return AVERROR(ENOMEM);
776
777 s->limiter_buf_size = frame_size(inlink->sample_rate, 210) * inlink->ch_layout.nb_channels;
778 s->limiter_buf = av_malloc_array(s->buf_size, sizeof(*s->limiter_buf));
779 if (!s->limiter_buf)
780 return AVERROR(ENOMEM);
781
782 s->prev_smp = av_malloc_array(inlink->ch_layout.nb_channels, sizeof(*s->prev_smp));
783 if (!s->prev_smp)
784 return AVERROR(ENOMEM);
785
786 init_gaussian_filter(s);
787
788 s->buf_index =
789 s->prev_buf_index =
790 s->limiter_buf_index = 0;
791 s->channels = inlink->ch_layout.nb_channels;
792 s->index = 1;
793 s->limiter_state = OUT;
794 s->offset = pow(10., s->offset / 20.);
795 s->target_tp = pow(10., s->target_tp / 20.);
796 s->attack_length = frame_size(inlink->sample_rate, 10);
797 s->release_length = frame_size(inlink->sample_rate, 100);
798
799 return 0;
800 }
801
802 static av_cold int init(AVFilterContext *ctx)
803 {
804 LoudNormContext *s = ctx->priv;
805 s->frame_type = FIRST_FRAME;
806
807 if (s->linear) {
808 double offset, offset_tp;
809 offset = s->target_i - s->measured_i;
810 offset_tp = s->measured_tp + offset;
811
812 if (s->measured_tp != 99 && s->measured_thresh != -70 && s->measured_lra != 0 && s->measured_i != 0) {
813 if ((offset_tp <= s->target_tp) && (s->measured_lra <= s->target_lra)) {
814 s->frame_type = LINEAR_MODE;
815 s->offset = offset;
816 }
817 }
818 }
819
820 return 0;
821 }
822
823 static av_cold void uninit(AVFilterContext *ctx)
824 {
825 LoudNormContext *s = ctx->priv;
826 double i_in, i_out, lra_in, lra_out, thresh_in, thresh_out, tp_in, tp_out;
827 int c;
828
829 if (!s->r128_in || !s->r128_out)
830 goto end;
831
832 ff_ebur128_loudness_range(s->r128_in, &lra_in);
833 ff_ebur128_loudness_global(s->r128_in, &i_in);
834 ff_ebur128_relative_threshold(s->r128_in, &thresh_in);
835 for (c = 0; c < s->channels; c++) {
836 double tmp;
837 ff_ebur128_sample_peak(s->r128_in, c, &tmp);
838 if ((c == 0) || (tmp > tp_in))
839 tp_in = tmp;
840 }
841
842 ff_ebur128_loudness_range(s->r128_out, &lra_out);
843 ff_ebur128_loudness_global(s->r128_out, &i_out);
844 ff_ebur128_relative_threshold(s->r128_out, &thresh_out);
845 for (c = 0; c < s->channels; c++) {
846 double tmp;
847 ff_ebur128_sample_peak(s->r128_out, c, &tmp);
848 if ((c == 0) || (tmp > tp_out))
849 tp_out = tmp;
850 }
851
852 switch(s->print_format) {
853 case NONE:
854 break;
855
856 case JSON:
857 av_log(ctx, AV_LOG_INFO,
858 "\n{\n"
859 "\t\"input_i\" : \"%.2f\",\n"
860 "\t\"input_tp\" : \"%.2f\",\n"
861 "\t\"input_lra\" : \"%.2f\",\n"
862 "\t\"input_thresh\" : \"%.2f\",\n"
863 "\t\"output_i\" : \"%.2f\",\n"
864 "\t\"output_tp\" : \"%+.2f\",\n"
865 "\t\"output_lra\" : \"%.2f\",\n"
866 "\t\"output_thresh\" : \"%.2f\",\n"
867 "\t\"normalization_type\" : \"%s\",\n"
868 "\t\"target_offset\" : \"%.2f\"\n"
869 "}\n",
870 i_in,
871 20. * log10(tp_in),
872 lra_in,
873 thresh_in,
874 i_out,
875 20. * log10(tp_out),
876 lra_out,
877 thresh_out,
878 s->frame_type == LINEAR_MODE ? "linear" : "dynamic",
879 s->target_i - i_out
880 );
881 break;
882
883 case SUMMARY:
884 av_log(ctx, AV_LOG_INFO,
885 "\n"
886 "Input Integrated: %+6.1f LUFS\n"
887 "Input True Peak: %+6.1f dBTP\n"
888 "Input LRA: %6.1f LU\n"
889 "Input Threshold: %+6.1f LUFS\n"
890 "\n"
891 "Output Integrated: %+6.1f LUFS\n"
892 "Output True Peak: %+6.1f dBTP\n"
893 "Output LRA: %6.1f LU\n"
894 "Output Threshold: %+6.1f LUFS\n"
895 "\n"
896 "Normalization Type: %s\n"
897 "Target Offset: %+6.1f LU\n",
898 i_in,
899 20. * log10(tp_in),
900 lra_in,
901 thresh_in,
902 i_out,
903 20. * log10(tp_out),
904 lra_out,
905 thresh_out,
906 s->frame_type == LINEAR_MODE ? "Linear" : "Dynamic",
907 s->target_i - i_out
908 );
909 break;
910 }
911
912 end:
913 if (s->r128_in)
914 ff_ebur128_destroy(&s->r128_in);
915 if (s->r128_out)
916 ff_ebur128_destroy(&s->r128_out);
917 av_freep(&s->limiter_buf);
918 av_freep(&s->prev_smp);
919 av_freep(&s->buf);
920 }
921
922 static const AVFilterPad avfilter_af_loudnorm_inputs[] = {
923 {
924 .name = "default",
925 .type = AVMEDIA_TYPE_AUDIO,
926 .config_props = config_input,
927 },
928 };
929
930 const AVFilter ff_af_loudnorm = {
931 .name = "loudnorm",
932 .description = NULL_IF_CONFIG_SMALL("EBU R128 loudness normalization"),
933 .priv_size = sizeof(LoudNormContext),
934 .priv_class = &loudnorm_class,
935 .init = init,
936 .activate = activate,
937 .uninit = uninit,
938 FILTER_INPUTS(avfilter_af_loudnorm_inputs),
939 FILTER_OUTPUTS(ff_audio_default_filterpad),
940 FILTER_QUERY_FUNC(query_formats),
941 };
942