FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_loudnorm.c
Date: 2024-04-25 15:36:26
Exec Total Coverage
Lines: 0 470 0.0%
Functions: 0 12 0.0%
Branches: 0 316 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2016 Kyle Swanson <k@ylo.ph>.
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /* http://k.ylo.ph/2016/04/04/loudnorm.html */
22
23 #include "libavutil/mem.h"
24 #include "libavutil/opt.h"
25 #include "avfilter.h"
26 #include "filters.h"
27 #include "formats.h"
28 #include "internal.h"
29 #include "audio.h"
30 #include "ebur128.h"
31
32 enum FrameType {
33 FIRST_FRAME,
34 INNER_FRAME,
35 FINAL_FRAME,
36 LINEAR_MODE,
37 FRAME_NB
38 };
39
40 enum LimiterState {
41 OUT,
42 ATTACK,
43 SUSTAIN,
44 RELEASE,
45 STATE_NB
46 };
47
48 enum PrintFormat {
49 NONE,
50 JSON,
51 SUMMARY,
52 PF_NB
53 };
54
55 typedef struct LoudNormContext {
56 const AVClass *class;
57 double target_i;
58 double target_lra;
59 double target_tp;
60 double measured_i;
61 double measured_lra;
62 double measured_tp;
63 double measured_thresh;
64 double offset;
65 int linear;
66 int dual_mono;
67 enum PrintFormat print_format;
68
69 double *buf;
70 int buf_size;
71 int buf_index;
72 int prev_buf_index;
73
74 double delta[30];
75 double weights[21];
76 double prev_delta;
77 int index;
78
79 double gain_reduction[2];
80 double *limiter_buf;
81 double *prev_smp;
82 int limiter_buf_index;
83 int limiter_buf_size;
84 enum LimiterState limiter_state;
85 int peak_index;
86 int env_index;
87 int env_cnt;
88 int attack_length;
89 int release_length;
90
91 int64_t pts[30];
92 enum FrameType frame_type;
93 int above_threshold;
94 int prev_nb_samples;
95 int channels;
96
97 FFEBUR128State *r128_in;
98 FFEBUR128State *r128_out;
99 } LoudNormContext;
100
101 #define OFFSET(x) offsetof(LoudNormContext, x)
102 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
103
104 static const AVOption loudnorm_options[] = {
105 { "I", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
106 { "i", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
107 { "LRA", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 50., FLAGS },
108 { "lra", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 50., FLAGS },
109 { "TP", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
110 { "tp", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
111 { "measured_I", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
112 { "measured_i", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
113 { "measured_LRA", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
114 { "measured_lra", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
115 { "measured_TP", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
116 { "measured_tp", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
117 { "measured_thresh", "measured threshold of input file", OFFSET(measured_thresh), AV_OPT_TYPE_DOUBLE, {.dbl = -70.}, -99., 0., FLAGS },
118 { "offset", "set offset gain", OFFSET(offset), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 99., FLAGS },
119 { "linear", "normalize linearly if possible", OFFSET(linear), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
120 { "dual_mono", "treat mono input as dual-mono", OFFSET(dual_mono), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
121 { "print_format", "set print format for stats", OFFSET(print_format), AV_OPT_TYPE_INT, {.i64 = NONE}, NONE, PF_NB -1, FLAGS, .unit = "print_format" },
122 { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = NONE}, 0, 0, FLAGS, .unit = "print_format" },
123 { "json", 0, 0, AV_OPT_TYPE_CONST, {.i64 = JSON}, 0, 0, FLAGS, .unit = "print_format" },
124 { "summary", 0, 0, AV_OPT_TYPE_CONST, {.i64 = SUMMARY}, 0, 0, FLAGS, .unit = "print_format" },
125 { NULL }
126 };
127
128 AVFILTER_DEFINE_CLASS(loudnorm);
129
130 static inline int frame_size(int sample_rate, int frame_len_msec)
131 {
132 const int frame_size = round((double)sample_rate * (frame_len_msec / 1000.0));
133 return frame_size + (frame_size % 2);
134 }
135
136 static void init_gaussian_filter(LoudNormContext *s)
137 {
138 double total_weight = 0.0;
139 const double sigma = 3.5;
140 double adjust;
141 int i;
142
143 const int offset = 21 / 2;
144 const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
145 const double c2 = 2.0 * pow(sigma, 2.0);
146
147 for (i = 0; i < 21; i++) {
148 const int x = i - offset;
149 s->weights[i] = c1 * exp(-(pow(x, 2.0) / c2));
150 total_weight += s->weights[i];
151 }
152
153 adjust = 1.0 / total_weight;
154 for (i = 0; i < 21; i++)
155 s->weights[i] *= adjust;
156 }
157
158 static double gaussian_filter(LoudNormContext *s, int index)
159 {
160 double result = 0.;
161 int i;
162
163 index = index - 10 > 0 ? index - 10 : index + 20;
164 for (i = 0; i < 21; i++)
165 result += s->delta[((index + i) < 30) ? (index + i) : (index + i - 30)] * s->weights[i];
166
167 return result;
168 }
169
170 static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
171 {
172 int n, c, i, index;
173 double ceiling;
174 double *buf;
175
176 *peak_delta = -1;
177 buf = s->limiter_buf;
178 ceiling = s->target_tp;
179
180 index = s->limiter_buf_index + (offset * channels) + (1920 * channels);
181 if (index >= s->limiter_buf_size)
182 index -= s->limiter_buf_size;
183
184 if (s->frame_type == FIRST_FRAME) {
185 for (c = 0; c < channels; c++)
186 s->prev_smp[c] = fabs(buf[index + c - channels]);
187 }
188
189 for (n = 0; n < nb_samples; n++) {
190 for (c = 0; c < channels; c++) {
191 double this, next, max_peak;
192
193 this = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
194 next = fabs(buf[(index + c + channels) < s->limiter_buf_size ? (index + c + channels) : (index + c + channels - s->limiter_buf_size)]);
195
196 if ((s->prev_smp[c] <= this) && (next <= this) && (this > ceiling) && (n > 0)) {
197 int detected;
198
199 detected = 1;
200 for (i = 2; i < 12; i++) {
201 next = fabs(buf[(index + c + (i * channels)) < s->limiter_buf_size ? (index + c + (i * channels)) : (index + c + (i * channels) - s->limiter_buf_size)]);
202 if (next > this) {
203 detected = 0;
204 break;
205 }
206 }
207
208 if (!detected)
209 continue;
210
211 for (c = 0; c < channels; c++) {
212 if (c == 0 || fabs(buf[index + c]) > max_peak)
213 max_peak = fabs(buf[index + c]);
214
215 s->prev_smp[c] = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
216 }
217
218 *peak_delta = n;
219 s->peak_index = index;
220 *peak_value = max_peak;
221 return;
222 }
223
224 s->prev_smp[c] = this;
225 }
226
227 index += channels;
228 if (index >= s->limiter_buf_size)
229 index -= s->limiter_buf_size;
230 }
231 }
232
233 static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
234 {
235 int n, c, index, peak_delta, smp_cnt;
236 double ceiling, peak_value;
237 double *buf;
238
239 buf = s->limiter_buf;
240 ceiling = s->target_tp;
241 index = s->limiter_buf_index;
242 smp_cnt = 0;
243
244 if (s->frame_type == FIRST_FRAME) {
245 double max;
246
247 max = 0.;
248 for (n = 0; n < 1920; n++) {
249 for (c = 0; c < channels; c++) {
250 max = fabs(buf[c]) > max ? fabs(buf[c]) : max;
251 }
252 buf += channels;
253 }
254
255 if (max > ceiling) {
256 s->gain_reduction[1] = ceiling / max;
257 s->limiter_state = SUSTAIN;
258 buf = s->limiter_buf;
259
260 for (n = 0; n < 1920; n++) {
261 for (c = 0; c < channels; c++) {
262 double env;
263 env = s->gain_reduction[1];
264 buf[c] *= env;
265 }
266 buf += channels;
267 }
268 }
269
270 buf = s->limiter_buf;
271 }
272
273 do {
274
275 switch(s->limiter_state) {
276 case OUT:
277 detect_peak(s, smp_cnt, nb_samples - smp_cnt, channels, &peak_delta, &peak_value);
278 if (peak_delta != -1) {
279 s->env_cnt = 0;
280 smp_cnt += (peak_delta - s->attack_length);
281 s->gain_reduction[0] = 1.;
282 s->gain_reduction[1] = ceiling / peak_value;
283 s->limiter_state = ATTACK;
284
285 s->env_index = s->peak_index - (s->attack_length * channels);
286 if (s->env_index < 0)
287 s->env_index += s->limiter_buf_size;
288
289 s->env_index += (s->env_cnt * channels);
290 if (s->env_index > s->limiter_buf_size)
291 s->env_index -= s->limiter_buf_size;
292
293 } else {
294 smp_cnt = nb_samples;
295 }
296 break;
297
298 case ATTACK:
299 for (; s->env_cnt < s->attack_length; s->env_cnt++) {
300 for (c = 0; c < channels; c++) {
301 double env;
302 env = s->gain_reduction[0] - ((double) s->env_cnt / (s->attack_length - 1) * (s->gain_reduction[0] - s->gain_reduction[1]));
303 buf[s->env_index + c] *= env;
304 }
305
306 s->env_index += channels;
307 if (s->env_index >= s->limiter_buf_size)
308 s->env_index -= s->limiter_buf_size;
309
310 smp_cnt++;
311 if (smp_cnt >= nb_samples) {
312 s->env_cnt++;
313 break;
314 }
315 }
316
317 if (smp_cnt < nb_samples) {
318 s->env_cnt = 0;
319 s->attack_length = 1920;
320 s->limiter_state = SUSTAIN;
321 }
322 break;
323
324 case SUSTAIN:
325 detect_peak(s, smp_cnt, nb_samples, channels, &peak_delta, &peak_value);
326 if (peak_delta == -1) {
327 s->limiter_state = RELEASE;
328 s->gain_reduction[0] = s->gain_reduction[1];
329 s->gain_reduction[1] = 1.;
330 s->env_cnt = 0;
331 break;
332 } else {
333 double gain_reduction;
334 gain_reduction = ceiling / peak_value;
335
336 if (gain_reduction < s->gain_reduction[1]) {
337 s->limiter_state = ATTACK;
338
339 s->attack_length = peak_delta;
340 if (s->attack_length <= 1)
341 s->attack_length = 2;
342
343 s->gain_reduction[0] = s->gain_reduction[1];
344 s->gain_reduction[1] = gain_reduction;
345 s->env_cnt = 0;
346 break;
347 }
348
349 for (s->env_cnt = 0; s->env_cnt < peak_delta; s->env_cnt++) {
350 for (c = 0; c < channels; c++) {
351 double env;
352 env = s->gain_reduction[1];
353 buf[s->env_index + c] *= env;
354 }
355
356 s->env_index += channels;
357 if (s->env_index >= s->limiter_buf_size)
358 s->env_index -= s->limiter_buf_size;
359
360 smp_cnt++;
361 if (smp_cnt >= nb_samples) {
362 s->env_cnt++;
363 break;
364 }
365 }
366 }
367 break;
368
369 case RELEASE:
370 for (; s->env_cnt < s->release_length; s->env_cnt++) {
371 for (c = 0; c < channels; c++) {
372 double env;
373 env = s->gain_reduction[0] + (((double) s->env_cnt / (s->release_length - 1)) * (s->gain_reduction[1] - s->gain_reduction[0]));
374 buf[s->env_index + c] *= env;
375 }
376
377 s->env_index += channels;
378 if (s->env_index >= s->limiter_buf_size)
379 s->env_index -= s->limiter_buf_size;
380
381 smp_cnt++;
382 if (smp_cnt >= nb_samples) {
383 s->env_cnt++;
384 break;
385 }
386 }
387
388 if (smp_cnt < nb_samples) {
389 s->env_cnt = 0;
390 s->limiter_state = OUT;
391 }
392
393 break;
394 }
395
396 } while (smp_cnt < nb_samples);
397
398 for (n = 0; n < nb_samples; n++) {
399 for (c = 0; c < channels; c++) {
400 out[c] = buf[index + c];
401 if (fabs(out[c]) > ceiling) {
402 out[c] = ceiling * (out[c] < 0 ? -1 : 1);
403 }
404 }
405 out += channels;
406 index += channels;
407 if (index >= s->limiter_buf_size)
408 index -= s->limiter_buf_size;
409 }
410 }
411
412 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
413 {
414 AVFilterContext *ctx = inlink->dst;
415 LoudNormContext *s = ctx->priv;
416 AVFilterLink *outlink = ctx->outputs[0];
417 AVFrame *out;
418 const double *src;
419 double *dst;
420 double *buf;
421 double *limiter_buf;
422 int i, n, c, subframe_length, src_index;
423 double gain, gain_next, env_global, env_shortterm,
424 global, shortterm, lra, relative_threshold;
425
426 if (av_frame_is_writable(in)) {
427 out = in;
428 } else {
429 out = ff_get_audio_buffer(outlink, in->nb_samples);
430 if (!out) {
431 av_frame_free(&in);
432 return AVERROR(ENOMEM);
433 }
434 av_frame_copy_props(out, in);
435 }
436
437 out->pts = s->pts[0];
438 memmove(s->pts, &s->pts[1], (FF_ARRAY_ELEMS(s->pts) - 1) * sizeof(s->pts[0]));
439
440 src = (const double *)in->data[0];
441 dst = (double *)out->data[0];
442 buf = s->buf;
443 limiter_buf = s->limiter_buf;
444
445 ff_ebur128_add_frames_double(s->r128_in, src, in->nb_samples);
446
447 if (s->frame_type == FIRST_FRAME && in->nb_samples < frame_size(inlink->sample_rate, 3000)) {
448 double offset, offset_tp, true_peak;
449
450 ff_ebur128_loudness_global(s->r128_in, &global);
451 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
452 double tmp;
453 ff_ebur128_sample_peak(s->r128_in, c, &tmp);
454 if (c == 0 || tmp > true_peak)
455 true_peak = tmp;
456 }
457
458 offset = pow(10., (s->target_i - global) / 20.);
459 offset_tp = true_peak * offset;
460 s->offset = offset_tp < s->target_tp ? offset : s->target_tp / true_peak;
461 s->frame_type = LINEAR_MODE;
462 }
463
464 switch (s->frame_type) {
465 case FIRST_FRAME:
466 for (n = 0; n < in->nb_samples; n++) {
467 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
468 buf[s->buf_index + c] = src[c];
469 }
470 src += inlink->ch_layout.nb_channels;
471 s->buf_index += inlink->ch_layout.nb_channels;
472 }
473
474 ff_ebur128_loudness_shortterm(s->r128_in, &shortterm);
475
476 if (shortterm < s->measured_thresh) {
477 s->above_threshold = 0;
478 env_shortterm = shortterm <= -70. ? 0. : s->target_i - s->measured_i;
479 } else {
480 s->above_threshold = 1;
481 env_shortterm = shortterm <= -70. ? 0. : s->target_i - shortterm;
482 }
483
484 for (n = 0; n < 30; n++)
485 s->delta[n] = pow(10., env_shortterm / 20.);
486 s->prev_delta = s->delta[s->index];
487
488 s->buf_index =
489 s->limiter_buf_index = 0;
490
491 for (n = 0; n < (s->limiter_buf_size / inlink->ch_layout.nb_channels); n++) {
492 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
493 limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * s->delta[s->index] * s->offset;
494 }
495 s->limiter_buf_index += inlink->ch_layout.nb_channels;
496 if (s->limiter_buf_index >= s->limiter_buf_size)
497 s->limiter_buf_index -= s->limiter_buf_size;
498
499 s->buf_index += inlink->ch_layout.nb_channels;
500 }
501
502 subframe_length = frame_size(inlink->sample_rate, 100);
503 true_peak_limiter(s, dst, subframe_length, inlink->ch_layout.nb_channels);
504 ff_ebur128_add_frames_double(s->r128_out, dst, subframe_length);
505
506 out->nb_samples = subframe_length;
507
508 s->frame_type = INNER_FRAME;
509 break;
510
511 case INNER_FRAME:
512 gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
513 gain_next = gaussian_filter(s, s->index + 11 < 30 ? s->index + 11 : s->index + 11 - 30);
514
515 for (n = 0; n < in->nb_samples; n++) {
516 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
517 buf[s->prev_buf_index + c] = src[c];
518 limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * (gain + (((double) n / in->nb_samples) * (gain_next - gain))) * s->offset;
519 }
520 src += inlink->ch_layout.nb_channels;
521
522 s->limiter_buf_index += inlink->ch_layout.nb_channels;
523 if (s->limiter_buf_index >= s->limiter_buf_size)
524 s->limiter_buf_index -= s->limiter_buf_size;
525
526 s->prev_buf_index += inlink->ch_layout.nb_channels;
527 if (s->prev_buf_index >= s->buf_size)
528 s->prev_buf_index -= s->buf_size;
529
530 s->buf_index += inlink->ch_layout.nb_channels;
531 if (s->buf_index >= s->buf_size)
532 s->buf_index -= s->buf_size;
533 }
534
535 subframe_length = (frame_size(inlink->sample_rate, 100) - in->nb_samples) * inlink->ch_layout.nb_channels;
536 s->limiter_buf_index = s->limiter_buf_index + subframe_length < s->limiter_buf_size ? s->limiter_buf_index + subframe_length : s->limiter_buf_index + subframe_length - s->limiter_buf_size;
537
538 true_peak_limiter(s, dst, in->nb_samples, inlink->ch_layout.nb_channels);
539 ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
540
541 ff_ebur128_loudness_range(s->r128_in, &lra);
542 ff_ebur128_loudness_global(s->r128_in, &global);
543 ff_ebur128_loudness_shortterm(s->r128_in, &shortterm);
544 ff_ebur128_relative_threshold(s->r128_in, &relative_threshold);
545
546 if (s->above_threshold == 0) {
547 double shortterm_out;
548
549 if (shortterm > s->measured_thresh)
550 s->prev_delta *= 1.0058;
551
552 ff_ebur128_loudness_shortterm(s->r128_out, &shortterm_out);
553 if (shortterm_out >= s->target_i)
554 s->above_threshold = 1;
555 }
556
557 if (shortterm < relative_threshold || shortterm <= -70. || s->above_threshold == 0) {
558 s->delta[s->index] = s->prev_delta;
559 } else {
560 env_global = fabs(shortterm - global) < (s->target_lra / 2.) ? shortterm - global : (s->target_lra / 2.) * ((shortterm - global) < 0 ? -1 : 1);
561 env_shortterm = s->target_i - shortterm;
562 s->delta[s->index] = pow(10., (env_global + env_shortterm) / 20.);
563 }
564
565 s->prev_delta = s->delta[s->index];
566 s->index++;
567 if (s->index >= 30)
568 s->index -= 30;
569 s->prev_nb_samples = in->nb_samples;
570 break;
571
572 case FINAL_FRAME:
573 gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
574 s->limiter_buf_index = 0;
575 src_index = 0;
576
577 for (n = 0; n < s->limiter_buf_size / inlink->ch_layout.nb_channels; n++) {
578 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
579 s->limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
580 }
581 src_index += inlink->ch_layout.nb_channels;
582
583 s->limiter_buf_index += inlink->ch_layout.nb_channels;
584 if (s->limiter_buf_index >= s->limiter_buf_size)
585 s->limiter_buf_index -= s->limiter_buf_size;
586 }
587
588 subframe_length = frame_size(inlink->sample_rate, 100);
589 for (i = 0; i < in->nb_samples / subframe_length; i++) {
590 true_peak_limiter(s, dst, subframe_length, inlink->ch_layout.nb_channels);
591
592 for (n = 0; n < subframe_length; n++) {
593 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
594 if (src_index < (in->nb_samples * inlink->ch_layout.nb_channels)) {
595 limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
596 } else {
597 limiter_buf[s->limiter_buf_index + c] = 0.;
598 }
599 }
600
601 if (src_index < (in->nb_samples * inlink->ch_layout.nb_channels))
602 src_index += inlink->ch_layout.nb_channels;
603
604 s->limiter_buf_index += inlink->ch_layout.nb_channels;
605 if (s->limiter_buf_index >= s->limiter_buf_size)
606 s->limiter_buf_index -= s->limiter_buf_size;
607 }
608
609 dst += (subframe_length * inlink->ch_layout.nb_channels);
610 }
611
612 dst = (double *)out->data[0];
613 ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
614 break;
615
616 case LINEAR_MODE:
617 for (n = 0; n < in->nb_samples; n++) {
618 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
619 dst[c] = src[c] * s->offset;
620 }
621 src += inlink->ch_layout.nb_channels;
622 dst += inlink->ch_layout.nb_channels;
623 }
624
625 dst = (double *)out->data[0];
626 ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
627 break;
628 }
629
630 if (in != out)
631 av_frame_free(&in);
632 return ff_filter_frame(outlink, out);
633 }
634
635 static int flush_frame(AVFilterLink *outlink)
636 {
637 AVFilterContext *ctx = outlink->src;
638 AVFilterLink *inlink = ctx->inputs[0];
639 LoudNormContext *s = ctx->priv;
640 int ret = 0;
641
642 if (s->frame_type == INNER_FRAME) {
643 double *src;
644 double *buf;
645 int nb_samples, n, c, offset;
646 AVFrame *frame;
647
648 nb_samples = (s->buf_size / inlink->ch_layout.nb_channels) - s->prev_nb_samples;
649 nb_samples -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples);
650
651 frame = ff_get_audio_buffer(outlink, nb_samples);
652 if (!frame)
653 return AVERROR(ENOMEM);
654 frame->nb_samples = nb_samples;
655
656 buf = s->buf;
657 src = (double *)frame->data[0];
658
659 offset = ((s->limiter_buf_size / inlink->ch_layout.nb_channels) - s->prev_nb_samples) * inlink->ch_layout.nb_channels;
660 offset -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples) * inlink->ch_layout.nb_channels;
661 s->buf_index = s->buf_index - offset < 0 ? s->buf_index - offset + s->buf_size : s->buf_index - offset;
662
663 for (n = 0; n < nb_samples; n++) {
664 for (c = 0; c < inlink->ch_layout.nb_channels; c++) {
665 src[c] = buf[s->buf_index + c];
666 }
667 src += inlink->ch_layout.nb_channels;
668 s->buf_index += inlink->ch_layout.nb_channels;
669 if (s->buf_index >= s->buf_size)
670 s->buf_index -= s->buf_size;
671 }
672
673 s->frame_type = FINAL_FRAME;
674 ret = filter_frame(inlink, frame);
675 }
676 return ret;
677 }
678
679 static int activate(AVFilterContext *ctx)
680 {
681 AVFilterLink *inlink = ctx->inputs[0];
682 AVFilterLink *outlink = ctx->outputs[0];
683 LoudNormContext *s = ctx->priv;
684 AVFrame *in = NULL;
685 int ret = 0, status;
686 int64_t pts;
687
688 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
689
690 if (s->frame_type != LINEAR_MODE) {
691 int nb_samples;
692
693 if (s->frame_type == FIRST_FRAME) {
694 nb_samples = frame_size(inlink->sample_rate, 3000);
695 } else {
696 nb_samples = frame_size(inlink->sample_rate, 100);
697 }
698
699 ret = ff_inlink_consume_samples(inlink, nb_samples, nb_samples, &in);
700 } else {
701 ret = ff_inlink_consume_frame(inlink, &in);
702 }
703
704 if (ret < 0)
705 return ret;
706 if (ret > 0) {
707 if (s->frame_type == FIRST_FRAME) {
708 const int nb_samples = frame_size(inlink->sample_rate, 100);
709
710 for (int i = 0; i < FF_ARRAY_ELEMS(s->pts); i++)
711 s->pts[i] = in->pts + i * nb_samples;
712 } else if (s->frame_type == LINEAR_MODE) {
713 s->pts[0] = in->pts;
714 } else {
715 s->pts[FF_ARRAY_ELEMS(s->pts) - 1] = in->pts;
716 }
717 ret = filter_frame(inlink, in);
718 }
719 if (ret < 0)
720 return ret;
721
722 if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
723 ff_outlink_set_status(outlink, status, pts);
724 return flush_frame(outlink);
725 }
726
727 FF_FILTER_FORWARD_WANTED(outlink, inlink);
728
729 return FFERROR_NOT_READY;
730 }
731
732 static int query_formats(AVFilterContext *ctx)
733 {
734 LoudNormContext *s = ctx->priv;
735 static const int input_srate[] = {192000, -1};
736 static const enum AVSampleFormat sample_fmts[] = {
737 AV_SAMPLE_FMT_DBL,
738 AV_SAMPLE_FMT_NONE
739 };
740 int ret = ff_set_common_all_channel_counts(ctx);
741 if (ret < 0)
742 return ret;
743
744 ret = ff_set_common_formats_from_list(ctx, sample_fmts);
745 if (ret < 0)
746 return ret;
747
748 if (s->frame_type == LINEAR_MODE) {
749 return ff_set_common_all_samplerates(ctx);
750 } else {
751 return ff_set_common_samplerates_from_list(ctx, input_srate);
752 }
753 }
754
755 static int config_input(AVFilterLink *inlink)
756 {
757 AVFilterContext *ctx = inlink->dst;
758 LoudNormContext *s = ctx->priv;
759
760 s->r128_in = ff_ebur128_init(inlink->ch_layout.nb_channels, inlink->sample_rate, 0, FF_EBUR128_MODE_I | FF_EBUR128_MODE_S | FF_EBUR128_MODE_LRA | FF_EBUR128_MODE_SAMPLE_PEAK);
761 if (!s->r128_in)
762 return AVERROR(ENOMEM);
763
764 s->r128_out = ff_ebur128_init(inlink->ch_layout.nb_channels, inlink->sample_rate, 0, FF_EBUR128_MODE_I | FF_EBUR128_MODE_S | FF_EBUR128_MODE_LRA | FF_EBUR128_MODE_SAMPLE_PEAK);
765 if (!s->r128_out)
766 return AVERROR(ENOMEM);
767
768 if (inlink->ch_layout.nb_channels == 1 && s->dual_mono) {
769 ff_ebur128_set_channel(s->r128_in, 0, FF_EBUR128_DUAL_MONO);
770 ff_ebur128_set_channel(s->r128_out, 0, FF_EBUR128_DUAL_MONO);
771 }
772
773 s->buf_size = frame_size(inlink->sample_rate, 3000) * inlink->ch_layout.nb_channels;
774 s->buf = av_malloc_array(s->buf_size, sizeof(*s->buf));
775 if (!s->buf)
776 return AVERROR(ENOMEM);
777
778 s->limiter_buf_size = frame_size(inlink->sample_rate, 210) * inlink->ch_layout.nb_channels;
779 s->limiter_buf = av_malloc_array(s->buf_size, sizeof(*s->limiter_buf));
780 if (!s->limiter_buf)
781 return AVERROR(ENOMEM);
782
783 s->prev_smp = av_malloc_array(inlink->ch_layout.nb_channels, sizeof(*s->prev_smp));
784 if (!s->prev_smp)
785 return AVERROR(ENOMEM);
786
787 init_gaussian_filter(s);
788
789 s->buf_index =
790 s->prev_buf_index =
791 s->limiter_buf_index = 0;
792 s->channels = inlink->ch_layout.nb_channels;
793 s->index = 1;
794 s->limiter_state = OUT;
795 s->offset = pow(10., s->offset / 20.);
796 s->target_tp = pow(10., s->target_tp / 20.);
797 s->attack_length = frame_size(inlink->sample_rate, 10);
798 s->release_length = frame_size(inlink->sample_rate, 100);
799
800 return 0;
801 }
802
803 static av_cold int init(AVFilterContext *ctx)
804 {
805 LoudNormContext *s = ctx->priv;
806 s->frame_type = FIRST_FRAME;
807
808 if (s->linear) {
809 double offset, offset_tp;
810 offset = s->target_i - s->measured_i;
811 offset_tp = s->measured_tp + offset;
812
813 if (s->measured_tp != 99 && s->measured_thresh != -70 && s->measured_lra != 0 && s->measured_i != 0) {
814 if ((offset_tp <= s->target_tp) && (s->measured_lra <= s->target_lra)) {
815 s->frame_type = LINEAR_MODE;
816 s->offset = offset;
817 }
818 }
819 }
820
821 return 0;
822 }
823
824 static av_cold void uninit(AVFilterContext *ctx)
825 {
826 LoudNormContext *s = ctx->priv;
827 double i_in, i_out, lra_in, lra_out, thresh_in, thresh_out, tp_in, tp_out;
828 int c;
829
830 if (!s->r128_in || !s->r128_out)
831 goto end;
832
833 ff_ebur128_loudness_range(s->r128_in, &lra_in);
834 ff_ebur128_loudness_global(s->r128_in, &i_in);
835 ff_ebur128_relative_threshold(s->r128_in, &thresh_in);
836 for (c = 0; c < s->channels; c++) {
837 double tmp;
838 ff_ebur128_sample_peak(s->r128_in, c, &tmp);
839 if ((c == 0) || (tmp > tp_in))
840 tp_in = tmp;
841 }
842
843 ff_ebur128_loudness_range(s->r128_out, &lra_out);
844 ff_ebur128_loudness_global(s->r128_out, &i_out);
845 ff_ebur128_relative_threshold(s->r128_out, &thresh_out);
846 for (c = 0; c < s->channels; c++) {
847 double tmp;
848 ff_ebur128_sample_peak(s->r128_out, c, &tmp);
849 if ((c == 0) || (tmp > tp_out))
850 tp_out = tmp;
851 }
852
853 switch(s->print_format) {
854 case NONE:
855 break;
856
857 case JSON:
858 av_log(ctx, AV_LOG_INFO,
859 "\n{\n"
860 "\t\"input_i\" : \"%.2f\",\n"
861 "\t\"input_tp\" : \"%.2f\",\n"
862 "\t\"input_lra\" : \"%.2f\",\n"
863 "\t\"input_thresh\" : \"%.2f\",\n"
864 "\t\"output_i\" : \"%.2f\",\n"
865 "\t\"output_tp\" : \"%+.2f\",\n"
866 "\t\"output_lra\" : \"%.2f\",\n"
867 "\t\"output_thresh\" : \"%.2f\",\n"
868 "\t\"normalization_type\" : \"%s\",\n"
869 "\t\"target_offset\" : \"%.2f\"\n"
870 "}\n",
871 i_in,
872 20. * log10(tp_in),
873 lra_in,
874 thresh_in,
875 i_out,
876 20. * log10(tp_out),
877 lra_out,
878 thresh_out,
879 s->frame_type == LINEAR_MODE ? "linear" : "dynamic",
880 s->target_i - i_out
881 );
882 break;
883
884 case SUMMARY:
885 av_log(ctx, AV_LOG_INFO,
886 "\n"
887 "Input Integrated: %+6.1f LUFS\n"
888 "Input True Peak: %+6.1f dBTP\n"
889 "Input LRA: %6.1f LU\n"
890 "Input Threshold: %+6.1f LUFS\n"
891 "\n"
892 "Output Integrated: %+6.1f LUFS\n"
893 "Output True Peak: %+6.1f dBTP\n"
894 "Output LRA: %6.1f LU\n"
895 "Output Threshold: %+6.1f LUFS\n"
896 "\n"
897 "Normalization Type: %s\n"
898 "Target Offset: %+6.1f LU\n",
899 i_in,
900 20. * log10(tp_in),
901 lra_in,
902 thresh_in,
903 i_out,
904 20. * log10(tp_out),
905 lra_out,
906 thresh_out,
907 s->frame_type == LINEAR_MODE ? "Linear" : "Dynamic",
908 s->target_i - i_out
909 );
910 break;
911 }
912
913 end:
914 if (s->r128_in)
915 ff_ebur128_destroy(&s->r128_in);
916 if (s->r128_out)
917 ff_ebur128_destroy(&s->r128_out);
918 av_freep(&s->limiter_buf);
919 av_freep(&s->prev_smp);
920 av_freep(&s->buf);
921 }
922
923 static const AVFilterPad avfilter_af_loudnorm_inputs[] = {
924 {
925 .name = "default",
926 .type = AVMEDIA_TYPE_AUDIO,
927 .config_props = config_input,
928 },
929 };
930
931 const AVFilter ff_af_loudnorm = {
932 .name = "loudnorm",
933 .description = NULL_IF_CONFIG_SMALL("EBU R128 loudness normalization"),
934 .priv_size = sizeof(LoudNormContext),
935 .priv_class = &loudnorm_class,
936 .init = init,
937 .activate = activate,
938 .uninit = uninit,
939 FILTER_INPUTS(avfilter_af_loudnorm_inputs),
940 FILTER_OUTPUTS(ff_audio_default_filterpad),
941 FILTER_QUERY_FUNC(query_formats),
942 };
943