FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/af_dialoguenhance.c
Date: 2024-04-19 17:50:32
Exec Total Coverage
Lines: 0 86 0.0%
Functions: 0 5 0.0%
Branches: 0 47 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2022 Paul B Mahol
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public License
8 * as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/channel_layout.h"
22 #include "libavutil/mem.h"
23 #include "libavutil/opt.h"
24 #include "libavutil/tx.h"
25 #include "audio.h"
26 #include "avfilter.h"
27 #include "filters.h"
28 #include "formats.h"
29 #include "internal.h"
30
31 #include <float.h>
32
33 typedef struct AudioDialogueEnhancementContext {
34 const AVClass *class;
35
36 double original, enhance, voice;
37
38 int fft_size;
39 int overlap;
40
41 void *window;
42 float *window_float;
43 double *window_double;
44 float prev_vad_float;
45 double prev_vad_double;
46
47 AVFrame *in;
48 AVFrame *in_frame;
49 AVFrame *out_dist_frame;
50 AVFrame *windowed_frame;
51 AVFrame *windowed_out;
52 AVFrame *windowed_prev;
53 AVFrame *center_frame;
54
55 int (*de_stereo)(AVFilterContext *ctx, AVFrame *out);
56
57 AVTXContext *tx_ctx[2], *itx_ctx;
58 av_tx_fn tx_fn, itx_fn;
59 } AudioDialogueEnhanceContext;
60
61 #define OFFSET(x) offsetof(AudioDialogueEnhanceContext, x)
62 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_RUNTIME_PARAM
63
64 static const AVOption dialoguenhance_options[] = {
65 { "original", "set original center factor", OFFSET(original), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0, 1, FLAGS },
66 { "enhance", "set dialogue enhance factor",OFFSET(enhance), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0, 3, FLAGS },
67 { "voice", "set voice detection factor", OFFSET(voice), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 2,32, FLAGS },
68 {NULL}
69 };
70
71 AVFILTER_DEFINE_CLASS(dialoguenhance);
72
73 static int query_formats(AVFilterContext *ctx)
74 {
75 AVFilterFormats *formats = NULL;
76 AVFilterChannelLayouts *in_layout = NULL, *out_layout = NULL;
77 int ret;
78
79 if ((ret = ff_add_format (&formats, AV_SAMPLE_FMT_FLTP )) < 0 ||
80 (ret = ff_add_format (&formats, AV_SAMPLE_FMT_DBLP )) < 0 ||
81 (ret = ff_set_common_formats (ctx , formats )) < 0 ||
82 (ret = ff_add_channel_layout (&in_layout , &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO)) < 0 ||
83 (ret = ff_channel_layouts_ref(in_layout, &ctx->inputs[0]->outcfg.channel_layouts)) < 0 ||
84 (ret = ff_add_channel_layout (&out_layout , &(AVChannelLayout)AV_CHANNEL_LAYOUT_SURROUND)) < 0 ||
85 (ret = ff_channel_layouts_ref(out_layout, &ctx->outputs[0]->incfg.channel_layouts)) < 0)
86 return ret;
87
88 return ff_set_common_all_samplerates(ctx);
89 }
90
91 #define DEPTH 32
92 #include "dialoguenhance_template.c"
93
94 #undef DEPTH
95 #define DEPTH 64
96 #include "dialoguenhance_template.c"
97
98 static int config_input(AVFilterLink *inlink)
99 {
100 AVFilterContext *ctx = inlink->dst;
101 AudioDialogueEnhanceContext *s = ctx->priv;
102 int ret;
103
104 s->fft_size = inlink->sample_rate > 100000 ? 8192 : inlink->sample_rate > 50000 ? 4096 : 2048;
105 s->overlap = s->fft_size / 4;
106
107 s->in_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
108 s->center_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
109 s->out_dist_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
110 s->windowed_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
111 s->windowed_out = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
112 s->windowed_prev = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
113 if (!s->in_frame || !s->windowed_out || !s->windowed_prev ||
114 !s->out_dist_frame || !s->windowed_frame || !s->center_frame)
115 return AVERROR(ENOMEM);
116
117 switch (inlink->format) {
118 case AV_SAMPLE_FMT_FLTP:
119 s->de_stereo = de_stereo_float;
120 ret = de_tx_init_float(ctx);
121 break;
122 case AV_SAMPLE_FMT_DBLP:
123 s->de_stereo = de_stereo_double;
124 ret = de_tx_init_double(ctx);
125 break;
126 }
127
128 return ret;
129 }
130
131 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
132 {
133 AVFilterContext *ctx = inlink->dst;
134 AVFilterLink *outlink = ctx->outputs[0];
135 AudioDialogueEnhanceContext *s = ctx->priv;
136 AVFrame *out;
137 int ret;
138
139 out = ff_get_audio_buffer(outlink, s->overlap);
140 if (!out) {
141 ret = AVERROR(ENOMEM);
142 goto fail;
143 }
144
145 s->in = in;
146 s->de_stereo(ctx, out);
147
148 av_frame_copy_props(out, in);
149 out->nb_samples = in->nb_samples;
150 ret = ff_filter_frame(outlink, out);
151 fail:
152 av_frame_free(&in);
153 s->in = NULL;
154 return ret < 0 ? ret : 0;
155 }
156
157 static int activate(AVFilterContext *ctx)
158 {
159 AVFilterLink *inlink = ctx->inputs[0];
160 AVFilterLink *outlink = ctx->outputs[0];
161 AudioDialogueEnhanceContext *s = ctx->priv;
162 AVFrame *in = NULL;
163 int ret = 0, status;
164 int64_t pts;
165
166 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
167
168 ret = ff_inlink_consume_samples(inlink, s->overlap, s->overlap, &in);
169 if (ret < 0)
170 return ret;
171
172 if (ret > 0) {
173 return filter_frame(inlink, in);
174 } else if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
175 ff_outlink_set_status(outlink, status, pts);
176 return 0;
177 } else {
178 if (ff_inlink_queued_samples(inlink) >= s->overlap) {
179 ff_filter_set_ready(ctx, 10);
180 } else if (ff_outlink_frame_wanted(outlink)) {
181 ff_inlink_request_frame(inlink);
182 }
183 return 0;
184 }
185 }
186
187 static av_cold void uninit(AVFilterContext *ctx)
188 {
189 AudioDialogueEnhanceContext *s = ctx->priv;
190
191 av_freep(&s->window);
192
193 av_frame_free(&s->in_frame);
194 av_frame_free(&s->center_frame);
195 av_frame_free(&s->out_dist_frame);
196 av_frame_free(&s->windowed_frame);
197 av_frame_free(&s->windowed_out);
198 av_frame_free(&s->windowed_prev);
199
200 av_tx_uninit(&s->tx_ctx[0]);
201 av_tx_uninit(&s->tx_ctx[1]);
202 av_tx_uninit(&s->itx_ctx);
203 }
204
205 static const AVFilterPad inputs[] = {
206 {
207 .name = "default",
208 .type = AVMEDIA_TYPE_AUDIO,
209 .config_props = config_input,
210 },
211 };
212
213 const AVFilter ff_af_dialoguenhance = {
214 .name = "dialoguenhance",
215 .description = NULL_IF_CONFIG_SMALL("Audio Dialogue Enhancement."),
216 .priv_size = sizeof(AudioDialogueEnhanceContext),
217 .priv_class = &dialoguenhance_class,
218 .uninit = uninit,
219 FILTER_INPUTS(inputs),
220 FILTER_OUTPUTS(ff_audio_default_filterpad),
221 FILTER_QUERY_FUNC(query_formats),
222 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
223 .activate = activate,
224 .process_command = ff_filter_process_command,
225 };
226