| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (c) 2022 Paul B Mahol | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public License | ||
| 8 | * as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | * GNU Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public License | ||
| 17 | * along with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
| 18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include "libavutil/channel_layout.h" | ||
| 22 | #include "libavutil/mem.h" | ||
| 23 | #include "libavutil/opt.h" | ||
| 24 | #include "libavutil/tx.h" | ||
| 25 | #include "audio.h" | ||
| 26 | #include "avfilter.h" | ||
| 27 | #include "filters.h" | ||
| 28 | #include "formats.h" | ||
| 29 | |||
| 30 | #include <float.h> | ||
| 31 | |||
| 32 | typedef struct AudioDialogueEnhancementContext { | ||
| 33 | const AVClass *class; | ||
| 34 | |||
| 35 | double original, enhance, voice; | ||
| 36 | |||
| 37 | int fft_size; | ||
| 38 | int overlap; | ||
| 39 | |||
| 40 | void *window; | ||
| 41 | float *window_float; | ||
| 42 | double *window_double; | ||
| 43 | float prev_vad_float; | ||
| 44 | double prev_vad_double; | ||
| 45 | |||
| 46 | AVFrame *in; | ||
| 47 | AVFrame *in_frame; | ||
| 48 | AVFrame *out_dist_frame; | ||
| 49 | AVFrame *windowed_frame; | ||
| 50 | AVFrame *windowed_out; | ||
| 51 | AVFrame *windowed_prev; | ||
| 52 | AVFrame *center_frame; | ||
| 53 | |||
| 54 | int (*de_stereo)(AVFilterContext *ctx, AVFrame *out); | ||
| 55 | |||
| 56 | AVTXContext *tx_ctx[2], *itx_ctx; | ||
| 57 | av_tx_fn tx_fn, itx_fn; | ||
| 58 | } AudioDialogueEnhanceContext; | ||
| 59 | |||
| 60 | #define OFFSET(x) offsetof(AudioDialogueEnhanceContext, x) | ||
| 61 | #define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_RUNTIME_PARAM | ||
| 62 | |||
| 63 | static const AVOption dialoguenhance_options[] = { | ||
| 64 | { "original", "set original center factor", OFFSET(original), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0, 1, FLAGS }, | ||
| 65 | { "enhance", "set dialogue enhance factor",OFFSET(enhance), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0, 3, FLAGS }, | ||
| 66 | { "voice", "set voice detection factor", OFFSET(voice), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 2,32, FLAGS }, | ||
| 67 | {NULL} | ||
| 68 | }; | ||
| 69 | |||
| 70 | AVFILTER_DEFINE_CLASS(dialoguenhance); | ||
| 71 | |||
| 72 | ✗ | static int query_formats(const AVFilterContext *ctx, | |
| 73 | AVFilterFormatsConfig **cfg_in, | ||
| 74 | AVFilterFormatsConfig **cfg_out) | ||
| 75 | { | ||
| 76 | static const enum AVSampleFormat formats[] = { | ||
| 77 | AV_SAMPLE_FMT_FLTP, | ||
| 78 | AV_SAMPLE_FMT_DBLP, | ||
| 79 | AV_SAMPLE_FMT_NONE, | ||
| 80 | }; | ||
| 81 | |||
| 82 | ✗ | AVFilterChannelLayouts *in_layout = NULL, *out_layout = NULL; | |
| 83 | int ret; | ||
| 84 | |||
| 85 | ✗ | ret = ff_set_sample_formats_from_list2(ctx, cfg_in, cfg_out, formats); | |
| 86 | ✗ | if (ret < 0) | |
| 87 | ✗ | return ret; | |
| 88 | |||
| 89 | ✗ | if ((ret = ff_add_channel_layout (&in_layout , &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO)) < 0 || | |
| 90 | ✗ | (ret = ff_channel_layouts_ref(in_layout, &cfg_in[0]->channel_layouts)) < 0 || | |
| 91 | ✗ | (ret = ff_add_channel_layout (&out_layout , &(AVChannelLayout)AV_CHANNEL_LAYOUT_SURROUND)) < 0 || | |
| 92 | ✗ | (ret = ff_channel_layouts_ref(out_layout, &cfg_out[0]->channel_layouts)) < 0) | |
| 93 | ✗ | return ret; | |
| 94 | |||
| 95 | ✗ | return 0; | |
| 96 | } | ||
| 97 | |||
| 98 | #define DEPTH 32 | ||
| 99 | #include "dialoguenhance_template.c" | ||
| 100 | |||
| 101 | #undef DEPTH | ||
| 102 | #define DEPTH 64 | ||
| 103 | #include "dialoguenhance_template.c" | ||
| 104 | |||
| 105 | ✗ | static int config_input(AVFilterLink *inlink) | |
| 106 | { | ||
| 107 | ✗ | AVFilterContext *ctx = inlink->dst; | |
| 108 | ✗ | AudioDialogueEnhanceContext *s = ctx->priv; | |
| 109 | int ret; | ||
| 110 | |||
| 111 | ✗ | s->fft_size = inlink->sample_rate > 100000 ? 8192 : inlink->sample_rate > 50000 ? 4096 : 2048; | |
| 112 | ✗ | s->overlap = s->fft_size / 4; | |
| 113 | |||
| 114 | ✗ | s->in_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2); | |
| 115 | ✗ | s->center_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2); | |
| 116 | ✗ | s->out_dist_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2); | |
| 117 | ✗ | s->windowed_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2); | |
| 118 | ✗ | s->windowed_out = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2); | |
| 119 | ✗ | s->windowed_prev = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2); | |
| 120 | ✗ | if (!s->in_frame || !s->windowed_out || !s->windowed_prev || | |
| 121 | ✗ | !s->out_dist_frame || !s->windowed_frame || !s->center_frame) | |
| 122 | ✗ | return AVERROR(ENOMEM); | |
| 123 | |||
| 124 | ✗ | switch (inlink->format) { | |
| 125 | ✗ | case AV_SAMPLE_FMT_FLTP: | |
| 126 | ✗ | s->de_stereo = de_stereo_float; | |
| 127 | ✗ | ret = de_tx_init_float(ctx); | |
| 128 | ✗ | break; | |
| 129 | ✗ | case AV_SAMPLE_FMT_DBLP: | |
| 130 | ✗ | s->de_stereo = de_stereo_double; | |
| 131 | ✗ | ret = de_tx_init_double(ctx); | |
| 132 | ✗ | break; | |
| 133 | } | ||
| 134 | |||
| 135 | ✗ | return ret; | |
| 136 | } | ||
| 137 | |||
| 138 | ✗ | static int filter_frame(AVFilterLink *inlink, AVFrame *in) | |
| 139 | { | ||
| 140 | ✗ | AVFilterContext *ctx = inlink->dst; | |
| 141 | ✗ | AVFilterLink *outlink = ctx->outputs[0]; | |
| 142 | ✗ | AudioDialogueEnhanceContext *s = ctx->priv; | |
| 143 | AVFrame *out; | ||
| 144 | int ret; | ||
| 145 | |||
| 146 | ✗ | out = ff_get_audio_buffer(outlink, s->overlap); | |
| 147 | ✗ | if (!out) { | |
| 148 | ✗ | ret = AVERROR(ENOMEM); | |
| 149 | ✗ | goto fail; | |
| 150 | } | ||
| 151 | |||
| 152 | ✗ | s->in = in; | |
| 153 | ✗ | s->de_stereo(ctx, out); | |
| 154 | |||
| 155 | ✗ | av_frame_copy_props(out, in); | |
| 156 | ✗ | out->nb_samples = in->nb_samples; | |
| 157 | ✗ | ret = ff_filter_frame(outlink, out); | |
| 158 | ✗ | fail: | |
| 159 | ✗ | av_frame_free(&in); | |
| 160 | ✗ | s->in = NULL; | |
| 161 | ✗ | return ret < 0 ? ret : 0; | |
| 162 | } | ||
| 163 | |||
| 164 | ✗ | static int activate(AVFilterContext *ctx) | |
| 165 | { | ||
| 166 | ✗ | AVFilterLink *inlink = ctx->inputs[0]; | |
| 167 | ✗ | AVFilterLink *outlink = ctx->outputs[0]; | |
| 168 | ✗ | AudioDialogueEnhanceContext *s = ctx->priv; | |
| 169 | ✗ | AVFrame *in = NULL; | |
| 170 | ✗ | int ret = 0, status; | |
| 171 | int64_t pts; | ||
| 172 | |||
| 173 | ✗ | FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); | |
| 174 | |||
| 175 | ✗ | ret = ff_inlink_consume_samples(inlink, s->overlap, s->overlap, &in); | |
| 176 | ✗ | if (ret < 0) | |
| 177 | ✗ | return ret; | |
| 178 | |||
| 179 | ✗ | if (ret > 0) { | |
| 180 | ✗ | return filter_frame(inlink, in); | |
| 181 | ✗ | } else if (ff_inlink_acknowledge_status(inlink, &status, &pts)) { | |
| 182 | ✗ | ff_outlink_set_status(outlink, status, pts); | |
| 183 | ✗ | return 0; | |
| 184 | } else { | ||
| 185 | ✗ | if (ff_inlink_queued_samples(inlink) >= s->overlap) { | |
| 186 | ✗ | ff_filter_set_ready(ctx, 10); | |
| 187 | ✗ | } else if (ff_outlink_frame_wanted(outlink)) { | |
| 188 | ✗ | ff_inlink_request_frame(inlink); | |
| 189 | } | ||
| 190 | ✗ | return 0; | |
| 191 | } | ||
| 192 | } | ||
| 193 | |||
| 194 | ✗ | static av_cold void uninit(AVFilterContext *ctx) | |
| 195 | { | ||
| 196 | ✗ | AudioDialogueEnhanceContext *s = ctx->priv; | |
| 197 | |||
| 198 | ✗ | av_freep(&s->window); | |
| 199 | |||
| 200 | ✗ | av_frame_free(&s->in_frame); | |
| 201 | ✗ | av_frame_free(&s->center_frame); | |
| 202 | ✗ | av_frame_free(&s->out_dist_frame); | |
| 203 | ✗ | av_frame_free(&s->windowed_frame); | |
| 204 | ✗ | av_frame_free(&s->windowed_out); | |
| 205 | ✗ | av_frame_free(&s->windowed_prev); | |
| 206 | |||
| 207 | ✗ | av_tx_uninit(&s->tx_ctx[0]); | |
| 208 | ✗ | av_tx_uninit(&s->tx_ctx[1]); | |
| 209 | ✗ | av_tx_uninit(&s->itx_ctx); | |
| 210 | ✗ | } | |
| 211 | |||
| 212 | static const AVFilterPad inputs[] = { | ||
| 213 | { | ||
| 214 | .name = "default", | ||
| 215 | .type = AVMEDIA_TYPE_AUDIO, | ||
| 216 | .config_props = config_input, | ||
| 217 | }, | ||
| 218 | }; | ||
| 219 | |||
| 220 | const FFFilter ff_af_dialoguenhance = { | ||
| 221 | .p.name = "dialoguenhance", | ||
| 222 | .p.description = NULL_IF_CONFIG_SMALL("Audio Dialogue Enhancement."), | ||
| 223 | .p.priv_class = &dialoguenhance_class, | ||
| 224 | .p.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, | ||
| 225 | .priv_size = sizeof(AudioDialogueEnhanceContext), | ||
| 226 | .uninit = uninit, | ||
| 227 | FILTER_INPUTS(inputs), | ||
| 228 | FILTER_OUTPUTS(ff_audio_default_filterpad), | ||
| 229 | FILTER_QUERY_FUNC2(query_formats), | ||
| 230 | .activate = activate, | ||
| 231 | .process_command = ff_filter_process_command, | ||
| 232 | }; | ||
| 233 |