| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * This file is part of FFmpeg. | ||
| 3 | * | ||
| 4 | * FFmpeg is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU Lesser General Public | ||
| 6 | * License as published by the Free Software Foundation; either | ||
| 7 | * version 2.1 of the License, or (at your option) any later version. | ||
| 8 | * | ||
| 9 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 12 | * Lesser General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU Lesser General Public | ||
| 15 | * License along with FFmpeg; if not, write to the Free Software | ||
| 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include "libavutil/mem.h" | ||
| 20 | #include "libavutil/tx.h" | ||
| 21 | #include "avfilter.h" | ||
| 22 | #include "audio.h" | ||
| 23 | |||
| 24 | #undef ctype | ||
| 25 | #undef ftype | ||
| 26 | #undef SQRT | ||
| 27 | #undef HYPOT | ||
| 28 | #undef SAMPLE_FORMAT | ||
| 29 | #undef TX_TYPE | ||
| 30 | #undef ONE | ||
| 31 | #undef ZERO | ||
| 32 | #undef HALF | ||
| 33 | #undef SIN | ||
| 34 | #undef CLIP | ||
| 35 | #undef EPSILON | ||
| 36 | #if DEPTH == 32 | ||
| 37 | #define SAMPLE_FORMAT float | ||
| 38 | #define SQRT sqrtf | ||
| 39 | #define HYPOT hypotf | ||
| 40 | #define ctype AVComplexFloat | ||
| 41 | #define ftype float | ||
| 42 | #define TX_TYPE AV_TX_FLOAT_RDFT | ||
| 43 | #define ONE 1.f | ||
| 44 | #define ZERO 0.f | ||
| 45 | #define HALF 0.5f | ||
| 46 | #define SIN sinf | ||
| 47 | #define CLIP av_clipf | ||
| 48 | #define EPSILON FLT_EPSILON | ||
| 49 | #else | ||
| 50 | #define SAMPLE_FORMAT double | ||
| 51 | #define SQRT sqrt | ||
| 52 | #define HYPOT hypot | ||
| 53 | #define ctype AVComplexDouble | ||
| 54 | #define ftype double | ||
| 55 | #define TX_TYPE AV_TX_DOUBLE_RDFT | ||
| 56 | #define ONE 1.0 | ||
| 57 | #define ZERO 0.0 | ||
| 58 | #define HALF 0.5 | ||
| 59 | #define SIN sin | ||
| 60 | #define CLIP av_clipd | ||
| 61 | #define EPSILON DBL_EPSILON | ||
| 62 | #endif | ||
| 63 | |||
| 64 | #define fn3(a,b) a##_##b | ||
| 65 | #define fn2(a,b) fn3(a,b) | ||
| 66 | #define fn(a) fn2(a, SAMPLE_FORMAT) | ||
| 67 | |||
| 68 | ✗ | static int fn(de_tx_init)(AVFilterContext *ctx) | |
| 69 | { | ||
| 70 | ✗ | AudioDialogueEnhanceContext *s = ctx->priv; | |
| 71 | ✗ | ftype scale = ONE, iscale = ONE / (s->fft_size * 1.5f); | |
| 72 | int ret; | ||
| 73 | |||
| 74 | ✗ | s->window = av_calloc(s->fft_size, sizeof(ftype)); | |
| 75 | ✗ | if (!s->window) | |
| 76 | ✗ | return AVERROR(ENOMEM); | |
| 77 | ✗ | fn(s->window) = s->window; | |
| 78 | ✗ | for (int n = 0; n < s->fft_size; n++) | |
| 79 | ✗ | fn(s->window)[n] = SIN(M_PI*n/(s->fft_size-1)); | |
| 80 | |||
| 81 | ✗ | ret = av_tx_init(&s->tx_ctx[0], &s->tx_fn, TX_TYPE, 0, s->fft_size, &scale, 0); | |
| 82 | ✗ | if (ret < 0) | |
| 83 | ✗ | return ret; | |
| 84 | |||
| 85 | ✗ | ret = av_tx_init(&s->tx_ctx[1], &s->tx_fn, TX_TYPE, 0, s->fft_size, &scale, 0); | |
| 86 | ✗ | if (ret < 0) | |
| 87 | ✗ | return ret; | |
| 88 | |||
| 89 | ✗ | ret = av_tx_init(&s->itx_ctx, &s->itx_fn, TX_TYPE, 1, s->fft_size, &iscale, 0); | |
| 90 | ✗ | if (ret < 0) | |
| 91 | ✗ | return ret; | |
| 92 | |||
| 93 | ✗ | return 0; | |
| 94 | } | ||
| 95 | |||
| 96 | ✗ | static void fn(apply_window)(AudioDialogueEnhanceContext *s, | |
| 97 | const ftype *in_frame, ftype *out_frame, const int add_to_out_frame) | ||
| 98 | { | ||
| 99 | ✗ | const ftype *window = fn(s->window); | |
| 100 | ✗ | const int fft_size = s->fft_size; | |
| 101 | |||
| 102 | ✗ | if (add_to_out_frame) { | |
| 103 | ✗ | for (int i = 0; i < fft_size; i++) | |
| 104 | ✗ | out_frame[i] += in_frame[i] * window[i]; | |
| 105 | } else { | ||
| 106 | ✗ | for (int i = 0; i < fft_size; i++) | |
| 107 | ✗ | out_frame[i] = in_frame[i] * window[i]; | |
| 108 | } | ||
| 109 | ✗ | } | |
| 110 | |||
| 111 | ✗ | static ftype fn(sqr)(ftype x) | |
| 112 | { | ||
| 113 | ✗ | return x * x; | |
| 114 | } | ||
| 115 | |||
| 116 | ✗ | static void fn(get_centere)(ctype *left, ctype *right, | |
| 117 | ctype *center, int N) | ||
| 118 | { | ||
| 119 | ✗ | for (int i = 0; i < N; i++) { | |
| 120 | ✗ | const ftype l_re = left[i].re; | |
| 121 | ✗ | const ftype l_im = left[i].im; | |
| 122 | ✗ | const ftype r_re = right[i].re; | |
| 123 | ✗ | const ftype r_im = right[i].im; | |
| 124 | ✗ | const ftype a = HALF * (ONE - SQRT((fn(sqr)(l_re - r_re) + fn(sqr)(l_im - r_im))/ | |
| 125 | ✗ | (fn(sqr)(l_re + r_re) + fn(sqr)(l_im + r_im) + EPSILON))); | |
| 126 | |||
| 127 | ✗ | center[i].re = a * (l_re + r_re); | |
| 128 | ✗ | center[i].im = a * (l_im + r_im); | |
| 129 | } | ||
| 130 | ✗ | } | |
| 131 | |||
| 132 | ✗ | static ftype fn(flux)(ftype *curf, ftype *prevf, int N) | |
| 133 | { | ||
| 134 | ✗ | ctype *cur = (ctype *)curf; | |
| 135 | ✗ | ctype *prev = (ctype *)prevf; | |
| 136 | ✗ | ftype sum = ZERO; | |
| 137 | |||
| 138 | ✗ | for (int i = 0; i < N; i++) { | |
| 139 | ✗ | ftype c_re = cur[i].re; | |
| 140 | ✗ | ftype c_im = cur[i].im; | |
| 141 | ✗ | ftype p_re = prev[i].re; | |
| 142 | ✗ | ftype p_im = prev[i].im; | |
| 143 | |||
| 144 | ✗ | sum += fn(sqr)(HYPOT(c_re, c_im) - HYPOT(p_re, p_im)); | |
| 145 | } | ||
| 146 | |||
| 147 | ✗ | return sum; | |
| 148 | } | ||
| 149 | |||
| 150 | ✗ | static ftype fn(fluxlr)(ftype *lf, ftype *lpf, | |
| 151 | ftype *rf, ftype *rpf, | ||
| 152 | int N) | ||
| 153 | { | ||
| 154 | ✗ | ctype *l = (ctype *)lf; | |
| 155 | ✗ | ctype *lp = (ctype *)lpf; | |
| 156 | ✗ | ctype *r = (ctype *)rf; | |
| 157 | ✗ | ctype *rp = (ctype *)rpf; | |
| 158 | ✗ | ftype sum = ZERO; | |
| 159 | |||
| 160 | ✗ | for (int i = 0; i < N; i++) { | |
| 161 | ✗ | ftype c_re = l[i].re - r[i].re; | |
| 162 | ✗ | ftype c_im = l[i].im - r[i].im; | |
| 163 | ✗ | ftype p_re = lp[i].re - rp[i].re; | |
| 164 | ✗ | ftype p_im = lp[i].im - rp[i].im; | |
| 165 | |||
| 166 | ✗ | sum += fn(sqr)(HYPOT(c_re, c_im) - HYPOT(p_re, p_im)); | |
| 167 | } | ||
| 168 | |||
| 169 | ✗ | return sum; | |
| 170 | } | ||
| 171 | |||
| 172 | ✗ | static ftype fn(calc_vad)(ftype fc, ftype flr, ftype a) | |
| 173 | { | ||
| 174 | ✗ | const ftype vad = a * (fc / (fc + flr) - HALF); | |
| 175 | |||
| 176 | ✗ | return CLIP(vad, ZERO, ONE); | |
| 177 | } | ||
| 178 | |||
| 179 | ✗ | static void fn(get_final)(ftype *c, ftype *l, | |
| 180 | ftype *r, ftype vad, int N, | ||
| 181 | ftype original, ftype enhance) | ||
| 182 | { | ||
| 183 | ✗ | ctype *center = (ctype *)c; | |
| 184 | ✗ | ctype *left = (ctype *)l; | |
| 185 | ✗ | ctype *right = (ctype *)r; | |
| 186 | |||
| 187 | ✗ | for (int i = 0; i < N; i++) { | |
| 188 | ✗ | ftype cP = fn(sqr)(center[i].re) + fn(sqr)(center[i].im); | |
| 189 | ✗ | ftype lrP = fn(sqr)(left[i].re - right[i].re) + fn(sqr)(left[i].im - right[i].im); | |
| 190 | ✗ | ftype G = cP / (cP + lrP + EPSILON); | |
| 191 | ftype re, im; | ||
| 192 | |||
| 193 | ✗ | re = center[i].re * (original + vad * G * enhance); | |
| 194 | ✗ | im = center[i].im * (original + vad * G * enhance); | |
| 195 | |||
| 196 | ✗ | center[i].re = re; | |
| 197 | ✗ | center[i].im = im; | |
| 198 | } | ||
| 199 | ✗ | } | |
| 200 | |||
| 201 | ✗ | static int fn(de_stereo)(AVFilterContext *ctx, AVFrame *out) | |
| 202 | { | ||
| 203 | ✗ | AudioDialogueEnhanceContext *s = ctx->priv; | |
| 204 | ✗ | ftype *center = (ftype *)s->center_frame->extended_data[0]; | |
| 205 | ✗ | ftype *center_prev = (ftype *)s->center_frame->extended_data[1]; | |
| 206 | ✗ | ftype *left_in = (ftype *)s->in_frame->extended_data[0]; | |
| 207 | ✗ | ftype *right_in = (ftype *)s->in_frame->extended_data[1]; | |
| 208 | ✗ | ftype *left_out = (ftype *)s->out_dist_frame->extended_data[0]; | |
| 209 | ✗ | ftype *right_out = (ftype *)s->out_dist_frame->extended_data[1]; | |
| 210 | ✗ | ftype *left_samples = (ftype *)s->in->extended_data[0]; | |
| 211 | ✗ | ftype *right_samples = (ftype *)s->in->extended_data[1]; | |
| 212 | ✗ | ftype *windowed_left = (ftype *)s->windowed_frame->extended_data[0]; | |
| 213 | ✗ | ftype *windowed_right = (ftype *)s->windowed_frame->extended_data[1]; | |
| 214 | ✗ | ftype *windowed_oleft = (ftype *)s->windowed_out->extended_data[0]; | |
| 215 | ✗ | ftype *windowed_oright = (ftype *)s->windowed_out->extended_data[1]; | |
| 216 | ✗ | ftype *windowed_pleft = (ftype *)s->windowed_prev->extended_data[0]; | |
| 217 | ✗ | ftype *windowed_pright = (ftype *)s->windowed_prev->extended_data[1]; | |
| 218 | ✗ | ftype *left_osamples = (ftype *)out->extended_data[0]; | |
| 219 | ✗ | ftype *right_osamples = (ftype *)out->extended_data[1]; | |
| 220 | ✗ | ftype *center_osamples = (ftype *)out->extended_data[2]; | |
| 221 | ✗ | const int overlap = s->overlap; | |
| 222 | ✗ | const int offset = s->fft_size - overlap; | |
| 223 | ✗ | const int nb_samples = FFMIN(overlap, s->in->nb_samples); | |
| 224 | ftype vad; | ||
| 225 | |||
| 226 | // shift in/out buffers | ||
| 227 | ✗ | memmove(left_in, &left_in[overlap], offset * sizeof(ftype)); | |
| 228 | ✗ | memmove(right_in, &right_in[overlap], offset * sizeof(ftype)); | |
| 229 | ✗ | memmove(left_out, &left_out[overlap], offset * sizeof(ftype)); | |
| 230 | ✗ | memmove(right_out, &right_out[overlap], offset * sizeof(ftype)); | |
| 231 | |||
| 232 | ✗ | memcpy(&left_in[offset], left_samples, nb_samples * sizeof(ftype)); | |
| 233 | ✗ | memcpy(&right_in[offset], right_samples, nb_samples * sizeof(ftype)); | |
| 234 | ✗ | memset(&left_out[offset], 0, overlap * sizeof(ftype)); | |
| 235 | ✗ | memset(&right_out[offset], 0, overlap * sizeof(ftype)); | |
| 236 | |||
| 237 | ✗ | fn(apply_window)(s, left_in, windowed_left, 0); | |
| 238 | ✗ | fn(apply_window)(s, right_in, windowed_right, 0); | |
| 239 | |||
| 240 | ✗ | s->tx_fn(s->tx_ctx[0], windowed_oleft, windowed_left, sizeof(ftype)); | |
| 241 | ✗ | s->tx_fn(s->tx_ctx[1], windowed_oright, windowed_right, sizeof(ftype)); | |
| 242 | |||
| 243 | ✗ | fn(get_centere)((ctype *)windowed_oleft, | |
| 244 | (ctype *)windowed_oright, | ||
| 245 | (ctype *)center, | ||
| 246 | ✗ | s->fft_size / 2 + 1); | |
| 247 | |||
| 248 | ✗ | vad = fn(calc_vad)(fn(flux)(center, center_prev, s->fft_size / 2 + 1), | |
| 249 | fn(fluxlr)(windowed_oleft, windowed_pleft, | ||
| 250 | ✗ | windowed_oright, windowed_pright, s->fft_size / 2 + 1), s->voice); | |
| 251 | ✗ | vad = vad * 0.1 + 0.9 * fn(s->prev_vad); | |
| 252 | ✗ | fn(s->prev_vad) = vad; | |
| 253 | |||
| 254 | ✗ | memcpy(center_prev, center, s->fft_size * sizeof(ftype)); | |
| 255 | ✗ | memcpy(windowed_pleft, windowed_oleft, s->fft_size * sizeof(ftype)); | |
| 256 | ✗ | memcpy(windowed_pright, windowed_oright, s->fft_size * sizeof(ftype)); | |
| 257 | |||
| 258 | ✗ | fn(get_final)(center, windowed_oleft, windowed_oright, vad, s->fft_size / 2 + 1, | |
| 259 | ✗ | s->original, s->enhance); | |
| 260 | |||
| 261 | ✗ | s->itx_fn(s->itx_ctx, windowed_oleft, center, sizeof(ctype)); | |
| 262 | |||
| 263 | ✗ | fn(apply_window)(s, windowed_oleft, left_out, 1); | |
| 264 | |||
| 265 | ✗ | memcpy(left_osamples, left_in, overlap * sizeof(ftype)); | |
| 266 | ✗ | memcpy(right_osamples, right_in, overlap * sizeof(ftype)); | |
| 267 | |||
| 268 | ✗ | if (ctx->is_disabled) | |
| 269 | ✗ | memset(center_osamples, 0, overlap * sizeof(ftype)); | |
| 270 | else | ||
| 271 | ✗ | memcpy(center_osamples, left_out, overlap * sizeof(ftype)); | |
| 272 | |||
| 273 | ✗ | return 0; | |
| 274 | } | ||
| 275 |