| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (c) 2021 Paul B Mahol | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public | ||
| 17 | * License along with FFmpeg; if not, write to the Free Software | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include <float.h> | ||
| 22 | #include <math.h> | ||
| 23 | |||
| 24 | #include "libavutil/mem.h" | ||
| 25 | #include "libavutil/opt.h" | ||
| 26 | #include "libavutil/tx.h" | ||
| 27 | #include "audio.h" | ||
| 28 | #include "avfilter.h" | ||
| 29 | #include "filters.h" | ||
| 30 | #include "window_func.h" | ||
| 31 | |||
| 32 | #define MEASURE_ALL UINT_MAX | ||
| 33 | #define MEASURE_NONE 0 | ||
| 34 | #define MEASURE_MEAN (1 << 0) | ||
| 35 | #define MEASURE_VARIANCE (1 << 1) | ||
| 36 | #define MEASURE_CENTROID (1 << 2) | ||
| 37 | #define MEASURE_SPREAD (1 << 3) | ||
| 38 | #define MEASURE_SKEWNESS (1 << 4) | ||
| 39 | #define MEASURE_KURTOSIS (1 << 5) | ||
| 40 | #define MEASURE_ENTROPY (1 << 6) | ||
| 41 | #define MEASURE_FLATNESS (1 << 7) | ||
| 42 | #define MEASURE_CREST (1 << 8) | ||
| 43 | #define MEASURE_FLUX (1 << 9) | ||
| 44 | #define MEASURE_SLOPE (1 << 10) | ||
| 45 | #define MEASURE_DECREASE (1 << 11) | ||
| 46 | #define MEASURE_ROLLOFF (1 << 12) | ||
| 47 | |||
| 48 | typedef struct ChannelSpectralStats { | ||
| 49 | float mean; | ||
| 50 | float variance; | ||
| 51 | float centroid; | ||
| 52 | float spread; | ||
| 53 | float skewness; | ||
| 54 | float kurtosis; | ||
| 55 | float entropy; | ||
| 56 | float flatness; | ||
| 57 | float crest; | ||
| 58 | float flux; | ||
| 59 | float slope; | ||
| 60 | float decrease; | ||
| 61 | float rolloff; | ||
| 62 | } ChannelSpectralStats; | ||
| 63 | |||
| 64 | typedef struct AudioSpectralStatsContext { | ||
| 65 | const AVClass *class; | ||
| 66 | unsigned measure; | ||
| 67 | int win_size; | ||
| 68 | int win_func; | ||
| 69 | float overlap; | ||
| 70 | int nb_channels; | ||
| 71 | int hop_size; | ||
| 72 | ChannelSpectralStats *stats; | ||
| 73 | float *window_func_lut; | ||
| 74 | av_tx_fn tx_fn; | ||
| 75 | AVTXContext **fft; | ||
| 76 | AVComplexFloat **fft_in; | ||
| 77 | AVComplexFloat **fft_out; | ||
| 78 | float **prev_magnitude; | ||
| 79 | float **magnitude; | ||
| 80 | AVFrame *window; | ||
| 81 | } AudioSpectralStatsContext; | ||
| 82 | |||
| 83 | #define OFFSET(x) offsetof(AudioSpectralStatsContext, x) | ||
| 84 | #define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM | ||
| 85 | |||
| 86 | static const AVOption aspectralstats_options[] = { | ||
| 87 | { "win_size", "set the window size", OFFSET(win_size), AV_OPT_TYPE_INT, {.i64=2048}, 32, 65536, A }, | ||
| 88 | WIN_FUNC_OPTION("win_func", OFFSET(win_func), A, WFUNC_HANNING), | ||
| 89 | { "overlap", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=0.5}, 0, 1, A }, | ||
| 90 | { "measure", "select the parameters which are measured", OFFSET(measure), AV_OPT_TYPE_FLAGS, {.i64=MEASURE_ALL}, 0, UINT_MAX, A, .unit = "measure" }, | ||
| 91 | { "none", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_NONE }, 0, 0, A, .unit = "measure" }, | ||
| 92 | { "all", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_ALL }, 0, 0, A, .unit = "measure" }, | ||
| 93 | { "mean", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_MEAN }, 0, 0, A, .unit = "measure" }, | ||
| 94 | { "variance", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_VARIANCE}, 0, 0, A, .unit = "measure" }, | ||
| 95 | { "centroid", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_CENTROID}, 0, 0, A, .unit = "measure" }, | ||
| 96 | { "spread", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_SPREAD }, 0, 0, A, .unit = "measure" }, | ||
| 97 | { "skewness", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_SKEWNESS}, 0, 0, A, .unit = "measure" }, | ||
| 98 | { "kurtosis", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_KURTOSIS}, 0, 0, A, .unit = "measure" }, | ||
| 99 | { "entropy", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_ENTROPY }, 0, 0, A, .unit = "measure" }, | ||
| 100 | { "flatness", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_FLATNESS}, 0, 0, A, .unit = "measure" }, | ||
| 101 | { "crest", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_CREST }, 0, 0, A, .unit = "measure" }, | ||
| 102 | { "flux", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_FLUX }, 0, 0, A, .unit = "measure" }, | ||
| 103 | { "slope", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_SLOPE }, 0, 0, A, .unit = "measure" }, | ||
| 104 | { "decrease", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_DECREASE}, 0, 0, A, .unit = "measure" }, | ||
| 105 | { "rolloff", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_ROLLOFF }, 0, 0, A, .unit = "measure" }, | ||
| 106 | { NULL } | ||
| 107 | }; | ||
| 108 | |||
| 109 | AVFILTER_DEFINE_CLASS(aspectralstats); | ||
| 110 | |||
| 111 | ✗ | static int config_output(AVFilterLink *outlink) | |
| 112 | { | ||
| 113 | ✗ | AudioSpectralStatsContext *s = outlink->src->priv; | |
| 114 | ✗ | float overlap, scale = 1.f; | |
| 115 | int ret; | ||
| 116 | |||
| 117 | ✗ | s->nb_channels = outlink->ch_layout.nb_channels; | |
| 118 | ✗ | s->window_func_lut = av_realloc_f(s->window_func_lut, s->win_size, | |
| 119 | sizeof(*s->window_func_lut)); | ||
| 120 | ✗ | if (!s->window_func_lut) | |
| 121 | ✗ | return AVERROR(ENOMEM); | |
| 122 | ✗ | generate_window_func(s->window_func_lut, s->win_size, s->win_func, &overlap); | |
| 123 | ✗ | if (s->overlap == 1.f) | |
| 124 | ✗ | s->overlap = overlap; | |
| 125 | |||
| 126 | ✗ | s->hop_size = s->win_size * (1.f - s->overlap); | |
| 127 | ✗ | if (s->hop_size <= 0) | |
| 128 | ✗ | return AVERROR(EINVAL); | |
| 129 | |||
| 130 | ✗ | s->stats = av_calloc(s->nb_channels, sizeof(*s->stats)); | |
| 131 | ✗ | if (!s->stats) | |
| 132 | ✗ | return AVERROR(ENOMEM); | |
| 133 | |||
| 134 | ✗ | s->fft = av_calloc(s->nb_channels, sizeof(*s->fft)); | |
| 135 | ✗ | if (!s->fft) | |
| 136 | ✗ | return AVERROR(ENOMEM); | |
| 137 | |||
| 138 | ✗ | s->magnitude = av_calloc(s->nb_channels, sizeof(*s->magnitude)); | |
| 139 | ✗ | if (!s->magnitude) | |
| 140 | ✗ | return AVERROR(ENOMEM); | |
| 141 | |||
| 142 | ✗ | s->prev_magnitude = av_calloc(s->nb_channels, sizeof(*s->prev_magnitude)); | |
| 143 | ✗ | if (!s->prev_magnitude) | |
| 144 | ✗ | return AVERROR(ENOMEM); | |
| 145 | |||
| 146 | ✗ | s->fft_in = av_calloc(s->nb_channels, sizeof(*s->fft_in)); | |
| 147 | ✗ | if (!s->fft_in) | |
| 148 | ✗ | return AVERROR(ENOMEM); | |
| 149 | |||
| 150 | ✗ | s->fft_out = av_calloc(s->nb_channels, sizeof(*s->fft_out)); | |
| 151 | ✗ | if (!s->fft_out) | |
| 152 | ✗ | return AVERROR(ENOMEM); | |
| 153 | |||
| 154 | ✗ | for (int ch = 0; ch < s->nb_channels; ch++) { | |
| 155 | ✗ | ret = av_tx_init(&s->fft[ch], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->win_size, &scale, 0); | |
| 156 | ✗ | if (ret < 0) | |
| 157 | ✗ | return ret; | |
| 158 | |||
| 159 | ✗ | s->fft_in[ch] = av_calloc(s->win_size, sizeof(**s->fft_in)); | |
| 160 | ✗ | if (!s->fft_in[ch]) | |
| 161 | ✗ | return AVERROR(ENOMEM); | |
| 162 | |||
| 163 | ✗ | s->fft_out[ch] = av_calloc(s->win_size, sizeof(**s->fft_out)); | |
| 164 | ✗ | if (!s->fft_out[ch]) | |
| 165 | ✗ | return AVERROR(ENOMEM); | |
| 166 | |||
| 167 | ✗ | s->magnitude[ch] = av_calloc(s->win_size, sizeof(**s->magnitude)); | |
| 168 | ✗ | if (!s->magnitude[ch]) | |
| 169 | ✗ | return AVERROR(ENOMEM); | |
| 170 | |||
| 171 | ✗ | s->prev_magnitude[ch] = av_calloc(s->win_size, sizeof(**s->prev_magnitude)); | |
| 172 | ✗ | if (!s->prev_magnitude[ch]) | |
| 173 | ✗ | return AVERROR(ENOMEM); | |
| 174 | } | ||
| 175 | |||
| 176 | ✗ | s->window = ff_get_audio_buffer(outlink, s->win_size); | |
| 177 | ✗ | if (!s->window) | |
| 178 | ✗ | return AVERROR(ENOMEM); | |
| 179 | |||
| 180 | ✗ | return 0; | |
| 181 | } | ||
| 182 | |||
| 183 | ✗ | static void set_meta(AVDictionary **metadata, int chan, const char *key, | |
| 184 | const char *fmt, float val) | ||
| 185 | { | ||
| 186 | uint8_t value[128]; | ||
| 187 | uint8_t key2[128]; | ||
| 188 | |||
| 189 | ✗ | snprintf(value, sizeof(value), fmt, val); | |
| 190 | ✗ | if (chan) | |
| 191 | ✗ | snprintf(key2, sizeof(key2), "lavfi.aspectralstats.%d.%s", chan, key); | |
| 192 | else | ||
| 193 | ✗ | snprintf(key2, sizeof(key2), "lavfi.aspectralstats.%s", key); | |
| 194 | ✗ | av_dict_set(metadata, key2, value, 0); | |
| 195 | ✗ | } | |
| 196 | |||
| 197 | ✗ | static void set_metadata(AudioSpectralStatsContext *s, AVDictionary **metadata) | |
| 198 | { | ||
| 199 | ✗ | for (int ch = 0; ch < s->nb_channels; ch++) { | |
| 200 | ✗ | ChannelSpectralStats *stats = &s->stats[ch]; | |
| 201 | |||
| 202 | ✗ | if (s->measure & MEASURE_MEAN) | |
| 203 | ✗ | set_meta(metadata, ch + 1, "mean", "%g", stats->mean); | |
| 204 | ✗ | if (s->measure & MEASURE_VARIANCE) | |
| 205 | ✗ | set_meta(metadata, ch + 1, "variance", "%g", stats->variance); | |
| 206 | ✗ | if (s->measure & MEASURE_CENTROID) | |
| 207 | ✗ | set_meta(metadata, ch + 1, "centroid", "%g", stats->centroid); | |
| 208 | ✗ | if (s->measure & MEASURE_SPREAD) | |
| 209 | ✗ | set_meta(metadata, ch + 1, "spread", "%g", stats->spread); | |
| 210 | ✗ | if (s->measure & MEASURE_SKEWNESS) | |
| 211 | ✗ | set_meta(metadata, ch + 1, "skewness", "%g", stats->skewness); | |
| 212 | ✗ | if (s->measure & MEASURE_KURTOSIS) | |
| 213 | ✗ | set_meta(metadata, ch + 1, "kurtosis", "%g", stats->kurtosis); | |
| 214 | ✗ | if (s->measure & MEASURE_ENTROPY) | |
| 215 | ✗ | set_meta(metadata, ch + 1, "entropy", "%g", stats->entropy); | |
| 216 | ✗ | if (s->measure & MEASURE_FLATNESS) | |
| 217 | ✗ | set_meta(metadata, ch + 1, "flatness", "%g", stats->flatness); | |
| 218 | ✗ | if (s->measure & MEASURE_CREST) | |
| 219 | ✗ | set_meta(metadata, ch + 1, "crest", "%g", stats->crest); | |
| 220 | ✗ | if (s->measure & MEASURE_FLUX) | |
| 221 | ✗ | set_meta(metadata, ch + 1, "flux", "%g", stats->flux); | |
| 222 | ✗ | if (s->measure & MEASURE_SLOPE) | |
| 223 | ✗ | set_meta(metadata, ch + 1, "slope", "%g", stats->slope); | |
| 224 | ✗ | if (s->measure & MEASURE_DECREASE) | |
| 225 | ✗ | set_meta(metadata, ch + 1, "decrease", "%g", stats->decrease); | |
| 226 | ✗ | if (s->measure & MEASURE_ROLLOFF) | |
| 227 | ✗ | set_meta(metadata, ch + 1, "rolloff", "%g", stats->rolloff); | |
| 228 | } | ||
| 229 | ✗ | } | |
| 230 | |||
| 231 | ✗ | static float spectral_mean(const float *const spectral, int size, int max_freq) | |
| 232 | { | ||
| 233 | ✗ | float sum = 0.f; | |
| 234 | |||
| 235 | ✗ | for (int n = 0; n < size; n++) | |
| 236 | ✗ | sum += spectral[n]; | |
| 237 | |||
| 238 | ✗ | return sum / size; | |
| 239 | } | ||
| 240 | |||
| 241 | ✗ | static float sqrf(float a) | |
| 242 | { | ||
| 243 | ✗ | return a * a; | |
| 244 | } | ||
| 245 | |||
| 246 | ✗ | static float spectral_variance(const float *const spectral, int size, int max_freq, float mean) | |
| 247 | { | ||
| 248 | ✗ | float sum = 0.f; | |
| 249 | |||
| 250 | ✗ | for (int n = 0; n < size; n++) | |
| 251 | ✗ | sum += sqrf(spectral[n] - mean); | |
| 252 | |||
| 253 | ✗ | return sum / size; | |
| 254 | } | ||
| 255 | |||
| 256 | ✗ | static float spectral_centroid(const float *const spectral, int size, int max_freq) | |
| 257 | { | ||
| 258 | ✗ | const float scale = max_freq / (float)size; | |
| 259 | ✗ | float num = 0.f, den = 0.f; | |
| 260 | |||
| 261 | ✗ | for (int n = 0; n < size; n++) { | |
| 262 | ✗ | num += spectral[n] * n * scale; | |
| 263 | ✗ | den += spectral[n]; | |
| 264 | } | ||
| 265 | |||
| 266 | ✗ | if (den <= FLT_EPSILON) | |
| 267 | ✗ | return 1.f; | |
| 268 | ✗ | return num / den; | |
| 269 | } | ||
| 270 | |||
| 271 | ✗ | static float spectral_spread(const float *const spectral, int size, int max_freq, float centroid) | |
| 272 | { | ||
| 273 | ✗ | const float scale = max_freq / (float)size; | |
| 274 | ✗ | float num = 0.f, den = 0.f; | |
| 275 | |||
| 276 | ✗ | for (int n = 0; n < size; n++) { | |
| 277 | ✗ | num += spectral[n] * sqrf(n * scale - centroid); | |
| 278 | ✗ | den += spectral[n]; | |
| 279 | } | ||
| 280 | |||
| 281 | ✗ | if (den <= FLT_EPSILON) | |
| 282 | ✗ | return 1.f; | |
| 283 | ✗ | return sqrtf(num / den); | |
| 284 | } | ||
| 285 | |||
| 286 | ✗ | static float cbrf(float a) | |
| 287 | { | ||
| 288 | ✗ | return a * a * a; | |
| 289 | } | ||
| 290 | |||
| 291 | ✗ | static float spectral_skewness(const float *const spectral, int size, int max_freq, float centroid, float spread) | |
| 292 | { | ||
| 293 | ✗ | const float scale = max_freq / (float)size; | |
| 294 | ✗ | float num = 0.f, den = 0.f; | |
| 295 | |||
| 296 | ✗ | for (int n = 0; n < size; n++) { | |
| 297 | ✗ | num += spectral[n] * cbrf(n * scale - centroid); | |
| 298 | ✗ | den += spectral[n]; | |
| 299 | } | ||
| 300 | |||
| 301 | ✗ | den *= cbrf(spread); | |
| 302 | ✗ | if (den <= FLT_EPSILON) | |
| 303 | ✗ | return 1.f; | |
| 304 | ✗ | return num / den; | |
| 305 | } | ||
| 306 | |||
| 307 | ✗ | static float spectral_kurtosis(const float *const spectral, int size, int max_freq, float centroid, float spread) | |
| 308 | { | ||
| 309 | ✗ | const float scale = max_freq / (float)size; | |
| 310 | ✗ | float num = 0.f, den = 0.f; | |
| 311 | |||
| 312 | ✗ | for (int n = 0; n < size; n++) { | |
| 313 | ✗ | num += spectral[n] * sqrf(sqrf(n * scale - centroid)); | |
| 314 | ✗ | den += spectral[n]; | |
| 315 | } | ||
| 316 | |||
| 317 | ✗ | den *= sqrf(sqrf(spread)); | |
| 318 | ✗ | if (den <= FLT_EPSILON) | |
| 319 | ✗ | return 1.f; | |
| 320 | ✗ | return num / den; | |
| 321 | } | ||
| 322 | |||
| 323 | ✗ | static float spectral_entropy(const float *const spectral, int size, int max_freq) | |
| 324 | { | ||
| 325 | ✗ | float num = 0.f, den = 0.f; | |
| 326 | |||
| 327 | ✗ | for (int n = 0; n < size; n++) { | |
| 328 | ✗ | num += spectral[n] * logf(spectral[n] + FLT_EPSILON); | |
| 329 | } | ||
| 330 | |||
| 331 | ✗ | den = logf(size); | |
| 332 | ✗ | if (den <= FLT_EPSILON) | |
| 333 | ✗ | return 1.f; | |
| 334 | ✗ | return -num / den; | |
| 335 | } | ||
| 336 | |||
| 337 | ✗ | static float spectral_flatness(const float *const spectral, int size, int max_freq) | |
| 338 | { | ||
| 339 | ✗ | float num = 0.f, den = 0.f; | |
| 340 | |||
| 341 | ✗ | for (int n = 0; n < size; n++) { | |
| 342 | ✗ | float v = FLT_EPSILON + spectral[n]; | |
| 343 | ✗ | num += logf(v); | |
| 344 | ✗ | den += v; | |
| 345 | } | ||
| 346 | |||
| 347 | ✗ | num /= size; | |
| 348 | ✗ | den /= size; | |
| 349 | ✗ | num = expf(num); | |
| 350 | ✗ | if (den <= FLT_EPSILON) | |
| 351 | ✗ | return 0.f; | |
| 352 | ✗ | return num / den; | |
| 353 | } | ||
| 354 | |||
| 355 | ✗ | static float spectral_crest(const float *const spectral, int size, int max_freq) | |
| 356 | { | ||
| 357 | ✗ | float max = 0.f, mean = 0.f; | |
| 358 | |||
| 359 | ✗ | for (int n = 0; n < size; n++) { | |
| 360 | ✗ | max = fmaxf(max, spectral[n]); | |
| 361 | ✗ | mean += spectral[n]; | |
| 362 | } | ||
| 363 | |||
| 364 | ✗ | mean /= size; | |
| 365 | ✗ | if (mean <= FLT_EPSILON) | |
| 366 | ✗ | return 0.f; | |
| 367 | ✗ | return max / mean; | |
| 368 | } | ||
| 369 | |||
| 370 | ✗ | static float spectral_flux(const float *const spectral, const float *const prev_spectral, | |
| 371 | int size, int max_freq) | ||
| 372 | { | ||
| 373 | ✗ | float sum = 0.f; | |
| 374 | |||
| 375 | ✗ | for (int n = 0; n < size; n++) | |
| 376 | ✗ | sum += sqrf(spectral[n] - prev_spectral[n]); | |
| 377 | |||
| 378 | ✗ | return sqrtf(sum); | |
| 379 | } | ||
| 380 | |||
| 381 | ✗ | static float spectral_slope(const float *const spectral, int size, int max_freq) | |
| 382 | { | ||
| 383 | ✗ | const float mean_freq = size * 0.5f; | |
| 384 | ✗ | float mean_spectral = 0.f, num = 0.f, den = 0.f; | |
| 385 | |||
| 386 | ✗ | for (int n = 0; n < size; n++) | |
| 387 | ✗ | mean_spectral += spectral[n]; | |
| 388 | ✗ | mean_spectral /= size; | |
| 389 | |||
| 390 | ✗ | for (int n = 0; n < size; n++) { | |
| 391 | ✗ | num += ((n - mean_freq) / mean_freq) * (spectral[n] - mean_spectral); | |
| 392 | ✗ | den += sqrf((n - mean_freq) / mean_freq); | |
| 393 | } | ||
| 394 | |||
| 395 | ✗ | if (fabsf(den) <= FLT_EPSILON) | |
| 396 | ✗ | return 0.f; | |
| 397 | ✗ | return num / den; | |
| 398 | } | ||
| 399 | |||
| 400 | ✗ | static float spectral_decrease(const float *const spectral, int size, int max_freq) | |
| 401 | { | ||
| 402 | ✗ | float num = 0.f, den = 0.f; | |
| 403 | |||
| 404 | ✗ | for (int n = 1; n < size; n++) { | |
| 405 | ✗ | num += (spectral[n] - spectral[0]) / n; | |
| 406 | ✗ | den += spectral[n]; | |
| 407 | } | ||
| 408 | |||
| 409 | ✗ | if (den <= FLT_EPSILON) | |
| 410 | ✗ | return 0.f; | |
| 411 | ✗ | return num / den; | |
| 412 | } | ||
| 413 | |||
| 414 | ✗ | static float spectral_rolloff(const float *const spectral, int size, int max_freq) | |
| 415 | { | ||
| 416 | ✗ | const float scale = max_freq / (float)size; | |
| 417 | ✗ | float norm = 0.f, sum = 0.f; | |
| 418 | ✗ | int idx = 0.f; | |
| 419 | |||
| 420 | ✗ | for (int n = 0; n < size; n++) | |
| 421 | ✗ | norm += spectral[n]; | |
| 422 | ✗ | norm *= 0.85f; | |
| 423 | |||
| 424 | ✗ | for (int n = 0; n < size; n++) { | |
| 425 | ✗ | sum += spectral[n]; | |
| 426 | ✗ | if (sum >= norm) { | |
| 427 | ✗ | idx = n; | |
| 428 | ✗ | break; | |
| 429 | } | ||
| 430 | } | ||
| 431 | |||
| 432 | ✗ | return idx * scale; | |
| 433 | } | ||
| 434 | |||
| 435 | ✗ | static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) | |
| 436 | { | ||
| 437 | ✗ | AudioSpectralStatsContext *s = ctx->priv; | |
| 438 | ✗ | const float *window_func_lut = s->window_func_lut; | |
| 439 | ✗ | AVFrame *in = arg; | |
| 440 | ✗ | const int channels = s->nb_channels; | |
| 441 | ✗ | const int start = (channels * jobnr) / nb_jobs; | |
| 442 | ✗ | const int end = (channels * (jobnr+1)) / nb_jobs; | |
| 443 | ✗ | const int offset = s->win_size - s->hop_size; | |
| 444 | |||
| 445 | ✗ | for (int ch = start; ch < end; ch++) { | |
| 446 | ✗ | float *window = (float *)s->window->extended_data[ch]; | |
| 447 | ✗ | ChannelSpectralStats *stats = &s->stats[ch]; | |
| 448 | ✗ | AVComplexFloat *fft_out = s->fft_out[ch]; | |
| 449 | ✗ | AVComplexFloat *fft_in = s->fft_in[ch]; | |
| 450 | ✗ | float *magnitude = s->magnitude[ch]; | |
| 451 | ✗ | float *prev_magnitude = s->prev_magnitude[ch]; | |
| 452 | ✗ | const float scale = 1.f / s->win_size; | |
| 453 | |||
| 454 | ✗ | memmove(window, &window[s->hop_size], offset * sizeof(float)); | |
| 455 | ✗ | memcpy(&window[offset], in->extended_data[ch], in->nb_samples * sizeof(float)); | |
| 456 | ✗ | memset(&window[offset + in->nb_samples], 0, (s->hop_size - in->nb_samples) * sizeof(float)); | |
| 457 | |||
| 458 | ✗ | for (int n = 0; n < s->win_size; n++) { | |
| 459 | ✗ | fft_in[n].re = window[n] * window_func_lut[n]; | |
| 460 | ✗ | fft_in[n].im = 0; | |
| 461 | } | ||
| 462 | |||
| 463 | ✗ | s->tx_fn(s->fft[ch], fft_out, fft_in, sizeof(*fft_in)); | |
| 464 | |||
| 465 | ✗ | for (int n = 0; n < s->win_size / 2; n++) { | |
| 466 | ✗ | fft_out[n].re *= scale; | |
| 467 | ✗ | fft_out[n].im *= scale; | |
| 468 | } | ||
| 469 | |||
| 470 | ✗ | for (int n = 0; n < s->win_size / 2; n++) | |
| 471 | ✗ | magnitude[n] = hypotf(fft_out[n].re, fft_out[n].im); | |
| 472 | |||
| 473 | ✗ | if (s->measure & (MEASURE_MEAN | MEASURE_VARIANCE)) | |
| 474 | ✗ | stats->mean = spectral_mean(magnitude, s->win_size / 2, in->sample_rate / 2); | |
| 475 | ✗ | if (s->measure & MEASURE_VARIANCE) | |
| 476 | ✗ | stats->variance = spectral_variance(magnitude, s->win_size / 2, in->sample_rate / 2, stats->mean); | |
| 477 | ✗ | if (s->measure & (MEASURE_SPREAD | MEASURE_KURTOSIS | MEASURE_SKEWNESS | MEASURE_CENTROID)) | |
| 478 | ✗ | stats->centroid = spectral_centroid(magnitude, s->win_size / 2, in->sample_rate / 2); | |
| 479 | ✗ | if (s->measure & (MEASURE_SPREAD | MEASURE_KURTOSIS | MEASURE_SKEWNESS)) | |
| 480 | ✗ | stats->spread = spectral_spread(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid); | |
| 481 | ✗ | if (s->measure & MEASURE_SKEWNESS) | |
| 482 | ✗ | stats->skewness = spectral_skewness(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid, stats->spread); | |
| 483 | ✗ | if (s->measure & MEASURE_KURTOSIS) | |
| 484 | ✗ | stats->kurtosis = spectral_kurtosis(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid, stats->spread); | |
| 485 | ✗ | if (s->measure & MEASURE_ENTROPY) | |
| 486 | ✗ | stats->entropy = spectral_entropy(magnitude, s->win_size / 2, in->sample_rate / 2); | |
| 487 | ✗ | if (s->measure & MEASURE_FLATNESS) | |
| 488 | ✗ | stats->flatness = spectral_flatness(magnitude, s->win_size / 2, in->sample_rate / 2); | |
| 489 | ✗ | if (s->measure & MEASURE_CREST) | |
| 490 | ✗ | stats->crest = spectral_crest(magnitude, s->win_size / 2, in->sample_rate / 2); | |
| 491 | ✗ | if (s->measure & MEASURE_FLUX) | |
| 492 | ✗ | stats->flux = spectral_flux(magnitude, prev_magnitude, s->win_size / 2, in->sample_rate / 2); | |
| 493 | ✗ | if (s->measure & MEASURE_SLOPE) | |
| 494 | ✗ | stats->slope = spectral_slope(magnitude, s->win_size / 2, in->sample_rate / 2); | |
| 495 | ✗ | if (s->measure & MEASURE_DECREASE) | |
| 496 | ✗ | stats->decrease = spectral_decrease(magnitude, s->win_size / 2, in->sample_rate / 2); | |
| 497 | ✗ | if (s->measure & MEASURE_ROLLOFF) | |
| 498 | ✗ | stats->rolloff = spectral_rolloff(magnitude, s->win_size / 2, in->sample_rate / 2); | |
| 499 | |||
| 500 | ✗ | memcpy(prev_magnitude, magnitude, s->win_size * sizeof(float)); | |
| 501 | } | ||
| 502 | |||
| 503 | ✗ | return 0; | |
| 504 | } | ||
| 505 | |||
| 506 | ✗ | static int filter_frame(AVFilterLink *inlink, AVFrame *in) | |
| 507 | { | ||
| 508 | ✗ | AVFilterContext *ctx = inlink->dst; | |
| 509 | ✗ | AVFilterLink *outlink = ctx->outputs[0]; | |
| 510 | ✗ | AudioSpectralStatsContext *s = ctx->priv; | |
| 511 | AVDictionary **metadata; | ||
| 512 | AVFrame *out; | ||
| 513 | int ret; | ||
| 514 | |||
| 515 | ✗ | if (av_frame_is_writable(in)) { | |
| 516 | ✗ | out = in; | |
| 517 | } else { | ||
| 518 | ✗ | out = ff_get_audio_buffer(outlink, in->nb_samples); | |
| 519 | ✗ | if (!out) { | |
| 520 | ✗ | av_frame_free(&in); | |
| 521 | ✗ | return AVERROR(ENOMEM); | |
| 522 | } | ||
| 523 | ✗ | ret = av_frame_copy_props(out, in); | |
| 524 | ✗ | if (ret < 0) | |
| 525 | ✗ | goto fail; | |
| 526 | ✗ | ret = av_frame_copy(out, in); | |
| 527 | ✗ | if (ret < 0) | |
| 528 | ✗ | goto fail; | |
| 529 | } | ||
| 530 | |||
| 531 | ✗ | metadata = &out->metadata; | |
| 532 | ✗ | ff_filter_execute(ctx, filter_channel, in, NULL, | |
| 533 | ✗ | FFMIN(inlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx))); | |
| 534 | |||
| 535 | ✗ | set_metadata(s, metadata); | |
| 536 | |||
| 537 | ✗ | if (out != in) | |
| 538 | ✗ | av_frame_free(&in); | |
| 539 | ✗ | return ff_filter_frame(outlink, out); | |
| 540 | ✗ | fail: | |
| 541 | ✗ | av_frame_free(&in); | |
| 542 | ✗ | av_frame_free(&out); | |
| 543 | ✗ | return ret; | |
| 544 | } | ||
| 545 | |||
| 546 | ✗ | static int activate(AVFilterContext *ctx) | |
| 547 | { | ||
| 548 | ✗ | AudioSpectralStatsContext *s = ctx->priv; | |
| 549 | ✗ | AVFilterLink *outlink = ctx->outputs[0]; | |
| 550 | ✗ | AVFilterLink *inlink = ctx->inputs[0]; | |
| 551 | AVFrame *in; | ||
| 552 | int ret; | ||
| 553 | |||
| 554 | ✗ | FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); | |
| 555 | |||
| 556 | ✗ | ret = ff_inlink_consume_samples(inlink, s->hop_size, s->hop_size, &in); | |
| 557 | ✗ | if (ret < 0) | |
| 558 | ✗ | return ret; | |
| 559 | ✗ | if (ret > 0) | |
| 560 | ✗ | ret = filter_frame(inlink, in); | |
| 561 | ✗ | if (ret < 0) | |
| 562 | ✗ | return ret; | |
| 563 | |||
| 564 | ✗ | if (ff_inlink_queued_samples(inlink) >= s->hop_size) { | |
| 565 | ✗ | ff_filter_set_ready(ctx, 10); | |
| 566 | ✗ | return 0; | |
| 567 | } | ||
| 568 | |||
| 569 | ✗ | FF_FILTER_FORWARD_STATUS(inlink, outlink); | |
| 570 | ✗ | FF_FILTER_FORWARD_WANTED(outlink, inlink); | |
| 571 | |||
| 572 | ✗ | return FFERROR_NOT_READY; | |
| 573 | } | ||
| 574 | |||
| 575 | ✗ | static av_cold void uninit(AVFilterContext *ctx) | |
| 576 | { | ||
| 577 | ✗ | AudioSpectralStatsContext *s = ctx->priv; | |
| 578 | |||
| 579 | ✗ | for (int ch = 0; ch < s->nb_channels; ch++) { | |
| 580 | ✗ | if (s->fft) | |
| 581 | ✗ | av_tx_uninit(&s->fft[ch]); | |
| 582 | ✗ | if (s->fft_in) | |
| 583 | ✗ | av_freep(&s->fft_in[ch]); | |
| 584 | ✗ | if (s->fft_out) | |
| 585 | ✗ | av_freep(&s->fft_out[ch]); | |
| 586 | ✗ | if (s->magnitude) | |
| 587 | ✗ | av_freep(&s->magnitude[ch]); | |
| 588 | ✗ | if (s->prev_magnitude) | |
| 589 | ✗ | av_freep(&s->prev_magnitude[ch]); | |
| 590 | } | ||
| 591 | |||
| 592 | ✗ | av_freep(&s->fft); | |
| 593 | ✗ | av_freep(&s->magnitude); | |
| 594 | ✗ | av_freep(&s->prev_magnitude); | |
| 595 | ✗ | av_freep(&s->fft_in); | |
| 596 | ✗ | av_freep(&s->fft_out); | |
| 597 | ✗ | av_freep(&s->stats); | |
| 598 | |||
| 599 | ✗ | av_freep(&s->window_func_lut); | |
| 600 | ✗ | av_frame_free(&s->window); | |
| 601 | ✗ | } | |
| 602 | |||
| 603 | static const AVFilterPad aspectralstats_outputs[] = { | ||
| 604 | { | ||
| 605 | .name = "default", | ||
| 606 | .type = AVMEDIA_TYPE_AUDIO, | ||
| 607 | .config_props = config_output, | ||
| 608 | }, | ||
| 609 | }; | ||
| 610 | |||
| 611 | const FFFilter ff_af_aspectralstats = { | ||
| 612 | .p.name = "aspectralstats", | ||
| 613 | .p.description = NULL_IF_CONFIG_SMALL("Show frequency domain statistics about audio frames."), | ||
| 614 | .p.priv_class = &aspectralstats_class, | ||
| 615 | .p.flags = AVFILTER_FLAG_SLICE_THREADS, | ||
| 616 | .priv_size = sizeof(AudioSpectralStatsContext), | ||
| 617 | .uninit = uninit, | ||
| 618 | .activate = activate, | ||
| 619 | FILTER_INPUTS(ff_audio_default_filterpad), | ||
| 620 | FILTER_OUTPUTS(aspectralstats_outputs), | ||
| 621 | FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP), | ||
| 622 | }; | ||
| 623 |