Line |
Branch |
Exec |
Source |
1 |
|
|
/* |
2 |
|
|
* Copyright (c) 2021 Paul B Mahol |
3 |
|
|
* |
4 |
|
|
* This file is part of FFmpeg. |
5 |
|
|
* |
6 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
7 |
|
|
* modify it under the terms of the GNU Lesser General Public |
8 |
|
|
* License as published by the Free Software Foundation; either |
9 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
* |
11 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
12 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
|
|
* Lesser General Public License for more details. |
15 |
|
|
* |
16 |
|
|
* You should have received a copy of the GNU Lesser General Public |
17 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
18 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 |
|
|
*/ |
20 |
|
|
|
21 |
|
|
#include <float.h> |
22 |
|
|
#include <math.h> |
23 |
|
|
|
24 |
|
|
#include "libavutil/mem.h" |
25 |
|
|
#include "libavutil/opt.h" |
26 |
|
|
#include "libavutil/tx.h" |
27 |
|
|
#include "audio.h" |
28 |
|
|
#include "avfilter.h" |
29 |
|
|
#include "filters.h" |
30 |
|
|
#include "internal.h" |
31 |
|
|
#include "window_func.h" |
32 |
|
|
|
33 |
|
|
#define MEASURE_ALL UINT_MAX |
34 |
|
|
#define MEASURE_NONE 0 |
35 |
|
|
#define MEASURE_MEAN (1 << 0) |
36 |
|
|
#define MEASURE_VARIANCE (1 << 1) |
37 |
|
|
#define MEASURE_CENTROID (1 << 2) |
38 |
|
|
#define MEASURE_SPREAD (1 << 3) |
39 |
|
|
#define MEASURE_SKEWNESS (1 << 4) |
40 |
|
|
#define MEASURE_KURTOSIS (1 << 5) |
41 |
|
|
#define MEASURE_ENTROPY (1 << 6) |
42 |
|
|
#define MEASURE_FLATNESS (1 << 7) |
43 |
|
|
#define MEASURE_CREST (1 << 8) |
44 |
|
|
#define MEASURE_FLUX (1 << 9) |
45 |
|
|
#define MEASURE_SLOPE (1 << 10) |
46 |
|
|
#define MEASURE_DECREASE (1 << 11) |
47 |
|
|
#define MEASURE_ROLLOFF (1 << 12) |
48 |
|
|
|
49 |
|
|
typedef struct ChannelSpectralStats { |
50 |
|
|
float mean; |
51 |
|
|
float variance; |
52 |
|
|
float centroid; |
53 |
|
|
float spread; |
54 |
|
|
float skewness; |
55 |
|
|
float kurtosis; |
56 |
|
|
float entropy; |
57 |
|
|
float flatness; |
58 |
|
|
float crest; |
59 |
|
|
float flux; |
60 |
|
|
float slope; |
61 |
|
|
float decrease; |
62 |
|
|
float rolloff; |
63 |
|
|
} ChannelSpectralStats; |
64 |
|
|
|
65 |
|
|
typedef struct AudioSpectralStatsContext { |
66 |
|
|
const AVClass *class; |
67 |
|
|
unsigned measure; |
68 |
|
|
int win_size; |
69 |
|
|
int win_func; |
70 |
|
|
float overlap; |
71 |
|
|
int nb_channels; |
72 |
|
|
int hop_size; |
73 |
|
|
ChannelSpectralStats *stats; |
74 |
|
|
float *window_func_lut; |
75 |
|
|
av_tx_fn tx_fn; |
76 |
|
|
AVTXContext **fft; |
77 |
|
|
AVComplexFloat **fft_in; |
78 |
|
|
AVComplexFloat **fft_out; |
79 |
|
|
float **prev_magnitude; |
80 |
|
|
float **magnitude; |
81 |
|
|
AVFrame *window; |
82 |
|
|
} AudioSpectralStatsContext; |
83 |
|
|
|
84 |
|
|
#define OFFSET(x) offsetof(AudioSpectralStatsContext, x) |
85 |
|
|
#define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM |
86 |
|
|
|
87 |
|
|
static const AVOption aspectralstats_options[] = { |
88 |
|
|
{ "win_size", "set the window size", OFFSET(win_size), AV_OPT_TYPE_INT, {.i64=2048}, 32, 65536, A }, |
89 |
|
|
WIN_FUNC_OPTION("win_func", OFFSET(win_func), A, WFUNC_HANNING), |
90 |
|
|
{ "overlap", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=0.5}, 0, 1, A }, |
91 |
|
|
{ "measure", "select the parameters which are measured", OFFSET(measure), AV_OPT_TYPE_FLAGS, {.i64=MEASURE_ALL}, 0, UINT_MAX, A, .unit = "measure" }, |
92 |
|
|
{ "none", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_NONE }, 0, 0, A, .unit = "measure" }, |
93 |
|
|
{ "all", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_ALL }, 0, 0, A, .unit = "measure" }, |
94 |
|
|
{ "mean", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_MEAN }, 0, 0, A, .unit = "measure" }, |
95 |
|
|
{ "variance", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_VARIANCE}, 0, 0, A, .unit = "measure" }, |
96 |
|
|
{ "centroid", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_CENTROID}, 0, 0, A, .unit = "measure" }, |
97 |
|
|
{ "spread", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_SPREAD }, 0, 0, A, .unit = "measure" }, |
98 |
|
|
{ "skewness", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_SKEWNESS}, 0, 0, A, .unit = "measure" }, |
99 |
|
|
{ "kurtosis", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_KURTOSIS}, 0, 0, A, .unit = "measure" }, |
100 |
|
|
{ "entropy", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_ENTROPY }, 0, 0, A, .unit = "measure" }, |
101 |
|
|
{ "flatness", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_FLATNESS}, 0, 0, A, .unit = "measure" }, |
102 |
|
|
{ "crest", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_CREST }, 0, 0, A, .unit = "measure" }, |
103 |
|
|
{ "flux", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_FLUX }, 0, 0, A, .unit = "measure" }, |
104 |
|
|
{ "slope", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_SLOPE }, 0, 0, A, .unit = "measure" }, |
105 |
|
|
{ "decrease", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_DECREASE}, 0, 0, A, .unit = "measure" }, |
106 |
|
|
{ "rolloff", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_ROLLOFF }, 0, 0, A, .unit = "measure" }, |
107 |
|
|
{ NULL } |
108 |
|
|
}; |
109 |
|
|
|
110 |
|
|
AVFILTER_DEFINE_CLASS(aspectralstats); |
111 |
|
|
|
112 |
|
✗ |
static int config_output(AVFilterLink *outlink) |
113 |
|
|
{ |
114 |
|
✗ |
AudioSpectralStatsContext *s = outlink->src->priv; |
115 |
|
✗ |
float overlap, scale = 1.f; |
116 |
|
|
int ret; |
117 |
|
|
|
118 |
|
✗ |
s->nb_channels = outlink->ch_layout.nb_channels; |
119 |
|
✗ |
s->window_func_lut = av_realloc_f(s->window_func_lut, s->win_size, |
120 |
|
|
sizeof(*s->window_func_lut)); |
121 |
|
✗ |
if (!s->window_func_lut) |
122 |
|
✗ |
return AVERROR(ENOMEM); |
123 |
|
✗ |
generate_window_func(s->window_func_lut, s->win_size, s->win_func, &overlap); |
124 |
|
✗ |
if (s->overlap == 1.f) |
125 |
|
✗ |
s->overlap = overlap; |
126 |
|
|
|
127 |
|
✗ |
s->hop_size = s->win_size * (1.f - s->overlap); |
128 |
|
✗ |
if (s->hop_size <= 0) |
129 |
|
✗ |
return AVERROR(EINVAL); |
130 |
|
|
|
131 |
|
✗ |
s->stats = av_calloc(s->nb_channels, sizeof(*s->stats)); |
132 |
|
✗ |
if (!s->stats) |
133 |
|
✗ |
return AVERROR(ENOMEM); |
134 |
|
|
|
135 |
|
✗ |
s->fft = av_calloc(s->nb_channels, sizeof(*s->fft)); |
136 |
|
✗ |
if (!s->fft) |
137 |
|
✗ |
return AVERROR(ENOMEM); |
138 |
|
|
|
139 |
|
✗ |
s->magnitude = av_calloc(s->nb_channels, sizeof(*s->magnitude)); |
140 |
|
✗ |
if (!s->magnitude) |
141 |
|
✗ |
return AVERROR(ENOMEM); |
142 |
|
|
|
143 |
|
✗ |
s->prev_magnitude = av_calloc(s->nb_channels, sizeof(*s->prev_magnitude)); |
144 |
|
✗ |
if (!s->prev_magnitude) |
145 |
|
✗ |
return AVERROR(ENOMEM); |
146 |
|
|
|
147 |
|
✗ |
s->fft_in = av_calloc(s->nb_channels, sizeof(*s->fft_in)); |
148 |
|
✗ |
if (!s->fft_in) |
149 |
|
✗ |
return AVERROR(ENOMEM); |
150 |
|
|
|
151 |
|
✗ |
s->fft_out = av_calloc(s->nb_channels, sizeof(*s->fft_out)); |
152 |
|
✗ |
if (!s->fft_out) |
153 |
|
✗ |
return AVERROR(ENOMEM); |
154 |
|
|
|
155 |
|
✗ |
for (int ch = 0; ch < s->nb_channels; ch++) { |
156 |
|
✗ |
ret = av_tx_init(&s->fft[ch], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->win_size, &scale, 0); |
157 |
|
✗ |
if (ret < 0) |
158 |
|
✗ |
return ret; |
159 |
|
|
|
160 |
|
✗ |
s->fft_in[ch] = av_calloc(s->win_size, sizeof(**s->fft_in)); |
161 |
|
✗ |
if (!s->fft_in[ch]) |
162 |
|
✗ |
return AVERROR(ENOMEM); |
163 |
|
|
|
164 |
|
✗ |
s->fft_out[ch] = av_calloc(s->win_size, sizeof(**s->fft_out)); |
165 |
|
✗ |
if (!s->fft_out[ch]) |
166 |
|
✗ |
return AVERROR(ENOMEM); |
167 |
|
|
|
168 |
|
✗ |
s->magnitude[ch] = av_calloc(s->win_size, sizeof(**s->magnitude)); |
169 |
|
✗ |
if (!s->magnitude[ch]) |
170 |
|
✗ |
return AVERROR(ENOMEM); |
171 |
|
|
|
172 |
|
✗ |
s->prev_magnitude[ch] = av_calloc(s->win_size, sizeof(**s->prev_magnitude)); |
173 |
|
✗ |
if (!s->prev_magnitude[ch]) |
174 |
|
✗ |
return AVERROR(ENOMEM); |
175 |
|
|
} |
176 |
|
|
|
177 |
|
✗ |
s->window = ff_get_audio_buffer(outlink, s->win_size); |
178 |
|
✗ |
if (!s->window) |
179 |
|
✗ |
return AVERROR(ENOMEM); |
180 |
|
|
|
181 |
|
✗ |
return 0; |
182 |
|
|
} |
183 |
|
|
|
184 |
|
✗ |
static void set_meta(AVDictionary **metadata, int chan, const char *key, |
185 |
|
|
const char *fmt, float val) |
186 |
|
|
{ |
187 |
|
|
uint8_t value[128]; |
188 |
|
|
uint8_t key2[128]; |
189 |
|
|
|
190 |
|
✗ |
snprintf(value, sizeof(value), fmt, val); |
191 |
|
✗ |
if (chan) |
192 |
|
✗ |
snprintf(key2, sizeof(key2), "lavfi.aspectralstats.%d.%s", chan, key); |
193 |
|
|
else |
194 |
|
✗ |
snprintf(key2, sizeof(key2), "lavfi.aspectralstats.%s", key); |
195 |
|
✗ |
av_dict_set(metadata, key2, value, 0); |
196 |
|
✗ |
} |
197 |
|
|
|
198 |
|
✗ |
static void set_metadata(AudioSpectralStatsContext *s, AVDictionary **metadata) |
199 |
|
|
{ |
200 |
|
✗ |
for (int ch = 0; ch < s->nb_channels; ch++) { |
201 |
|
✗ |
ChannelSpectralStats *stats = &s->stats[ch]; |
202 |
|
|
|
203 |
|
✗ |
if (s->measure & MEASURE_MEAN) |
204 |
|
✗ |
set_meta(metadata, ch + 1, "mean", "%g", stats->mean); |
205 |
|
✗ |
if (s->measure & MEASURE_VARIANCE) |
206 |
|
✗ |
set_meta(metadata, ch + 1, "variance", "%g", stats->variance); |
207 |
|
✗ |
if (s->measure & MEASURE_CENTROID) |
208 |
|
✗ |
set_meta(metadata, ch + 1, "centroid", "%g", stats->centroid); |
209 |
|
✗ |
if (s->measure & MEASURE_SPREAD) |
210 |
|
✗ |
set_meta(metadata, ch + 1, "spread", "%g", stats->spread); |
211 |
|
✗ |
if (s->measure & MEASURE_SKEWNESS) |
212 |
|
✗ |
set_meta(metadata, ch + 1, "skewness", "%g", stats->skewness); |
213 |
|
✗ |
if (s->measure & MEASURE_KURTOSIS) |
214 |
|
✗ |
set_meta(metadata, ch + 1, "kurtosis", "%g", stats->kurtosis); |
215 |
|
✗ |
if (s->measure & MEASURE_ENTROPY) |
216 |
|
✗ |
set_meta(metadata, ch + 1, "entropy", "%g", stats->entropy); |
217 |
|
✗ |
if (s->measure & MEASURE_FLATNESS) |
218 |
|
✗ |
set_meta(metadata, ch + 1, "flatness", "%g", stats->flatness); |
219 |
|
✗ |
if (s->measure & MEASURE_CREST) |
220 |
|
✗ |
set_meta(metadata, ch + 1, "crest", "%g", stats->crest); |
221 |
|
✗ |
if (s->measure & MEASURE_FLUX) |
222 |
|
✗ |
set_meta(metadata, ch + 1, "flux", "%g", stats->flux); |
223 |
|
✗ |
if (s->measure & MEASURE_SLOPE) |
224 |
|
✗ |
set_meta(metadata, ch + 1, "slope", "%g", stats->slope); |
225 |
|
✗ |
if (s->measure & MEASURE_DECREASE) |
226 |
|
✗ |
set_meta(metadata, ch + 1, "decrease", "%g", stats->decrease); |
227 |
|
✗ |
if (s->measure & MEASURE_ROLLOFF) |
228 |
|
✗ |
set_meta(metadata, ch + 1, "rolloff", "%g", stats->rolloff); |
229 |
|
|
} |
230 |
|
✗ |
} |
231 |
|
|
|
232 |
|
✗ |
static float spectral_mean(const float *const spectral, int size, int max_freq) |
233 |
|
|
{ |
234 |
|
✗ |
float sum = 0.f; |
235 |
|
|
|
236 |
|
✗ |
for (int n = 0; n < size; n++) |
237 |
|
✗ |
sum += spectral[n]; |
238 |
|
|
|
239 |
|
✗ |
return sum / size; |
240 |
|
|
} |
241 |
|
|
|
242 |
|
✗ |
static float sqrf(float a) |
243 |
|
|
{ |
244 |
|
✗ |
return a * a; |
245 |
|
|
} |
246 |
|
|
|
247 |
|
✗ |
static float spectral_variance(const float *const spectral, int size, int max_freq, float mean) |
248 |
|
|
{ |
249 |
|
✗ |
float sum = 0.f; |
250 |
|
|
|
251 |
|
✗ |
for (int n = 0; n < size; n++) |
252 |
|
✗ |
sum += sqrf(spectral[n] - mean); |
253 |
|
|
|
254 |
|
✗ |
return sum / size; |
255 |
|
|
} |
256 |
|
|
|
257 |
|
✗ |
static float spectral_centroid(const float *const spectral, int size, int max_freq) |
258 |
|
|
{ |
259 |
|
✗ |
const float scale = max_freq / (float)size; |
260 |
|
✗ |
float num = 0.f, den = 0.f; |
261 |
|
|
|
262 |
|
✗ |
for (int n = 0; n < size; n++) { |
263 |
|
✗ |
num += spectral[n] * n * scale; |
264 |
|
✗ |
den += spectral[n]; |
265 |
|
|
} |
266 |
|
|
|
267 |
|
✗ |
if (den <= FLT_EPSILON) |
268 |
|
✗ |
return 1.f; |
269 |
|
✗ |
return num / den; |
270 |
|
|
} |
271 |
|
|
|
272 |
|
✗ |
static float spectral_spread(const float *const spectral, int size, int max_freq, float centroid) |
273 |
|
|
{ |
274 |
|
✗ |
const float scale = max_freq / (float)size; |
275 |
|
✗ |
float num = 0.f, den = 0.f; |
276 |
|
|
|
277 |
|
✗ |
for (int n = 0; n < size; n++) { |
278 |
|
✗ |
num += spectral[n] * sqrf(n * scale - centroid); |
279 |
|
✗ |
den += spectral[n]; |
280 |
|
|
} |
281 |
|
|
|
282 |
|
✗ |
if (den <= FLT_EPSILON) |
283 |
|
✗ |
return 1.f; |
284 |
|
✗ |
return sqrtf(num / den); |
285 |
|
|
} |
286 |
|
|
|
287 |
|
✗ |
static float cbrf(float a) |
288 |
|
|
{ |
289 |
|
✗ |
return a * a * a; |
290 |
|
|
} |
291 |
|
|
|
292 |
|
✗ |
static float spectral_skewness(const float *const spectral, int size, int max_freq, float centroid, float spread) |
293 |
|
|
{ |
294 |
|
✗ |
const float scale = max_freq / (float)size; |
295 |
|
✗ |
float num = 0.f, den = 0.f; |
296 |
|
|
|
297 |
|
✗ |
for (int n = 0; n < size; n++) { |
298 |
|
✗ |
num += spectral[n] * cbrf(n * scale - centroid); |
299 |
|
✗ |
den += spectral[n]; |
300 |
|
|
} |
301 |
|
|
|
302 |
|
✗ |
den *= cbrf(spread); |
303 |
|
✗ |
if (den <= FLT_EPSILON) |
304 |
|
✗ |
return 1.f; |
305 |
|
✗ |
return num / den; |
306 |
|
|
} |
307 |
|
|
|
308 |
|
✗ |
static float spectral_kurtosis(const float *const spectral, int size, int max_freq, float centroid, float spread) |
309 |
|
|
{ |
310 |
|
✗ |
const float scale = max_freq / (float)size; |
311 |
|
✗ |
float num = 0.f, den = 0.f; |
312 |
|
|
|
313 |
|
✗ |
for (int n = 0; n < size; n++) { |
314 |
|
✗ |
num += spectral[n] * sqrf(sqrf(n * scale - centroid)); |
315 |
|
✗ |
den += spectral[n]; |
316 |
|
|
} |
317 |
|
|
|
318 |
|
✗ |
den *= sqrf(sqrf(spread)); |
319 |
|
✗ |
if (den <= FLT_EPSILON) |
320 |
|
✗ |
return 1.f; |
321 |
|
✗ |
return num / den; |
322 |
|
|
} |
323 |
|
|
|
324 |
|
✗ |
static float spectral_entropy(const float *const spectral, int size, int max_freq) |
325 |
|
|
{ |
326 |
|
✗ |
float num = 0.f, den = 0.f; |
327 |
|
|
|
328 |
|
✗ |
for (int n = 0; n < size; n++) { |
329 |
|
✗ |
num += spectral[n] * logf(spectral[n] + FLT_EPSILON); |
330 |
|
|
} |
331 |
|
|
|
332 |
|
✗ |
den = logf(size); |
333 |
|
✗ |
if (den <= FLT_EPSILON) |
334 |
|
✗ |
return 1.f; |
335 |
|
✗ |
return -num / den; |
336 |
|
|
} |
337 |
|
|
|
338 |
|
✗ |
static float spectral_flatness(const float *const spectral, int size, int max_freq) |
339 |
|
|
{ |
340 |
|
✗ |
float num = 0.f, den = 0.f; |
341 |
|
|
|
342 |
|
✗ |
for (int n = 0; n < size; n++) { |
343 |
|
✗ |
float v = FLT_EPSILON + spectral[n]; |
344 |
|
✗ |
num += logf(v); |
345 |
|
✗ |
den += v; |
346 |
|
|
} |
347 |
|
|
|
348 |
|
✗ |
num /= size; |
349 |
|
✗ |
den /= size; |
350 |
|
✗ |
num = expf(num); |
351 |
|
✗ |
if (den <= FLT_EPSILON) |
352 |
|
✗ |
return 0.f; |
353 |
|
✗ |
return num / den; |
354 |
|
|
} |
355 |
|
|
|
356 |
|
✗ |
static float spectral_crest(const float *const spectral, int size, int max_freq) |
357 |
|
|
{ |
358 |
|
✗ |
float max = 0.f, mean = 0.f; |
359 |
|
|
|
360 |
|
✗ |
for (int n = 0; n < size; n++) { |
361 |
|
✗ |
max = fmaxf(max, spectral[n]); |
362 |
|
✗ |
mean += spectral[n]; |
363 |
|
|
} |
364 |
|
|
|
365 |
|
✗ |
mean /= size; |
366 |
|
✗ |
if (mean <= FLT_EPSILON) |
367 |
|
✗ |
return 0.f; |
368 |
|
✗ |
return max / mean; |
369 |
|
|
} |
370 |
|
|
|
371 |
|
✗ |
static float spectral_flux(const float *const spectral, const float *const prev_spectral, |
372 |
|
|
int size, int max_freq) |
373 |
|
|
{ |
374 |
|
✗ |
float sum = 0.f; |
375 |
|
|
|
376 |
|
✗ |
for (int n = 0; n < size; n++) |
377 |
|
✗ |
sum += sqrf(spectral[n] - prev_spectral[n]); |
378 |
|
|
|
379 |
|
✗ |
return sqrtf(sum); |
380 |
|
|
} |
381 |
|
|
|
382 |
|
✗ |
static float spectral_slope(const float *const spectral, int size, int max_freq) |
383 |
|
|
{ |
384 |
|
✗ |
const float mean_freq = size * 0.5f; |
385 |
|
✗ |
float mean_spectral = 0.f, num = 0.f, den = 0.f; |
386 |
|
|
|
387 |
|
✗ |
for (int n = 0; n < size; n++) |
388 |
|
✗ |
mean_spectral += spectral[n]; |
389 |
|
✗ |
mean_spectral /= size; |
390 |
|
|
|
391 |
|
✗ |
for (int n = 0; n < size; n++) { |
392 |
|
✗ |
num += ((n - mean_freq) / mean_freq) * (spectral[n] - mean_spectral); |
393 |
|
✗ |
den += sqrf((n - mean_freq) / mean_freq); |
394 |
|
|
} |
395 |
|
|
|
396 |
|
✗ |
if (fabsf(den) <= FLT_EPSILON) |
397 |
|
✗ |
return 0.f; |
398 |
|
✗ |
return num / den; |
399 |
|
|
} |
400 |
|
|
|
401 |
|
✗ |
static float spectral_decrease(const float *const spectral, int size, int max_freq) |
402 |
|
|
{ |
403 |
|
✗ |
float num = 0.f, den = 0.f; |
404 |
|
|
|
405 |
|
✗ |
for (int n = 1; n < size; n++) { |
406 |
|
✗ |
num += (spectral[n] - spectral[0]) / n; |
407 |
|
✗ |
den += spectral[n]; |
408 |
|
|
} |
409 |
|
|
|
410 |
|
✗ |
if (den <= FLT_EPSILON) |
411 |
|
✗ |
return 0.f; |
412 |
|
✗ |
return num / den; |
413 |
|
|
} |
414 |
|
|
|
415 |
|
✗ |
static float spectral_rolloff(const float *const spectral, int size, int max_freq) |
416 |
|
|
{ |
417 |
|
✗ |
const float scale = max_freq / (float)size; |
418 |
|
✗ |
float norm = 0.f, sum = 0.f; |
419 |
|
✗ |
int idx = 0.f; |
420 |
|
|
|
421 |
|
✗ |
for (int n = 0; n < size; n++) |
422 |
|
✗ |
norm += spectral[n]; |
423 |
|
✗ |
norm *= 0.85f; |
424 |
|
|
|
425 |
|
✗ |
for (int n = 0; n < size; n++) { |
426 |
|
✗ |
sum += spectral[n]; |
427 |
|
✗ |
if (sum >= norm) { |
428 |
|
✗ |
idx = n; |
429 |
|
✗ |
break; |
430 |
|
|
} |
431 |
|
|
} |
432 |
|
|
|
433 |
|
✗ |
return idx * scale; |
434 |
|
|
} |
435 |
|
|
|
436 |
|
✗ |
static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) |
437 |
|
|
{ |
438 |
|
✗ |
AudioSpectralStatsContext *s = ctx->priv; |
439 |
|
✗ |
const float *window_func_lut = s->window_func_lut; |
440 |
|
✗ |
AVFrame *in = arg; |
441 |
|
✗ |
const int channels = s->nb_channels; |
442 |
|
✗ |
const int start = (channels * jobnr) / nb_jobs; |
443 |
|
✗ |
const int end = (channels * (jobnr+1)) / nb_jobs; |
444 |
|
✗ |
const int offset = s->win_size - s->hop_size; |
445 |
|
|
|
446 |
|
✗ |
for (int ch = start; ch < end; ch++) { |
447 |
|
✗ |
float *window = (float *)s->window->extended_data[ch]; |
448 |
|
✗ |
ChannelSpectralStats *stats = &s->stats[ch]; |
449 |
|
✗ |
AVComplexFloat *fft_out = s->fft_out[ch]; |
450 |
|
✗ |
AVComplexFloat *fft_in = s->fft_in[ch]; |
451 |
|
✗ |
float *magnitude = s->magnitude[ch]; |
452 |
|
✗ |
float *prev_magnitude = s->prev_magnitude[ch]; |
453 |
|
✗ |
const float scale = 1.f / s->win_size; |
454 |
|
|
|
455 |
|
✗ |
memmove(window, &window[s->hop_size], offset * sizeof(float)); |
456 |
|
✗ |
memcpy(&window[offset], in->extended_data[ch], in->nb_samples * sizeof(float)); |
457 |
|
✗ |
memset(&window[offset + in->nb_samples], 0, (s->hop_size - in->nb_samples) * sizeof(float)); |
458 |
|
|
|
459 |
|
✗ |
for (int n = 0; n < s->win_size; n++) { |
460 |
|
✗ |
fft_in[n].re = window[n] * window_func_lut[n]; |
461 |
|
✗ |
fft_in[n].im = 0; |
462 |
|
|
} |
463 |
|
|
|
464 |
|
✗ |
s->tx_fn(s->fft[ch], fft_out, fft_in, sizeof(*fft_in)); |
465 |
|
|
|
466 |
|
✗ |
for (int n = 0; n < s->win_size / 2; n++) { |
467 |
|
✗ |
fft_out[n].re *= scale; |
468 |
|
✗ |
fft_out[n].im *= scale; |
469 |
|
|
} |
470 |
|
|
|
471 |
|
✗ |
for (int n = 0; n < s->win_size / 2; n++) |
472 |
|
✗ |
magnitude[n] = hypotf(fft_out[n].re, fft_out[n].im); |
473 |
|
|
|
474 |
|
✗ |
if (s->measure & (MEASURE_MEAN | MEASURE_VARIANCE)) |
475 |
|
✗ |
stats->mean = spectral_mean(magnitude, s->win_size / 2, in->sample_rate / 2); |
476 |
|
✗ |
if (s->measure & MEASURE_VARIANCE) |
477 |
|
✗ |
stats->variance = spectral_variance(magnitude, s->win_size / 2, in->sample_rate / 2, stats->mean); |
478 |
|
✗ |
if (s->measure & (MEASURE_SPREAD | MEASURE_KURTOSIS | MEASURE_SKEWNESS | MEASURE_CENTROID)) |
479 |
|
✗ |
stats->centroid = spectral_centroid(magnitude, s->win_size / 2, in->sample_rate / 2); |
480 |
|
✗ |
if (s->measure & (MEASURE_SPREAD | MEASURE_KURTOSIS | MEASURE_SKEWNESS)) |
481 |
|
✗ |
stats->spread = spectral_spread(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid); |
482 |
|
✗ |
if (s->measure & MEASURE_SKEWNESS) |
483 |
|
✗ |
stats->skewness = spectral_skewness(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid, stats->spread); |
484 |
|
✗ |
if (s->measure & MEASURE_KURTOSIS) |
485 |
|
✗ |
stats->kurtosis = spectral_kurtosis(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid, stats->spread); |
486 |
|
✗ |
if (s->measure & MEASURE_ENTROPY) |
487 |
|
✗ |
stats->entropy = spectral_entropy(magnitude, s->win_size / 2, in->sample_rate / 2); |
488 |
|
✗ |
if (s->measure & MEASURE_FLATNESS) |
489 |
|
✗ |
stats->flatness = spectral_flatness(magnitude, s->win_size / 2, in->sample_rate / 2); |
490 |
|
✗ |
if (s->measure & MEASURE_CREST) |
491 |
|
✗ |
stats->crest = spectral_crest(magnitude, s->win_size / 2, in->sample_rate / 2); |
492 |
|
✗ |
if (s->measure & MEASURE_FLUX) |
493 |
|
✗ |
stats->flux = spectral_flux(magnitude, prev_magnitude, s->win_size / 2, in->sample_rate / 2); |
494 |
|
✗ |
if (s->measure & MEASURE_SLOPE) |
495 |
|
✗ |
stats->slope = spectral_slope(magnitude, s->win_size / 2, in->sample_rate / 2); |
496 |
|
✗ |
if (s->measure & MEASURE_DECREASE) |
497 |
|
✗ |
stats->decrease = spectral_decrease(magnitude, s->win_size / 2, in->sample_rate / 2); |
498 |
|
✗ |
if (s->measure & MEASURE_ROLLOFF) |
499 |
|
✗ |
stats->rolloff = spectral_rolloff(magnitude, s->win_size / 2, in->sample_rate / 2); |
500 |
|
|
|
501 |
|
✗ |
memcpy(prev_magnitude, magnitude, s->win_size * sizeof(float)); |
502 |
|
|
} |
503 |
|
|
|
504 |
|
✗ |
return 0; |
505 |
|
|
} |
506 |
|
|
|
507 |
|
✗ |
static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
508 |
|
|
{ |
509 |
|
✗ |
AVFilterContext *ctx = inlink->dst; |
510 |
|
✗ |
AVFilterLink *outlink = ctx->outputs[0]; |
511 |
|
✗ |
AudioSpectralStatsContext *s = ctx->priv; |
512 |
|
|
AVDictionary **metadata; |
513 |
|
|
AVFrame *out; |
514 |
|
|
int ret; |
515 |
|
|
|
516 |
|
✗ |
if (av_frame_is_writable(in)) { |
517 |
|
✗ |
out = in; |
518 |
|
|
} else { |
519 |
|
✗ |
out = ff_get_audio_buffer(outlink, in->nb_samples); |
520 |
|
✗ |
if (!out) { |
521 |
|
✗ |
av_frame_free(&in); |
522 |
|
✗ |
return AVERROR(ENOMEM); |
523 |
|
|
} |
524 |
|
✗ |
ret = av_frame_copy_props(out, in); |
525 |
|
✗ |
if (ret < 0) |
526 |
|
✗ |
goto fail; |
527 |
|
✗ |
ret = av_frame_copy(out, in); |
528 |
|
✗ |
if (ret < 0) |
529 |
|
✗ |
goto fail; |
530 |
|
|
} |
531 |
|
|
|
532 |
|
✗ |
metadata = &out->metadata; |
533 |
|
✗ |
ff_filter_execute(ctx, filter_channel, in, NULL, |
534 |
|
✗ |
FFMIN(inlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx))); |
535 |
|
|
|
536 |
|
✗ |
set_metadata(s, metadata); |
537 |
|
|
|
538 |
|
✗ |
if (out != in) |
539 |
|
✗ |
av_frame_free(&in); |
540 |
|
✗ |
return ff_filter_frame(outlink, out); |
541 |
|
✗ |
fail: |
542 |
|
✗ |
av_frame_free(&in); |
543 |
|
✗ |
av_frame_free(&out); |
544 |
|
✗ |
return ret; |
545 |
|
|
} |
546 |
|
|
|
547 |
|
✗ |
static int activate(AVFilterContext *ctx) |
548 |
|
|
{ |
549 |
|
✗ |
AudioSpectralStatsContext *s = ctx->priv; |
550 |
|
✗ |
AVFilterLink *outlink = ctx->outputs[0]; |
551 |
|
✗ |
AVFilterLink *inlink = ctx->inputs[0]; |
552 |
|
|
AVFrame *in; |
553 |
|
|
int ret; |
554 |
|
|
|
555 |
|
✗ |
FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); |
556 |
|
|
|
557 |
|
✗ |
ret = ff_inlink_consume_samples(inlink, s->hop_size, s->hop_size, &in); |
558 |
|
✗ |
if (ret < 0) |
559 |
|
✗ |
return ret; |
560 |
|
✗ |
if (ret > 0) |
561 |
|
✗ |
ret = filter_frame(inlink, in); |
562 |
|
✗ |
if (ret < 0) |
563 |
|
✗ |
return ret; |
564 |
|
|
|
565 |
|
✗ |
if (ff_inlink_queued_samples(inlink) >= s->hop_size) { |
566 |
|
✗ |
ff_filter_set_ready(ctx, 10); |
567 |
|
✗ |
return 0; |
568 |
|
|
} |
569 |
|
|
|
570 |
|
✗ |
FF_FILTER_FORWARD_STATUS(inlink, outlink); |
571 |
|
✗ |
FF_FILTER_FORWARD_WANTED(outlink, inlink); |
572 |
|
|
|
573 |
|
✗ |
return FFERROR_NOT_READY; |
574 |
|
|
} |
575 |
|
|
|
576 |
|
✗ |
static av_cold void uninit(AVFilterContext *ctx) |
577 |
|
|
{ |
578 |
|
✗ |
AudioSpectralStatsContext *s = ctx->priv; |
579 |
|
|
|
580 |
|
✗ |
for (int ch = 0; ch < s->nb_channels; ch++) { |
581 |
|
✗ |
if (s->fft) |
582 |
|
✗ |
av_tx_uninit(&s->fft[ch]); |
583 |
|
✗ |
if (s->fft_in) |
584 |
|
✗ |
av_freep(&s->fft_in[ch]); |
585 |
|
✗ |
if (s->fft_out) |
586 |
|
✗ |
av_freep(&s->fft_out[ch]); |
587 |
|
✗ |
if (s->magnitude) |
588 |
|
✗ |
av_freep(&s->magnitude[ch]); |
589 |
|
✗ |
if (s->prev_magnitude) |
590 |
|
✗ |
av_freep(&s->prev_magnitude[ch]); |
591 |
|
|
} |
592 |
|
|
|
593 |
|
✗ |
av_freep(&s->fft); |
594 |
|
✗ |
av_freep(&s->magnitude); |
595 |
|
✗ |
av_freep(&s->prev_magnitude); |
596 |
|
✗ |
av_freep(&s->fft_in); |
597 |
|
✗ |
av_freep(&s->fft_out); |
598 |
|
✗ |
av_freep(&s->stats); |
599 |
|
|
|
600 |
|
✗ |
av_freep(&s->window_func_lut); |
601 |
|
✗ |
av_frame_free(&s->window); |
602 |
|
✗ |
} |
603 |
|
|
|
604 |
|
|
static const AVFilterPad aspectralstats_outputs[] = { |
605 |
|
|
{ |
606 |
|
|
.name = "default", |
607 |
|
|
.type = AVMEDIA_TYPE_AUDIO, |
608 |
|
|
.config_props = config_output, |
609 |
|
|
}, |
610 |
|
|
}; |
611 |
|
|
|
612 |
|
|
const AVFilter ff_af_aspectralstats = { |
613 |
|
|
.name = "aspectralstats", |
614 |
|
|
.description = NULL_IF_CONFIG_SMALL("Show frequency domain statistics about audio frames."), |
615 |
|
|
.priv_size = sizeof(AudioSpectralStatsContext), |
616 |
|
|
.priv_class = &aspectralstats_class, |
617 |
|
|
.uninit = uninit, |
618 |
|
|
.activate = activate, |
619 |
|
|
FILTER_INPUTS(ff_audio_default_filterpad), |
620 |
|
|
FILTER_OUTPUTS(aspectralstats_outputs), |
621 |
|
|
FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP), |
622 |
|
|
.flags = AVFILTER_FLAG_SLICE_THREADS, |
623 |
|
|
}; |
624 |
|
|
|