Line |
Branch |
Exec |
Source |
1 |
|
|
/* |
2 |
|
|
* Copyright (c) 2021 Paul B Mahol |
3 |
|
|
* |
4 |
|
|
* This file is part of FFmpeg. |
5 |
|
|
* |
6 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
7 |
|
|
* modify it under the terms of the GNU Lesser General Public |
8 |
|
|
* License as published by the Free Software Foundation; either |
9 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
* |
11 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
12 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
|
|
* Lesser General Public License for more details. |
15 |
|
|
* |
16 |
|
|
* You should have received a copy of the GNU Lesser General Public |
17 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
18 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 |
|
|
*/ |
20 |
|
|
|
21 |
|
|
#include <float.h> |
22 |
|
|
#include <math.h> |
23 |
|
|
|
24 |
|
|
#include "libavutil/mem.h" |
25 |
|
|
#include "libavutil/opt.h" |
26 |
|
|
#include "libavutil/tx.h" |
27 |
|
|
#include "audio.h" |
28 |
|
|
#include "avfilter.h" |
29 |
|
|
#include "filters.h" |
30 |
|
|
#include "window_func.h" |
31 |
|
|
|
32 |
|
|
#define MEASURE_ALL UINT_MAX |
33 |
|
|
#define MEASURE_NONE 0 |
34 |
|
|
#define MEASURE_MEAN (1 << 0) |
35 |
|
|
#define MEASURE_VARIANCE (1 << 1) |
36 |
|
|
#define MEASURE_CENTROID (1 << 2) |
37 |
|
|
#define MEASURE_SPREAD (1 << 3) |
38 |
|
|
#define MEASURE_SKEWNESS (1 << 4) |
39 |
|
|
#define MEASURE_KURTOSIS (1 << 5) |
40 |
|
|
#define MEASURE_ENTROPY (1 << 6) |
41 |
|
|
#define MEASURE_FLATNESS (1 << 7) |
42 |
|
|
#define MEASURE_CREST (1 << 8) |
43 |
|
|
#define MEASURE_FLUX (1 << 9) |
44 |
|
|
#define MEASURE_SLOPE (1 << 10) |
45 |
|
|
#define MEASURE_DECREASE (1 << 11) |
46 |
|
|
#define MEASURE_ROLLOFF (1 << 12) |
47 |
|
|
|
48 |
|
|
typedef struct ChannelSpectralStats { |
49 |
|
|
float mean; |
50 |
|
|
float variance; |
51 |
|
|
float centroid; |
52 |
|
|
float spread; |
53 |
|
|
float skewness; |
54 |
|
|
float kurtosis; |
55 |
|
|
float entropy; |
56 |
|
|
float flatness; |
57 |
|
|
float crest; |
58 |
|
|
float flux; |
59 |
|
|
float slope; |
60 |
|
|
float decrease; |
61 |
|
|
float rolloff; |
62 |
|
|
} ChannelSpectralStats; |
63 |
|
|
|
64 |
|
|
typedef struct AudioSpectralStatsContext { |
65 |
|
|
const AVClass *class; |
66 |
|
|
unsigned measure; |
67 |
|
|
int win_size; |
68 |
|
|
int win_func; |
69 |
|
|
float overlap; |
70 |
|
|
int nb_channels; |
71 |
|
|
int hop_size; |
72 |
|
|
ChannelSpectralStats *stats; |
73 |
|
|
float *window_func_lut; |
74 |
|
|
av_tx_fn tx_fn; |
75 |
|
|
AVTXContext **fft; |
76 |
|
|
AVComplexFloat **fft_in; |
77 |
|
|
AVComplexFloat **fft_out; |
78 |
|
|
float **prev_magnitude; |
79 |
|
|
float **magnitude; |
80 |
|
|
AVFrame *window; |
81 |
|
|
} AudioSpectralStatsContext; |
82 |
|
|
|
83 |
|
|
#define OFFSET(x) offsetof(AudioSpectralStatsContext, x) |
84 |
|
|
#define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM |
85 |
|
|
|
86 |
|
|
static const AVOption aspectralstats_options[] = { |
87 |
|
|
{ "win_size", "set the window size", OFFSET(win_size), AV_OPT_TYPE_INT, {.i64=2048}, 32, 65536, A }, |
88 |
|
|
WIN_FUNC_OPTION("win_func", OFFSET(win_func), A, WFUNC_HANNING), |
89 |
|
|
{ "overlap", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=0.5}, 0, 1, A }, |
90 |
|
|
{ "measure", "select the parameters which are measured", OFFSET(measure), AV_OPT_TYPE_FLAGS, {.i64=MEASURE_ALL}, 0, UINT_MAX, A, .unit = "measure" }, |
91 |
|
|
{ "none", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_NONE }, 0, 0, A, .unit = "measure" }, |
92 |
|
|
{ "all", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_ALL }, 0, 0, A, .unit = "measure" }, |
93 |
|
|
{ "mean", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_MEAN }, 0, 0, A, .unit = "measure" }, |
94 |
|
|
{ "variance", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_VARIANCE}, 0, 0, A, .unit = "measure" }, |
95 |
|
|
{ "centroid", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_CENTROID}, 0, 0, A, .unit = "measure" }, |
96 |
|
|
{ "spread", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_SPREAD }, 0, 0, A, .unit = "measure" }, |
97 |
|
|
{ "skewness", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_SKEWNESS}, 0, 0, A, .unit = "measure" }, |
98 |
|
|
{ "kurtosis", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_KURTOSIS}, 0, 0, A, .unit = "measure" }, |
99 |
|
|
{ "entropy", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_ENTROPY }, 0, 0, A, .unit = "measure" }, |
100 |
|
|
{ "flatness", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_FLATNESS}, 0, 0, A, .unit = "measure" }, |
101 |
|
|
{ "crest", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_CREST }, 0, 0, A, .unit = "measure" }, |
102 |
|
|
{ "flux", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_FLUX }, 0, 0, A, .unit = "measure" }, |
103 |
|
|
{ "slope", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_SLOPE }, 0, 0, A, .unit = "measure" }, |
104 |
|
|
{ "decrease", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_DECREASE}, 0, 0, A, .unit = "measure" }, |
105 |
|
|
{ "rolloff", "", 0, AV_OPT_TYPE_CONST, {.i64=MEASURE_ROLLOFF }, 0, 0, A, .unit = "measure" }, |
106 |
|
|
{ NULL } |
107 |
|
|
}; |
108 |
|
|
|
109 |
|
|
AVFILTER_DEFINE_CLASS(aspectralstats); |
110 |
|
|
|
111 |
|
✗ |
static int config_output(AVFilterLink *outlink) |
112 |
|
|
{ |
113 |
|
✗ |
AudioSpectralStatsContext *s = outlink->src->priv; |
114 |
|
✗ |
float overlap, scale = 1.f; |
115 |
|
|
int ret; |
116 |
|
|
|
117 |
|
✗ |
s->nb_channels = outlink->ch_layout.nb_channels; |
118 |
|
✗ |
s->window_func_lut = av_realloc_f(s->window_func_lut, s->win_size, |
119 |
|
|
sizeof(*s->window_func_lut)); |
120 |
|
✗ |
if (!s->window_func_lut) |
121 |
|
✗ |
return AVERROR(ENOMEM); |
122 |
|
✗ |
generate_window_func(s->window_func_lut, s->win_size, s->win_func, &overlap); |
123 |
|
✗ |
if (s->overlap == 1.f) |
124 |
|
✗ |
s->overlap = overlap; |
125 |
|
|
|
126 |
|
✗ |
s->hop_size = s->win_size * (1.f - s->overlap); |
127 |
|
✗ |
if (s->hop_size <= 0) |
128 |
|
✗ |
return AVERROR(EINVAL); |
129 |
|
|
|
130 |
|
✗ |
s->stats = av_calloc(s->nb_channels, sizeof(*s->stats)); |
131 |
|
✗ |
if (!s->stats) |
132 |
|
✗ |
return AVERROR(ENOMEM); |
133 |
|
|
|
134 |
|
✗ |
s->fft = av_calloc(s->nb_channels, sizeof(*s->fft)); |
135 |
|
✗ |
if (!s->fft) |
136 |
|
✗ |
return AVERROR(ENOMEM); |
137 |
|
|
|
138 |
|
✗ |
s->magnitude = av_calloc(s->nb_channels, sizeof(*s->magnitude)); |
139 |
|
✗ |
if (!s->magnitude) |
140 |
|
✗ |
return AVERROR(ENOMEM); |
141 |
|
|
|
142 |
|
✗ |
s->prev_magnitude = av_calloc(s->nb_channels, sizeof(*s->prev_magnitude)); |
143 |
|
✗ |
if (!s->prev_magnitude) |
144 |
|
✗ |
return AVERROR(ENOMEM); |
145 |
|
|
|
146 |
|
✗ |
s->fft_in = av_calloc(s->nb_channels, sizeof(*s->fft_in)); |
147 |
|
✗ |
if (!s->fft_in) |
148 |
|
✗ |
return AVERROR(ENOMEM); |
149 |
|
|
|
150 |
|
✗ |
s->fft_out = av_calloc(s->nb_channels, sizeof(*s->fft_out)); |
151 |
|
✗ |
if (!s->fft_out) |
152 |
|
✗ |
return AVERROR(ENOMEM); |
153 |
|
|
|
154 |
|
✗ |
for (int ch = 0; ch < s->nb_channels; ch++) { |
155 |
|
✗ |
ret = av_tx_init(&s->fft[ch], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->win_size, &scale, 0); |
156 |
|
✗ |
if (ret < 0) |
157 |
|
✗ |
return ret; |
158 |
|
|
|
159 |
|
✗ |
s->fft_in[ch] = av_calloc(s->win_size, sizeof(**s->fft_in)); |
160 |
|
✗ |
if (!s->fft_in[ch]) |
161 |
|
✗ |
return AVERROR(ENOMEM); |
162 |
|
|
|
163 |
|
✗ |
s->fft_out[ch] = av_calloc(s->win_size, sizeof(**s->fft_out)); |
164 |
|
✗ |
if (!s->fft_out[ch]) |
165 |
|
✗ |
return AVERROR(ENOMEM); |
166 |
|
|
|
167 |
|
✗ |
s->magnitude[ch] = av_calloc(s->win_size, sizeof(**s->magnitude)); |
168 |
|
✗ |
if (!s->magnitude[ch]) |
169 |
|
✗ |
return AVERROR(ENOMEM); |
170 |
|
|
|
171 |
|
✗ |
s->prev_magnitude[ch] = av_calloc(s->win_size, sizeof(**s->prev_magnitude)); |
172 |
|
✗ |
if (!s->prev_magnitude[ch]) |
173 |
|
✗ |
return AVERROR(ENOMEM); |
174 |
|
|
} |
175 |
|
|
|
176 |
|
✗ |
s->window = ff_get_audio_buffer(outlink, s->win_size); |
177 |
|
✗ |
if (!s->window) |
178 |
|
✗ |
return AVERROR(ENOMEM); |
179 |
|
|
|
180 |
|
✗ |
return 0; |
181 |
|
|
} |
182 |
|
|
|
183 |
|
✗ |
static void set_meta(AVDictionary **metadata, int chan, const char *key, |
184 |
|
|
const char *fmt, float val) |
185 |
|
|
{ |
186 |
|
|
uint8_t value[128]; |
187 |
|
|
uint8_t key2[128]; |
188 |
|
|
|
189 |
|
✗ |
snprintf(value, sizeof(value), fmt, val); |
190 |
|
✗ |
if (chan) |
191 |
|
✗ |
snprintf(key2, sizeof(key2), "lavfi.aspectralstats.%d.%s", chan, key); |
192 |
|
|
else |
193 |
|
✗ |
snprintf(key2, sizeof(key2), "lavfi.aspectralstats.%s", key); |
194 |
|
✗ |
av_dict_set(metadata, key2, value, 0); |
195 |
|
✗ |
} |
196 |
|
|
|
197 |
|
✗ |
static void set_metadata(AudioSpectralStatsContext *s, AVDictionary **metadata) |
198 |
|
|
{ |
199 |
|
✗ |
for (int ch = 0; ch < s->nb_channels; ch++) { |
200 |
|
✗ |
ChannelSpectralStats *stats = &s->stats[ch]; |
201 |
|
|
|
202 |
|
✗ |
if (s->measure & MEASURE_MEAN) |
203 |
|
✗ |
set_meta(metadata, ch + 1, "mean", "%g", stats->mean); |
204 |
|
✗ |
if (s->measure & MEASURE_VARIANCE) |
205 |
|
✗ |
set_meta(metadata, ch + 1, "variance", "%g", stats->variance); |
206 |
|
✗ |
if (s->measure & MEASURE_CENTROID) |
207 |
|
✗ |
set_meta(metadata, ch + 1, "centroid", "%g", stats->centroid); |
208 |
|
✗ |
if (s->measure & MEASURE_SPREAD) |
209 |
|
✗ |
set_meta(metadata, ch + 1, "spread", "%g", stats->spread); |
210 |
|
✗ |
if (s->measure & MEASURE_SKEWNESS) |
211 |
|
✗ |
set_meta(metadata, ch + 1, "skewness", "%g", stats->skewness); |
212 |
|
✗ |
if (s->measure & MEASURE_KURTOSIS) |
213 |
|
✗ |
set_meta(metadata, ch + 1, "kurtosis", "%g", stats->kurtosis); |
214 |
|
✗ |
if (s->measure & MEASURE_ENTROPY) |
215 |
|
✗ |
set_meta(metadata, ch + 1, "entropy", "%g", stats->entropy); |
216 |
|
✗ |
if (s->measure & MEASURE_FLATNESS) |
217 |
|
✗ |
set_meta(metadata, ch + 1, "flatness", "%g", stats->flatness); |
218 |
|
✗ |
if (s->measure & MEASURE_CREST) |
219 |
|
✗ |
set_meta(metadata, ch + 1, "crest", "%g", stats->crest); |
220 |
|
✗ |
if (s->measure & MEASURE_FLUX) |
221 |
|
✗ |
set_meta(metadata, ch + 1, "flux", "%g", stats->flux); |
222 |
|
✗ |
if (s->measure & MEASURE_SLOPE) |
223 |
|
✗ |
set_meta(metadata, ch + 1, "slope", "%g", stats->slope); |
224 |
|
✗ |
if (s->measure & MEASURE_DECREASE) |
225 |
|
✗ |
set_meta(metadata, ch + 1, "decrease", "%g", stats->decrease); |
226 |
|
✗ |
if (s->measure & MEASURE_ROLLOFF) |
227 |
|
✗ |
set_meta(metadata, ch + 1, "rolloff", "%g", stats->rolloff); |
228 |
|
|
} |
229 |
|
✗ |
} |
230 |
|
|
|
231 |
|
✗ |
static float spectral_mean(const float *const spectral, int size, int max_freq) |
232 |
|
|
{ |
233 |
|
✗ |
float sum = 0.f; |
234 |
|
|
|
235 |
|
✗ |
for (int n = 0; n < size; n++) |
236 |
|
✗ |
sum += spectral[n]; |
237 |
|
|
|
238 |
|
✗ |
return sum / size; |
239 |
|
|
} |
240 |
|
|
|
241 |
|
✗ |
static float sqrf(float a) |
242 |
|
|
{ |
243 |
|
✗ |
return a * a; |
244 |
|
|
} |
245 |
|
|
|
246 |
|
✗ |
static float spectral_variance(const float *const spectral, int size, int max_freq, float mean) |
247 |
|
|
{ |
248 |
|
✗ |
float sum = 0.f; |
249 |
|
|
|
250 |
|
✗ |
for (int n = 0; n < size; n++) |
251 |
|
✗ |
sum += sqrf(spectral[n] - mean); |
252 |
|
|
|
253 |
|
✗ |
return sum / size; |
254 |
|
|
} |
255 |
|
|
|
256 |
|
✗ |
static float spectral_centroid(const float *const spectral, int size, int max_freq) |
257 |
|
|
{ |
258 |
|
✗ |
const float scale = max_freq / (float)size; |
259 |
|
✗ |
float num = 0.f, den = 0.f; |
260 |
|
|
|
261 |
|
✗ |
for (int n = 0; n < size; n++) { |
262 |
|
✗ |
num += spectral[n] * n * scale; |
263 |
|
✗ |
den += spectral[n]; |
264 |
|
|
} |
265 |
|
|
|
266 |
|
✗ |
if (den <= FLT_EPSILON) |
267 |
|
✗ |
return 1.f; |
268 |
|
✗ |
return num / den; |
269 |
|
|
} |
270 |
|
|
|
271 |
|
✗ |
static float spectral_spread(const float *const spectral, int size, int max_freq, float centroid) |
272 |
|
|
{ |
273 |
|
✗ |
const float scale = max_freq / (float)size; |
274 |
|
✗ |
float num = 0.f, den = 0.f; |
275 |
|
|
|
276 |
|
✗ |
for (int n = 0; n < size; n++) { |
277 |
|
✗ |
num += spectral[n] * sqrf(n * scale - centroid); |
278 |
|
✗ |
den += spectral[n]; |
279 |
|
|
} |
280 |
|
|
|
281 |
|
✗ |
if (den <= FLT_EPSILON) |
282 |
|
✗ |
return 1.f; |
283 |
|
✗ |
return sqrtf(num / den); |
284 |
|
|
} |
285 |
|
|
|
286 |
|
✗ |
static float cbrf(float a) |
287 |
|
|
{ |
288 |
|
✗ |
return a * a * a; |
289 |
|
|
} |
290 |
|
|
|
291 |
|
✗ |
static float spectral_skewness(const float *const spectral, int size, int max_freq, float centroid, float spread) |
292 |
|
|
{ |
293 |
|
✗ |
const float scale = max_freq / (float)size; |
294 |
|
✗ |
float num = 0.f, den = 0.f; |
295 |
|
|
|
296 |
|
✗ |
for (int n = 0; n < size; n++) { |
297 |
|
✗ |
num += spectral[n] * cbrf(n * scale - centroid); |
298 |
|
✗ |
den += spectral[n]; |
299 |
|
|
} |
300 |
|
|
|
301 |
|
✗ |
den *= cbrf(spread); |
302 |
|
✗ |
if (den <= FLT_EPSILON) |
303 |
|
✗ |
return 1.f; |
304 |
|
✗ |
return num / den; |
305 |
|
|
} |
306 |
|
|
|
307 |
|
✗ |
static float spectral_kurtosis(const float *const spectral, int size, int max_freq, float centroid, float spread) |
308 |
|
|
{ |
309 |
|
✗ |
const float scale = max_freq / (float)size; |
310 |
|
✗ |
float num = 0.f, den = 0.f; |
311 |
|
|
|
312 |
|
✗ |
for (int n = 0; n < size; n++) { |
313 |
|
✗ |
num += spectral[n] * sqrf(sqrf(n * scale - centroid)); |
314 |
|
✗ |
den += spectral[n]; |
315 |
|
|
} |
316 |
|
|
|
317 |
|
✗ |
den *= sqrf(sqrf(spread)); |
318 |
|
✗ |
if (den <= FLT_EPSILON) |
319 |
|
✗ |
return 1.f; |
320 |
|
✗ |
return num / den; |
321 |
|
|
} |
322 |
|
|
|
323 |
|
✗ |
static float spectral_entropy(const float *const spectral, int size, int max_freq) |
324 |
|
|
{ |
325 |
|
✗ |
float num = 0.f, den = 0.f; |
326 |
|
|
|
327 |
|
✗ |
for (int n = 0; n < size; n++) { |
328 |
|
✗ |
num += spectral[n] * logf(spectral[n] + FLT_EPSILON); |
329 |
|
|
} |
330 |
|
|
|
331 |
|
✗ |
den = logf(size); |
332 |
|
✗ |
if (den <= FLT_EPSILON) |
333 |
|
✗ |
return 1.f; |
334 |
|
✗ |
return -num / den; |
335 |
|
|
} |
336 |
|
|
|
337 |
|
✗ |
static float spectral_flatness(const float *const spectral, int size, int max_freq) |
338 |
|
|
{ |
339 |
|
✗ |
float num = 0.f, den = 0.f; |
340 |
|
|
|
341 |
|
✗ |
for (int n = 0; n < size; n++) { |
342 |
|
✗ |
float v = FLT_EPSILON + spectral[n]; |
343 |
|
✗ |
num += logf(v); |
344 |
|
✗ |
den += v; |
345 |
|
|
} |
346 |
|
|
|
347 |
|
✗ |
num /= size; |
348 |
|
✗ |
den /= size; |
349 |
|
✗ |
num = expf(num); |
350 |
|
✗ |
if (den <= FLT_EPSILON) |
351 |
|
✗ |
return 0.f; |
352 |
|
✗ |
return num / den; |
353 |
|
|
} |
354 |
|
|
|
355 |
|
✗ |
static float spectral_crest(const float *const spectral, int size, int max_freq) |
356 |
|
|
{ |
357 |
|
✗ |
float max = 0.f, mean = 0.f; |
358 |
|
|
|
359 |
|
✗ |
for (int n = 0; n < size; n++) { |
360 |
|
✗ |
max = fmaxf(max, spectral[n]); |
361 |
|
✗ |
mean += spectral[n]; |
362 |
|
|
} |
363 |
|
|
|
364 |
|
✗ |
mean /= size; |
365 |
|
✗ |
if (mean <= FLT_EPSILON) |
366 |
|
✗ |
return 0.f; |
367 |
|
✗ |
return max / mean; |
368 |
|
|
} |
369 |
|
|
|
370 |
|
✗ |
static float spectral_flux(const float *const spectral, const float *const prev_spectral, |
371 |
|
|
int size, int max_freq) |
372 |
|
|
{ |
373 |
|
✗ |
float sum = 0.f; |
374 |
|
|
|
375 |
|
✗ |
for (int n = 0; n < size; n++) |
376 |
|
✗ |
sum += sqrf(spectral[n] - prev_spectral[n]); |
377 |
|
|
|
378 |
|
✗ |
return sqrtf(sum); |
379 |
|
|
} |
380 |
|
|
|
381 |
|
✗ |
static float spectral_slope(const float *const spectral, int size, int max_freq) |
382 |
|
|
{ |
383 |
|
✗ |
const float mean_freq = size * 0.5f; |
384 |
|
✗ |
float mean_spectral = 0.f, num = 0.f, den = 0.f; |
385 |
|
|
|
386 |
|
✗ |
for (int n = 0; n < size; n++) |
387 |
|
✗ |
mean_spectral += spectral[n]; |
388 |
|
✗ |
mean_spectral /= size; |
389 |
|
|
|
390 |
|
✗ |
for (int n = 0; n < size; n++) { |
391 |
|
✗ |
num += ((n - mean_freq) / mean_freq) * (spectral[n] - mean_spectral); |
392 |
|
✗ |
den += sqrf((n - mean_freq) / mean_freq); |
393 |
|
|
} |
394 |
|
|
|
395 |
|
✗ |
if (fabsf(den) <= FLT_EPSILON) |
396 |
|
✗ |
return 0.f; |
397 |
|
✗ |
return num / den; |
398 |
|
|
} |
399 |
|
|
|
400 |
|
✗ |
static float spectral_decrease(const float *const spectral, int size, int max_freq) |
401 |
|
|
{ |
402 |
|
✗ |
float num = 0.f, den = 0.f; |
403 |
|
|
|
404 |
|
✗ |
for (int n = 1; n < size; n++) { |
405 |
|
✗ |
num += (spectral[n] - spectral[0]) / n; |
406 |
|
✗ |
den += spectral[n]; |
407 |
|
|
} |
408 |
|
|
|
409 |
|
✗ |
if (den <= FLT_EPSILON) |
410 |
|
✗ |
return 0.f; |
411 |
|
✗ |
return num / den; |
412 |
|
|
} |
413 |
|
|
|
414 |
|
✗ |
static float spectral_rolloff(const float *const spectral, int size, int max_freq) |
415 |
|
|
{ |
416 |
|
✗ |
const float scale = max_freq / (float)size; |
417 |
|
✗ |
float norm = 0.f, sum = 0.f; |
418 |
|
✗ |
int idx = 0.f; |
419 |
|
|
|
420 |
|
✗ |
for (int n = 0; n < size; n++) |
421 |
|
✗ |
norm += spectral[n]; |
422 |
|
✗ |
norm *= 0.85f; |
423 |
|
|
|
424 |
|
✗ |
for (int n = 0; n < size; n++) { |
425 |
|
✗ |
sum += spectral[n]; |
426 |
|
✗ |
if (sum >= norm) { |
427 |
|
✗ |
idx = n; |
428 |
|
✗ |
break; |
429 |
|
|
} |
430 |
|
|
} |
431 |
|
|
|
432 |
|
✗ |
return idx * scale; |
433 |
|
|
} |
434 |
|
|
|
435 |
|
✗ |
static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) |
436 |
|
|
{ |
437 |
|
✗ |
AudioSpectralStatsContext *s = ctx->priv; |
438 |
|
✗ |
const float *window_func_lut = s->window_func_lut; |
439 |
|
✗ |
AVFrame *in = arg; |
440 |
|
✗ |
const int channels = s->nb_channels; |
441 |
|
✗ |
const int start = (channels * jobnr) / nb_jobs; |
442 |
|
✗ |
const int end = (channels * (jobnr+1)) / nb_jobs; |
443 |
|
✗ |
const int offset = s->win_size - s->hop_size; |
444 |
|
|
|
445 |
|
✗ |
for (int ch = start; ch < end; ch++) { |
446 |
|
✗ |
float *window = (float *)s->window->extended_data[ch]; |
447 |
|
✗ |
ChannelSpectralStats *stats = &s->stats[ch]; |
448 |
|
✗ |
AVComplexFloat *fft_out = s->fft_out[ch]; |
449 |
|
✗ |
AVComplexFloat *fft_in = s->fft_in[ch]; |
450 |
|
✗ |
float *magnitude = s->magnitude[ch]; |
451 |
|
✗ |
float *prev_magnitude = s->prev_magnitude[ch]; |
452 |
|
✗ |
const float scale = 1.f / s->win_size; |
453 |
|
|
|
454 |
|
✗ |
memmove(window, &window[s->hop_size], offset * sizeof(float)); |
455 |
|
✗ |
memcpy(&window[offset], in->extended_data[ch], in->nb_samples * sizeof(float)); |
456 |
|
✗ |
memset(&window[offset + in->nb_samples], 0, (s->hop_size - in->nb_samples) * sizeof(float)); |
457 |
|
|
|
458 |
|
✗ |
for (int n = 0; n < s->win_size; n++) { |
459 |
|
✗ |
fft_in[n].re = window[n] * window_func_lut[n]; |
460 |
|
✗ |
fft_in[n].im = 0; |
461 |
|
|
} |
462 |
|
|
|
463 |
|
✗ |
s->tx_fn(s->fft[ch], fft_out, fft_in, sizeof(*fft_in)); |
464 |
|
|
|
465 |
|
✗ |
for (int n = 0; n < s->win_size / 2; n++) { |
466 |
|
✗ |
fft_out[n].re *= scale; |
467 |
|
✗ |
fft_out[n].im *= scale; |
468 |
|
|
} |
469 |
|
|
|
470 |
|
✗ |
for (int n = 0; n < s->win_size / 2; n++) |
471 |
|
✗ |
magnitude[n] = hypotf(fft_out[n].re, fft_out[n].im); |
472 |
|
|
|
473 |
|
✗ |
if (s->measure & (MEASURE_MEAN | MEASURE_VARIANCE)) |
474 |
|
✗ |
stats->mean = spectral_mean(magnitude, s->win_size / 2, in->sample_rate / 2); |
475 |
|
✗ |
if (s->measure & MEASURE_VARIANCE) |
476 |
|
✗ |
stats->variance = spectral_variance(magnitude, s->win_size / 2, in->sample_rate / 2, stats->mean); |
477 |
|
✗ |
if (s->measure & (MEASURE_SPREAD | MEASURE_KURTOSIS | MEASURE_SKEWNESS | MEASURE_CENTROID)) |
478 |
|
✗ |
stats->centroid = spectral_centroid(magnitude, s->win_size / 2, in->sample_rate / 2); |
479 |
|
✗ |
if (s->measure & (MEASURE_SPREAD | MEASURE_KURTOSIS | MEASURE_SKEWNESS)) |
480 |
|
✗ |
stats->spread = spectral_spread(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid); |
481 |
|
✗ |
if (s->measure & MEASURE_SKEWNESS) |
482 |
|
✗ |
stats->skewness = spectral_skewness(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid, stats->spread); |
483 |
|
✗ |
if (s->measure & MEASURE_KURTOSIS) |
484 |
|
✗ |
stats->kurtosis = spectral_kurtosis(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid, stats->spread); |
485 |
|
✗ |
if (s->measure & MEASURE_ENTROPY) |
486 |
|
✗ |
stats->entropy = spectral_entropy(magnitude, s->win_size / 2, in->sample_rate / 2); |
487 |
|
✗ |
if (s->measure & MEASURE_FLATNESS) |
488 |
|
✗ |
stats->flatness = spectral_flatness(magnitude, s->win_size / 2, in->sample_rate / 2); |
489 |
|
✗ |
if (s->measure & MEASURE_CREST) |
490 |
|
✗ |
stats->crest = spectral_crest(magnitude, s->win_size / 2, in->sample_rate / 2); |
491 |
|
✗ |
if (s->measure & MEASURE_FLUX) |
492 |
|
✗ |
stats->flux = spectral_flux(magnitude, prev_magnitude, s->win_size / 2, in->sample_rate / 2); |
493 |
|
✗ |
if (s->measure & MEASURE_SLOPE) |
494 |
|
✗ |
stats->slope = spectral_slope(magnitude, s->win_size / 2, in->sample_rate / 2); |
495 |
|
✗ |
if (s->measure & MEASURE_DECREASE) |
496 |
|
✗ |
stats->decrease = spectral_decrease(magnitude, s->win_size / 2, in->sample_rate / 2); |
497 |
|
✗ |
if (s->measure & MEASURE_ROLLOFF) |
498 |
|
✗ |
stats->rolloff = spectral_rolloff(magnitude, s->win_size / 2, in->sample_rate / 2); |
499 |
|
|
|
500 |
|
✗ |
memcpy(prev_magnitude, magnitude, s->win_size * sizeof(float)); |
501 |
|
|
} |
502 |
|
|
|
503 |
|
✗ |
return 0; |
504 |
|
|
} |
505 |
|
|
|
506 |
|
✗ |
static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
507 |
|
|
{ |
508 |
|
✗ |
AVFilterContext *ctx = inlink->dst; |
509 |
|
✗ |
AVFilterLink *outlink = ctx->outputs[0]; |
510 |
|
✗ |
AudioSpectralStatsContext *s = ctx->priv; |
511 |
|
|
AVDictionary **metadata; |
512 |
|
|
AVFrame *out; |
513 |
|
|
int ret; |
514 |
|
|
|
515 |
|
✗ |
if (av_frame_is_writable(in)) { |
516 |
|
✗ |
out = in; |
517 |
|
|
} else { |
518 |
|
✗ |
out = ff_get_audio_buffer(outlink, in->nb_samples); |
519 |
|
✗ |
if (!out) { |
520 |
|
✗ |
av_frame_free(&in); |
521 |
|
✗ |
return AVERROR(ENOMEM); |
522 |
|
|
} |
523 |
|
✗ |
ret = av_frame_copy_props(out, in); |
524 |
|
✗ |
if (ret < 0) |
525 |
|
✗ |
goto fail; |
526 |
|
✗ |
ret = av_frame_copy(out, in); |
527 |
|
✗ |
if (ret < 0) |
528 |
|
✗ |
goto fail; |
529 |
|
|
} |
530 |
|
|
|
531 |
|
✗ |
metadata = &out->metadata; |
532 |
|
✗ |
ff_filter_execute(ctx, filter_channel, in, NULL, |
533 |
|
✗ |
FFMIN(inlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx))); |
534 |
|
|
|
535 |
|
✗ |
set_metadata(s, metadata); |
536 |
|
|
|
537 |
|
✗ |
if (out != in) |
538 |
|
✗ |
av_frame_free(&in); |
539 |
|
✗ |
return ff_filter_frame(outlink, out); |
540 |
|
✗ |
fail: |
541 |
|
✗ |
av_frame_free(&in); |
542 |
|
✗ |
av_frame_free(&out); |
543 |
|
✗ |
return ret; |
544 |
|
|
} |
545 |
|
|
|
546 |
|
✗ |
static int activate(AVFilterContext *ctx) |
547 |
|
|
{ |
548 |
|
✗ |
AudioSpectralStatsContext *s = ctx->priv; |
549 |
|
✗ |
AVFilterLink *outlink = ctx->outputs[0]; |
550 |
|
✗ |
AVFilterLink *inlink = ctx->inputs[0]; |
551 |
|
|
AVFrame *in; |
552 |
|
|
int ret; |
553 |
|
|
|
554 |
|
✗ |
FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); |
555 |
|
|
|
556 |
|
✗ |
ret = ff_inlink_consume_samples(inlink, s->hop_size, s->hop_size, &in); |
557 |
|
✗ |
if (ret < 0) |
558 |
|
✗ |
return ret; |
559 |
|
✗ |
if (ret > 0) |
560 |
|
✗ |
ret = filter_frame(inlink, in); |
561 |
|
✗ |
if (ret < 0) |
562 |
|
✗ |
return ret; |
563 |
|
|
|
564 |
|
✗ |
if (ff_inlink_queued_samples(inlink) >= s->hop_size) { |
565 |
|
✗ |
ff_filter_set_ready(ctx, 10); |
566 |
|
✗ |
return 0; |
567 |
|
|
} |
568 |
|
|
|
569 |
|
✗ |
FF_FILTER_FORWARD_STATUS(inlink, outlink); |
570 |
|
✗ |
FF_FILTER_FORWARD_WANTED(outlink, inlink); |
571 |
|
|
|
572 |
|
✗ |
return FFERROR_NOT_READY; |
573 |
|
|
} |
574 |
|
|
|
575 |
|
✗ |
static av_cold void uninit(AVFilterContext *ctx) |
576 |
|
|
{ |
577 |
|
✗ |
AudioSpectralStatsContext *s = ctx->priv; |
578 |
|
|
|
579 |
|
✗ |
for (int ch = 0; ch < s->nb_channels; ch++) { |
580 |
|
✗ |
if (s->fft) |
581 |
|
✗ |
av_tx_uninit(&s->fft[ch]); |
582 |
|
✗ |
if (s->fft_in) |
583 |
|
✗ |
av_freep(&s->fft_in[ch]); |
584 |
|
✗ |
if (s->fft_out) |
585 |
|
✗ |
av_freep(&s->fft_out[ch]); |
586 |
|
✗ |
if (s->magnitude) |
587 |
|
✗ |
av_freep(&s->magnitude[ch]); |
588 |
|
✗ |
if (s->prev_magnitude) |
589 |
|
✗ |
av_freep(&s->prev_magnitude[ch]); |
590 |
|
|
} |
591 |
|
|
|
592 |
|
✗ |
av_freep(&s->fft); |
593 |
|
✗ |
av_freep(&s->magnitude); |
594 |
|
✗ |
av_freep(&s->prev_magnitude); |
595 |
|
✗ |
av_freep(&s->fft_in); |
596 |
|
✗ |
av_freep(&s->fft_out); |
597 |
|
✗ |
av_freep(&s->stats); |
598 |
|
|
|
599 |
|
✗ |
av_freep(&s->window_func_lut); |
600 |
|
✗ |
av_frame_free(&s->window); |
601 |
|
✗ |
} |
602 |
|
|
|
603 |
|
|
static const AVFilterPad aspectralstats_outputs[] = { |
604 |
|
|
{ |
605 |
|
|
.name = "default", |
606 |
|
|
.type = AVMEDIA_TYPE_AUDIO, |
607 |
|
|
.config_props = config_output, |
608 |
|
|
}, |
609 |
|
|
}; |
610 |
|
|
|
611 |
|
|
const AVFilter ff_af_aspectralstats = { |
612 |
|
|
.name = "aspectralstats", |
613 |
|
|
.description = NULL_IF_CONFIG_SMALL("Show frequency domain statistics about audio frames."), |
614 |
|
|
.priv_size = sizeof(AudioSpectralStatsContext), |
615 |
|
|
.priv_class = &aspectralstats_class, |
616 |
|
|
.uninit = uninit, |
617 |
|
|
.activate = activate, |
618 |
|
|
FILTER_INPUTS(ff_audio_default_filterpad), |
619 |
|
|
FILTER_OUTPUTS(aspectralstats_outputs), |
620 |
|
|
FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP), |
621 |
|
|
.flags = AVFILTER_FLAG_SLICE_THREADS, |
622 |
|
|
}; |
623 |
|
|
|