Line |
Branch |
Exec |
Source |
1 |
|
|
/* |
2 |
|
|
* Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com> |
3 |
|
|
* Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com> |
4 |
|
|
* |
5 |
|
|
* This file is part of FFmpeg. |
6 |
|
|
* |
7 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
8 |
|
|
* modify it under the terms of the GNU Lesser General Public |
9 |
|
|
* License as published by the Free Software Foundation; either |
10 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
11 |
|
|
* |
12 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
13 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 |
|
|
* Lesser General Public License for more details. |
16 |
|
|
* |
17 |
|
|
* You should have received a copy of the GNU Lesser General Public |
18 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
19 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 |
|
|
*/ |
21 |
|
|
|
22 |
|
|
/** |
23 |
|
|
* @file |
24 |
|
|
* Calculate VMAF Motion score. |
25 |
|
|
*/ |
26 |
|
|
|
27 |
|
|
#include "libavutil/file_open.h" |
28 |
|
|
#include "libavutil/mem.h" |
29 |
|
|
#include "libavutil/opt.h" |
30 |
|
|
#include "libavutil/pixdesc.h" |
31 |
|
|
#include "avfilter.h" |
32 |
|
|
#include "filters.h" |
33 |
|
|
#include "formats.h" |
34 |
|
|
#include "video.h" |
35 |
|
|
#include "vmaf_motion.h" |
36 |
|
|
|
37 |
|
|
#define BIT_SHIFT 15 |
38 |
|
|
|
39 |
|
|
static const float FILTER_5[5] = { |
40 |
|
|
0.054488685, |
41 |
|
|
0.244201342, |
42 |
|
|
0.402619947, |
43 |
|
|
0.244201342, |
44 |
|
|
0.054488685 |
45 |
|
|
}; |
46 |
|
|
|
47 |
|
|
typedef struct VMAFMotionContext { |
48 |
|
|
const AVClass *class; |
49 |
|
|
VMAFMotionData data; |
50 |
|
|
FILE *stats_file; |
51 |
|
|
char *stats_file_str; |
52 |
|
|
} VMAFMotionContext; |
53 |
|
|
|
54 |
|
|
#define OFFSET(x) offsetof(VMAFMotionContext, x) |
55 |
|
|
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM |
56 |
|
|
|
57 |
|
|
static const AVOption vmafmotion_options[] = { |
58 |
|
|
{"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS }, |
59 |
|
|
{ NULL } |
60 |
|
|
}; |
61 |
|
|
|
62 |
|
|
AVFILTER_DEFINE_CLASS(vmafmotion); |
63 |
|
|
|
64 |
|
✗ |
static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w, |
65 |
|
|
int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride) |
66 |
|
|
{ |
67 |
|
✗ |
ptrdiff_t img1_stride = _img1_stride / sizeof(*img1); |
68 |
|
✗ |
ptrdiff_t img2_stride = _img2_stride / sizeof(*img2); |
69 |
|
✗ |
uint64_t sum = 0; |
70 |
|
|
int i, j; |
71 |
|
|
|
72 |
|
✗ |
for (i = 0; i < h; i++) { |
73 |
|
✗ |
for (j = 0; j < w; j++) { |
74 |
|
✗ |
sum += abs(img1[j] - img2[j]); |
75 |
|
|
} |
76 |
|
✗ |
img1 += img1_stride; |
77 |
|
✗ |
img2 += img2_stride; |
78 |
|
|
} |
79 |
|
|
|
80 |
|
✗ |
return sum; |
81 |
|
|
} |
82 |
|
|
|
83 |
|
✗ |
static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src, |
84 |
|
|
uint16_t *dst, int w, int h, ptrdiff_t _src_stride, |
85 |
|
|
ptrdiff_t _dst_stride) |
86 |
|
|
{ |
87 |
|
✗ |
ptrdiff_t src_stride = _src_stride / sizeof(*src); |
88 |
|
✗ |
ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); |
89 |
|
✗ |
int radius = filt_w / 2; |
90 |
|
✗ |
int borders_left = radius; |
91 |
|
✗ |
int borders_right = w - (filt_w - radius); |
92 |
|
|
int i, j, k; |
93 |
|
|
|
94 |
|
✗ |
for (i = 0; i < h; i++) { |
95 |
|
✗ |
for (j = 0; j < borders_left; j++) { |
96 |
|
✗ |
int sum = 0; |
97 |
|
✗ |
for (k = 0; k < filt_w; k++) { |
98 |
|
✗ |
int j_tap = FFABS(j - radius + k); |
99 |
|
✗ |
if (j_tap >= w) { |
100 |
|
✗ |
j_tap = w - (j_tap - w + 1); |
101 |
|
|
} |
102 |
|
✗ |
sum += filter[k] * src[i * src_stride + j_tap]; |
103 |
|
|
} |
104 |
|
✗ |
dst[i * dst_stride + j] = sum >> BIT_SHIFT; |
105 |
|
|
} |
106 |
|
|
|
107 |
|
✗ |
for (j = borders_left; j < borders_right; j++) { |
108 |
|
✗ |
int sum = 0; |
109 |
|
✗ |
for (k = 0; k < filt_w; k++) { |
110 |
|
✗ |
sum += filter[k] * src[i * src_stride + j - radius + k]; |
111 |
|
|
} |
112 |
|
✗ |
dst[i * dst_stride + j] = sum >> BIT_SHIFT; |
113 |
|
|
} |
114 |
|
|
|
115 |
|
✗ |
for (j = borders_right; j < w; j++) { |
116 |
|
✗ |
int sum = 0; |
117 |
|
✗ |
for (k = 0; k < filt_w; k++) { |
118 |
|
✗ |
int j_tap = FFABS(j - radius + k); |
119 |
|
✗ |
if (j_tap >= w) { |
120 |
|
✗ |
j_tap = w - (j_tap - w + 1); |
121 |
|
|
} |
122 |
|
✗ |
sum += filter[k] * src[i * src_stride + j_tap]; |
123 |
|
|
} |
124 |
|
✗ |
dst[i * dst_stride + j] = sum >> BIT_SHIFT; |
125 |
|
|
} |
126 |
|
|
} |
127 |
|
✗ |
} |
128 |
|
|
|
129 |
|
|
#define conv_y_fn(type, bits) \ |
130 |
|
|
static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \ |
131 |
|
|
const uint8_t *_src, uint16_t *dst, \ |
132 |
|
|
int w, int h, ptrdiff_t _src_stride, \ |
133 |
|
|
ptrdiff_t _dst_stride) \ |
134 |
|
|
{ \ |
135 |
|
|
const type *src = (const type *) _src; \ |
136 |
|
|
ptrdiff_t src_stride = _src_stride / sizeof(*src); \ |
137 |
|
|
ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \ |
138 |
|
|
int radius = filt_w / 2; \ |
139 |
|
|
int borders_top = radius; \ |
140 |
|
|
int borders_bottom = h - (filt_w - radius); \ |
141 |
|
|
int i, j, k; \ |
142 |
|
|
int sum = 0; \ |
143 |
|
|
\ |
144 |
|
|
for (i = 0; i < borders_top; i++) { \ |
145 |
|
|
for (j = 0; j < w; j++) { \ |
146 |
|
|
sum = 0; \ |
147 |
|
|
for (k = 0; k < filt_w; k++) { \ |
148 |
|
|
int i_tap = FFABS(i - radius + k); \ |
149 |
|
|
if (i_tap >= h) { \ |
150 |
|
|
i_tap = h - (i_tap - h + 1); \ |
151 |
|
|
} \ |
152 |
|
|
sum += filter[k] * src[i_tap * src_stride + j]; \ |
153 |
|
|
} \ |
154 |
|
|
dst[i * dst_stride + j] = sum >> bits; \ |
155 |
|
|
} \ |
156 |
|
|
} \ |
157 |
|
|
for (i = borders_top; i < borders_bottom; i++) { \ |
158 |
|
|
for (j = 0; j < w; j++) { \ |
159 |
|
|
sum = 0; \ |
160 |
|
|
for (k = 0; k < filt_w; k++) { \ |
161 |
|
|
sum += filter[k] * src[(i - radius + k) * src_stride + j]; \ |
162 |
|
|
} \ |
163 |
|
|
dst[i * dst_stride + j] = sum >> bits; \ |
164 |
|
|
} \ |
165 |
|
|
} \ |
166 |
|
|
for (i = borders_bottom; i < h; i++) { \ |
167 |
|
|
for (j = 0; j < w; j++) { \ |
168 |
|
|
sum = 0; \ |
169 |
|
|
for (k = 0; k < filt_w; k++) { \ |
170 |
|
|
int i_tap = FFABS(i - radius + k); \ |
171 |
|
|
if (i_tap >= h) { \ |
172 |
|
|
i_tap = h - (i_tap - h + 1); \ |
173 |
|
|
} \ |
174 |
|
|
sum += filter[k] * src[i_tap * src_stride + j]; \ |
175 |
|
|
} \ |
176 |
|
|
dst[i * dst_stride + j] = sum >> bits; \ |
177 |
|
|
} \ |
178 |
|
|
} \ |
179 |
|
|
} |
180 |
|
|
|
181 |
|
✗ |
conv_y_fn(uint8_t, 8) |
182 |
|
✗ |
conv_y_fn(uint16_t, 10) |
183 |
|
|
|
184 |
|
✗ |
static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) { |
185 |
|
✗ |
dsp->convolution_x = convolution_x; |
186 |
|
✗ |
dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit; |
187 |
|
✗ |
dsp->sad = image_sad; |
188 |
|
✗ |
} |
189 |
|
|
|
190 |
|
✗ |
double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref) |
191 |
|
|
{ |
192 |
|
|
double score; |
193 |
|
|
|
194 |
|
✗ |
s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data, |
195 |
|
✗ |
s->width, s->height, ref->linesize[0], s->stride); |
196 |
|
✗ |
s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0], |
197 |
|
|
s->width, s->height, s->stride, s->stride); |
198 |
|
|
|
199 |
|
✗ |
if (!s->nb_frames) { |
200 |
|
✗ |
score = 0.0; |
201 |
|
|
} else { |
202 |
|
✗ |
uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0], |
203 |
|
|
s->width, s->height, s->stride, s->stride); |
204 |
|
|
// the output score is always normalized to 8 bits |
205 |
|
✗ |
score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8))); |
206 |
|
|
} |
207 |
|
|
|
208 |
|
✗ |
FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]); |
209 |
|
✗ |
s->nb_frames++; |
210 |
|
✗ |
s->motion_sum += score; |
211 |
|
|
|
212 |
|
✗ |
return score; |
213 |
|
|
} |
214 |
|
|
|
215 |
|
✗ |
static void set_meta(AVDictionary **metadata, const char *key, float d) |
216 |
|
|
{ |
217 |
|
|
char value[128]; |
218 |
|
✗ |
snprintf(value, sizeof(value), "%0.2f", d); |
219 |
|
✗ |
av_dict_set(metadata, key, value, 0); |
220 |
|
✗ |
} |
221 |
|
|
|
222 |
|
✗ |
static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref) |
223 |
|
|
{ |
224 |
|
✗ |
VMAFMotionContext *s = ctx->priv; |
225 |
|
|
double score; |
226 |
|
|
|
227 |
|
✗ |
score = ff_vmafmotion_process(&s->data, ref); |
228 |
|
✗ |
set_meta(&ref->metadata, "lavfi.vmafmotion.score", score); |
229 |
|
✗ |
if (s->stats_file) { |
230 |
|
✗ |
fprintf(s->stats_file, |
231 |
|
|
"n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score); |
232 |
|
|
} |
233 |
|
✗ |
} |
234 |
|
|
|
235 |
|
|
|
236 |
|
✗ |
int ff_vmafmotion_init(VMAFMotionData *s, |
237 |
|
|
int w, int h, enum AVPixelFormat fmt) |
238 |
|
|
{ |
239 |
|
|
size_t data_sz; |
240 |
|
|
int i; |
241 |
|
✗ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); |
242 |
|
|
|
243 |
|
✗ |
if (w < 3 || h < 3) |
244 |
|
✗ |
return AVERROR(EINVAL); |
245 |
|
|
|
246 |
|
✗ |
s->width = w; |
247 |
|
✗ |
s->height = h; |
248 |
|
✗ |
s->stride = FFALIGN(w * sizeof(uint16_t), 32); |
249 |
|
|
|
250 |
|
✗ |
data_sz = (size_t) s->stride * h; |
251 |
|
✗ |
if (!(s->blur_data[0] = av_malloc(data_sz)) || |
252 |
|
✗ |
!(s->blur_data[1] = av_malloc(data_sz)) || |
253 |
|
✗ |
!(s->temp_data = av_malloc(data_sz))) { |
254 |
|
✗ |
return AVERROR(ENOMEM); |
255 |
|
|
} |
256 |
|
|
|
257 |
|
✗ |
for (i = 0; i < 5; i++) { |
258 |
|
✗ |
s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT)); |
259 |
|
|
} |
260 |
|
|
|
261 |
|
✗ |
vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth); |
262 |
|
|
|
263 |
|
✗ |
return 0; |
264 |
|
|
} |
265 |
|
|
|
266 |
|
✗ |
static int query_formats(const AVFilterContext *ctx, |
267 |
|
|
AVFilterFormatsConfig **cfg_in, |
268 |
|
|
AVFilterFormatsConfig **cfg_out) |
269 |
|
|
{ |
270 |
|
✗ |
AVFilterFormats *fmts_list = NULL; |
271 |
|
|
int format, ret; |
272 |
|
|
|
273 |
|
✗ |
for (format = 0; av_pix_fmt_desc_get(format); format++) { |
274 |
|
✗ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); |
275 |
|
✗ |
if (!(desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_HWACCEL | AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_PAL)) && |
276 |
|
✗ |
(desc->flags & AV_PIX_FMT_FLAG_PLANAR || desc->nb_components == 1) && |
277 |
|
✗ |
(!(desc->flags & AV_PIX_FMT_FLAG_BE) == !HAVE_BIGENDIAN || desc->comp[0].depth == 8) && |
278 |
|
✗ |
(desc->comp[0].depth == 8 || desc->comp[0].depth == 10) && |
279 |
|
✗ |
(ret = ff_add_format(&fmts_list, format)) < 0) |
280 |
|
✗ |
return ret; |
281 |
|
|
} |
282 |
|
|
|
283 |
|
✗ |
return ff_set_common_formats2(ctx, cfg_in, cfg_out, fmts_list); |
284 |
|
|
} |
285 |
|
|
|
286 |
|
✗ |
static int config_input_ref(AVFilterLink *inlink) |
287 |
|
|
{ |
288 |
|
✗ |
AVFilterContext *ctx = inlink->dst; |
289 |
|
✗ |
VMAFMotionContext *s = ctx->priv; |
290 |
|
|
|
291 |
|
✗ |
return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w, |
292 |
|
✗ |
ctx->inputs[0]->h, ctx->inputs[0]->format); |
293 |
|
|
} |
294 |
|
|
|
295 |
|
✗ |
double ff_vmafmotion_uninit(VMAFMotionData *s) |
296 |
|
|
{ |
297 |
|
✗ |
av_free(s->blur_data[0]); |
298 |
|
✗ |
av_free(s->blur_data[1]); |
299 |
|
✗ |
av_free(s->temp_data); |
300 |
|
|
|
301 |
|
✗ |
return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0; |
302 |
|
|
} |
303 |
|
|
|
304 |
|
✗ |
static int filter_frame(AVFilterLink *inlink, AVFrame *ref) |
305 |
|
|
{ |
306 |
|
✗ |
AVFilterContext *ctx = inlink->dst; |
307 |
|
✗ |
do_vmafmotion(ctx, ref); |
308 |
|
✗ |
return ff_filter_frame(ctx->outputs[0], ref); |
309 |
|
|
} |
310 |
|
|
|
311 |
|
✗ |
static av_cold int init(AVFilterContext *ctx) |
312 |
|
|
{ |
313 |
|
✗ |
VMAFMotionContext *s = ctx->priv; |
314 |
|
|
|
315 |
|
✗ |
if (s->stats_file_str) { |
316 |
|
✗ |
if (!strcmp(s->stats_file_str, "-")) { |
317 |
|
✗ |
s->stats_file = stdout; |
318 |
|
|
} else { |
319 |
|
✗ |
s->stats_file = avpriv_fopen_utf8(s->stats_file_str, "w"); |
320 |
|
✗ |
if (!s->stats_file) { |
321 |
|
✗ |
int err = AVERROR(errno); |
322 |
|
✗ |
av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n", |
323 |
|
✗ |
s->stats_file_str, av_err2str(err)); |
324 |
|
✗ |
return err; |
325 |
|
|
} |
326 |
|
|
} |
327 |
|
|
} |
328 |
|
|
|
329 |
|
✗ |
return 0; |
330 |
|
|
} |
331 |
|
|
|
332 |
|
✗ |
static av_cold void uninit(AVFilterContext *ctx) |
333 |
|
|
{ |
334 |
|
✗ |
VMAFMotionContext *s = ctx->priv; |
335 |
|
✗ |
double avg_motion = ff_vmafmotion_uninit(&s->data); |
336 |
|
|
|
337 |
|
✗ |
if (s->data.nb_frames > 0) { |
338 |
|
✗ |
av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion); |
339 |
|
|
} |
340 |
|
|
|
341 |
|
✗ |
if (s->stats_file && s->stats_file != stdout) |
342 |
|
✗ |
fclose(s->stats_file); |
343 |
|
✗ |
} |
344 |
|
|
|
345 |
|
|
static const AVFilterPad vmafmotion_inputs[] = { |
346 |
|
|
{ |
347 |
|
|
.name = "reference", |
348 |
|
|
.type = AVMEDIA_TYPE_VIDEO, |
349 |
|
|
.filter_frame = filter_frame, |
350 |
|
|
.config_props = config_input_ref, |
351 |
|
|
}, |
352 |
|
|
}; |
353 |
|
|
|
354 |
|
|
const AVFilter ff_vf_vmafmotion = { |
355 |
|
|
.name = "vmafmotion", |
356 |
|
|
.description = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."), |
357 |
|
|
.init = init, |
358 |
|
|
.uninit = uninit, |
359 |
|
|
.priv_size = sizeof(VMAFMotionContext), |
360 |
|
|
.priv_class = &vmafmotion_class, |
361 |
|
|
.flags = AVFILTER_FLAG_METADATA_ONLY, |
362 |
|
|
FILTER_INPUTS(vmafmotion_inputs), |
363 |
|
|
FILTER_OUTPUTS(ff_video_default_filterpad), |
364 |
|
|
FILTER_QUERY_FUNC2(query_formats), |
365 |
|
|
}; |
366 |
|
|
|