FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/vf_vmafmotion.c
Date: 2024-11-20 23:03:26
Exec Total Coverage
Lines: 0 130 0.0%
Functions: 0 15 0.0%
Branches: 0 116 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
3 * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * Calculate VMAF Motion score.
25 */
26
27 #include "libavutil/file_open.h"
28 #include "libavutil/mem.h"
29 #include "libavutil/opt.h"
30 #include "libavutil/pixdesc.h"
31 #include "avfilter.h"
32 #include "filters.h"
33 #include "formats.h"
34 #include "video.h"
35 #include "vmaf_motion.h"
36
37 #define BIT_SHIFT 15
38
39 static const float FILTER_5[5] = {
40 0.054488685,
41 0.244201342,
42 0.402619947,
43 0.244201342,
44 0.054488685
45 };
46
47 typedef struct VMAFMotionContext {
48 const AVClass *class;
49 VMAFMotionData data;
50 FILE *stats_file;
51 char *stats_file_str;
52 } VMAFMotionContext;
53
54 #define OFFSET(x) offsetof(VMAFMotionContext, x)
55 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
56
57 static const AVOption vmafmotion_options[] = {
58 {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
59 { NULL }
60 };
61
62 AVFILTER_DEFINE_CLASS(vmafmotion);
63
64 static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w,
65 int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
66 {
67 ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
68 ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
69 uint64_t sum = 0;
70 int i, j;
71
72 for (i = 0; i < h; i++) {
73 for (j = 0; j < w; j++) {
74 sum += abs(img1[j] - img2[j]);
75 }
76 img1 += img1_stride;
77 img2 += img2_stride;
78 }
79
80 return sum;
81 }
82
83 static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src,
84 uint16_t *dst, int w, int h, ptrdiff_t _src_stride,
85 ptrdiff_t _dst_stride)
86 {
87 ptrdiff_t src_stride = _src_stride / sizeof(*src);
88 ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
89 int radius = filt_w / 2;
90 int borders_left = radius;
91 int borders_right = w - (filt_w - radius);
92 int i, j, k;
93
94 for (i = 0; i < h; i++) {
95 for (j = 0; j < borders_left; j++) {
96 int sum = 0;
97 for (k = 0; k < filt_w; k++) {
98 int j_tap = FFABS(j - radius + k);
99 if (j_tap >= w) {
100 j_tap = w - (j_tap - w + 1);
101 }
102 sum += filter[k] * src[i * src_stride + j_tap];
103 }
104 dst[i * dst_stride + j] = sum >> BIT_SHIFT;
105 }
106
107 for (j = borders_left; j < borders_right; j++) {
108 int sum = 0;
109 for (k = 0; k < filt_w; k++) {
110 sum += filter[k] * src[i * src_stride + j - radius + k];
111 }
112 dst[i * dst_stride + j] = sum >> BIT_SHIFT;
113 }
114
115 for (j = borders_right; j < w; j++) {
116 int sum = 0;
117 for (k = 0; k < filt_w; k++) {
118 int j_tap = FFABS(j - radius + k);
119 if (j_tap >= w) {
120 j_tap = w - (j_tap - w + 1);
121 }
122 sum += filter[k] * src[i * src_stride + j_tap];
123 }
124 dst[i * dst_stride + j] = sum >> BIT_SHIFT;
125 }
126 }
127 }
128
129 #define conv_y_fn(type, bits) \
130 static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
131 const uint8_t *_src, uint16_t *dst, \
132 int w, int h, ptrdiff_t _src_stride, \
133 ptrdiff_t _dst_stride) \
134 { \
135 const type *src = (const type *) _src; \
136 ptrdiff_t src_stride = _src_stride / sizeof(*src); \
137 ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
138 int radius = filt_w / 2; \
139 int borders_top = radius; \
140 int borders_bottom = h - (filt_w - radius); \
141 int i, j, k; \
142 int sum = 0; \
143 \
144 for (i = 0; i < borders_top; i++) { \
145 for (j = 0; j < w; j++) { \
146 sum = 0; \
147 for (k = 0; k < filt_w; k++) { \
148 int i_tap = FFABS(i - radius + k); \
149 if (i_tap >= h) { \
150 i_tap = h - (i_tap - h + 1); \
151 } \
152 sum += filter[k] * src[i_tap * src_stride + j]; \
153 } \
154 dst[i * dst_stride + j] = sum >> bits; \
155 } \
156 } \
157 for (i = borders_top; i < borders_bottom; i++) { \
158 for (j = 0; j < w; j++) { \
159 sum = 0; \
160 for (k = 0; k < filt_w; k++) { \
161 sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
162 } \
163 dst[i * dst_stride + j] = sum >> bits; \
164 } \
165 } \
166 for (i = borders_bottom; i < h; i++) { \
167 for (j = 0; j < w; j++) { \
168 sum = 0; \
169 for (k = 0; k < filt_w; k++) { \
170 int i_tap = FFABS(i - radius + k); \
171 if (i_tap >= h) { \
172 i_tap = h - (i_tap - h + 1); \
173 } \
174 sum += filter[k] * src[i_tap * src_stride + j]; \
175 } \
176 dst[i * dst_stride + j] = sum >> bits; \
177 } \
178 } \
179 }
180
181 conv_y_fn(uint8_t, 8)
182 conv_y_fn(uint16_t, 10)
183
184 static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
185 dsp->convolution_x = convolution_x;
186 dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
187 dsp->sad = image_sad;
188 }
189
190 double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
191 {
192 double score;
193
194 s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
195 s->width, s->height, ref->linesize[0], s->stride);
196 s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0],
197 s->width, s->height, s->stride, s->stride);
198
199 if (!s->nb_frames) {
200 score = 0.0;
201 } else {
202 uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0],
203 s->width, s->height, s->stride, s->stride);
204 // the output score is always normalized to 8 bits
205 score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8)));
206 }
207
208 FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
209 s->nb_frames++;
210 s->motion_sum += score;
211
212 return score;
213 }
214
215 static void set_meta(AVDictionary **metadata, const char *key, float d)
216 {
217 char value[128];
218 snprintf(value, sizeof(value), "%0.2f", d);
219 av_dict_set(metadata, key, value, 0);
220 }
221
222 static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
223 {
224 VMAFMotionContext *s = ctx->priv;
225 double score;
226
227 score = ff_vmafmotion_process(&s->data, ref);
228 set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
229 if (s->stats_file) {
230 fprintf(s->stats_file,
231 "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score);
232 }
233 }
234
235
236 int ff_vmafmotion_init(VMAFMotionData *s,
237 int w, int h, enum AVPixelFormat fmt)
238 {
239 size_t data_sz;
240 int i;
241 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
242
243 if (w < 3 || h < 3)
244 return AVERROR(EINVAL);
245
246 s->width = w;
247 s->height = h;
248 s->stride = FFALIGN(w * sizeof(uint16_t), 32);
249
250 data_sz = (size_t) s->stride * h;
251 if (!(s->blur_data[0] = av_malloc(data_sz)) ||
252 !(s->blur_data[1] = av_malloc(data_sz)) ||
253 !(s->temp_data = av_malloc(data_sz))) {
254 return AVERROR(ENOMEM);
255 }
256
257 for (i = 0; i < 5; i++) {
258 s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
259 }
260
261 vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
262
263 return 0;
264 }
265
266 static int query_formats(const AVFilterContext *ctx,
267 AVFilterFormatsConfig **cfg_in,
268 AVFilterFormatsConfig **cfg_out)
269 {
270 AVFilterFormats *fmts_list = NULL;
271 int format, ret;
272
273 for (format = 0; av_pix_fmt_desc_get(format); format++) {
274 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
275 if (!(desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_HWACCEL | AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_PAL)) &&
276 (desc->flags & AV_PIX_FMT_FLAG_PLANAR || desc->nb_components == 1) &&
277 (!(desc->flags & AV_PIX_FMT_FLAG_BE) == !HAVE_BIGENDIAN || desc->comp[0].depth == 8) &&
278 (desc->comp[0].depth == 8 || desc->comp[0].depth == 10) &&
279 (ret = ff_add_format(&fmts_list, format)) < 0)
280 return ret;
281 }
282
283 return ff_set_common_formats2(ctx, cfg_in, cfg_out, fmts_list);
284 }
285
286 static int config_input_ref(AVFilterLink *inlink)
287 {
288 AVFilterContext *ctx = inlink->dst;
289 VMAFMotionContext *s = ctx->priv;
290
291 return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
292 ctx->inputs[0]->h, ctx->inputs[0]->format);
293 }
294
295 double ff_vmafmotion_uninit(VMAFMotionData *s)
296 {
297 av_free(s->blur_data[0]);
298 av_free(s->blur_data[1]);
299 av_free(s->temp_data);
300
301 return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
302 }
303
304 static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
305 {
306 AVFilterContext *ctx = inlink->dst;
307 do_vmafmotion(ctx, ref);
308 return ff_filter_frame(ctx->outputs[0], ref);
309 }
310
311 static av_cold int init(AVFilterContext *ctx)
312 {
313 VMAFMotionContext *s = ctx->priv;
314
315 if (s->stats_file_str) {
316 if (!strcmp(s->stats_file_str, "-")) {
317 s->stats_file = stdout;
318 } else {
319 s->stats_file = avpriv_fopen_utf8(s->stats_file_str, "w");
320 if (!s->stats_file) {
321 int err = AVERROR(errno);
322 av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
323 s->stats_file_str, av_err2str(err));
324 return err;
325 }
326 }
327 }
328
329 return 0;
330 }
331
332 static av_cold void uninit(AVFilterContext *ctx)
333 {
334 VMAFMotionContext *s = ctx->priv;
335 double avg_motion = ff_vmafmotion_uninit(&s->data);
336
337 if (s->data.nb_frames > 0) {
338 av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
339 }
340
341 if (s->stats_file && s->stats_file != stdout)
342 fclose(s->stats_file);
343 }
344
345 static const AVFilterPad vmafmotion_inputs[] = {
346 {
347 .name = "reference",
348 .type = AVMEDIA_TYPE_VIDEO,
349 .filter_frame = filter_frame,
350 .config_props = config_input_ref,
351 },
352 };
353
354 const AVFilter ff_vf_vmafmotion = {
355 .name = "vmafmotion",
356 .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."),
357 .init = init,
358 .uninit = uninit,
359 .priv_size = sizeof(VMAFMotionContext),
360 .priv_class = &vmafmotion_class,
361 .flags = AVFILTER_FLAG_METADATA_ONLY,
362 FILTER_INPUTS(vmafmotion_inputs),
363 FILTER_OUTPUTS(ff_video_default_filterpad),
364 FILTER_QUERY_FUNC2(query_formats),
365 };
366