| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com> | ||
| 3 | * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com> | ||
| 4 | * | ||
| 5 | * This file is part of FFmpeg. | ||
| 6 | * | ||
| 7 | * FFmpeg is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU Lesser General Public | ||
| 9 | * License as published by the Free Software Foundation; either | ||
| 10 | * version 2.1 of the License, or (at your option) any later version. | ||
| 11 | * | ||
| 12 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * Lesser General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU Lesser General Public | ||
| 18 | * License along with FFmpeg; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 20 | */ | ||
| 21 | |||
| 22 | /** | ||
| 23 | * @file | ||
| 24 | * Calculate VMAF Motion score. | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include "libavutil/file_open.h" | ||
| 28 | #include "libavutil/mem.h" | ||
| 29 | #include "libavutil/opt.h" | ||
| 30 | #include "libavutil/pixdesc.h" | ||
| 31 | #include "avfilter.h" | ||
| 32 | #include "filters.h" | ||
| 33 | #include "formats.h" | ||
| 34 | #include "video.h" | ||
| 35 | #include "vmaf_motion.h" | ||
| 36 | |||
| 37 | #define BIT_SHIFT 15 | ||
| 38 | |||
| 39 | static const float FILTER_5[5] = { | ||
| 40 | 0.054488685, | ||
| 41 | 0.244201342, | ||
| 42 | 0.402619947, | ||
| 43 | 0.244201342, | ||
| 44 | 0.054488685 | ||
| 45 | }; | ||
| 46 | |||
| 47 | typedef struct VMAFMotionContext { | ||
| 48 | const AVClass *class; | ||
| 49 | VMAFMotionData data; | ||
| 50 | FILE *stats_file; | ||
| 51 | char *stats_file_str; | ||
| 52 | } VMAFMotionContext; | ||
| 53 | |||
| 54 | #define OFFSET(x) offsetof(VMAFMotionContext, x) | ||
| 55 | #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM | ||
| 56 | |||
| 57 | static const AVOption vmafmotion_options[] = { | ||
| 58 | {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS }, | ||
| 59 | { NULL } | ||
| 60 | }; | ||
| 61 | |||
| 62 | AVFILTER_DEFINE_CLASS(vmafmotion); | ||
| 63 | |||
| 64 | ✗ | static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w, | |
| 65 | int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride) | ||
| 66 | { | ||
| 67 | ✗ | ptrdiff_t img1_stride = _img1_stride / sizeof(*img1); | |
| 68 | ✗ | ptrdiff_t img2_stride = _img2_stride / sizeof(*img2); | |
| 69 | ✗ | uint64_t sum = 0; | |
| 70 | int i, j; | ||
| 71 | |||
| 72 | ✗ | for (i = 0; i < h; i++) { | |
| 73 | ✗ | for (j = 0; j < w; j++) { | |
| 74 | ✗ | sum += abs(img1[j] - img2[j]); | |
| 75 | } | ||
| 76 | ✗ | img1 += img1_stride; | |
| 77 | ✗ | img2 += img2_stride; | |
| 78 | } | ||
| 79 | |||
| 80 | ✗ | return sum; | |
| 81 | } | ||
| 82 | |||
| 83 | ✗ | static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src, | |
| 84 | uint16_t *dst, int w, int h, ptrdiff_t _src_stride, | ||
| 85 | ptrdiff_t _dst_stride) | ||
| 86 | { | ||
| 87 | ✗ | ptrdiff_t src_stride = _src_stride / sizeof(*src); | |
| 88 | ✗ | ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); | |
| 89 | ✗ | int radius = filt_w / 2; | |
| 90 | ✗ | int borders_left = radius; | |
| 91 | ✗ | int borders_right = w - (filt_w - radius); | |
| 92 | int i, j, k; | ||
| 93 | |||
| 94 | ✗ | for (i = 0; i < h; i++) { | |
| 95 | ✗ | for (j = 0; j < borders_left; j++) { | |
| 96 | ✗ | int sum = 0; | |
| 97 | ✗ | for (k = 0; k < filt_w; k++) { | |
| 98 | ✗ | int j_tap = FFABS(j - radius + k); | |
| 99 | ✗ | if (j_tap >= w) { | |
| 100 | ✗ | j_tap = w - (j_tap - w + 1); | |
| 101 | } | ||
| 102 | ✗ | sum += filter[k] * src[i * src_stride + j_tap]; | |
| 103 | } | ||
| 104 | ✗ | dst[i * dst_stride + j] = sum >> BIT_SHIFT; | |
| 105 | } | ||
| 106 | |||
| 107 | ✗ | for (j = borders_left; j < borders_right; j++) { | |
| 108 | ✗ | int sum = 0; | |
| 109 | ✗ | for (k = 0; k < filt_w; k++) { | |
| 110 | ✗ | sum += filter[k] * src[i * src_stride + j - radius + k]; | |
| 111 | } | ||
| 112 | ✗ | dst[i * dst_stride + j] = sum >> BIT_SHIFT; | |
| 113 | } | ||
| 114 | |||
| 115 | ✗ | for (j = borders_right; j < w; j++) { | |
| 116 | ✗ | int sum = 0; | |
| 117 | ✗ | for (k = 0; k < filt_w; k++) { | |
| 118 | ✗ | int j_tap = FFABS(j - radius + k); | |
| 119 | ✗ | if (j_tap >= w) { | |
| 120 | ✗ | j_tap = w - (j_tap - w + 1); | |
| 121 | } | ||
| 122 | ✗ | sum += filter[k] * src[i * src_stride + j_tap]; | |
| 123 | } | ||
| 124 | ✗ | dst[i * dst_stride + j] = sum >> BIT_SHIFT; | |
| 125 | } | ||
| 126 | } | ||
| 127 | ✗ | } | |
| 128 | |||
| 129 | #define conv_y_fn(type, bits) \ | ||
| 130 | static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \ | ||
| 131 | const uint8_t *_src, uint16_t *dst, \ | ||
| 132 | int w, int h, ptrdiff_t _src_stride, \ | ||
| 133 | ptrdiff_t _dst_stride) \ | ||
| 134 | { \ | ||
| 135 | const type *src = (const type *) _src; \ | ||
| 136 | ptrdiff_t src_stride = _src_stride / sizeof(*src); \ | ||
| 137 | ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \ | ||
| 138 | int radius = filt_w / 2; \ | ||
| 139 | int borders_top = radius; \ | ||
| 140 | int borders_bottom = h - (filt_w - radius); \ | ||
| 141 | int i, j, k; \ | ||
| 142 | int sum = 0; \ | ||
| 143 | \ | ||
| 144 | for (i = 0; i < borders_top; i++) { \ | ||
| 145 | for (j = 0; j < w; j++) { \ | ||
| 146 | sum = 0; \ | ||
| 147 | for (k = 0; k < filt_w; k++) { \ | ||
| 148 | int i_tap = FFABS(i - radius + k); \ | ||
| 149 | if (i_tap >= h) { \ | ||
| 150 | i_tap = h - (i_tap - h + 1); \ | ||
| 151 | } \ | ||
| 152 | sum += filter[k] * src[i_tap * src_stride + j]; \ | ||
| 153 | } \ | ||
| 154 | dst[i * dst_stride + j] = sum >> bits; \ | ||
| 155 | } \ | ||
| 156 | } \ | ||
| 157 | for (i = borders_top; i < borders_bottom; i++) { \ | ||
| 158 | for (j = 0; j < w; j++) { \ | ||
| 159 | sum = 0; \ | ||
| 160 | for (k = 0; k < filt_w; k++) { \ | ||
| 161 | sum += filter[k] * src[(i - radius + k) * src_stride + j]; \ | ||
| 162 | } \ | ||
| 163 | dst[i * dst_stride + j] = sum >> bits; \ | ||
| 164 | } \ | ||
| 165 | } \ | ||
| 166 | for (i = borders_bottom; i < h; i++) { \ | ||
| 167 | for (j = 0; j < w; j++) { \ | ||
| 168 | sum = 0; \ | ||
| 169 | for (k = 0; k < filt_w; k++) { \ | ||
| 170 | int i_tap = FFABS(i - radius + k); \ | ||
| 171 | if (i_tap >= h) { \ | ||
| 172 | i_tap = h - (i_tap - h + 1); \ | ||
| 173 | } \ | ||
| 174 | sum += filter[k] * src[i_tap * src_stride + j]; \ | ||
| 175 | } \ | ||
| 176 | dst[i * dst_stride + j] = sum >> bits; \ | ||
| 177 | } \ | ||
| 178 | } \ | ||
| 179 | } | ||
| 180 | |||
| 181 | ✗ | conv_y_fn(uint8_t, 8) | |
| 182 | ✗ | conv_y_fn(uint16_t, 10) | |
| 183 | |||
| 184 | ✗ | static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) { | |
| 185 | ✗ | dsp->convolution_x = convolution_x; | |
| 186 | ✗ | dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit; | |
| 187 | ✗ | dsp->sad = image_sad; | |
| 188 | ✗ | } | |
| 189 | |||
| 190 | ✗ | double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref) | |
| 191 | { | ||
| 192 | double score; | ||
| 193 | |||
| 194 | ✗ | s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data, | |
| 195 | ✗ | s->width, s->height, ref->linesize[0], s->stride); | |
| 196 | ✗ | s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0], | |
| 197 | s->width, s->height, s->stride, s->stride); | ||
| 198 | |||
| 199 | ✗ | if (!s->nb_frames) { | |
| 200 | ✗ | score = 0.0; | |
| 201 | } else { | ||
| 202 | ✗ | uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0], | |
| 203 | s->width, s->height, s->stride, s->stride); | ||
| 204 | // the output score is always normalized to 8 bits | ||
| 205 | ✗ | score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8))); | |
| 206 | } | ||
| 207 | |||
| 208 | ✗ | FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]); | |
| 209 | ✗ | s->nb_frames++; | |
| 210 | ✗ | s->motion_sum += score; | |
| 211 | |||
| 212 | ✗ | return score; | |
| 213 | } | ||
| 214 | |||
| 215 | ✗ | static void set_meta(AVDictionary **metadata, const char *key, float d) | |
| 216 | { | ||
| 217 | char value[128]; | ||
| 218 | ✗ | snprintf(value, sizeof(value), "%0.2f", d); | |
| 219 | ✗ | av_dict_set(metadata, key, value, 0); | |
| 220 | ✗ | } | |
| 221 | |||
| 222 | ✗ | static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref) | |
| 223 | { | ||
| 224 | ✗ | VMAFMotionContext *s = ctx->priv; | |
| 225 | double score; | ||
| 226 | |||
| 227 | ✗ | score = ff_vmafmotion_process(&s->data, ref); | |
| 228 | ✗ | set_meta(&ref->metadata, "lavfi.vmafmotion.score", score); | |
| 229 | ✗ | if (s->stats_file) { | |
| 230 | ✗ | fprintf(s->stats_file, | |
| 231 | "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score); | ||
| 232 | } | ||
| 233 | ✗ | } | |
| 234 | |||
| 235 | |||
| 236 | ✗ | int ff_vmafmotion_init(VMAFMotionData *s, | |
| 237 | int w, int h, enum AVPixelFormat fmt) | ||
| 238 | { | ||
| 239 | size_t data_sz; | ||
| 240 | int i; | ||
| 241 | ✗ | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); | |
| 242 | |||
| 243 | ✗ | if (w < 3 || h < 3) | |
| 244 | ✗ | return AVERROR(EINVAL); | |
| 245 | |||
| 246 | ✗ | s->width = w; | |
| 247 | ✗ | s->height = h; | |
| 248 | ✗ | s->stride = FFALIGN(w * sizeof(uint16_t), 32); | |
| 249 | |||
| 250 | ✗ | data_sz = (size_t) s->stride * h; | |
| 251 | ✗ | if (!(s->blur_data[0] = av_malloc(data_sz)) || | |
| 252 | ✗ | !(s->blur_data[1] = av_malloc(data_sz)) || | |
| 253 | ✗ | !(s->temp_data = av_malloc(data_sz))) { | |
| 254 | ✗ | return AVERROR(ENOMEM); | |
| 255 | } | ||
| 256 | |||
| 257 | ✗ | for (i = 0; i < 5; i++) { | |
| 258 | ✗ | s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT)); | |
| 259 | } | ||
| 260 | |||
| 261 | ✗ | vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth); | |
| 262 | |||
| 263 | ✗ | return 0; | |
| 264 | } | ||
| 265 | |||
| 266 | ✗ | static int query_formats(const AVFilterContext *ctx, | |
| 267 | AVFilterFormatsConfig **cfg_in, | ||
| 268 | AVFilterFormatsConfig **cfg_out) | ||
| 269 | { | ||
| 270 | ✗ | AVFilterFormats *fmts_list = NULL; | |
| 271 | int format, ret; | ||
| 272 | |||
| 273 | ✗ | for (format = 0; av_pix_fmt_desc_get(format); format++) { | |
| 274 | ✗ | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); | |
| 275 | ✗ | if (!(desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_HWACCEL | AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_PAL)) && | |
| 276 | ✗ | (desc->flags & AV_PIX_FMT_FLAG_PLANAR || desc->nb_components == 1) && | |
| 277 | ✗ | (!(desc->flags & AV_PIX_FMT_FLAG_BE) == !HAVE_BIGENDIAN || desc->comp[0].depth == 8) && | |
| 278 | ✗ | (desc->comp[0].depth == 8 || desc->comp[0].depth == 10) && | |
| 279 | ✗ | (ret = ff_add_format(&fmts_list, format)) < 0) | |
| 280 | ✗ | return ret; | |
| 281 | } | ||
| 282 | |||
| 283 | ✗ | return ff_set_common_formats2(ctx, cfg_in, cfg_out, fmts_list); | |
| 284 | } | ||
| 285 | |||
| 286 | ✗ | static int config_input_ref(AVFilterLink *inlink) | |
| 287 | { | ||
| 288 | ✗ | AVFilterContext *ctx = inlink->dst; | |
| 289 | ✗ | VMAFMotionContext *s = ctx->priv; | |
| 290 | |||
| 291 | ✗ | return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w, | |
| 292 | ✗ | ctx->inputs[0]->h, ctx->inputs[0]->format); | |
| 293 | } | ||
| 294 | |||
| 295 | ✗ | double ff_vmafmotion_uninit(VMAFMotionData *s) | |
| 296 | { | ||
| 297 | ✗ | av_free(s->blur_data[0]); | |
| 298 | ✗ | av_free(s->blur_data[1]); | |
| 299 | ✗ | av_free(s->temp_data); | |
| 300 | |||
| 301 | ✗ | return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0; | |
| 302 | } | ||
| 303 | |||
| 304 | ✗ | static int filter_frame(AVFilterLink *inlink, AVFrame *ref) | |
| 305 | { | ||
| 306 | ✗ | AVFilterContext *ctx = inlink->dst; | |
| 307 | ✗ | do_vmafmotion(ctx, ref); | |
| 308 | ✗ | return ff_filter_frame(ctx->outputs[0], ref); | |
| 309 | } | ||
| 310 | |||
| 311 | ✗ | static av_cold int init(AVFilterContext *ctx) | |
| 312 | { | ||
| 313 | ✗ | VMAFMotionContext *s = ctx->priv; | |
| 314 | |||
| 315 | ✗ | if (s->stats_file_str) { | |
| 316 | ✗ | if (!strcmp(s->stats_file_str, "-")) { | |
| 317 | ✗ | s->stats_file = stdout; | |
| 318 | } else { | ||
| 319 | ✗ | s->stats_file = avpriv_fopen_utf8(s->stats_file_str, "w"); | |
| 320 | ✗ | if (!s->stats_file) { | |
| 321 | ✗ | int err = AVERROR(errno); | |
| 322 | ✗ | av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n", | |
| 323 | ✗ | s->stats_file_str, av_err2str(err)); | |
| 324 | ✗ | return err; | |
| 325 | } | ||
| 326 | } | ||
| 327 | } | ||
| 328 | |||
| 329 | ✗ | return 0; | |
| 330 | } | ||
| 331 | |||
| 332 | ✗ | static av_cold void uninit(AVFilterContext *ctx) | |
| 333 | { | ||
| 334 | ✗ | VMAFMotionContext *s = ctx->priv; | |
| 335 | ✗ | double avg_motion = ff_vmafmotion_uninit(&s->data); | |
| 336 | |||
| 337 | ✗ | if (s->data.nb_frames > 0) { | |
| 338 | ✗ | av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion); | |
| 339 | } | ||
| 340 | |||
| 341 | ✗ | if (s->stats_file && s->stats_file != stdout) | |
| 342 | ✗ | fclose(s->stats_file); | |
| 343 | ✗ | } | |
| 344 | |||
| 345 | static const AVFilterPad vmafmotion_inputs[] = { | ||
| 346 | { | ||
| 347 | .name = "reference", | ||
| 348 | .type = AVMEDIA_TYPE_VIDEO, | ||
| 349 | .filter_frame = filter_frame, | ||
| 350 | .config_props = config_input_ref, | ||
| 351 | }, | ||
| 352 | }; | ||
| 353 | |||
| 354 | const FFFilter ff_vf_vmafmotion = { | ||
| 355 | .p.name = "vmafmotion", | ||
| 356 | .p.description = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."), | ||
| 357 | .p.priv_class = &vmafmotion_class, | ||
| 358 | .p.flags = AVFILTER_FLAG_METADATA_ONLY, | ||
| 359 | .init = init, | ||
| 360 | .uninit = uninit, | ||
| 361 | .priv_size = sizeof(VMAFMotionContext), | ||
| 362 | FILTER_INPUTS(vmafmotion_inputs), | ||
| 363 | FILTER_OUTPUTS(ff_video_default_filterpad), | ||
| 364 | FILTER_QUERY_FUNC2(query_formats), | ||
| 365 | }; | ||
| 366 |