| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (c) 2021 Boris Baracaldo | ||
| 3 | * Copyright (c) 2022 Thilo Borgmann | ||
| 4 | * | ||
| 5 | * This file is part of FFmpeg. | ||
| 6 | * | ||
| 7 | * FFmpeg is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU Lesser General Public | ||
| 9 | * License as published by the Free Software Foundation; either | ||
| 10 | * version 2.1 of the License, or (at your option) any later version. | ||
| 11 | * | ||
| 12 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * Lesser General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU Lesser General Public | ||
| 18 | * License along with FFmpeg; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 20 | */ | ||
| 21 | |||
| 22 | /** | ||
| 23 | * @file | ||
| 24 | * Calculate Spatial Info (SI) and Temporal Info (TI) scores | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <math.h> | ||
| 28 | |||
| 29 | #include "libavutil/imgutils.h" | ||
| 30 | #include "libavutil/internal.h" | ||
| 31 | #include "libavutil/mem.h" | ||
| 32 | #include "libavutil/opt.h" | ||
| 33 | |||
| 34 | #include "avfilter.h" | ||
| 35 | #include "filters.h" | ||
| 36 | #include "video.h" | ||
| 37 | |||
| 38 | static const int X_FILTER[9] = { | ||
| 39 | 1, 0, -1, | ||
| 40 | 2, 0, -2, | ||
| 41 | 1, 0, -1 | ||
| 42 | }; | ||
| 43 | |||
| 44 | static const int Y_FILTER[9] = { | ||
| 45 | 1, 2, 1, | ||
| 46 | 0, 0, 0, | ||
| 47 | -1, -2, -1 | ||
| 48 | }; | ||
| 49 | |||
| 50 | typedef struct SiTiContext { | ||
| 51 | const AVClass *class; | ||
| 52 | int pixel_depth; | ||
| 53 | int width, height; | ||
| 54 | uint64_t nb_frames; | ||
| 55 | uint8_t *prev_frame; | ||
| 56 | float max_si; | ||
| 57 | float max_ti; | ||
| 58 | float min_si; | ||
| 59 | float min_ti; | ||
| 60 | float sum_si; | ||
| 61 | float sum_ti; | ||
| 62 | float *gradient_matrix; | ||
| 63 | float *motion_matrix; | ||
| 64 | int full_range; | ||
| 65 | int print_summary; | ||
| 66 | } SiTiContext; | ||
| 67 | |||
| 68 | static const enum AVPixelFormat pix_fmts[] = { | ||
| 69 | AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, | ||
| 70 | AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, | ||
| 71 | AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, | ||
| 72 | AV_PIX_FMT_NONE | ||
| 73 | }; | ||
| 74 | |||
| 75 | 2 | static av_cold int init(AVFilterContext *ctx) | |
| 76 | { | ||
| 77 | // User options but no input data | ||
| 78 | 2 | SiTiContext *s = ctx->priv; | |
| 79 | 2 | s->max_si = 0; | |
| 80 | 2 | s->max_ti = 0; | |
| 81 | 2 | return 0; | |
| 82 | } | ||
| 83 | |||
| 84 | 2 | static av_cold void uninit(AVFilterContext *ctx) | |
| 85 | { | ||
| 86 | 2 | SiTiContext *s = ctx->priv; | |
| 87 | |||
| 88 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (s->print_summary) { |
| 89 | ✗ | float avg_si = s->sum_si / s->nb_frames; | |
| 90 | ✗ | float avg_ti = s->sum_ti / s->nb_frames; | |
| 91 | ✗ | av_log(ctx, AV_LOG_INFO, | |
| 92 | "SITI Summary:\nTotal frames: %"PRId64"\n\n" | ||
| 93 | "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n" | ||
| 94 | "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n", | ||
| 95 | ✗ | s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti | |
| 96 | ); | ||
| 97 | } | ||
| 98 | |||
| 99 | 2 | av_freep(&s->prev_frame); | |
| 100 | 2 | av_freep(&s->gradient_matrix); | |
| 101 | 2 | av_freep(&s->motion_matrix); | |
| 102 | 2 | } | |
| 103 | |||
| 104 | 1 | static int config_input(AVFilterLink *inlink) | |
| 105 | { | ||
| 106 | // Video input data available | ||
| 107 | 1 | AVFilterContext *ctx = inlink->dst; | |
| 108 | 1 | SiTiContext *s = ctx->priv; | |
| 109 | int max_pixsteps[4]; | ||
| 110 | size_t pixel_sz; | ||
| 111 | size_t data_sz; | ||
| 112 | size_t gradient_sz; | ||
| 113 | size_t motion_sz; | ||
| 114 | |||
| 115 | 1 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); | |
| 116 | 1 | av_image_fill_max_pixsteps(max_pixsteps, NULL, desc); | |
| 117 | |||
| 118 | // free previous buffers in case they are allocated already | ||
| 119 | 1 | av_freep(&s->prev_frame); | |
| 120 | 1 | av_freep(&s->gradient_matrix); | |
| 121 | 1 | av_freep(&s->motion_matrix); | |
| 122 | |||
| 123 | 1 | s->pixel_depth = max_pixsteps[0]; | |
| 124 | 1 | s->width = inlink->w; | |
| 125 | 1 | s->height = inlink->h; | |
| 126 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t); |
| 127 | 1 | data_sz = s->width * pixel_sz * s->height; | |
| 128 | |||
| 129 | 1 | s->prev_frame = av_malloc(data_sz); | |
| 130 | |||
| 131 | 1 | gradient_sz = (s->width - 2) * sizeof(float) * (s->height - 2); | |
| 132 | 1 | s->gradient_matrix = av_malloc(gradient_sz); | |
| 133 | |||
| 134 | 1 | motion_sz = s->width * sizeof(float) * s->height; | |
| 135 | 1 | s->motion_matrix = av_malloc(motion_sz); | |
| 136 | |||
| 137 |
3/6✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
|
1 | if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) { |
| 138 | ✗ | return AVERROR(ENOMEM); | |
| 139 | } | ||
| 140 | |||
| 141 | 1 | return 0; | |
| 142 | } | ||
| 143 | |||
| 144 | // Determine whether the video is in full or limited range. If not defined, assume limited. | ||
| 145 | 5 | static int is_full_range(AVFrame* frame) | |
| 146 | { | ||
| 147 | // If color range not specified, fallback to pixel format | ||
| 148 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
5 | if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB) |
| 149 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
5 | return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P; |
| 150 | ✗ | return frame->color_range == AVCOL_RANGE_JPEG; | |
| 151 | } | ||
| 152 | |||
| 153 | // Check frame's color range and convert to full range if needed | ||
| 154 | 2955180 | static uint16_t convert_full_range(int factor, uint16_t y) | |
| 155 | { | ||
| 156 | int shift; | ||
| 157 | int limit_upper; | ||
| 158 | int full_upper; | ||
| 159 | int limit_y; | ||
| 160 | |||
| 161 | // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4 | ||
| 162 | 2955180 | shift = 16 * factor; | |
| 163 | 2955180 | limit_upper = 235 * factor - shift; | |
| 164 | 2955180 | full_upper = 256 * factor - 1; | |
| 165 | 2955180 | limit_y = fminf(fmaxf(y - shift, 0), limit_upper); | |
| 166 | 2955180 | return (full_upper * limit_y / limit_upper); | |
| 167 | } | ||
| 168 | |||
| 169 | // Applies sobel convolution | ||
| 170 | 5 | static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize) | |
| 171 | { | ||
| 172 | double x_conv_sum; | ||
| 173 | double y_conv_sum; | ||
| 174 | float gradient; | ||
| 175 | int ki; | ||
| 176 | int kj; | ||
| 177 | int index; | ||
| 178 | uint16_t data; | ||
| 179 | 5 | int filter_width = 3; | |
| 180 | 5 | int filter_size = filter_width * filter_width; | |
| 181 | 5 | int stride = linesize / s->pixel_depth; | |
| 182 | // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4 | ||
| 183 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | int factor = s->pixel_depth == 1 ? 1 : 4; |
| 184 | |||
| 185 | // Dst matrix is smaller than src since we ignore edges that can't be convolved | ||
| 186 | #define CONVOLVE(bps) \ | ||
| 187 | { \ | ||
| 188 | uint##bps##_t *vsrc = (uint##bps##_t*)src; \ | ||
| 189 | for (int j = 1; j < s->height - 1; j++) { \ | ||
| 190 | for (int i = 1; i < s->width - 1; i++) { \ | ||
| 191 | x_conv_sum = 0.0; \ | ||
| 192 | y_conv_sum = 0.0; \ | ||
| 193 | for (int k = 0; k < filter_size; k++) { \ | ||
| 194 | ki = k % filter_width - 1; \ | ||
| 195 | kj = floor(k / filter_width) - 1; \ | ||
| 196 | index = (j + kj) * stride + (i + ki); \ | ||
| 197 | data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \ | ||
| 198 | x_conv_sum += data * X_FILTER[k]; \ | ||
| 199 | y_conv_sum += data * Y_FILTER[k]; \ | ||
| 200 | } \ | ||
| 201 | gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \ | ||
| 202 | dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \ | ||
| 203 | } \ | ||
| 204 | } \ | ||
| 205 | } | ||
| 206 | |||
| 207 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | if (s->pixel_depth == 2) { |
| 208 | ✗ | CONVOLVE(16); | |
| 209 | } else { | ||
| 210 |
7/8✗ Branch 0 not taken.
✓ Branch 1 taken 2655180 times.
✓ Branch 3 taken 2655180 times.
✓ Branch 4 taken 295020 times.
✓ Branch 5 taken 295020 times.
✓ Branch 6 taken 990 times.
✓ Branch 7 taken 990 times.
✓ Branch 8 taken 5 times.
|
2951195 | CONVOLVE(8); |
| 211 | } | ||
| 212 | 5 | } | |
| 213 | |||
| 214 | // Calculate pixel difference between current and previous frame, and update previous | ||
| 215 | 5 | static void calculate_motion(SiTiContext *s, const uint8_t *curr, | |
| 216 | float *motion_matrix, int linesize) | ||
| 217 | { | ||
| 218 | 5 | int stride = linesize / s->pixel_depth; | |
| 219 | float motion; | ||
| 220 | int curr_index; | ||
| 221 | int prev_index; | ||
| 222 | uint16_t curr_data; | ||
| 223 | // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4 | ||
| 224 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | int factor = s->pixel_depth == 1 ? 1 : 4; |
| 225 | |||
| 226 | // Previous frame is already converted to full range | ||
| 227 | #define CALCULATE(bps) \ | ||
| 228 | { \ | ||
| 229 | uint##bps##_t *vsrc = (uint##bps##_t*)curr; \ | ||
| 230 | uint##bps##_t *vdst = (uint##bps##_t*)s->prev_frame; \ | ||
| 231 | for (int j = 0; j < s->height; j++) { \ | ||
| 232 | for (int i = 0; i < s->width; i++) { \ | ||
| 233 | motion = 0; \ | ||
| 234 | curr_index = j * stride + i; \ | ||
| 235 | prev_index = j * s->width + i; \ | ||
| 236 | curr_data = s->full_range ? vsrc[curr_index] : convert_full_range(factor, vsrc[curr_index]); \ | ||
| 237 | if (s->nb_frames > 1) \ | ||
| 238 | motion = curr_data - vdst[prev_index]; \ | ||
| 239 | vdst[prev_index] = curr_data; \ | ||
| 240 | motion_matrix[j * s->width + i] = motion; \ | ||
| 241 | } \ | ||
| 242 | } \ | ||
| 243 | } | ||
| 244 | |||
| 245 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | if (s->pixel_depth == 2) { |
| 246 | ✗ | CALCULATE(16); | |
| 247 | } else { | ||
| 248 |
7/8✗ Branch 0 not taken.
✓ Branch 1 taken 300000 times.
✓ Branch 3 taken 240000 times.
✓ Branch 4 taken 60000 times.
✓ Branch 5 taken 300000 times.
✓ Branch 6 taken 1000 times.
✓ Branch 7 taken 1000 times.
✓ Branch 8 taken 5 times.
|
301005 | CALCULATE(8); |
| 249 | } | ||
| 250 | 5 | } | |
| 251 | |||
| 252 | 10 | static float std_deviation(float *img_metrics, int width, int height) | |
| 253 | { | ||
| 254 | 10 | int size = height * width; | |
| 255 | 10 | double mean = 0.0; | |
| 256 | 10 | double sqr_diff = 0; | |
| 257 | |||
| 258 |
2/2✓ Branch 0 taken 1990 times.
✓ Branch 1 taken 10 times.
|
2000 | for (int j = 0; j < height; j++) |
| 259 |
2/2✓ Branch 0 taken 595020 times.
✓ Branch 1 taken 1990 times.
|
597010 | for (int i = 0; i < width; i++) |
| 260 | 595020 | mean += img_metrics[j * width + i]; | |
| 261 | |||
| 262 | 10 | mean /= size; | |
| 263 | |||
| 264 |
2/2✓ Branch 0 taken 1990 times.
✓ Branch 1 taken 10 times.
|
2000 | for (int j = 0; j < height; j++) { |
| 265 |
2/2✓ Branch 0 taken 595020 times.
✓ Branch 1 taken 1990 times.
|
597010 | for (int i = 0; i < width; i++) { |
| 266 | 595020 | float mean_diff = img_metrics[j * width + i] - mean; | |
| 267 | 595020 | sqr_diff += (mean_diff * mean_diff); | |
| 268 | } | ||
| 269 | } | ||
| 270 | 10 | sqr_diff = sqr_diff / size; | |
| 271 | 10 | return sqrt(sqr_diff); | |
| 272 | } | ||
| 273 | |||
| 274 | 10 | static void set_meta(AVDictionary **metadata, const char *key, float d) | |
| 275 | { | ||
| 276 | char value[128]; | ||
| 277 | 10 | snprintf(value, sizeof(value), "%0.2f", d); | |
| 278 | 10 | av_dict_set(metadata, key, value, 0); | |
| 279 | 10 | } | |
| 280 | |||
| 281 | 5 | static int filter_frame(AVFilterLink *inlink, AVFrame *frame) | |
| 282 | { | ||
| 283 | 5 | AVFilterContext *ctx = inlink->dst; | |
| 284 | 5 | SiTiContext *s = ctx->priv; | |
| 285 | float si; | ||
| 286 | float ti; | ||
| 287 | |||
| 288 | 5 | s->full_range = is_full_range(frame); | |
| 289 | 5 | s->nb_frames++; | |
| 290 | |||
| 291 | // Calculate si and ti | ||
| 292 | 5 | convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]); | |
| 293 | 5 | calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]); | |
| 294 | 5 | si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2); | |
| 295 | 5 | ti = std_deviation(s->motion_matrix, s->width, s->height); | |
| 296 | |||
| 297 | // Calculate statistics | ||
| 298 | 5 | s->max_si = fmaxf(si, s->max_si); | |
| 299 | 5 | s->max_ti = fmaxf(ti, s->max_ti); | |
| 300 | 5 | s->sum_si += si; | |
| 301 | 5 | s->sum_ti += ti; | |
| 302 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | s->min_si = s->nb_frames == 1 ? si : fminf(si, s->min_si); |
| 303 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | s->min_ti = s->nb_frames == 1 ? ti : fminf(ti, s->min_ti); |
| 304 | |||
| 305 | // Set si ti information in frame metadata | ||
| 306 | 5 | set_meta(&frame->metadata, "lavfi.siti.si", si); | |
| 307 | 5 | set_meta(&frame->metadata, "lavfi.siti.ti", ti); | |
| 308 | |||
| 309 | 5 | return ff_filter_frame(inlink->dst->outputs[0], frame); | |
| 310 | } | ||
| 311 | |||
| 312 | #define OFFSET(x) offsetof(SiTiContext, x) | ||
| 313 | #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM | ||
| 314 | |||
| 315 | static const AVOption siti_options[] = { | ||
| 316 | { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS }, | ||
| 317 | { NULL } | ||
| 318 | }; | ||
| 319 | |||
| 320 | AVFILTER_DEFINE_CLASS(siti); | ||
| 321 | |||
| 322 | static const AVFilterPad avfilter_vf_siti_inputs[] = { | ||
| 323 | { | ||
| 324 | .name = "default", | ||
| 325 | .type = AVMEDIA_TYPE_VIDEO, | ||
| 326 | .config_props = config_input, | ||
| 327 | .filter_frame = filter_frame, | ||
| 328 | }, | ||
| 329 | }; | ||
| 330 | |||
| 331 | const FFFilter ff_vf_siti = { | ||
| 332 | .p.name = "siti", | ||
| 333 | .p.description = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."), | ||
| 334 | .p.priv_class = &siti_class, | ||
| 335 | .p.flags = AVFILTER_FLAG_METADATA_ONLY, | ||
| 336 | .priv_size = sizeof(SiTiContext), | ||
| 337 | .init = init, | ||
| 338 | .uninit = uninit, | ||
| 339 | FILTER_PIXFMTS_ARRAY(pix_fmts), | ||
| 340 | FILTER_INPUTS(avfilter_vf_siti_inputs), | ||
| 341 | FILTER_OUTPUTS(ff_video_default_filterpad), | ||
| 342 | }; | ||
| 343 |