| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (C) 2010-2011 Kevin Stone | ||
| 3 | * Copyright (C) 2016 Paul B Mahol | ||
| 4 | * | ||
| 5 | * This file is part of FFmpeg. | ||
| 6 | * | ||
| 7 | * FFmpeg is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License as published by | ||
| 9 | * the Free Software Foundation; either version 2 of the License, or | ||
| 10 | * (at your option) any later version. | ||
| 11 | * | ||
| 12 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | * GNU General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License along | ||
| 18 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
| 19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include <float.h> | ||
| 23 | |||
| 24 | #include "libavutil/common.h" | ||
| 25 | #include "libavutil/file_open.h" | ||
| 26 | #include "libavutil/float_dsp.h" | ||
| 27 | #include "libavutil/imgutils.h" | ||
| 28 | #include "libavutil/mem.h" | ||
| 29 | #include "libavutil/mem_internal.h" | ||
| 30 | #include "libavutil/opt.h" | ||
| 31 | #include "libavutil/pixdesc.h" | ||
| 32 | #include "avfilter.h" | ||
| 33 | #include "filters.h" | ||
| 34 | #include "video.h" | ||
| 35 | |||
| 36 | static const size_t NNEDI_WEIGHTS_SIZE = 13574928; | ||
| 37 | static const uint8_t NNEDI_XDIM[] = { 8, 16, 32, 48, 8, 16, 32 }; | ||
| 38 | static const uint8_t NNEDI_YDIM[] = { 6, 6, 6, 6, 4, 4, 4 }; | ||
| 39 | static const uint16_t NNEDI_NNS[] = { 16, 32, 64, 128, 256 }; | ||
| 40 | |||
| 41 | typedef struct PrescreenerCoefficients { | ||
| 42 | DECLARE_ALIGNED(32, float, kernel_l0)[4][16 * 4]; | ||
| 43 | DECLARE_ALIGNED(32, float, bias_l0)[4]; | ||
| 44 | |||
| 45 | DECLARE_ALIGNED(32, float, kernel_l1)[4][4]; | ||
| 46 | DECLARE_ALIGNED(32, float, bias_l1)[4]; | ||
| 47 | |||
| 48 | DECLARE_ALIGNED(32, float, kernel_l2)[4][8]; | ||
| 49 | DECLARE_ALIGNED(32, float, bias_l2)[4]; | ||
| 50 | } PrescreenerCoefficients; | ||
| 51 | |||
| 52 | typedef struct PredictorCoefficients { | ||
| 53 | int xdim, ydim, nns, nsize; | ||
| 54 | float *data; | ||
| 55 | float *softmax_q1; | ||
| 56 | float *elliott_q1; | ||
| 57 | float *softmax_bias_q1; | ||
| 58 | float *elliott_bias_q1; | ||
| 59 | float *softmax_q2; | ||
| 60 | float *elliott_q2; | ||
| 61 | float *softmax_bias_q2; | ||
| 62 | float *elliott_bias_q2; | ||
| 63 | } PredictorCoefficients; | ||
| 64 | |||
| 65 | typedef struct NNEDIContext { | ||
| 66 | const AVClass *class; | ||
| 67 | |||
| 68 | char *weights_file; | ||
| 69 | |||
| 70 | AVFrame *prev; | ||
| 71 | int eof; | ||
| 72 | int64_t pts; | ||
| 73 | |||
| 74 | AVFloatDSPContext *fdsp; | ||
| 75 | int depth; | ||
| 76 | int nb_planes; | ||
| 77 | int nb_threads; | ||
| 78 | int linesize[4]; | ||
| 79 | int planewidth[4]; | ||
| 80 | int planeheight[4]; | ||
| 81 | int field_n; | ||
| 82 | |||
| 83 | PrescreenerCoefficients prescreener[4]; | ||
| 84 | PredictorCoefficients coeffs[2][5][7]; | ||
| 85 | |||
| 86 | float half; | ||
| 87 | float in_scale; | ||
| 88 | float out_scale; | ||
| 89 | |||
| 90 | // Parameters | ||
| 91 | int deint; | ||
| 92 | int field; | ||
| 93 | int process_plane; | ||
| 94 | int nsize; | ||
| 95 | int nnsparam; | ||
| 96 | int qual; | ||
| 97 | int etype; | ||
| 98 | int pscrn; | ||
| 99 | |||
| 100 | int input_size; | ||
| 101 | uint8_t **prescreen_buf; | ||
| 102 | float **input_buf; | ||
| 103 | float **output_buf; | ||
| 104 | |||
| 105 | void (*read)(const uint8_t *src, float *dst, | ||
| 106 | int src_stride, int dst_stride, | ||
| 107 | int width, int height, float scale); | ||
| 108 | void (*write)(const float *src, uint8_t *dst, | ||
| 109 | int src_stride, int dst_stride, | ||
| 110 | int width, int height, int depth, float scale); | ||
| 111 | void (*prescreen[2])(AVFilterContext *ctx, | ||
| 112 | const void *src, ptrdiff_t src_stride, | ||
| 113 | uint8_t *prescreen, int N, | ||
| 114 | const PrescreenerCoefficients *const coeffs); | ||
| 115 | } NNEDIContext; | ||
| 116 | |||
| 117 | #define OFFSET(x) offsetof(NNEDIContext, x) | ||
| 118 | #define RFLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM | ||
| 119 | #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM | ||
| 120 | |||
| 121 | static const AVOption nnedi_options[] = { | ||
| 122 | {"weights", "set weights file", OFFSET(weights_file), AV_OPT_TYPE_STRING, {.str="nnedi3_weights.bin"}, 0, 0, FLAGS }, | ||
| 123 | {"deint", "set which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, RFLAGS, .unit = "deint" }, | ||
| 124 | {"all", "deinterlace all frames", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, .unit = "deint" }, | ||
| 125 | {"interlaced", "only deinterlace frames marked as interlaced", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, .unit = "deint" }, | ||
| 126 | {"field", "set mode of operation", OFFSET(field), AV_OPT_TYPE_INT, {.i64=-1}, -2, 3, RFLAGS, .unit = "field" }, | ||
| 127 | {"af", "use frame flags, both fields", 0, AV_OPT_TYPE_CONST, {.i64=-2}, 0, 0, RFLAGS, .unit = "field" }, | ||
| 128 | {"a", "use frame flags, single field", 0, AV_OPT_TYPE_CONST, {.i64=-1}, 0, 0, RFLAGS, .unit = "field" }, | ||
| 129 | {"t", "use top field only", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, .unit = "field" }, | ||
| 130 | {"b", "use bottom field only", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, .unit = "field" }, | ||
| 131 | {"tf", "use both fields, top first", 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, .unit = "field" }, | ||
| 132 | {"bf", "use both fields, bottom first", 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, .unit = "field" }, | ||
| 133 | {"planes", "set which planes to process", OFFSET(process_plane), AV_OPT_TYPE_INT, {.i64=7}, 0, 15, RFLAGS }, | ||
| 134 | {"nsize", "set size of local neighborhood around each pixel, used by the predictor neural network", OFFSET(nsize), AV_OPT_TYPE_INT, {.i64=6}, 0, 6, RFLAGS, .unit = "nsize" }, | ||
| 135 | {"s8x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, .unit = "nsize" }, | ||
| 136 | {"s16x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, .unit = "nsize" }, | ||
| 137 | {"s32x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, .unit = "nsize" }, | ||
| 138 | {"s48x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, .unit = "nsize" }, | ||
| 139 | {"s8x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, RFLAGS, .unit = "nsize" }, | ||
| 140 | {"s16x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=5}, 0, 0, RFLAGS, .unit = "nsize" }, | ||
| 141 | {"s32x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=6}, 0, 0, RFLAGS, .unit = "nsize" }, | ||
| 142 | {"nns", "set number of neurons in predictor neural network", OFFSET(nnsparam), AV_OPT_TYPE_INT, {.i64=1}, 0, 4, RFLAGS, .unit = "nns" }, | ||
| 143 | {"n16", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, .unit = "nns" }, | ||
| 144 | {"n32", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, .unit = "nns" }, | ||
| 145 | {"n64", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, .unit = "nns" }, | ||
| 146 | {"n128", NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, .unit = "nns" }, | ||
| 147 | {"n256", NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, RFLAGS, .unit = "nns" }, | ||
| 148 | {"qual", "set quality", OFFSET(qual), AV_OPT_TYPE_INT, {.i64=1}, 1, 2, RFLAGS, .unit = "qual" }, | ||
| 149 | {"fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, .unit = "qual" }, | ||
| 150 | {"slow", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, .unit = "qual" }, | ||
| 151 | {"etype", "set which set of weights to use in the predictor", OFFSET(etype), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, RFLAGS, .unit = "etype" }, | ||
| 152 | {"a", "weights trained to minimize absolute error", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, .unit = "etype" }, | ||
| 153 | {"abs","weights trained to minimize absolute error", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, .unit = "etype" }, | ||
| 154 | {"s", "weights trained to minimize squared error", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, .unit = "etype" }, | ||
| 155 | {"mse","weights trained to minimize squared error", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, .unit = "etype" }, | ||
| 156 | {"pscrn", "set prescreening", OFFSET(pscrn), AV_OPT_TYPE_INT, {.i64=2}, 0, 4, RFLAGS, .unit = "pscrn" }, | ||
| 157 | {"none", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, .unit = "pscrn" }, | ||
| 158 | {"original", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, .unit = "pscrn" }, | ||
| 159 | {"new", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, .unit = "pscrn" }, | ||
| 160 | {"new2", NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, .unit = "pscrn" }, | ||
| 161 | {"new3", NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, RFLAGS, .unit = "pscrn" }, | ||
| 162 | { NULL } | ||
| 163 | }; | ||
| 164 | |||
| 165 | AVFILTER_DEFINE_CLASS(nnedi); | ||
| 166 | |||
| 167 | ✗ | static int config_output(AVFilterLink *outlink) | |
| 168 | { | ||
| 169 | ✗ | AVFilterContext *ctx = outlink->src; | |
| 170 | ✗ | const NNEDIContext *const s = ctx->priv; | |
| 171 | |||
| 172 | ✗ | outlink->time_base = av_mul_q(ctx->inputs[0]->time_base, (AVRational){1, 2}); | |
| 173 | ✗ | outlink->w = ctx->inputs[0]->w; | |
| 174 | ✗ | outlink->h = ctx->inputs[0]->h; | |
| 175 | |||
| 176 | ✗ | if (s->field == -2 || s->field > 1) { | |
| 177 | ✗ | FilterLink *il = ff_filter_link(ctx->inputs[0]); | |
| 178 | ✗ | FilterLink *ol = ff_filter_link(outlink); | |
| 179 | ✗ | ol->frame_rate = av_mul_q(il->frame_rate, (AVRational){2, 1}); | |
| 180 | } | ||
| 181 | |||
| 182 | ✗ | return 0; | |
| 183 | } | ||
| 184 | |||
| 185 | static const enum AVPixelFormat pix_fmts[] = { | ||
| 186 | AV_PIX_FMT_GRAY8, | ||
| 187 | AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16, | ||
| 188 | AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, | ||
| 189 | AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, | ||
| 190 | AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P, | ||
| 191 | AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, | ||
| 192 | AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P, | ||
| 193 | AV_PIX_FMT_YUVJ411P, | ||
| 194 | AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P, | ||
| 195 | AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP, | ||
| 196 | AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9, | ||
| 197 | AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, | ||
| 198 | AV_PIX_FMT_YUV440P10, | ||
| 199 | AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, | ||
| 200 | AV_PIX_FMT_YUV440P12, | ||
| 201 | AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14, | ||
| 202 | AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16, | ||
| 203 | AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, | ||
| 204 | AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P12, AV_PIX_FMT_YUVA444P16, | ||
| 205 | AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA422P12, AV_PIX_FMT_YUVA422P16, | ||
| 206 | AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA420P16, | ||
| 207 | AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16, | ||
| 208 | AV_PIX_FMT_NONE | ||
| 209 | }; | ||
| 210 | |||
| 211 | ✗ | static float dot_dsp(const NNEDIContext *const s, const float *kernel, const float *input, | |
| 212 | int n, float scale, float bias) | ||
| 213 | { | ||
| 214 | float sum, y; | ||
| 215 | |||
| 216 | ✗ | sum = s->fdsp->scalarproduct_float(kernel, input, n); | |
| 217 | |||
| 218 | ✗ | y = sum * scale + bias + 1e-20f; | |
| 219 | |||
| 220 | ✗ | return y; | |
| 221 | } | ||
| 222 | |||
| 223 | ✗ | static float elliott(float x) | |
| 224 | { | ||
| 225 | ✗ | return x / (1.0f + fabsf(x)); | |
| 226 | } | ||
| 227 | |||
| 228 | ✗ | static void transform_elliott(float *input, int size) | |
| 229 | { | ||
| 230 | ✗ | for (int i = 0; i < size; i++) | |
| 231 | ✗ | input[i] = elliott(input[i]); | |
| 232 | ✗ | } | |
| 233 | |||
| 234 | ✗ | static void process_old(AVFilterContext *ctx, | |
| 235 | const void *src, ptrdiff_t src_stride, | ||
| 236 | uint8_t *prescreen, int N, | ||
| 237 | const PrescreenerCoefficients *const m_data) | ||
| 238 | { | ||
| 239 | ✗ | NNEDIContext *s = ctx->priv; | |
| 240 | ✗ | const float *src_p = src; | |
| 241 | |||
| 242 | // Adjust source pointer to point to top-left of filter window. | ||
| 243 | ✗ | const float *window = src_p - 2 * src_stride - 5; | |
| 244 | |||
| 245 | ✗ | for (int j = 0; j < N; j++) { | |
| 246 | ✗ | LOCAL_ALIGNED_32(float, input, [48]); | |
| 247 | float state[12]; | ||
| 248 | |||
| 249 | ✗ | for (int i = 0; i < 4; i++) | |
| 250 | ✗ | memcpy(input + i * 12, window + i * src_stride + j, 12 * sizeof(float)); | |
| 251 | |||
| 252 | // Layer 0. | ||
| 253 | ✗ | for (int n = 0; n < 4; n++) | |
| 254 | ✗ | state[n] = dot_dsp(s, m_data->kernel_l0[n], input, 48, 1.0f, m_data->bias_l0[n]); | |
| 255 | ✗ | transform_elliott(state + 1, 3); | |
| 256 | |||
| 257 | // Layer 1. | ||
| 258 | ✗ | for (int n = 0; n < 4; n++) | |
| 259 | ✗ | state[n + 4] = dot_dsp(s, m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]); | |
| 260 | ✗ | transform_elliott(state + 4, 3); | |
| 261 | |||
| 262 | // Layer 2. | ||
| 263 | ✗ | for (int n = 0; n < 4; n++) | |
| 264 | ✗ | state[n + 8] = dot_dsp(s, m_data->kernel_l2[n], state, 8, 1.0f, m_data->bias_l2[n]); | |
| 265 | |||
| 266 | ✗ | prescreen[j] = FFMAX(state[10], state[11]) <= FFMAX(state[8], state[9]) ? 255 : 0; | |
| 267 | } | ||
| 268 | ✗ | } | |
| 269 | |||
| 270 | ✗ | static void process_new(AVFilterContext *ctx, | |
| 271 | const void *src, ptrdiff_t src_stride, | ||
| 272 | uint8_t *prescreen, int N, | ||
| 273 | const PrescreenerCoefficients *const m_data) | ||
| 274 | { | ||
| 275 | ✗ | NNEDIContext *s = ctx->priv; | |
| 276 | ✗ | const float *src_p = src; | |
| 277 | |||
| 278 | // Adjust source pointer to point to top-left of filter window. | ||
| 279 | ✗ | const float *window = src_p - 2 * src_stride - 6; | |
| 280 | |||
| 281 | ✗ | for (int j = 0; j < N; j += 4) { | |
| 282 | ✗ | LOCAL_ALIGNED_32(float, input, [64]); | |
| 283 | float state[8]; | ||
| 284 | |||
| 285 | ✗ | for (int i = 0; i < 4; i++) | |
| 286 | ✗ | memcpy(input + i * 16, window + i * src_stride + j, 16 * sizeof(float)); | |
| 287 | |||
| 288 | ✗ | for (int n = 0; n < 4; n++) | |
| 289 | ✗ | state[n] = dot_dsp(s, m_data->kernel_l0[n], input, 64, 1.0f, m_data->bias_l0[n]); | |
| 290 | ✗ | transform_elliott(state, 4); | |
| 291 | |||
| 292 | ✗ | for (int n = 0; n < 4; n++) | |
| 293 | ✗ | state[n + 4] = dot_dsp(s, m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]); | |
| 294 | |||
| 295 | ✗ | for (int n = 0; n < 4; n++) | |
| 296 | ✗ | prescreen[j + n] = state[n + 4] > 0.f; | |
| 297 | } | ||
| 298 | ✗ | } | |
| 299 | |||
| 300 | ✗ | static int filter_offset(int nn, const PredictorCoefficients *const model) | |
| 301 | { | ||
| 302 | ✗ | return nn * model->nsize; | |
| 303 | } | ||
| 304 | |||
| 305 | ✗ | static const float *softmax_q1_filter(int nn, | |
| 306 | const PredictorCoefficients *const model) | ||
| 307 | { | ||
| 308 | ✗ | return model->softmax_q1 + filter_offset(nn, model); | |
| 309 | } | ||
| 310 | |||
| 311 | ✗ | static const float *elliott_q1_filter(int nn, | |
| 312 | const PredictorCoefficients *const model) | ||
| 313 | { | ||
| 314 | ✗ | return model->elliott_q1 + filter_offset(nn, model); | |
| 315 | } | ||
| 316 | |||
| 317 | ✗ | static const float *softmax_q2_filter(int nn, | |
| 318 | const PredictorCoefficients *const model) | ||
| 319 | { | ||
| 320 | ✗ | return model->softmax_q2 + filter_offset(nn, model); | |
| 321 | } | ||
| 322 | |||
| 323 | ✗ | static const float *elliott_q2_filter(int nn, | |
| 324 | const PredictorCoefficients *const model) | ||
| 325 | { | ||
| 326 | ✗ | return model->elliott_q2 + filter_offset(nn, model); | |
| 327 | } | ||
| 328 | |||
| 329 | ✗ | static void gather_input(const float *src, ptrdiff_t src_stride, | |
| 330 | float *buf, float mstd[4], | ||
| 331 | const PredictorCoefficients *const model) | ||
| 332 | { | ||
| 333 | ✗ | const float scale = 1.f / model->nsize; | |
| 334 | ✗ | float sum = 0.f; | |
| 335 | ✗ | float sum_sq = 0.f; | |
| 336 | float tmp; | ||
| 337 | |||
| 338 | ✗ | for (int i = 0; i < model->ydim; i++) { | |
| 339 | ✗ | memcpy(buf, src, model->xdim * sizeof(float)); | |
| 340 | |||
| 341 | ✗ | for (int j = 0; j < model->xdim; j++) { | |
| 342 | ✗ | const float val = src[j]; | |
| 343 | |||
| 344 | ✗ | sum += val; | |
| 345 | ✗ | sum_sq += val * val; | |
| 346 | } | ||
| 347 | |||
| 348 | ✗ | src += src_stride; | |
| 349 | ✗ | buf += model->xdim; | |
| 350 | } | ||
| 351 | |||
| 352 | ✗ | mstd[0] = sum * scale; | |
| 353 | ✗ | mstd[3] = 0.f; | |
| 354 | |||
| 355 | ✗ | tmp = sum_sq * scale - mstd[0] * mstd[0]; | |
| 356 | ✗ | if (tmp < FLT_EPSILON) { | |
| 357 | ✗ | mstd[1] = 0.0f; | |
| 358 | ✗ | mstd[2] = 0.0f; | |
| 359 | } else { | ||
| 360 | ✗ | mstd[1] = sqrtf(tmp); | |
| 361 | ✗ | mstd[2] = 1.0f / mstd[1]; | |
| 362 | } | ||
| 363 | ✗ | } | |
| 364 | |||
| 365 | ✗ | static float softmax_exp(float x) | |
| 366 | { | ||
| 367 | ✗ | return expf(av_clipf(x, -80.f, 80.f)); | |
| 368 | } | ||
| 369 | |||
| 370 | ✗ | static void transform_softmax_exp(float *input, int size) | |
| 371 | { | ||
| 372 | ✗ | for (int i = 0; i < size; i++) | |
| 373 | ✗ | input[i] = softmax_exp(input[i]); | |
| 374 | ✗ | } | |
| 375 | |||
| 376 | ✗ | static void wae5(const float *softmax, const float *el, | |
| 377 | int n, float mstd[4]) | ||
| 378 | { | ||
| 379 | ✗ | float vsum = 0.0f, wsum = 0.0f; | |
| 380 | |||
| 381 | ✗ | for (int i = 0; i < n; i++) { | |
| 382 | ✗ | vsum += softmax[i] * elliott(el[i]); | |
| 383 | ✗ | wsum += softmax[i]; | |
| 384 | } | ||
| 385 | |||
| 386 | ✗ | if (wsum > 1e-10f) | |
| 387 | ✗ | mstd[3] += (5.0f * vsum) / wsum * mstd[1] + mstd[0]; | |
| 388 | else | ||
| 389 | ✗ | mstd[3] += mstd[0]; | |
| 390 | ✗ | } | |
| 391 | |||
| 392 | ✗ | static void predictor(AVFilterContext *ctx, | |
| 393 | const void *src, ptrdiff_t src_stride, void *dst, | ||
| 394 | const uint8_t *prescreen, int N, | ||
| 395 | const PredictorCoefficients *const model, int use_q2) | ||
| 396 | { | ||
| 397 | ✗ | const NNEDIContext *const s = ctx->priv; | |
| 398 | ✗ | const float *src_p = src; | |
| 399 | ✗ | float *dst_p = dst; | |
| 400 | |||
| 401 | // Adjust source pointer to point to top-left of filter window. | ||
| 402 | ✗ | const float *window = src_p - (model->ydim / 2) * src_stride - (model->xdim / 2 - 1); | |
| 403 | ✗ | const int filter_size = model->nsize; | |
| 404 | ✗ | const int nns = model->nns; | |
| 405 | |||
| 406 | ✗ | for (int i = 0; i < N; i++) { | |
| 407 | ✗ | LOCAL_ALIGNED_32(float, input, [48 * 6]); | |
| 408 | float activation[256 * 2]; | ||
| 409 | float mstd[4]; | ||
| 410 | float scale; | ||
| 411 | |||
| 412 | ✗ | if (prescreen[i]) | |
| 413 | ✗ | continue; | |
| 414 | |||
| 415 | ✗ | gather_input(window + i, src_stride, input, mstd, model); | |
| 416 | ✗ | scale = mstd[2]; | |
| 417 | |||
| 418 | ✗ | for (int nn = 0; nn < nns; nn++) | |
| 419 | ✗ | activation[nn] = dot_dsp(s, softmax_q1_filter(nn, model), input, filter_size, scale, model->softmax_bias_q1[nn]); | |
| 420 | |||
| 421 | ✗ | for (int nn = 0; nn < nns; nn++) | |
| 422 | ✗ | activation[nns + nn] = dot_dsp(s, elliott_q1_filter(nn, model), input, filter_size, scale, model->elliott_bias_q1[nn]); | |
| 423 | |||
| 424 | ✗ | transform_softmax_exp(activation, nns); | |
| 425 | ✗ | wae5(activation, activation + nns, nns, mstd); | |
| 426 | |||
| 427 | ✗ | if (use_q2) { | |
| 428 | ✗ | for (int nn = 0; nn < nns; nn++) | |
| 429 | ✗ | activation[nn] = dot_dsp(s, softmax_q2_filter(nn, model), input, filter_size, scale, model->softmax_bias_q2[nn]); | |
| 430 | |||
| 431 | ✗ | for (int nn = 0; nn < nns; nn++) | |
| 432 | ✗ | activation[nns + nn] = dot_dsp(s, elliott_q2_filter(nn, model), input, filter_size, scale, model->elliott_bias_q2[nn]); | |
| 433 | |||
| 434 | ✗ | transform_softmax_exp(activation, nns); | |
| 435 | ✗ | wae5(activation, activation + nns, nns, mstd); | |
| 436 | } | ||
| 437 | |||
| 438 | ✗ | dst_p[i] = mstd[3] * (use_q2 ? 0.5f : 1.f); | |
| 439 | } | ||
| 440 | ✗ | } | |
| 441 | |||
| 442 | ✗ | static void read_bytes(const uint8_t *src, float *dst, | |
| 443 | int src_stride, int dst_stride, | ||
| 444 | int width, int height, float scale) | ||
| 445 | { | ||
| 446 | ✗ | for (int y = 0; y < height; y++) { | |
| 447 | ✗ | for (int x = 0; x < 32; x++) | |
| 448 | ✗ | dst[-x - 1] = src[x]; | |
| 449 | |||
| 450 | ✗ | for (int x = 0; x < width; x++) | |
| 451 | ✗ | dst[x] = src[x]; | |
| 452 | |||
| 453 | ✗ | for (int x = 0; x < 32; x++) | |
| 454 | ✗ | dst[width + x] = src[width - x - 1]; | |
| 455 | |||
| 456 | ✗ | dst += dst_stride; | |
| 457 | ✗ | src += src_stride; | |
| 458 | } | ||
| 459 | ✗ | } | |
| 460 | |||
| 461 | ✗ | static void read_words(const uint8_t *srcp, float *dst, | |
| 462 | int src_stride, int dst_stride, | ||
| 463 | int width, int height, float scale) | ||
| 464 | { | ||
| 465 | ✗ | const uint16_t *src = (const uint16_t *)srcp; | |
| 466 | |||
| 467 | ✗ | src_stride /= 2; | |
| 468 | |||
| 469 | ✗ | for (int y = 0; y < height; y++) { | |
| 470 | ✗ | for (int x = 0; x < 32; x++) | |
| 471 | ✗ | dst[-x - 1] = src[x] * scale; | |
| 472 | |||
| 473 | ✗ | for (int x = 0; x < width; x++) | |
| 474 | ✗ | dst[x] = src[x] * scale; | |
| 475 | |||
| 476 | ✗ | for (int x = 0; x < 32; x++) | |
| 477 | ✗ | dst[width + x] = src[width - x - 1] * scale; | |
| 478 | |||
| 479 | ✗ | dst += dst_stride; | |
| 480 | ✗ | src += src_stride; | |
| 481 | } | ||
| 482 | ✗ | } | |
| 483 | |||
| 484 | ✗ | static void write_bytes(const float *src, uint8_t *dst, | |
| 485 | int src_stride, int dst_stride, | ||
| 486 | int width, int height, int depth, | ||
| 487 | float scale) | ||
| 488 | { | ||
| 489 | ✗ | for (int y = 0; y < height; y++) { | |
| 490 | ✗ | for (int x = 0; x < width; x++) | |
| 491 | ✗ | dst[x] = av_clip_uint8(src[x]); | |
| 492 | |||
| 493 | ✗ | dst += dst_stride; | |
| 494 | ✗ | src += src_stride; | |
| 495 | } | ||
| 496 | ✗ | } | |
| 497 | |||
| 498 | ✗ | static void write_words(const float *src, uint8_t *dstp, | |
| 499 | int src_stride, int dst_stride, | ||
| 500 | int width, int height, int depth, | ||
| 501 | float scale) | ||
| 502 | { | ||
| 503 | ✗ | uint16_t *dst = (uint16_t *)dstp; | |
| 504 | |||
| 505 | ✗ | dst_stride /= 2; | |
| 506 | |||
| 507 | ✗ | for (int y = 0; y < height; y++) { | |
| 508 | ✗ | for (int x = 0; x < width; x++) | |
| 509 | ✗ | dst[x] = av_clip_uintp2_c(src[x] * scale, depth); | |
| 510 | |||
| 511 | ✗ | dst += dst_stride; | |
| 512 | ✗ | src += src_stride; | |
| 513 | } | ||
| 514 | ✗ | } | |
| 515 | |||
| 516 | ✗ | static void interpolation(const void *src, ptrdiff_t src_stride, | |
| 517 | void *dst, const uint8_t *prescreen, int n) | ||
| 518 | { | ||
| 519 | ✗ | const float *src_p = src; | |
| 520 | ✗ | float *dst_p = dst; | |
| 521 | ✗ | const float *window = src_p - 2 * src_stride; | |
| 522 | |||
| 523 | ✗ | for (int i = 0; i < n; i++) { | |
| 524 | ✗ | float accum = 0.0f; | |
| 525 | |||
| 526 | ✗ | if (!prescreen[i]) | |
| 527 | ✗ | continue; | |
| 528 | |||
| 529 | ✗ | accum += (-3.0f / 32.0f) * window[0 * src_stride + i]; | |
| 530 | ✗ | accum += (19.0f / 32.0f) * window[1 * src_stride + i]; | |
| 531 | ✗ | accum += (19.0f / 32.0f) * window[2 * src_stride + i]; | |
| 532 | ✗ | accum += (-3.0f / 32.0f) * window[3 * src_stride + i]; | |
| 533 | |||
| 534 | ✗ | dst_p[i] = accum; | |
| 535 | } | ||
| 536 | ✗ | } | |
| 537 | |||
| 538 | ✗ | static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) | |
| 539 | { | ||
| 540 | ✗ | const NNEDIContext *const s = ctx->priv; | |
| 541 | ✗ | AVFrame *out = arg; | |
| 542 | ✗ | AVFrame *in = s->prev; | |
| 543 | ✗ | const float in_scale = s->in_scale; | |
| 544 | ✗ | const float out_scale = s->out_scale; | |
| 545 | ✗ | const int depth = s->depth; | |
| 546 | ✗ | const int interlaced = !!(in->flags & AV_FRAME_FLAG_INTERLACED); | |
| 547 | ✗ | const int tff = s->field_n == (s->field < 0 ? interlaced ? (in->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) : 1 : | |
| 548 | ✗ | (s->field & 1) ^ 1); | |
| 549 | |||
| 550 | |||
| 551 | ✗ | for (int p = 0; p < s->nb_planes; p++) { | |
| 552 | ✗ | const int height = s->planeheight[p]; | |
| 553 | ✗ | const int width = s->planewidth[p]; | |
| 554 | ✗ | const int slice_start = 2 * ((height / 2 * jobnr) / nb_jobs); | |
| 555 | ✗ | const int slice_end = 2 * ((height / 2 * (jobnr+1)) / nb_jobs); | |
| 556 | ✗ | const uint8_t *src_data = in->data[p]; | |
| 557 | ✗ | uint8_t *dst_data = out->data[p]; | |
| 558 | ✗ | uint8_t *dst = out->data[p] + slice_start * out->linesize[p]; | |
| 559 | ✗ | const int src_linesize = in->linesize[p]; | |
| 560 | ✗ | const int dst_linesize = out->linesize[p]; | |
| 561 | ✗ | uint8_t *prescreen_buf = s->prescreen_buf[jobnr]; | |
| 562 | ✗ | float *srcbuf = s->input_buf[jobnr]; | |
| 563 | ✗ | const int srcbuf_stride = width + 64; | |
| 564 | ✗ | float *dstbuf = s->output_buf[jobnr]; | |
| 565 | ✗ | const int dstbuf_stride = width; | |
| 566 | ✗ | const int slice_height = (slice_end - slice_start) / 2; | |
| 567 | ✗ | const int last_slice = slice_end == height; | |
| 568 | const uint8_t *in_line; | ||
| 569 | uint8_t *out_line; | ||
| 570 | int y_out; | ||
| 571 | |||
| 572 | ✗ | if (!(s->process_plane & (1 << p))) { | |
| 573 | ✗ | av_image_copy_plane(dst, out->linesize[p], | |
| 574 | ✗ | in->data[p] + slice_start * in->linesize[p], | |
| 575 | in->linesize[p], | ||
| 576 | ✗ | s->linesize[p], slice_end - slice_start); | |
| 577 | ✗ | continue; | |
| 578 | } | ||
| 579 | |||
| 580 | ✗ | y_out = slice_start + (tff ^ (slice_start & 1)); | |
| 581 | ✗ | in_line = src_data + (y_out * src_linesize); | |
| 582 | ✗ | out_line = dst_data + (y_out * dst_linesize); | |
| 583 | |||
| 584 | ✗ | while (y_out < slice_end) { | |
| 585 | ✗ | memcpy(out_line, in_line, s->linesize[p]); | |
| 586 | ✗ | y_out += 2; | |
| 587 | ✗ | in_line += src_linesize * 2; | |
| 588 | ✗ | out_line += dst_linesize * 2; | |
| 589 | } | ||
| 590 | |||
| 591 | ✗ | y_out = slice_start + ((!tff) ^ (slice_start & 1)); | |
| 592 | |||
| 593 | ✗ | s->read(src_data + FFMAX(y_out - 5, tff) * src_linesize, | |
| 594 | srcbuf + 32, | ||
| 595 | src_linesize * 2, srcbuf_stride, | ||
| 596 | width, 1, in_scale); | ||
| 597 | ✗ | srcbuf += srcbuf_stride; | |
| 598 | |||
| 599 | ✗ | s->read(src_data + FFMAX(y_out - 3, tff) * src_linesize, | |
| 600 | srcbuf + 32, | ||
| 601 | src_linesize * 2, srcbuf_stride, | ||
| 602 | width, 1, in_scale); | ||
| 603 | ✗ | srcbuf += srcbuf_stride; | |
| 604 | |||
| 605 | ✗ | s->read(src_data + FFMAX(y_out - 1, tff) * src_linesize, | |
| 606 | srcbuf + 32, | ||
| 607 | src_linesize * 2, srcbuf_stride, | ||
| 608 | width, 1, in_scale); | ||
| 609 | ✗ | srcbuf += srcbuf_stride; | |
| 610 | |||
| 611 | ✗ | in_line = src_data + FFMIN(y_out + 1, height - 1 - !tff) * src_linesize; | |
| 612 | ✗ | out_line = dst_data + (y_out * dst_linesize); | |
| 613 | |||
| 614 | ✗ | s->read(in_line, srcbuf + 32, src_linesize * 2, srcbuf_stride, | |
| 615 | width, slice_height - last_slice, in_scale); | ||
| 616 | |||
| 617 | ✗ | y_out += (slice_height - last_slice) * 2; | |
| 618 | |||
| 619 | ✗ | s->read(src_data + FFMIN(y_out + 1, height - 1 - !tff) * src_linesize, | |
| 620 | ✗ | srcbuf + 32 + srcbuf_stride * (slice_height - last_slice), | |
| 621 | src_linesize * 2, srcbuf_stride, | ||
| 622 | width, 1, in_scale); | ||
| 623 | |||
| 624 | ✗ | s->read(src_data + FFMIN(y_out + 3, height - 1 - !tff) * src_linesize, | |
| 625 | ✗ | srcbuf + 32 + srcbuf_stride * (slice_height + 1 - last_slice), | |
| 626 | src_linesize * 2, srcbuf_stride, | ||
| 627 | width, 1, in_scale); | ||
| 628 | |||
| 629 | ✗ | s->read(src_data + FFMIN(y_out + 5, height - 1 - !tff) * src_linesize, | |
| 630 | ✗ | srcbuf + 32 + srcbuf_stride * (slice_height + 2 - last_slice), | |
| 631 | src_linesize * 2, srcbuf_stride, | ||
| 632 | width, 1, in_scale); | ||
| 633 | |||
| 634 | ✗ | for (int y = 0; y < slice_end - slice_start; y += 2) { | |
| 635 | ✗ | if (s->pscrn > 0) | |
| 636 | ✗ | s->prescreen[s->pscrn > 1](ctx, srcbuf + (y / 2) * srcbuf_stride + 32, | |
| 637 | srcbuf_stride, prescreen_buf, width, | ||
| 638 | ✗ | &s->prescreener[s->pscrn - 1]); | |
| 639 | |||
| 640 | ✗ | predictor(ctx, | |
| 641 | ✗ | srcbuf + (y / 2) * srcbuf_stride + 32, | |
| 642 | srcbuf_stride, | ||
| 643 | ✗ | dstbuf + (y / 2) * dstbuf_stride, | |
| 644 | prescreen_buf, width, | ||
| 645 | ✗ | &s->coeffs[s->etype][s->nnsparam][s->nsize], s->qual == 2); | |
| 646 | |||
| 647 | ✗ | if (s->pscrn > 0) | |
| 648 | ✗ | interpolation(srcbuf + (y / 2) * srcbuf_stride + 32, | |
| 649 | srcbuf_stride, | ||
| 650 | ✗ | dstbuf + (y / 2) * dstbuf_stride, | |
| 651 | prescreen_buf, width); | ||
| 652 | } | ||
| 653 | |||
| 654 | ✗ | s->write(dstbuf, out_line, dstbuf_stride, dst_linesize * 2, | |
| 655 | width, slice_height, depth, out_scale); | ||
| 656 | } | ||
| 657 | |||
| 658 | ✗ | return 0; | |
| 659 | } | ||
| 660 | |||
| 661 | ✗ | static int get_frame(AVFilterContext *ctx, int is_second) | |
| 662 | { | ||
| 663 | ✗ | NNEDIContext *s = ctx->priv; | |
| 664 | ✗ | AVFilterLink *outlink = ctx->outputs[0]; | |
| 665 | AVFrame *dst; | ||
| 666 | |||
| 667 | ✗ | dst = ff_get_video_buffer(outlink, outlink->w, outlink->h); | |
| 668 | ✗ | if (!dst) | |
| 669 | ✗ | return AVERROR(ENOMEM); | |
| 670 | ✗ | av_frame_copy_props(dst, s->prev); | |
| 671 | ✗ | dst->flags &= ~AV_FRAME_FLAG_INTERLACED; | |
| 672 | ✗ | dst->pts = s->pts; | |
| 673 | |||
| 674 | ✗ | ff_filter_execute(ctx, filter_slice, dst, NULL, | |
| 675 | ✗ | FFMIN(s->planeheight[1] / 2, s->nb_threads)); | |
| 676 | |||
| 677 | ✗ | if (s->field == -2 || s->field > 1) | |
| 678 | ✗ | s->field_n = !s->field_n; | |
| 679 | |||
| 680 | ✗ | return ff_filter_frame(outlink, dst); | |
| 681 | } | ||
| 682 | |||
| 683 | ✗ | static int filter_frame(AVFilterLink *inlink, AVFrame *in) | |
| 684 | { | ||
| 685 | ✗ | AVFilterContext *ctx = inlink->dst; | |
| 686 | ✗ | NNEDIContext *s = ctx->priv; | |
| 687 | int ret; | ||
| 688 | |||
| 689 | ✗ | if (!s->prev) { | |
| 690 | ✗ | s->prev = in; | |
| 691 | ✗ | return 0; | |
| 692 | } | ||
| 693 | |||
| 694 | ✗ | if ((s->deint && !(s->prev->flags & AV_FRAME_FLAG_INTERLACED)) || ctx->is_disabled) { | |
| 695 | ✗ | s->prev->pts *= 2; | |
| 696 | ✗ | ret = ff_filter_frame(ctx->outputs[0], s->prev); | |
| 697 | ✗ | s->prev = in; | |
| 698 | ✗ | return ret; | |
| 699 | } | ||
| 700 | |||
| 701 | ✗ | s->pts = s->prev->pts * 2; | |
| 702 | ✗ | ret = get_frame(ctx, 0); | |
| 703 | ✗ | if (ret < 0 || (s->field > -2 && s->field < 2)) { | |
| 704 | ✗ | av_frame_free(&s->prev); | |
| 705 | ✗ | s->prev = in; | |
| 706 | ✗ | return ret; | |
| 707 | } | ||
| 708 | |||
| 709 | ✗ | s->pts = s->prev->pts + in->pts; | |
| 710 | ✗ | ret = get_frame(ctx, 1); | |
| 711 | ✗ | av_frame_free(&s->prev); | |
| 712 | ✗ | s->prev = in; | |
| 713 | ✗ | return ret; | |
| 714 | } | ||
| 715 | |||
| 716 | ✗ | static int request_frame(AVFilterLink *link) | |
| 717 | { | ||
| 718 | ✗ | AVFilterContext *ctx = link->src; | |
| 719 | ✗ | NNEDIContext *s = ctx->priv; | |
| 720 | int ret; | ||
| 721 | |||
| 722 | ✗ | if (s->eof) | |
| 723 | ✗ | return AVERROR_EOF; | |
| 724 | |||
| 725 | ✗ | ret = ff_request_frame(ctx->inputs[0]); | |
| 726 | |||
| 727 | ✗ | if (ret == AVERROR_EOF && s->prev) { | |
| 728 | ✗ | AVFrame *next = av_frame_clone(s->prev); | |
| 729 | ✗ | FilterLink *l = ff_filter_link(ctx->outputs[0]); | |
| 730 | |||
| 731 | ✗ | if (!next) | |
| 732 | ✗ | return AVERROR(ENOMEM); | |
| 733 | |||
| 734 | ✗ | next->pts = s->prev->pts + av_rescale_q(1, av_inv_q(l->frame_rate), | |
| 735 | ✗ | ctx->outputs[0]->time_base); | |
| 736 | ✗ | s->eof = 1; | |
| 737 | |||
| 738 | ✗ | ret = filter_frame(ctx->inputs[0], next); | |
| 739 | ✗ | } else if (ret < 0) { | |
| 740 | ✗ | return ret; | |
| 741 | } | ||
| 742 | |||
| 743 | ✗ | return ret; | |
| 744 | } | ||
| 745 | |||
| 746 | ✗ | static void copy_weights(float *dst, int n, const float **data) | |
| 747 | { | ||
| 748 | ✗ | memcpy(dst, *data, n * sizeof(float)); | |
| 749 | ✗ | *data += n; | |
| 750 | ✗ | } | |
| 751 | |||
| 752 | ✗ | static float *allocate(float **ptr, int size) | |
| 753 | { | ||
| 754 | ✗ | float *ret = *ptr; | |
| 755 | |||
| 756 | ✗ | *ptr += size; | |
| 757 | |||
| 758 | ✗ | return ret; | |
| 759 | } | ||
| 760 | |||
| 761 | ✗ | static int allocate_model(PredictorCoefficients *coeffs, int xdim, int ydim, int nns) | |
| 762 | { | ||
| 763 | ✗ | int filter_size = nns * xdim * ydim; | |
| 764 | ✗ | int bias_size = nns; | |
| 765 | float *data; | ||
| 766 | |||
| 767 | ✗ | data = av_calloc(filter_size + bias_size, 4 * sizeof(float)); | |
| 768 | ✗ | if (!data) | |
| 769 | ✗ | return AVERROR(ENOMEM); | |
| 770 | |||
| 771 | ✗ | coeffs->data = data; | |
| 772 | ✗ | coeffs->xdim = xdim; | |
| 773 | ✗ | coeffs->ydim = ydim; | |
| 774 | ✗ | coeffs->nsize = xdim * ydim; | |
| 775 | ✗ | coeffs->nns = nns; | |
| 776 | |||
| 777 | ✗ | coeffs->softmax_q1 = allocate(&data, filter_size); | |
| 778 | ✗ | coeffs->elliott_q1 = allocate(&data, filter_size); | |
| 779 | ✗ | coeffs->softmax_bias_q1 = allocate(&data, bias_size); | |
| 780 | ✗ | coeffs->elliott_bias_q1 = allocate(&data, bias_size); | |
| 781 | |||
| 782 | ✗ | coeffs->softmax_q2 = allocate(&data, filter_size); | |
| 783 | ✗ | coeffs->elliott_q2 = allocate(&data, filter_size); | |
| 784 | ✗ | coeffs->softmax_bias_q2 = allocate(&data, bias_size); | |
| 785 | ✗ | coeffs->elliott_bias_q2 = allocate(&data, bias_size); | |
| 786 | |||
| 787 | ✗ | return 0; | |
| 788 | } | ||
| 789 | |||
| 790 | ✗ | static int read_weights(AVFilterContext *ctx, const float *bdata) | |
| 791 | { | ||
| 792 | ✗ | NNEDIContext *s = ctx->priv; | |
| 793 | int ret; | ||
| 794 | |||
| 795 | ✗ | copy_weights(&s->prescreener[0].kernel_l0[0][0], 4 * 48, &bdata); | |
| 796 | ✗ | copy_weights(s->prescreener[0].bias_l0, 4, &bdata); | |
| 797 | |||
| 798 | ✗ | copy_weights(&s->prescreener[0].kernel_l1[0][0], 4 * 4, &bdata); | |
| 799 | ✗ | copy_weights(s->prescreener[0].bias_l1, 4, &bdata); | |
| 800 | |||
| 801 | ✗ | copy_weights(&s->prescreener[0].kernel_l2[0][0], 4 * 8, &bdata); | |
| 802 | ✗ | copy_weights(s->prescreener[0].bias_l2, 4, &bdata); | |
| 803 | |||
| 804 | ✗ | for (int i = 0; i < 3; i++) { | |
| 805 | ✗ | PrescreenerCoefficients *data = &s->prescreener[i + 1]; | |
| 806 | float kernel_l0_shuffled[4 * 64]; | ||
| 807 | float kernel_l1_shuffled[4 * 4]; | ||
| 808 | |||
| 809 | ✗ | copy_weights(kernel_l0_shuffled, 4 * 64, &bdata); | |
| 810 | ✗ | copy_weights(data->bias_l0, 4, &bdata); | |
| 811 | |||
| 812 | ✗ | copy_weights(kernel_l1_shuffled, 4 * 4, &bdata); | |
| 813 | ✗ | copy_weights(data->bias_l1, 4, &bdata); | |
| 814 | |||
| 815 | ✗ | for (int n = 0; n < 4; n++) { | |
| 816 | ✗ | for (int k = 0; k < 64; k++) | |
| 817 | ✗ | data->kernel_l0[n][k] = kernel_l0_shuffled[(k / 8) * 32 + n * 8 + k % 8]; | |
| 818 | ✗ | for (int k = 0; k < 4; k++) | |
| 819 | ✗ | data->kernel_l1[n][k] = kernel_l1_shuffled[k * 4 + n]; | |
| 820 | } | ||
| 821 | } | ||
| 822 | |||
| 823 | ✗ | for (int m = 0; m < 2; m++) { | |
| 824 | // Grouping by neuron count. | ||
| 825 | ✗ | for (int i = 0; i < 5; i++) { | |
| 826 | ✗ | const int nns = NNEDI_NNS[i]; | |
| 827 | |||
| 828 | // Grouping by window size. | ||
| 829 | ✗ | for (int j = 0; j < 7; j++) { | |
| 830 | ✗ | PredictorCoefficients *model = &s->coeffs[m][i][j]; | |
| 831 | ✗ | const int xdim = NNEDI_XDIM[j]; | |
| 832 | ✗ | const int ydim = NNEDI_YDIM[j]; | |
| 833 | ✗ | const int filter_size = xdim * ydim; | |
| 834 | |||
| 835 | ✗ | ret = allocate_model(model, xdim, ydim, nns); | |
| 836 | ✗ | if (ret < 0) | |
| 837 | ✗ | return ret; | |
| 838 | |||
| 839 | // Quality 1 model. NNS[i] * (XDIM[j] * YDIM[j]) * 2 coefficients. | ||
| 840 | ✗ | copy_weights(model->softmax_q1, nns * filter_size, &bdata); | |
| 841 | ✗ | copy_weights(model->elliott_q1, nns * filter_size, &bdata); | |
| 842 | |||
| 843 | // Quality 1 model bias. NNS[i] * 2 coefficients. | ||
| 844 | ✗ | copy_weights(model->softmax_bias_q1, nns, &bdata); | |
| 845 | ✗ | copy_weights(model->elliott_bias_q1, nns, &bdata); | |
| 846 | |||
| 847 | // Quality 2 model. NNS[i] * (XDIM[j] * YDIM[j]) * 2 coefficients. | ||
| 848 | ✗ | copy_weights(model->softmax_q2, nns * filter_size, &bdata); | |
| 849 | ✗ | copy_weights(model->elliott_q2, nns * filter_size, &bdata); | |
| 850 | |||
| 851 | // Quality 2 model bias. NNS[i] * 2 coefficients. | ||
| 852 | ✗ | copy_weights(model->softmax_bias_q2, nns, &bdata); | |
| 853 | ✗ | copy_weights(model->elliott_bias_q2, nns, &bdata); | |
| 854 | } | ||
| 855 | } | ||
| 856 | } | ||
| 857 | |||
| 858 | ✗ | return 0; | |
| 859 | } | ||
| 860 | |||
| 861 | ✗ | static float mean(const float *input, int size) | |
| 862 | { | ||
| 863 | ✗ | float sum = 0.f; | |
| 864 | |||
| 865 | ✗ | for (int i = 0; i < size; i++) | |
| 866 | ✗ | sum += input[i]; | |
| 867 | |||
| 868 | ✗ | return sum / size; | |
| 869 | } | ||
| 870 | |||
| 871 | ✗ | static void transform(float *input, int size, float mean, float half) | |
| 872 | { | ||
| 873 | ✗ | for (int i = 0; i < size; i++) | |
| 874 | ✗ | input[i] = (input[i] - mean) / half; | |
| 875 | ✗ | } | |
| 876 | |||
| 877 | ✗ | static void subtract_mean_old(PrescreenerCoefficients *coeffs, float half) | |
| 878 | { | ||
| 879 | ✗ | for (int n = 0; n < 4; n++) { | |
| 880 | ✗ | float m = mean(coeffs->kernel_l0[n], 48); | |
| 881 | |||
| 882 | ✗ | transform(coeffs->kernel_l0[n], 48, m, half); | |
| 883 | } | ||
| 884 | ✗ | } | |
| 885 | |||
| 886 | ✗ | static void subtract_mean_new(PrescreenerCoefficients *coeffs, float half) | |
| 887 | { | ||
| 888 | ✗ | for (int n = 0; n < 4; n++) { | |
| 889 | ✗ | float m = mean(coeffs->kernel_l0[n], 64); | |
| 890 | |||
| 891 | ✗ | transform(coeffs->kernel_l0[n], 64, m, half); | |
| 892 | } | ||
| 893 | ✗ | } | |
| 894 | |||
| 895 | ✗ | static void subtract_mean_predictor(PredictorCoefficients *model) | |
| 896 | { | ||
| 897 | ✗ | const int filter_size = model->nsize; | |
| 898 | ✗ | const int nns = model->nns; | |
| 899 | ✗ | const float scale = 1.f / nns; | |
| 900 | |||
| 901 | double softmax_means[256]; // Average of individual softmax filters. | ||
| 902 | double elliott_means[256]; // Average of individual elliott filters. | ||
| 903 | ✗ | double mean_filter[48 * 6] = { 0 }; // Pointwise average of all softmax filters. | |
| 904 | double mean_bias; | ||
| 905 | |||
| 906 | // Quality 1. | ||
| 907 | ✗ | for (int nn = 0; nn < nns; nn++) { | |
| 908 | ✗ | softmax_means[nn] = mean(model->softmax_q1 + nn * filter_size, filter_size); | |
| 909 | ✗ | elliott_means[nn] = mean(model->elliott_q1 + nn * filter_size, filter_size); | |
| 910 | |||
| 911 | ✗ | for (int k = 0; k < filter_size; k++) | |
| 912 | ✗ | mean_filter[k] += model->softmax_q1[nn * filter_size + k] - softmax_means[nn]; | |
| 913 | } | ||
| 914 | |||
| 915 | ✗ | for (int k = 0; k < filter_size; k++) | |
| 916 | ✗ | mean_filter[k] *= scale; | |
| 917 | |||
| 918 | ✗ | mean_bias = mean(model->softmax_bias_q1, nns); | |
| 919 | |||
| 920 | ✗ | for (int nn = 0; nn < nns; nn++) { | |
| 921 | ✗ | for (int k = 0; k < filter_size; k++) { | |
| 922 | ✗ | model->softmax_q1[nn * filter_size + k] -= softmax_means[nn] + mean_filter[k]; | |
| 923 | ✗ | model->elliott_q1[nn * filter_size + k] -= elliott_means[nn]; | |
| 924 | } | ||
| 925 | ✗ | model->softmax_bias_q1[nn] -= mean_bias; | |
| 926 | } | ||
| 927 | |||
| 928 | // Quality 2. | ||
| 929 | ✗ | memset(mean_filter, 0, sizeof(mean_filter)); | |
| 930 | |||
| 931 | ✗ | for (int nn = 0; nn < nns; nn++) { | |
| 932 | ✗ | softmax_means[nn] = mean(model->softmax_q2 + nn * filter_size, filter_size); | |
| 933 | ✗ | elliott_means[nn] = mean(model->elliott_q2 + nn * filter_size, filter_size); | |
| 934 | |||
| 935 | ✗ | for (int k = 0; k < filter_size; k++) { | |
| 936 | ✗ | mean_filter[k] += model->softmax_q2[nn * filter_size + k] - softmax_means[nn]; | |
| 937 | } | ||
| 938 | } | ||
| 939 | |||
| 940 | ✗ | for (int k = 0; k < filter_size; k++) | |
| 941 | ✗ | mean_filter[k] *= scale; | |
| 942 | |||
| 943 | ✗ | mean_bias = mean(model->softmax_bias_q2, nns); | |
| 944 | |||
| 945 | ✗ | for (int nn = 0; nn < nns; nn++) { | |
| 946 | ✗ | for (int k = 0; k < filter_size; k++) { | |
| 947 | ✗ | model->softmax_q2[nn * filter_size + k] -= softmax_means[nn] + mean_filter[k]; | |
| 948 | ✗ | model->elliott_q2[nn * filter_size + k] -= elliott_means[nn]; | |
| 949 | } | ||
| 950 | |||
| 951 | ✗ | model->softmax_bias_q2[nn] -= mean_bias; | |
| 952 | } | ||
| 953 | ✗ | } | |
| 954 | |||
| 955 | ✗ | static av_cold int init(AVFilterContext *ctx) | |
| 956 | { | ||
| 957 | ✗ | NNEDIContext *s = ctx->priv; | |
| 958 | ✗ | FILE *weights_file = NULL; | |
| 959 | int64_t weights_size; | ||
| 960 | float *bdata; | ||
| 961 | size_t bytes_read; | ||
| 962 | ✗ | int ret = 0; | |
| 963 | |||
| 964 | ✗ | weights_file = avpriv_fopen_utf8(s->weights_file, "rb"); | |
| 965 | ✗ | if (!weights_file) { | |
| 966 | ✗ | av_log(ctx, AV_LOG_ERROR, "No weights file provided, aborting!\n"); | |
| 967 | ✗ | return AVERROR(EINVAL); | |
| 968 | } | ||
| 969 | |||
| 970 | ✗ | if (fseek(weights_file, 0, SEEK_END)) { | |
| 971 | ✗ | av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the end of weights file.\n"); | |
| 972 | ✗ | fclose(weights_file); | |
| 973 | ✗ | return AVERROR(EINVAL); | |
| 974 | } | ||
| 975 | |||
| 976 | ✗ | weights_size = ftell(weights_file); | |
| 977 | |||
| 978 | ✗ | if (weights_size == -1) { | |
| 979 | ✗ | fclose(weights_file); | |
| 980 | ✗ | av_log(ctx, AV_LOG_ERROR, "Couldn't get size of weights file.\n"); | |
| 981 | ✗ | return AVERROR(EINVAL); | |
| 982 | ✗ | } else if (weights_size != NNEDI_WEIGHTS_SIZE) { | |
| 983 | ✗ | fclose(weights_file); | |
| 984 | ✗ | av_log(ctx, AV_LOG_ERROR, "Unexpected weights file size.\n"); | |
| 985 | ✗ | return AVERROR(EINVAL); | |
| 986 | } | ||
| 987 | |||
| 988 | ✗ | if (fseek(weights_file, 0, SEEK_SET)) { | |
| 989 | ✗ | fclose(weights_file); | |
| 990 | ✗ | av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the start of weights file.\n"); | |
| 991 | ✗ | return AVERROR(EINVAL); | |
| 992 | } | ||
| 993 | |||
| 994 | ✗ | bdata = av_malloc(NNEDI_WEIGHTS_SIZE); | |
| 995 | ✗ | if (!bdata) { | |
| 996 | ✗ | fclose(weights_file); | |
| 997 | ✗ | return AVERROR(ENOMEM); | |
| 998 | } | ||
| 999 | |||
| 1000 | ✗ | bytes_read = fread(bdata, 1, NNEDI_WEIGHTS_SIZE, weights_file); | |
| 1001 | ✗ | if (bytes_read != NNEDI_WEIGHTS_SIZE) { | |
| 1002 | ✗ | fclose(weights_file); | |
| 1003 | ✗ | ret = AVERROR_INVALIDDATA; | |
| 1004 | ✗ | av_log(ctx, AV_LOG_ERROR, "Couldn't read weights file.\n"); | |
| 1005 | ✗ | goto fail; | |
| 1006 | } | ||
| 1007 | |||
| 1008 | ✗ | fclose(weights_file); | |
| 1009 | |||
| 1010 | ✗ | s->fdsp = avpriv_float_dsp_alloc(0); | |
| 1011 | ✗ | if (!s->fdsp) { | |
| 1012 | ✗ | ret = AVERROR(ENOMEM); | |
| 1013 | ✗ | goto fail; | |
| 1014 | } | ||
| 1015 | |||
| 1016 | ✗ | ret = read_weights(ctx, bdata); | |
| 1017 | ✗ | if (ret < 0) | |
| 1018 | ✗ | goto fail; | |
| 1019 | |||
| 1020 | ✗ | fail: | |
| 1021 | ✗ | av_free(bdata); | |
| 1022 | ✗ | return ret; | |
| 1023 | } | ||
| 1024 | |||
| 1025 | ✗ | static int config_input(AVFilterLink *inlink) | |
| 1026 | { | ||
| 1027 | ✗ | AVFilterContext *ctx = inlink->dst; | |
| 1028 | ✗ | NNEDIContext *s = ctx->priv; | |
| 1029 | ✗ | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); | |
| 1030 | int ret; | ||
| 1031 | |||
| 1032 | ✗ | s->depth = desc->comp[0].depth; | |
| 1033 | ✗ | s->nb_threads = ff_filter_get_nb_threads(ctx); | |
| 1034 | ✗ | s->nb_planes = av_pix_fmt_count_planes(inlink->format); | |
| 1035 | ✗ | if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0) | |
| 1036 | ✗ | return ret; | |
| 1037 | |||
| 1038 | ✗ | s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); | |
| 1039 | ✗ | s->planewidth[0] = s->planewidth[3] = inlink->w; | |
| 1040 | ✗ | s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); | |
| 1041 | ✗ | s->planeheight[0] = s->planeheight[3] = inlink->h; | |
| 1042 | |||
| 1043 | ✗ | s->half = ((1 << 8) - 1) / 2.f; | |
| 1044 | ✗ | s->out_scale = 1 << (s->depth - 8); | |
| 1045 | ✗ | s->in_scale = 1.f / s->out_scale; | |
| 1046 | |||
| 1047 | ✗ | switch (s->depth) { | |
| 1048 | ✗ | case 8: | |
| 1049 | ✗ | s->read = read_bytes; | |
| 1050 | ✗ | s->write = write_bytes; | |
| 1051 | ✗ | break; | |
| 1052 | ✗ | default: | |
| 1053 | ✗ | s->read = read_words; | |
| 1054 | ✗ | s->write = write_words; | |
| 1055 | ✗ | break; | |
| 1056 | } | ||
| 1057 | |||
| 1058 | ✗ | subtract_mean_old(&s->prescreener[0], s->half); | |
| 1059 | ✗ | subtract_mean_new(&s->prescreener[1], s->half); | |
| 1060 | ✗ | subtract_mean_new(&s->prescreener[2], s->half); | |
| 1061 | ✗ | subtract_mean_new(&s->prescreener[3], s->half); | |
| 1062 | |||
| 1063 | ✗ | s->prescreen[0] = process_old; | |
| 1064 | ✗ | s->prescreen[1] = process_new; | |
| 1065 | |||
| 1066 | ✗ | for (int i = 0; i < 2; i++) { | |
| 1067 | ✗ | for (int j = 0; j < 5; j++) { | |
| 1068 | ✗ | for (int k = 0; k < 7; k++) | |
| 1069 | ✗ | subtract_mean_predictor(&s->coeffs[i][j][k]); | |
| 1070 | } | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | ✗ | s->input_size = (s->planewidth[0] + 64) * (s->planeheight[0] + 6); | |
| 1074 | ✗ | s->input_buf = av_calloc(s->nb_threads, sizeof(*s->input_buf)); | |
| 1075 | ✗ | if (!s->input_buf) | |
| 1076 | ✗ | return AVERROR(ENOMEM); | |
| 1077 | |||
| 1078 | ✗ | for (int i = 0; i < s->nb_threads; i++) { | |
| 1079 | ✗ | s->input_buf[i] = av_calloc(s->input_size, sizeof(**s->input_buf)); | |
| 1080 | ✗ | if (!s->input_buf[i]) | |
| 1081 | ✗ | return AVERROR(ENOMEM); | |
| 1082 | } | ||
| 1083 | |||
| 1084 | ✗ | s->output_buf = av_calloc(s->nb_threads, sizeof(*s->output_buf)); | |
| 1085 | ✗ | if (!s->output_buf) | |
| 1086 | ✗ | return AVERROR(ENOMEM); | |
| 1087 | |||
| 1088 | ✗ | for (int i = 0; i < s->nb_threads; i++) { | |
| 1089 | ✗ | s->output_buf[i] = av_calloc(s->input_size, sizeof(**s->output_buf)); | |
| 1090 | ✗ | if (!s->output_buf[i]) | |
| 1091 | ✗ | return AVERROR(ENOMEM); | |
| 1092 | } | ||
| 1093 | |||
| 1094 | ✗ | s->prescreen_buf = av_calloc(s->nb_threads, sizeof(*s->prescreen_buf)); | |
| 1095 | ✗ | if (!s->prescreen_buf) | |
| 1096 | ✗ | return AVERROR(ENOMEM); | |
| 1097 | |||
| 1098 | ✗ | for (int i = 0; i < s->nb_threads; i++) { | |
| 1099 | ✗ | s->prescreen_buf[i] = av_calloc(s->planewidth[0], sizeof(**s->prescreen_buf)); | |
| 1100 | ✗ | if (!s->prescreen_buf[i]) | |
| 1101 | ✗ | return AVERROR(ENOMEM); | |
| 1102 | } | ||
| 1103 | |||
| 1104 | ✗ | return 0; | |
| 1105 | } | ||
| 1106 | |||
| 1107 | ✗ | static av_cold void uninit(AVFilterContext *ctx) | |
| 1108 | { | ||
| 1109 | ✗ | NNEDIContext *s = ctx->priv; | |
| 1110 | |||
| 1111 | ✗ | for (int i = 0; i < s->nb_threads && s->prescreen_buf; i++) | |
| 1112 | ✗ | av_freep(&s->prescreen_buf[i]); | |
| 1113 | |||
| 1114 | ✗ | av_freep(&s->prescreen_buf); | |
| 1115 | |||
| 1116 | ✗ | for (int i = 0; i < s->nb_threads && s->input_buf; i++) | |
| 1117 | ✗ | av_freep(&s->input_buf[i]); | |
| 1118 | |||
| 1119 | ✗ | av_freep(&s->input_buf); | |
| 1120 | |||
| 1121 | ✗ | for (int i = 0; i < s->nb_threads && s->output_buf; i++) | |
| 1122 | ✗ | av_freep(&s->output_buf[i]); | |
| 1123 | |||
| 1124 | ✗ | av_freep(&s->output_buf); | |
| 1125 | ✗ | av_freep(&s->fdsp); | |
| 1126 | |||
| 1127 | ✗ | for (int i = 0; i < 2; i++) { | |
| 1128 | ✗ | for (int j = 0; j < 5; j++) { | |
| 1129 | ✗ | for (int k = 0; k < 7; k++) { | |
| 1130 | ✗ | av_freep(&s->coeffs[i][j][k].data); | |
| 1131 | } | ||
| 1132 | } | ||
| 1133 | } | ||
| 1134 | |||
| 1135 | ✗ | av_frame_free(&s->prev); | |
| 1136 | ✗ | } | |
| 1137 | |||
| 1138 | static const AVFilterPad inputs[] = { | ||
| 1139 | { | ||
| 1140 | .name = "default", | ||
| 1141 | .type = AVMEDIA_TYPE_VIDEO, | ||
| 1142 | .filter_frame = filter_frame, | ||
| 1143 | .config_props = config_input, | ||
| 1144 | }, | ||
| 1145 | }; | ||
| 1146 | |||
| 1147 | static const AVFilterPad outputs[] = { | ||
| 1148 | { | ||
| 1149 | .name = "default", | ||
| 1150 | .type = AVMEDIA_TYPE_VIDEO, | ||
| 1151 | .config_props = config_output, | ||
| 1152 | .request_frame = request_frame, | ||
| 1153 | }, | ||
| 1154 | }; | ||
| 1155 | |||
| 1156 | const FFFilter ff_vf_nnedi = { | ||
| 1157 | .p.name = "nnedi", | ||
| 1158 | .p.description = NULL_IF_CONFIG_SMALL("Apply neural network edge directed interpolation intra-only deinterlacer."), | ||
| 1159 | .p.priv_class = &nnedi_class, | ||
| 1160 | .p.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS, | ||
| 1161 | .priv_size = sizeof(NNEDIContext), | ||
| 1162 | .init = init, | ||
| 1163 | .uninit = uninit, | ||
| 1164 | FILTER_INPUTS(inputs), | ||
| 1165 | FILTER_OUTPUTS(outputs), | ||
| 1166 | FILTER_PIXFMTS_ARRAY(pix_fmts), | ||
| 1167 | .process_command = ff_filter_process_command, | ||
| 1168 | }; | ||
| 1169 |