GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavfilter/vf_dnn_processing.c Lines: 0 133 0.0 %
Date: 2020-11-28 20:53:16 Branches: 0 63 0.0 %

Line Branch Exec Source
1
/*
2
 * Copyright (c) 2019 Guo Yejun
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
21
/**
22
 * @file
23
 * implementing a generic image processing filter using deep learning networks.
24
 */
25
26
#include "libavformat/avio.h"
27
#include "libavutil/opt.h"
28
#include "libavutil/pixdesc.h"
29
#include "libavutil/avassert.h"
30
#include "libavutil/imgutils.h"
31
#include "avfilter.h"
32
#include "dnn_interface.h"
33
#include "formats.h"
34
#include "internal.h"
35
#include "libswscale/swscale.h"
36
37
typedef struct DnnProcessingContext {
38
    const AVClass *class;
39
40
    char *model_filename;
41
    DNNBackendType backend_type;
42
    char *model_inputname;
43
    char *model_outputname;
44
    char *backend_options;
45
46
    DNNModule *dnn_module;
47
    DNNModel *model;
48
49
    struct SwsContext *sws_uv_scale;
50
    int sws_uv_height;
51
} DnnProcessingContext;
52
53
#define OFFSET(x) offsetof(DnnProcessingContext, x)
54
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
55
static const AVOption dnn_processing_options[] = {
56
    { "dnn_backend", "DNN backend",                OFFSET(backend_type),     AV_OPT_TYPE_INT,       { .i64 = 0 },    INT_MIN, INT_MAX, FLAGS, "backend" },
57
    { "native",      "native backend flag",        0,                        AV_OPT_TYPE_CONST,     { .i64 = 0 },    0, 0, FLAGS, "backend" },
58
#if (CONFIG_LIBTENSORFLOW == 1)
59
    { "tensorflow",  "tensorflow backend flag",    0,                        AV_OPT_TYPE_CONST,     { .i64 = 1 },    0, 0, FLAGS, "backend" },
60
#endif
61
#if (CONFIG_LIBOPENVINO == 1)
62
    { "openvino",    "openvino backend flag",      0,                        AV_OPT_TYPE_CONST,     { .i64 = 2 },    0, 0, FLAGS, "backend" },
63
#endif
64
    { "model",       "path to model file",         OFFSET(model_filename),   AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
65
    { "input",       "input name of the model",    OFFSET(model_inputname),  AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
66
    { "output",      "output name of the model",   OFFSET(model_outputname), AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
67
    { "options",     "backend options",            OFFSET(backend_options),  AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
68
    { NULL }
69
};
70
71
AVFILTER_DEFINE_CLASS(dnn_processing);
72
73
static av_cold int init(AVFilterContext *context)
74
{
75
    DnnProcessingContext *ctx = context->priv;
76
77
    if (!ctx->model_filename) {
78
        av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n");
79
        return AVERROR(EINVAL);
80
    }
81
    if (!ctx->model_inputname) {
82
        av_log(ctx, AV_LOG_ERROR, "input name of the model network is not specified\n");
83
        return AVERROR(EINVAL);
84
    }
85
    if (!ctx->model_outputname) {
86
        av_log(ctx, AV_LOG_ERROR, "output name of the model network is not specified\n");
87
        return AVERROR(EINVAL);
88
    }
89
90
    ctx->dnn_module = ff_get_dnn_module(ctx->backend_type);
91
    if (!ctx->dnn_module) {
92
        av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n");
93
        return AVERROR(ENOMEM);
94
    }
95
    if (!ctx->dnn_module->load_model) {
96
        av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n");
97
        return AVERROR(EINVAL);
98
    }
99
100
    ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, ctx->backend_options, ctx);
101
    if (!ctx->model) {
102
        av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n");
103
        return AVERROR(EINVAL);
104
    }
105
106
    return 0;
107
}
108
109
static int query_formats(AVFilterContext *context)
110
{
111
    static const enum AVPixelFormat pix_fmts[] = {
112
        AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
113
        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32,
114
        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
115
        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
116
        AV_PIX_FMT_NONE
117
    };
118
    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
119
    return ff_set_common_formats(context, fmts_list);
120
}
121
122
#define LOG_FORMAT_CHANNEL_MISMATCH()                       \
123
    av_log(ctx, AV_LOG_ERROR,                               \
124
           "the frame's format %s does not match "          \
125
           "the model input channel %d\n",                  \
126
           av_get_pix_fmt_name(fmt),                        \
127
           model_input->channels);
128
129
static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink)
130
{
131
    AVFilterContext *ctx   = inlink->dst;
132
    enum AVPixelFormat fmt = inlink->format;
133
134
    // the design is to add explicit scale filter before this filter
135
    if (model_input->height != -1 && model_input->height != inlink->h) {
136
        av_log(ctx, AV_LOG_ERROR, "the model requires frame height %d but got %d\n",
137
                                   model_input->height, inlink->h);
138
        return AVERROR(EIO);
139
    }
140
    if (model_input->width != -1 && model_input->width != inlink->w) {
141
        av_log(ctx, AV_LOG_ERROR, "the model requires frame width %d but got %d\n",
142
                                   model_input->width, inlink->w);
143
        return AVERROR(EIO);
144
    }
145
    if (model_input->dt != DNN_FLOAT) {
146
        av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float32.\n");
147
        return AVERROR(EIO);
148
    }
149
150
    switch (fmt) {
151
    case AV_PIX_FMT_RGB24:
152
    case AV_PIX_FMT_BGR24:
153
        if (model_input->channels != 3) {
154
            LOG_FORMAT_CHANNEL_MISMATCH();
155
            return AVERROR(EIO);
156
        }
157
        return 0;
158
    case AV_PIX_FMT_GRAYF32:
159
    case AV_PIX_FMT_YUV420P:
160
    case AV_PIX_FMT_YUV422P:
161
    case AV_PIX_FMT_YUV444P:
162
    case AV_PIX_FMT_YUV410P:
163
    case AV_PIX_FMT_YUV411P:
164
        if (model_input->channels != 1) {
165
            LOG_FORMAT_CHANNEL_MISMATCH();
166
            return AVERROR(EIO);
167
        }
168
        return 0;
169
    default:
170
        av_log(ctx, AV_LOG_ERROR, "%s not supported.\n", av_get_pix_fmt_name(fmt));
171
        return AVERROR(EIO);
172
    }
173
174
    return 0;
175
}
176
177
static int config_input(AVFilterLink *inlink)
178
{
179
    AVFilterContext *context     = inlink->dst;
180
    DnnProcessingContext *ctx = context->priv;
181
    DNNReturnType result;
182
    DNNData model_input;
183
    int check;
184
185
    result = ctx->model->get_input(ctx->model->model, &model_input, ctx->model_inputname);
186
    if (result != DNN_SUCCESS) {
187
        av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n");
188
        return AVERROR(EIO);
189
    }
190
191
    check = check_modelinput_inlink(&model_input, inlink);
192
    if (check != 0) {
193
        return check;
194
    }
195
196
    return 0;
197
}
198
199
static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
200
{
201
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
202
    av_assert0(desc);
203
    return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components == 3;
204
}
205
206
static int prepare_uv_scale(AVFilterLink *outlink)
207
{
208
    AVFilterContext *context = outlink->src;
209
    DnnProcessingContext *ctx = context->priv;
210
    AVFilterLink *inlink = context->inputs[0];
211
    enum AVPixelFormat fmt = inlink->format;
212
213
    if (isPlanarYUV(fmt)) {
214
        if (inlink->w != outlink->w || inlink->h != outlink->h) {
215
            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
216
            int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
217
            int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
218
            int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h);
219
            int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w);
220
            ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8,
221
                                               sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8,
222
                                               SWS_BICUBIC, NULL, NULL, NULL);
223
            ctx->sws_uv_height = sws_src_h;
224
        }
225
    }
226
227
    return 0;
228
}
229
230
static int config_output(AVFilterLink *outlink)
231
{
232
    AVFilterContext *context = outlink->src;
233
    DnnProcessingContext *ctx = context->priv;
234
    DNNReturnType result;
235
    AVFilterLink *inlink = context->inputs[0];
236
237
    // have a try run in case that the dnn model resize the frame
238
    result = ctx->model->get_output(ctx->model->model, ctx->model_inputname, inlink->w, inlink->h,
239
                                    ctx->model_outputname, &outlink->w, &outlink->h);
240
    if (result != DNN_SUCCESS) {
241
        av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n");
242
        return AVERROR(EIO);
243
    }
244
245
    prepare_uv_scale(outlink);
246
247
    return 0;
248
}
249
250
static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in)
251
{
252
    const AVPixFmtDescriptor *desc;
253
    int uv_height;
254
255
    if (!ctx->sws_uv_scale) {
256
        av_assert0(in->height == out->height && in->width == out->width);
257
        desc = av_pix_fmt_desc_get(in->format);
258
        uv_height = AV_CEIL_RSHIFT(in->height, desc->log2_chroma_h);
259
        for (int i = 1; i < 3; ++i) {
260
            int bytewidth = av_image_get_linesize(in->format, in->width, i);
261
            av_image_copy_plane(out->data[i], out->linesize[i],
262
                                in->data[i], in->linesize[i],
263
                                bytewidth, uv_height);
264
        }
265
    } else {
266
        sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1,
267
                  0, ctx->sws_uv_height, out->data + 1, out->linesize + 1);
268
        sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 2), in->linesize + 2,
269
                  0, ctx->sws_uv_height, out->data + 2, out->linesize + 2);
270
    }
271
272
    return 0;
273
}
274
275
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
276
{
277
    AVFilterContext *context  = inlink->dst;
278
    AVFilterLink *outlink = context->outputs[0];
279
    DnnProcessingContext *ctx = context->priv;
280
    DNNReturnType dnn_result;
281
    AVFrame *out;
282
283
    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
284
    if (!out) {
285
        av_frame_free(&in);
286
        return AVERROR(ENOMEM);
287
    }
288
    av_frame_copy_props(out, in);
289
290
    dnn_result = (ctx->dnn_module->execute_model)(ctx->model, ctx->model_inputname, in,
291
                                                  (const char **)&ctx->model_outputname, 1, out);
292
    if (dnn_result != DNN_SUCCESS){
293
        av_log(ctx, AV_LOG_ERROR, "failed to execute model\n");
294
        av_frame_free(&in);
295
        av_frame_free(&out);
296
        return AVERROR(EIO);
297
    }
298
299
    if (isPlanarYUV(in->format))
300
        copy_uv_planes(ctx, out, in);
301
302
    av_frame_free(&in);
303
    return ff_filter_frame(outlink, out);
304
}
305
306
static av_cold void uninit(AVFilterContext *ctx)
307
{
308
    DnnProcessingContext *context = ctx->priv;
309
310
    sws_freeContext(context->sws_uv_scale);
311
312
    if (context->dnn_module)
313
        (context->dnn_module->free_model)(&context->model);
314
315
    av_freep(&context->dnn_module);
316
}
317
318
static const AVFilterPad dnn_processing_inputs[] = {
319
    {
320
        .name         = "default",
321
        .type         = AVMEDIA_TYPE_VIDEO,
322
        .config_props = config_input,
323
        .filter_frame = filter_frame,
324
    },
325
    { NULL }
326
};
327
328
static const AVFilterPad dnn_processing_outputs[] = {
329
    {
330
        .name = "default",
331
        .type = AVMEDIA_TYPE_VIDEO,
332
        .config_props  = config_output,
333
    },
334
    { NULL }
335
};
336
337
AVFilter ff_vf_dnn_processing = {
338
    .name          = "dnn_processing",
339
    .description   = NULL_IF_CONFIG_SMALL("Apply DNN processing filter to the input."),
340
    .priv_size     = sizeof(DnnProcessingContext),
341
    .init          = init,
342
    .uninit        = uninit,
343
    .query_formats = query_formats,
344
    .inputs        = dnn_processing_inputs,
345
    .outputs       = dnn_processing_outputs,
346
    .priv_class    = &dnn_processing_class,
347
};