FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/vf_dnn_processing.c
Date: 2024-02-29 09:57:37
Exec Total Coverage
Lines: 0 166 0.0%
Functions: 0 10 0.0%
Branches: 0 89 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2019 Guo Yejun
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /**
22 * @file
23 * implementing a generic image processing filter using deep learning networks.
24 */
25
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
28 #include "libavutil/avassert.h"
29 #include "libavutil/imgutils.h"
30 #include "filters.h"
31 #include "dnn_filter_common.h"
32 #include "internal.h"
33 #include "video.h"
34 #include "libswscale/swscale.h"
35 #include "libavutil/time.h"
36
37 typedef struct DnnProcessingContext {
38 const AVClass *class;
39 DnnContext dnnctx;
40 struct SwsContext *sws_uv_scale;
41 int sws_uv_height;
42 } DnnProcessingContext;
43
44 #define OFFSET(x) offsetof(DnnProcessingContext, dnnctx.x)
45 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
46 static const AVOption dnn_processing_options[] = {
47 { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = DNN_TF }, INT_MIN, INT_MAX, FLAGS, .unit = "backend" },
48 #if (CONFIG_LIBTENSORFLOW == 1)
49 { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_TF }, 0, 0, FLAGS, .unit = "backend" },
50 #endif
51 #if (CONFIG_LIBOPENVINO == 1)
52 { "openvino", "openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_OV }, 0, 0, FLAGS, .unit = "backend" },
53 #endif
54 DNN_COMMON_OPTIONS
55 { NULL }
56 };
57
58 AVFILTER_DEFINE_CLASS(dnn_processing);
59
60 static av_cold int init(AVFilterContext *context)
61 {
62 DnnProcessingContext *ctx = context->priv;
63 return ff_dnn_init(&ctx->dnnctx, DFT_PROCESS_FRAME, context);
64 }
65
66 static const enum AVPixelFormat pix_fmts[] = {
67 AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
68 AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32,
69 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
70 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
71 AV_PIX_FMT_NV12,
72 AV_PIX_FMT_NONE
73 };
74
75 #define LOG_FORMAT_CHANNEL_MISMATCH() \
76 av_log(ctx, AV_LOG_ERROR, \
77 "the frame's format %s does not match " \
78 "the model input channel %d\n", \
79 av_get_pix_fmt_name(fmt), \
80 model_input->dims[dnn_get_channel_idx_by_layout(model_input->layout)]);
81
82 static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink)
83 {
84 AVFilterContext *ctx = inlink->dst;
85 enum AVPixelFormat fmt = inlink->format;
86 int width_idx, height_idx;
87
88 width_idx = dnn_get_width_idx_by_layout(model_input->layout);
89 height_idx = dnn_get_height_idx_by_layout(model_input->layout);
90 // the design is to add explicit scale filter before this filter
91 if (model_input->dims[height_idx] != -1 &&
92 model_input->dims[height_idx] != inlink->h) {
93 av_log(ctx, AV_LOG_ERROR, "the model requires frame height %d but got %d\n",
94 model_input->dims[height_idx],
95 inlink->h);
96 return AVERROR(EIO);
97 }
98 if (model_input->dims[width_idx] != -1 &&
99 model_input->dims[width_idx] != inlink->w) {
100 av_log(ctx, AV_LOG_ERROR, "the model requires frame width %d but got %d\n",
101 model_input->dims[width_idx],
102 inlink->w);
103 return AVERROR(EIO);
104 }
105 if (model_input->dt != DNN_FLOAT) {
106 avpriv_report_missing_feature(ctx, "data type rather than DNN_FLOAT");
107 return AVERROR(EIO);
108 }
109
110 switch (fmt) {
111 case AV_PIX_FMT_RGB24:
112 case AV_PIX_FMT_BGR24:
113 if (model_input->dims[dnn_get_channel_idx_by_layout(model_input->layout)] != 3) {
114 LOG_FORMAT_CHANNEL_MISMATCH();
115 return AVERROR(EIO);
116 }
117 return 0;
118 case AV_PIX_FMT_GRAY8:
119 case AV_PIX_FMT_GRAYF32:
120 case AV_PIX_FMT_YUV420P:
121 case AV_PIX_FMT_YUV422P:
122 case AV_PIX_FMT_YUV444P:
123 case AV_PIX_FMT_YUV410P:
124 case AV_PIX_FMT_YUV411P:
125 case AV_PIX_FMT_NV12:
126 if (model_input->dims[dnn_get_channel_idx_by_layout(model_input->layout)] != 1) {
127 LOG_FORMAT_CHANNEL_MISMATCH();
128 return AVERROR(EIO);
129 }
130 return 0;
131 default:
132 avpriv_report_missing_feature(ctx, "%s", av_get_pix_fmt_name(fmt));
133 return AVERROR(EIO);
134 }
135
136 return 0;
137 }
138
139 static int config_input(AVFilterLink *inlink)
140 {
141 AVFilterContext *context = inlink->dst;
142 DnnProcessingContext *ctx = context->priv;
143 int result;
144 DNNData model_input;
145 int check;
146
147 result = ff_dnn_get_input(&ctx->dnnctx, &model_input);
148 if (result != 0) {
149 av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n");
150 return result;
151 }
152
153 check = check_modelinput_inlink(&model_input, inlink);
154 if (check != 0) {
155 return check;
156 }
157
158 return 0;
159 }
160
161 static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
162 {
163 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
164 av_assert0(desc);
165 return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components == 3;
166 }
167
168 static int prepare_uv_scale(AVFilterLink *outlink)
169 {
170 AVFilterContext *context = outlink->src;
171 DnnProcessingContext *ctx = context->priv;
172 AVFilterLink *inlink = context->inputs[0];
173 enum AVPixelFormat fmt = inlink->format;
174
175 if (isPlanarYUV(fmt)) {
176 if (inlink->w != outlink->w || inlink->h != outlink->h) {
177 if (fmt == AV_PIX_FMT_NV12) {
178 ctx->sws_uv_scale = sws_getContext(inlink->w >> 1, inlink->h >> 1, AV_PIX_FMT_YA8,
179 outlink->w >> 1, outlink->h >> 1, AV_PIX_FMT_YA8,
180 SWS_BICUBIC, NULL, NULL, NULL);
181 ctx->sws_uv_height = inlink->h >> 1;
182 } else {
183 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
184 int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
185 int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
186 int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h);
187 int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w);
188 ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8,
189 sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8,
190 SWS_BICUBIC, NULL, NULL, NULL);
191 ctx->sws_uv_height = sws_src_h;
192 }
193 }
194 }
195
196 return 0;
197 }
198
199 static int config_output(AVFilterLink *outlink)
200 {
201 AVFilterContext *context = outlink->src;
202 DnnProcessingContext *ctx = context->priv;
203 int result;
204 AVFilterLink *inlink = context->inputs[0];
205
206 // have a try run in case that the dnn model resize the frame
207 result = ff_dnn_get_output(&ctx->dnnctx, inlink->w, inlink->h, &outlink->w, &outlink->h);
208 if (result != 0) {
209 av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n");
210 return result;
211 }
212
213 prepare_uv_scale(outlink);
214
215 return 0;
216 }
217
218 static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in)
219 {
220 const AVPixFmtDescriptor *desc;
221 int uv_height;
222
223 if (!ctx->sws_uv_scale) {
224 av_assert0(in->height == out->height && in->width == out->width);
225 desc = av_pix_fmt_desc_get(in->format);
226 uv_height = AV_CEIL_RSHIFT(in->height, desc->log2_chroma_h);
227 for (int i = 1; i < 3; ++i) {
228 int bytewidth = av_image_get_linesize(in->format, in->width, i);
229 if (bytewidth < 0) {
230 return AVERROR(EINVAL);
231 }
232 av_image_copy_plane(out->data[i], out->linesize[i],
233 in->data[i], in->linesize[i],
234 bytewidth, uv_height);
235 }
236 } else if (in->format == AV_PIX_FMT_NV12) {
237 sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1,
238 0, ctx->sws_uv_height, out->data + 1, out->linesize + 1);
239 } else {
240 sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1,
241 0, ctx->sws_uv_height, out->data + 1, out->linesize + 1);
242 sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 2), in->linesize + 2,
243 0, ctx->sws_uv_height, out->data + 2, out->linesize + 2);
244 }
245
246 return 0;
247 }
248
249 static int flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)
250 {
251 DnnProcessingContext *ctx = outlink->src->priv;
252 int ret;
253 DNNAsyncStatusType async_state;
254
255 ret = ff_dnn_flush(&ctx->dnnctx);
256 if (ret != 0) {
257 return -1;
258 }
259
260 do {
261 AVFrame *in_frame = NULL;
262 AVFrame *out_frame = NULL;
263 async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame);
264 if (out_frame) {
265 if (isPlanarYUV(in_frame->format))
266 copy_uv_planes(ctx, out_frame, in_frame);
267 av_frame_free(&in_frame);
268 ret = ff_filter_frame(outlink, out_frame);
269 if (ret < 0)
270 return ret;
271 if (out_pts)
272 *out_pts = out_frame->pts + pts;
273 }
274 av_usleep(5000);
275 } while (async_state >= DAST_NOT_READY);
276
277 return 0;
278 }
279
280 static int activate(AVFilterContext *filter_ctx)
281 {
282 AVFilterLink *inlink = filter_ctx->inputs[0];
283 AVFilterLink *outlink = filter_ctx->outputs[0];
284 DnnProcessingContext *ctx = filter_ctx->priv;
285 AVFrame *in = NULL, *out = NULL;
286 int64_t pts;
287 int ret, status;
288 int got_frame = 0;
289 int async_state;
290
291 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
292
293 do {
294 // drain all input frames
295 ret = ff_inlink_consume_frame(inlink, &in);
296 if (ret < 0)
297 return ret;
298 if (ret > 0) {
299 out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
300 if (!out) {
301 av_frame_free(&in);
302 return AVERROR(ENOMEM);
303 }
304 av_frame_copy_props(out, in);
305 if (ff_dnn_execute_model(&ctx->dnnctx, in, out) != 0) {
306 return AVERROR(EIO);
307 }
308 }
309 } while (ret > 0);
310
311 // drain all processed frames
312 do {
313 AVFrame *in_frame = NULL;
314 AVFrame *out_frame = NULL;
315 async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame);
316 if (out_frame) {
317 if (isPlanarYUV(in_frame->format))
318 copy_uv_planes(ctx, out_frame, in_frame);
319 av_frame_free(&in_frame);
320 ret = ff_filter_frame(outlink, out_frame);
321 if (ret < 0)
322 return ret;
323 got_frame = 1;
324 }
325 } while (async_state == DAST_SUCCESS);
326
327 // if frame got, schedule to next filter
328 if (got_frame)
329 return 0;
330
331 if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
332 if (status == AVERROR_EOF) {
333 int64_t out_pts = pts;
334 ret = flush_frame(outlink, pts, &out_pts);
335 ff_outlink_set_status(outlink, status, out_pts);
336 return ret;
337 }
338 }
339
340 FF_FILTER_FORWARD_WANTED(outlink, inlink);
341
342 return 0;
343 }
344
345 static av_cold void uninit(AVFilterContext *ctx)
346 {
347 DnnProcessingContext *context = ctx->priv;
348
349 sws_freeContext(context->sws_uv_scale);
350 ff_dnn_uninit(&context->dnnctx);
351 }
352
353 static const AVFilterPad dnn_processing_inputs[] = {
354 {
355 .name = "default",
356 .type = AVMEDIA_TYPE_VIDEO,
357 .config_props = config_input,
358 },
359 };
360
361 static const AVFilterPad dnn_processing_outputs[] = {
362 {
363 .name = "default",
364 .type = AVMEDIA_TYPE_VIDEO,
365 .config_props = config_output,
366 },
367 };
368
369 const AVFilter ff_vf_dnn_processing = {
370 .name = "dnn_processing",
371 .description = NULL_IF_CONFIG_SMALL("Apply DNN processing filter to the input."),
372 .priv_size = sizeof(DnnProcessingContext),
373 .init = init,
374 .uninit = uninit,
375 FILTER_INPUTS(dnn_processing_inputs),
376 FILTER_OUTPUTS(dnn_processing_outputs),
377 FILTER_PIXFMTS_ARRAY(pix_fmts),
378 .priv_class = &dnn_processing_class,
379 .activate = activate,
380 };
381