FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/dnn/dnn_io_proc.c
Date: 2024-04-19 17:50:32
Exec Total Coverage
Lines: 0 265 0.0%
Functions: 0 6 0.0%
Branches: 0 108 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2020
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "dnn_io_proc.h"
22 #include "libavutil/imgutils.h"
23 #include "libavutil/mem.h"
24 #include "libswscale/swscale.h"
25 #include "libavutil/avassert.h"
26 #include "libavutil/detection_bbox.h"
27
28 static int get_datatype_size(DNNDataType dt)
29 {
30 switch (dt)
31 {
32 case DNN_FLOAT:
33 return sizeof(float);
34 case DNN_UINT8:
35 return sizeof(uint8_t);
36 default:
37 av_assert0(!"not supported yet.");
38 return 1;
39 }
40 }
41
42 int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
43 {
44 struct SwsContext *sws_ctx;
45 int ret = 0;
46 int linesize[4] = { 0 };
47 void **dst_data = NULL;
48 void *middle_data = NULL;
49 uint8_t *planar_data[4] = { 0 };
50 int plane_size = frame->width * frame->height * sizeof(uint8_t);
51 enum AVPixelFormat src_fmt = AV_PIX_FMT_NONE;
52 int src_datatype_size = get_datatype_size(output->dt);
53
54 int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
55 if (bytewidth < 0) {
56 return AVERROR(EINVAL);
57 }
58 /* scale == 1 and mean == 0 and dt == UINT8: passthrough */
59 if (fabsf(output->scale - 1) < 1e-6f && fabsf(output->mean) < 1e-6 && output->dt == DNN_UINT8)
60 src_fmt = AV_PIX_FMT_GRAY8;
61 /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */
62 else if ((fabsf(output->scale - 255) < 1e-6f || fabsf(output->scale) < 1e-6f) &&
63 fabsf(output->mean) < 1e-6 && output->dt == DNN_FLOAT)
64 src_fmt = AV_PIX_FMT_GRAYF32;
65 else {
66 av_log(log_ctx, AV_LOG_ERROR, "dnn_process output data doesn't type: UINT8 "
67 "scale: %f, mean: %f\n", output->scale, output->mean);
68 return AVERROR(ENOSYS);
69 }
70
71 dst_data = (void **)frame->data;
72 linesize[0] = frame->linesize[0];
73 if (output->layout == DL_NCHW) {
74 middle_data = av_malloc(plane_size * output->dims[1]);
75 if (!middle_data) {
76 ret = AVERROR(ENOMEM);
77 goto err;
78 }
79 dst_data = &middle_data;
80 linesize[0] = frame->width * 3;
81 }
82
83 switch (frame->format) {
84 case AV_PIX_FMT_RGB24:
85 case AV_PIX_FMT_BGR24:
86 sws_ctx = sws_getContext(frame->width * 3,
87 frame->height,
88 src_fmt,
89 frame->width * 3,
90 frame->height,
91 AV_PIX_FMT_GRAY8,
92 0, NULL, NULL, NULL);
93 if (!sws_ctx) {
94 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
95 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
96 av_get_pix_fmt_name(src_fmt), frame->width * 3, frame->height,
97 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height);
98 ret = AVERROR(EINVAL);
99 goto err;
100 }
101 sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
102 (const int[4]){frame->width * 3 * src_datatype_size, 0, 0, 0}, 0, frame->height,
103 (uint8_t * const*)dst_data, linesize);
104 sws_freeContext(sws_ctx);
105 // convert data from planar to packed
106 if (output->layout == DL_NCHW) {
107 sws_ctx = sws_getContext(frame->width,
108 frame->height,
109 AV_PIX_FMT_GBRP,
110 frame->width,
111 frame->height,
112 frame->format,
113 0, NULL, NULL, NULL);
114 if (!sws_ctx) {
115 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
116 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
117 av_get_pix_fmt_name(AV_PIX_FMT_GBRP), frame->width, frame->height,
118 av_get_pix_fmt_name(frame->format),frame->width, frame->height);
119 ret = AVERROR(EINVAL);
120 goto err;
121 }
122 if (frame->format == AV_PIX_FMT_RGB24) {
123 planar_data[0] = (uint8_t *)middle_data + plane_size;
124 planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
125 planar_data[2] = (uint8_t *)middle_data;
126 } else if (frame->format == AV_PIX_FMT_BGR24) {
127 planar_data[0] = (uint8_t *)middle_data + plane_size;
128 planar_data[1] = (uint8_t *)middle_data;
129 planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
130 }
131 sws_scale(sws_ctx, (const uint8_t * const *)planar_data,
132 (const int [4]){frame->width * sizeof(uint8_t),
133 frame->width * sizeof(uint8_t),
134 frame->width * sizeof(uint8_t), 0},
135 0, frame->height, frame->data, frame->linesize);
136 sws_freeContext(sws_ctx);
137 }
138 break;
139 case AV_PIX_FMT_GRAYF32:
140 av_image_copy_plane(frame->data[0], frame->linesize[0],
141 output->data, bytewidth,
142 bytewidth, frame->height);
143 break;
144 case AV_PIX_FMT_YUV420P:
145 case AV_PIX_FMT_YUV422P:
146 case AV_PIX_FMT_YUV444P:
147 case AV_PIX_FMT_YUV410P:
148 case AV_PIX_FMT_YUV411P:
149 case AV_PIX_FMT_GRAY8:
150 case AV_PIX_FMT_NV12:
151 sws_ctx = sws_getContext(frame->width,
152 frame->height,
153 AV_PIX_FMT_GRAYF32,
154 frame->width,
155 frame->height,
156 AV_PIX_FMT_GRAY8,
157 0, NULL, NULL, NULL);
158 if (!sws_ctx) {
159 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
160 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
161 av_get_pix_fmt_name(src_fmt), frame->width, frame->height,
162 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height);
163 ret = AVERROR(EINVAL);
164 goto err;
165 }
166 sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
167 (const int[4]){frame->width * src_datatype_size, 0, 0, 0}, 0, frame->height,
168 (uint8_t * const*)frame->data, frame->linesize);
169 sws_freeContext(sws_ctx);
170 break;
171 default:
172 avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));
173 ret = AVERROR(ENOSYS);
174 goto err;
175 }
176
177 err:
178 av_free(middle_data);
179 return ret;
180 }
181
182 int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
183 {
184 struct SwsContext *sws_ctx;
185 int ret = 0;
186 int linesize[4] = { 0 };
187 void **src_data = NULL;
188 void *middle_data = NULL;
189 uint8_t *planar_data[4] = { 0 };
190 int plane_size = frame->width * frame->height * sizeof(uint8_t);
191 enum AVPixelFormat dst_fmt = AV_PIX_FMT_NONE;
192 int dst_datatype_size = get_datatype_size(input->dt);
193 int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
194 if (bytewidth < 0) {
195 return AVERROR(EINVAL);
196 }
197 /* scale == 1 and mean == 0 and dt == UINT8: passthrough */
198 if (fabsf(input->scale - 1) < 1e-6f && fabsf(input->mean) < 1e-6 && input->dt == DNN_UINT8)
199 dst_fmt = AV_PIX_FMT_GRAY8;
200 /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */
201 else if ((fabsf(input->scale - 255) < 1e-6f || fabsf(input->scale) < 1e-6f) &&
202 fabsf(input->mean) < 1e-6 && input->dt == DNN_FLOAT)
203 dst_fmt = AV_PIX_FMT_GRAYF32;
204 else {
205 av_log(log_ctx, AV_LOG_ERROR, "dnn_process input data doesn't support type: UINT8 "
206 "scale: %f, mean: %f\n", input->scale, input->mean);
207 return AVERROR(ENOSYS);
208 }
209
210 src_data = (void **)frame->data;
211 linesize[0] = frame->linesize[0];
212 if (input->layout == DL_NCHW) {
213 middle_data = av_malloc(plane_size * input->dims[1]);
214 if (!middle_data) {
215 ret = AVERROR(ENOMEM);
216 goto err;
217 }
218 src_data = &middle_data;
219 linesize[0] = frame->width * 3;
220 }
221
222 switch (frame->format) {
223 case AV_PIX_FMT_RGB24:
224 case AV_PIX_FMT_BGR24:
225 // convert data from planar to packed
226 if (input->layout == DL_NCHW) {
227 sws_ctx = sws_getContext(frame->width,
228 frame->height,
229 frame->format,
230 frame->width,
231 frame->height,
232 AV_PIX_FMT_GBRP,
233 0, NULL, NULL, NULL);
234 if (!sws_ctx) {
235 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
236 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
237 av_get_pix_fmt_name(frame->format), frame->width, frame->height,
238 av_get_pix_fmt_name(AV_PIX_FMT_GBRP),frame->width, frame->height);
239 ret = AVERROR(EINVAL);
240 goto err;
241 }
242 if (frame->format == AV_PIX_FMT_RGB24) {
243 planar_data[0] = (uint8_t *)middle_data + plane_size;
244 planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
245 planar_data[2] = (uint8_t *)middle_data;
246 } else if (frame->format == AV_PIX_FMT_BGR24) {
247 planar_data[0] = (uint8_t *)middle_data + plane_size;
248 planar_data[1] = (uint8_t *)middle_data;
249 planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
250 }
251 sws_scale(sws_ctx, (const uint8_t * const *)frame->data,
252 frame->linesize, 0, frame->height, planar_data,
253 (const int [4]){frame->width * sizeof(uint8_t),
254 frame->width * sizeof(uint8_t),
255 frame->width * sizeof(uint8_t), 0});
256 sws_freeContext(sws_ctx);
257 }
258 sws_ctx = sws_getContext(frame->width * 3,
259 frame->height,
260 AV_PIX_FMT_GRAY8,
261 frame->width * 3,
262 frame->height,
263 dst_fmt,
264 0, NULL, NULL, NULL);
265 if (!sws_ctx) {
266 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
267 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
268 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height,
269 av_get_pix_fmt_name(dst_fmt),frame->width * 3, frame->height);
270 ret = AVERROR(EINVAL);
271 goto err;
272 }
273 sws_scale(sws_ctx, (const uint8_t **)src_data,
274 linesize, 0, frame->height,
275 (uint8_t * const [4]){input->data, 0, 0, 0},
276 (const int [4]){frame->width * 3 * dst_datatype_size, 0, 0, 0});
277 sws_freeContext(sws_ctx);
278 break;
279 case AV_PIX_FMT_GRAYF32:
280 av_image_copy_plane(input->data, bytewidth,
281 frame->data[0], frame->linesize[0],
282 bytewidth, frame->height);
283 break;
284 case AV_PIX_FMT_YUV420P:
285 case AV_PIX_FMT_YUV422P:
286 case AV_PIX_FMT_YUV444P:
287 case AV_PIX_FMT_YUV410P:
288 case AV_PIX_FMT_YUV411P:
289 case AV_PIX_FMT_GRAY8:
290 case AV_PIX_FMT_NV12:
291 sws_ctx = sws_getContext(frame->width,
292 frame->height,
293 AV_PIX_FMT_GRAY8,
294 frame->width,
295 frame->height,
296 dst_fmt,
297 0, NULL, NULL, NULL);
298 if (!sws_ctx) {
299 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
300 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
301 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height,
302 av_get_pix_fmt_name(dst_fmt),frame->width, frame->height);
303 ret = AVERROR(EINVAL);
304 goto err;
305 }
306 sws_scale(sws_ctx, (const uint8_t **)frame->data,
307 frame->linesize, 0, frame->height,
308 (uint8_t * const [4]){input->data, 0, 0, 0},
309 (const int [4]){frame->width * dst_datatype_size, 0, 0, 0});
310 sws_freeContext(sws_ctx);
311 break;
312 default:
313 avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));
314 ret = AVERROR(ENOSYS);
315 goto err;
316 }
317 err:
318 av_free(middle_data);
319 return ret;
320 }
321
322 static enum AVPixelFormat get_pixel_format(DNNData *data)
323 {
324 if (data->dt == DNN_UINT8) {
325 switch (data->order) {
326 case DCO_BGR:
327 return AV_PIX_FMT_BGR24;
328 case DCO_RGB:
329 return AV_PIX_FMT_RGB24;
330 default:
331 av_assert0(!"unsupported data pixel format.\n");
332 return AV_PIX_FMT_BGR24;
333 }
334 }
335
336 av_assert0(!"unsupported data type.\n");
337 return AV_PIX_FMT_BGR24;
338 }
339
340 int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index, void *log_ctx)
341 {
342 const AVPixFmtDescriptor *desc;
343 int offsetx[4], offsety[4];
344 uint8_t *bbox_data[4];
345 struct SwsContext *sws_ctx;
346 int linesizes[4];
347 int ret = 0;
348 enum AVPixelFormat fmt;
349 int left, top, width, height;
350 int width_idx, height_idx;
351 const AVDetectionBBoxHeader *header;
352 const AVDetectionBBox *bbox;
353 AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);
354 int max_step[4] = { 0 };
355 av_assert0(sd);
356
357 /* (scale != 1 and scale != 0) or mean != 0 */
358 if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||
359 fabsf(input->mean) > 1e-6f) {
360 av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support "
361 "scale: %f, mean: %f\n", input->scale, input->mean);
362 return AVERROR(ENOSYS);
363 }
364
365 if (input->layout == DL_NCHW) {
366 av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support layout: NCHW\n");
367 return AVERROR(ENOSYS);
368 }
369
370 width_idx = dnn_get_width_idx_by_layout(input->layout);
371 height_idx = dnn_get_height_idx_by_layout(input->layout);
372
373 header = (const AVDetectionBBoxHeader *)sd->data;
374 bbox = av_get_detection_bbox(header, bbox_index);
375
376 left = bbox->x;
377 width = bbox->w;
378 top = bbox->y;
379 height = bbox->h;
380
381 fmt = get_pixel_format(input);
382 sws_ctx = sws_getContext(width, height, frame->format,
383 input->dims[width_idx],
384 input->dims[height_idx], fmt,
385 SWS_FAST_BILINEAR, NULL, NULL, NULL);
386 if (!sws_ctx) {
387 av_log(log_ctx, AV_LOG_ERROR, "Failed to create scale context for the conversion "
388 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
389 av_get_pix_fmt_name(frame->format), width, height,
390 av_get_pix_fmt_name(fmt),
391 input->dims[width_idx],
392 input->dims[height_idx]);
393 return AVERROR(EINVAL);
394 }
395
396 ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]);
397 if (ret < 0) {
398 av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");
399 sws_freeContext(sws_ctx);
400 return ret;
401 }
402
403 desc = av_pix_fmt_desc_get(frame->format);
404 offsetx[1] = offsetx[2] = AV_CEIL_RSHIFT(left, desc->log2_chroma_w);
405 offsetx[0] = offsetx[3] = left;
406
407 offsety[1] = offsety[2] = AV_CEIL_RSHIFT(top, desc->log2_chroma_h);
408 offsety[0] = offsety[3] = top;
409
410 av_image_fill_max_pixsteps(max_step, NULL, desc);
411 for (int k = 0; frame->data[k]; k++)
412 bbox_data[k] = frame->data[k] + offsety[k] * frame->linesize[k] + offsetx[k] * max_step[k];
413
414 sws_scale(sws_ctx, (const uint8_t *const *)&bbox_data, frame->linesize,
415 0, height,
416 (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);
417
418 sws_freeContext(sws_ctx);
419
420 return ret;
421 }
422
423 int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx)
424 {
425 struct SwsContext *sws_ctx;
426 int linesizes[4];
427 int ret = 0, width_idx, height_idx;
428 enum AVPixelFormat fmt = get_pixel_format(input);
429
430 /* (scale != 1 and scale != 0) or mean != 0 */
431 if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||
432 fabsf(input->mean) > 1e-6f) {
433 av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support "
434 "scale: %f, mean: %f\n", input->scale, input->mean);
435 return AVERROR(ENOSYS);
436 }
437
438 if (input->layout == DL_NCHW) {
439 av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support layout: NCHW\n");
440 return AVERROR(ENOSYS);
441 }
442
443 width_idx = dnn_get_width_idx_by_layout(input->layout);
444 height_idx = dnn_get_height_idx_by_layout(input->layout);
445
446 sws_ctx = sws_getContext(frame->width, frame->height, frame->format,
447 input->dims[width_idx],
448 input->dims[height_idx], fmt,
449 SWS_FAST_BILINEAR, NULL, NULL, NULL);
450 if (!sws_ctx) {
451 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
452 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
453 av_get_pix_fmt_name(frame->format), frame->width, frame->height,
454 av_get_pix_fmt_name(fmt), input->dims[width_idx],
455 input->dims[height_idx]);
456 return AVERROR(EINVAL);
457 }
458
459 ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]);
460 if (ret < 0) {
461 av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");
462 sws_freeContext(sws_ctx);
463 return ret;
464 }
465
466 sws_scale(sws_ctx, (const uint8_t *const *)frame->data, frame->linesize, 0, frame->height,
467 (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);
468
469 sws_freeContext(sws_ctx);
470 return ret;
471 }
472