FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/dnn/dnn_io_proc.c
Date: 2024-03-28 14:59:00
Exec Total Coverage
Lines: 0 263 0.0%
Functions: 0 6 0.0%
Branches: 0 108 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2020
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "dnn_io_proc.h"
22 #include "libavutil/imgutils.h"
23 #include "libswscale/swscale.h"
24 #include "libavutil/avassert.h"
25 #include "libavutil/detection_bbox.h"
26
27 static int get_datatype_size(DNNDataType dt)
28 {
29 switch (dt)
30 {
31 case DNN_FLOAT:
32 return sizeof(float);
33 case DNN_UINT8:
34 return sizeof(uint8_t);
35 default:
36 av_assert0(!"not supported yet.");
37 return 1;
38 }
39 }
40
41 int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
42 {
43 struct SwsContext *sws_ctx;
44 int ret = 0;
45 int linesize[4] = { 0 };
46 void **dst_data = NULL;
47 void *middle_data = NULL;
48 uint8_t *planar_data[4] = { 0 };
49 int plane_size = frame->width * frame->height * sizeof(uint8_t);
50 enum AVPixelFormat src_fmt = AV_PIX_FMT_NONE;
51 int src_datatype_size = get_datatype_size(output->dt);
52
53 int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
54 if (bytewidth < 0) {
55 return AVERROR(EINVAL);
56 }
57 /* scale == 1 and mean == 0 and dt == UINT8: passthrough */
58 if (fabsf(output->scale - 1) < 1e-6f && fabsf(output->mean) < 1e-6 && output->dt == DNN_UINT8)
59 src_fmt = AV_PIX_FMT_GRAY8;
60 /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */
61 else if ((fabsf(output->scale - 255) < 1e-6f || fabsf(output->scale) < 1e-6f) &&
62 fabsf(output->mean) < 1e-6 && output->dt == DNN_FLOAT)
63 src_fmt = AV_PIX_FMT_GRAYF32;
64 else {
65 av_log(log_ctx, AV_LOG_ERROR, "dnn_process output data doesn't type: UINT8 "
66 "scale: %f, mean: %f\n", output->scale, output->mean);
67 return AVERROR(ENOSYS);
68 }
69
70 dst_data = (void **)frame->data;
71 linesize[0] = frame->linesize[0];
72 if (output->layout == DL_NCHW) {
73 middle_data = av_malloc(plane_size * output->dims[1]);
74 if (!middle_data) {
75 ret = AVERROR(ENOMEM);
76 goto err;
77 }
78 dst_data = &middle_data;
79 linesize[0] = frame->width * 3;
80 }
81
82 switch (frame->format) {
83 case AV_PIX_FMT_RGB24:
84 case AV_PIX_FMT_BGR24:
85 sws_ctx = sws_getContext(frame->width * 3,
86 frame->height,
87 src_fmt,
88 frame->width * 3,
89 frame->height,
90 AV_PIX_FMT_GRAY8,
91 0, NULL, NULL, NULL);
92 if (!sws_ctx) {
93 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
94 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
95 av_get_pix_fmt_name(src_fmt), frame->width * 3, frame->height,
96 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height);
97 ret = AVERROR(EINVAL);
98 goto err;
99 }
100 sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
101 (const int[4]){frame->width * 3 * src_datatype_size, 0, 0, 0}, 0, frame->height,
102 (uint8_t * const*)dst_data, linesize);
103 sws_freeContext(sws_ctx);
104 // convert data from planar to packed
105 if (output->layout == DL_NCHW) {
106 sws_ctx = sws_getContext(frame->width,
107 frame->height,
108 AV_PIX_FMT_GBRP,
109 frame->width,
110 frame->height,
111 frame->format,
112 0, NULL, NULL, NULL);
113 if (!sws_ctx) {
114 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
115 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
116 av_get_pix_fmt_name(AV_PIX_FMT_GBRP), frame->width, frame->height,
117 av_get_pix_fmt_name(frame->format),frame->width, frame->height);
118 ret = AVERROR(EINVAL);
119 goto err;
120 }
121 if (frame->format == AV_PIX_FMT_RGB24) {
122 planar_data[0] = (uint8_t *)middle_data + plane_size;
123 planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
124 planar_data[2] = (uint8_t *)middle_data;
125 } else if (frame->format == AV_PIX_FMT_BGR24) {
126 planar_data[0] = (uint8_t *)middle_data + plane_size;
127 planar_data[1] = (uint8_t *)middle_data;
128 planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
129 }
130 sws_scale(sws_ctx, (const uint8_t * const *)planar_data,
131 (const int [4]){frame->width * sizeof(uint8_t),
132 frame->width * sizeof(uint8_t),
133 frame->width * sizeof(uint8_t), 0},
134 0, frame->height, frame->data, frame->linesize);
135 sws_freeContext(sws_ctx);
136 }
137 break;
138 case AV_PIX_FMT_GRAYF32:
139 av_image_copy_plane(frame->data[0], frame->linesize[0],
140 output->data, bytewidth,
141 bytewidth, frame->height);
142 break;
143 case AV_PIX_FMT_YUV420P:
144 case AV_PIX_FMT_YUV422P:
145 case AV_PIX_FMT_YUV444P:
146 case AV_PIX_FMT_YUV410P:
147 case AV_PIX_FMT_YUV411P:
148 case AV_PIX_FMT_GRAY8:
149 case AV_PIX_FMT_NV12:
150 sws_ctx = sws_getContext(frame->width,
151 frame->height,
152 AV_PIX_FMT_GRAYF32,
153 frame->width,
154 frame->height,
155 AV_PIX_FMT_GRAY8,
156 0, NULL, NULL, NULL);
157 if (!sws_ctx) {
158 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
159 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
160 av_get_pix_fmt_name(src_fmt), frame->width, frame->height,
161 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height);
162 ret = AVERROR(EINVAL);
163 goto err;
164 }
165 sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
166 (const int[4]){frame->width * src_datatype_size, 0, 0, 0}, 0, frame->height,
167 (uint8_t * const*)frame->data, frame->linesize);
168 sws_freeContext(sws_ctx);
169 break;
170 default:
171 avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));
172 ret = AVERROR(ENOSYS);
173 goto err;
174 }
175
176 err:
177 av_free(middle_data);
178 return ret;
179 }
180
181 int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
182 {
183 struct SwsContext *sws_ctx;
184 int ret = 0;
185 int linesize[4] = { 0 };
186 void **src_data = NULL;
187 void *middle_data = NULL;
188 uint8_t *planar_data[4] = { 0 };
189 int plane_size = frame->width * frame->height * sizeof(uint8_t);
190 enum AVPixelFormat dst_fmt = AV_PIX_FMT_NONE;
191 int dst_datatype_size = get_datatype_size(input->dt);
192 int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
193 if (bytewidth < 0) {
194 return AVERROR(EINVAL);
195 }
196 /* scale == 1 and mean == 0 and dt == UINT8: passthrough */
197 if (fabsf(input->scale - 1) < 1e-6f && fabsf(input->mean) < 1e-6 && input->dt == DNN_UINT8)
198 dst_fmt = AV_PIX_FMT_GRAY8;
199 /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */
200 else if ((fabsf(input->scale - 255) < 1e-6f || fabsf(input->scale) < 1e-6f) &&
201 fabsf(input->mean) < 1e-6 && input->dt == DNN_FLOAT)
202 dst_fmt = AV_PIX_FMT_GRAYF32;
203 else {
204 av_log(log_ctx, AV_LOG_ERROR, "dnn_process input data doesn't support type: UINT8 "
205 "scale: %f, mean: %f\n", input->scale, input->mean);
206 return AVERROR(ENOSYS);
207 }
208
209 src_data = (void **)frame->data;
210 linesize[0] = frame->linesize[0];
211 if (input->layout == DL_NCHW) {
212 middle_data = av_malloc(plane_size * input->dims[1]);
213 if (!middle_data) {
214 ret = AVERROR(ENOMEM);
215 goto err;
216 }
217 src_data = &middle_data;
218 linesize[0] = frame->width * 3;
219 }
220
221 switch (frame->format) {
222 case AV_PIX_FMT_RGB24:
223 case AV_PIX_FMT_BGR24:
224 // convert data from planar to packed
225 if (input->layout == DL_NCHW) {
226 sws_ctx = sws_getContext(frame->width,
227 frame->height,
228 frame->format,
229 frame->width,
230 frame->height,
231 AV_PIX_FMT_GBRP,
232 0, NULL, NULL, NULL);
233 if (!sws_ctx) {
234 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
235 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
236 av_get_pix_fmt_name(frame->format), frame->width, frame->height,
237 av_get_pix_fmt_name(AV_PIX_FMT_GBRP),frame->width, frame->height);
238 ret = AVERROR(EINVAL);
239 goto err;
240 }
241 if (frame->format == AV_PIX_FMT_RGB24) {
242 planar_data[0] = (uint8_t *)middle_data + plane_size;
243 planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
244 planar_data[2] = (uint8_t *)middle_data;
245 } else if (frame->format == AV_PIX_FMT_BGR24) {
246 planar_data[0] = (uint8_t *)middle_data + plane_size;
247 planar_data[1] = (uint8_t *)middle_data;
248 planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
249 }
250 sws_scale(sws_ctx, (const uint8_t * const *)frame->data,
251 frame->linesize, 0, frame->height, planar_data,
252 (const int [4]){frame->width * sizeof(uint8_t),
253 frame->width * sizeof(uint8_t),
254 frame->width * sizeof(uint8_t), 0});
255 sws_freeContext(sws_ctx);
256 }
257 sws_ctx = sws_getContext(frame->width * 3,
258 frame->height,
259 AV_PIX_FMT_GRAY8,
260 frame->width * 3,
261 frame->height,
262 dst_fmt,
263 0, NULL, NULL, NULL);
264 if (!sws_ctx) {
265 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
266 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
267 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height,
268 av_get_pix_fmt_name(dst_fmt),frame->width * 3, frame->height);
269 ret = AVERROR(EINVAL);
270 goto err;
271 }
272 sws_scale(sws_ctx, (const uint8_t **)src_data,
273 linesize, 0, frame->height,
274 (uint8_t * const [4]){input->data, 0, 0, 0},
275 (const int [4]){frame->width * 3 * dst_datatype_size, 0, 0, 0});
276 sws_freeContext(sws_ctx);
277 break;
278 case AV_PIX_FMT_GRAYF32:
279 av_image_copy_plane(input->data, bytewidth,
280 frame->data[0], frame->linesize[0],
281 bytewidth, frame->height);
282 break;
283 case AV_PIX_FMT_YUV420P:
284 case AV_PIX_FMT_YUV422P:
285 case AV_PIX_FMT_YUV444P:
286 case AV_PIX_FMT_YUV410P:
287 case AV_PIX_FMT_YUV411P:
288 case AV_PIX_FMT_GRAY8:
289 case AV_PIX_FMT_NV12:
290 sws_ctx = sws_getContext(frame->width,
291 frame->height,
292 AV_PIX_FMT_GRAY8,
293 frame->width,
294 frame->height,
295 dst_fmt,
296 0, NULL, NULL, NULL);
297 if (!sws_ctx) {
298 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
299 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
300 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height,
301 av_get_pix_fmt_name(dst_fmt),frame->width, frame->height);
302 ret = AVERROR(EINVAL);
303 goto err;
304 }
305 sws_scale(sws_ctx, (const uint8_t **)frame->data,
306 frame->linesize, 0, frame->height,
307 (uint8_t * const [4]){input->data, 0, 0, 0},
308 (const int [4]){frame->width * dst_datatype_size, 0, 0, 0});
309 sws_freeContext(sws_ctx);
310 break;
311 default:
312 avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));
313 ret = AVERROR(ENOSYS);
314 goto err;
315 }
316 err:
317 av_free(middle_data);
318 return ret;
319 }
320
321 static enum AVPixelFormat get_pixel_format(DNNData *data)
322 {
323 if (data->dt == DNN_UINT8) {
324 switch (data->order) {
325 case DCO_BGR:
326 return AV_PIX_FMT_BGR24;
327 case DCO_RGB:
328 return AV_PIX_FMT_RGB24;
329 default:
330 av_assert0(!"unsupported data pixel format.\n");
331 return AV_PIX_FMT_BGR24;
332 }
333 }
334
335 av_assert0(!"unsupported data type.\n");
336 return AV_PIX_FMT_BGR24;
337 }
338
339 int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index, void *log_ctx)
340 {
341 const AVPixFmtDescriptor *desc;
342 int offsetx[4], offsety[4];
343 uint8_t *bbox_data[4];
344 struct SwsContext *sws_ctx;
345 int linesizes[4];
346 int ret = 0;
347 enum AVPixelFormat fmt;
348 int left, top, width, height;
349 int width_idx, height_idx;
350 const AVDetectionBBoxHeader *header;
351 const AVDetectionBBox *bbox;
352 AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);
353 av_assert0(sd);
354
355 /* (scale != 1 and scale != 0) or mean != 0 */
356 if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||
357 fabsf(input->mean) > 1e-6f) {
358 av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support "
359 "scale: %f, mean: %f\n", input->scale, input->mean);
360 return AVERROR(ENOSYS);
361 }
362
363 if (input->layout == DL_NCHW) {
364 av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support layout: NCHW\n");
365 return AVERROR(ENOSYS);
366 }
367
368 width_idx = dnn_get_width_idx_by_layout(input->layout);
369 height_idx = dnn_get_height_idx_by_layout(input->layout);
370
371 header = (const AVDetectionBBoxHeader *)sd->data;
372 bbox = av_get_detection_bbox(header, bbox_index);
373
374 left = bbox->x;
375 width = bbox->w;
376 top = bbox->y;
377 height = bbox->h;
378
379 fmt = get_pixel_format(input);
380 sws_ctx = sws_getContext(width, height, frame->format,
381 input->dims[width_idx],
382 input->dims[height_idx], fmt,
383 SWS_FAST_BILINEAR, NULL, NULL, NULL);
384 if (!sws_ctx) {
385 av_log(log_ctx, AV_LOG_ERROR, "Failed to create scale context for the conversion "
386 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
387 av_get_pix_fmt_name(frame->format), width, height,
388 av_get_pix_fmt_name(fmt),
389 input->dims[width_idx],
390 input->dims[height_idx]);
391 return AVERROR(EINVAL);
392 }
393
394 ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]);
395 if (ret < 0) {
396 av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");
397 sws_freeContext(sws_ctx);
398 return ret;
399 }
400
401 desc = av_pix_fmt_desc_get(frame->format);
402 offsetx[1] = offsetx[2] = AV_CEIL_RSHIFT(left, desc->log2_chroma_w);
403 offsetx[0] = offsetx[3] = left;
404
405 offsety[1] = offsety[2] = AV_CEIL_RSHIFT(top, desc->log2_chroma_h);
406 offsety[0] = offsety[3] = top;
407
408 for (int k = 0; frame->data[k]; k++)
409 bbox_data[k] = frame->data[k] + offsety[k] * frame->linesize[k] + offsetx[k];
410
411 sws_scale(sws_ctx, (const uint8_t *const *)&bbox_data, frame->linesize,
412 0, height,
413 (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);
414
415 sws_freeContext(sws_ctx);
416
417 return ret;
418 }
419
420 int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx)
421 {
422 struct SwsContext *sws_ctx;
423 int linesizes[4];
424 int ret = 0, width_idx, height_idx;
425 enum AVPixelFormat fmt = get_pixel_format(input);
426
427 /* (scale != 1 and scale != 0) or mean != 0 */
428 if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||
429 fabsf(input->mean) > 1e-6f) {
430 av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support "
431 "scale: %f, mean: %f\n", input->scale, input->mean);
432 return AVERROR(ENOSYS);
433 }
434
435 if (input->layout == DL_NCHW) {
436 av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support layout: NCHW\n");
437 return AVERROR(ENOSYS);
438 }
439
440 width_idx = dnn_get_width_idx_by_layout(input->layout);
441 height_idx = dnn_get_height_idx_by_layout(input->layout);
442
443 sws_ctx = sws_getContext(frame->width, frame->height, frame->format,
444 input->dims[width_idx],
445 input->dims[height_idx], fmt,
446 SWS_FAST_BILINEAR, NULL, NULL, NULL);
447 if (!sws_ctx) {
448 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
449 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
450 av_get_pix_fmt_name(frame->format), frame->width, frame->height,
451 av_get_pix_fmt_name(fmt), input->dims[width_idx],
452 input->dims[height_idx]);
453 return AVERROR(EINVAL);
454 }
455
456 ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]);
457 if (ret < 0) {
458 av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");
459 sws_freeContext(sws_ctx);
460 return ret;
461 }
462
463 sws_scale(sws_ctx, (const uint8_t *const *)frame->data, frame->linesize, 0, frame->height,
464 (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);
465
466 sws_freeContext(sws_ctx);
467 return ret;
468 }
469