FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/vf_removelogo.c
Date: 2026-05-03 13:33:45
Exec Total Coverage
Lines: 0 173 0.0%
Functions: 0 9 0.0%
Branches: 0 98 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2005 Robert Edele <yartrebo@earthlink.net>
3 * Copyright (c) 2012 Stefano Sabatini
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * Advanced blur-based logo removing filter
25 *
26 * This filter loads an image mask file showing where a logo is and
27 * uses a blur transform to remove the logo.
28 *
29 * Based on the libmpcodecs remove-logo filter by Robert Edele.
30 */
31
32 /**
33 * This code implements a filter to remove annoying TV logos and other annoying
34 * images placed onto a video stream. It works by filling in the pixels that
35 * comprise the logo with neighboring pixels. The transform is very loosely
36 * based on a gaussian blur, but it is different enough to merit its own
37 * paragraph later on. It is a major improvement on the old delogo filter as it
38 * both uses a better blurring algorithm and uses a bitmap to use an arbitrary
39 * and generally much tighter fitting shape than a rectangle.
40 *
41 * The logo removal algorithm has two key points. The first is that it
42 * distinguishes between pixels in the logo and those not in the logo by using
43 * the passed-in bitmap. Pixels not in the logo are copied over directly without
44 * being modified and they also serve as source pixels for the logo
45 * fill-in. Pixels inside the logo have the mask applied.
46 *
47 * At init-time the bitmap is reprocessed internally, and the distance to the
48 * nearest edge of the logo (Manhattan distance), along with a little extra to
49 * remove rough edges, is stored in each pixel. This is done using an in-place
50 * erosion algorithm, and incrementing each pixel that survives any given
51 * erosion. Once every pixel is eroded, the maximum value is recorded, and a
52 * set of masks from size 0 to this size are generated. The masks are circular
53 * binary masks, where each pixel within a radius N (where N is the size of the
54 * mask) is a 1, and all other pixels are a 0. Although a gaussian mask would be
55 * more mathematically accurate, a binary mask works better in practice because
56 * we generally do not use the central pixels in the mask (because they are in
57 * the logo region), and thus a gaussian mask will cause too little blur and
58 * thus a very unstable image.
59 *
60 * The mask is applied in a special way. Namely, only pixels in the mask that
61 * line up to pixels outside the logo are used. The dynamic mask size means that
62 * the mask is just big enough so that the edges touch pixels outside the logo,
63 * so the blurring is kept to a minimum and at least the first boundary
64 * condition is met (that the image function itself is continuous), even if the
65 * second boundary condition (that the derivative of the image function is
66 * continuous) is not met. A masking algorithm that does preserve the second
67 * boundary condition (perhaps something based on a highly-modified bi-cubic
68 * algorithm) should offer even better results on paper, but the noise in a
69 * typical TV signal should make anything based on derivatives hopelessly noisy.
70 */
71
72 #include "libavutil/imgutils.h"
73 #include "libavutil/mem.h"
74 #include "libavutil/opt.h"
75 #include "avfilter.h"
76 #include "filters.h"
77 #include "video.h"
78 #include "bbox.h"
79 #include "lavfutils.h"
80 #include "lswsutils.h"
81
82 typedef struct RemovelogoContext {
83 const AVClass *class;
84 char *filename;
85 /* Stores our collection of masks. The first is for an array of
86 the second for the y axis, and the third for the x axis. */
87 int ***mask;
88 int max_mask_size;
89 int mask_w, mask_h;
90
91 uint8_t *full_mask_data;
92 FFBoundingBox full_mask_bbox;
93 uint8_t *half_mask_data;
94 FFBoundingBox half_mask_bbox;
95 } RemovelogoContext;
96
97 #define OFFSET(x) offsetof(RemovelogoContext, x)
98 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
99 static const AVOption removelogo_options[] = {
100 { "filename", "set bitmap filename", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
101 { "f", "set bitmap filename", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
102 { NULL }
103 };
104
105 AVFILTER_DEFINE_CLASS(removelogo);
106
107 /**
108 * Choose a slightly larger mask size to improve performance.
109 *
110 * This function maps the absolute minimum mask size needed to the
111 * mask size we'll actually use. f(x) = x (the smallest that will
112 * work) will produce the sharpest results, but will be quite
113 * jittery. f(x) = 1.25x (what I'm using) is a good tradeoff in my
114 * opinion. This will calculate only at init-time, so you can put a
115 * long expression here without effecting performance.
116 */
117 #define apply_mask_fudge_factor(x) (((x) >> 2) + (x))
118
119 /**
120 * Pre-process an image to give distance information.
121 *
122 * This function takes a bitmap image and converts it in place into a
123 * distance image. A distance image is zero for pixels outside of the
124 * logo and is the Manhattan distance (|dx| + |dy|) from the logo edge
125 * for pixels inside of the logo. This will overestimate the distance,
126 * but that is safe, and is far easier to implement than a proper
127 * pythagorean distance since I'm using a modified erosion algorithm
128 * to compute the distances.
129 *
130 * @param mask image which will be converted from a greyscale image
131 * into a distance image.
132 */
133 static void convert_mask_to_strength_mask(uint8_t *data, int linesize,
134 int w, int h, int min_val,
135 int *max_mask_size)
136 {
137 int x, y;
138
139 /* How many times we've gone through the loop. Used in the
140 in-place erosion algorithm and to get us max_mask_size later on. */
141 int current_pass = 0;
142
143 /* set all non-zero values to 1 */
144 for (y = 0; y < h; y++)
145 for (x = 0; x < w; x++)
146 data[y*linesize + x] = data[y*linesize + x] > min_val;
147
148 /* For each pass, if a pixel is itself the same value as the
149 current pass, and its four neighbors are too, then it is
150 incremented. If no pixels are incremented by the end of the
151 pass, then we go again. Edge pixels are counted as always
152 excluded (this should be true anyway for any sane mask, but if
153 it isn't this will ensure that we eventually exit). */
154 while (1) {
155 /* If this doesn't get set by the end of this pass, then we're done. */
156 int has_anything_changed = 0;
157 uint8_t *current_pixel0 = data + 1 + linesize, *current_pixel;
158 current_pass++;
159
160 for (y = 1; y < h-1; y++) {
161 current_pixel = current_pixel0;
162 for (x = 1; x < w-1; x++) {
163 /* Apply the in-place erosion transform. It is based
164 on the following two premises:
165 1 - Any pixel that fails 1 erosion will fail all
166 future erosions.
167
168 2 - Only pixels having survived all erosions up to
169 the present will be >= to current_pass.
170 It doesn't matter if it survived the current pass,
171 failed it, or hasn't been tested yet. By using >=
172 instead of ==, we allow the algorithm to work in
173 place. */
174 if ( *current_pixel >= current_pass &&
175 *(current_pixel + 1) >= current_pass &&
176 *(current_pixel - 1) >= current_pass &&
177 *(current_pixel + linesize) >= current_pass &&
178 *(current_pixel - linesize) >= current_pass) {
179 /* Increment the value since it still has not been
180 * eroded, as evidenced by the if statement that
181 * just evaluated to true. */
182 (*current_pixel)++;
183 has_anything_changed = 1;
184 }
185 current_pixel++;
186 }
187 current_pixel0 += linesize;
188 }
189 if (!has_anything_changed)
190 break;
191 }
192
193 /* Apply the fudge factor, which will increase the size of the
194 * mask a little to reduce jitter at the cost of more blur. */
195 for (y = 1; y < h - 1; y++)
196 for (x = 1; x < w - 1; x++)
197 data[(y * linesize) + x] = apply_mask_fudge_factor(data[(y * linesize) + x]);
198
199 /* As a side-effect, we now know the maximum mask size, which
200 * we'll use to generate our masks. */
201 /* Apply the fudge factor to this number too, since we must ensure
202 * that enough masks are generated. */
203 *max_mask_size = apply_mask_fudge_factor(current_pass + 1);
204 }
205
206 static int load_mask(uint8_t **mask, int *w, int *h,
207 const char *filename, void *log_ctx)
208 {
209 int ret;
210 uint8_t *gray_data[4];
211 AVFrame *src_frame;
212 int gray_linesize[4];
213
214 /* load image from file */
215 ret = ff_load_image(&src_frame, filename, log_ctx);
216 if (ret < 0)
217 return ret;
218 *w = src_frame->width;
219 *h = src_frame->height;
220
221 /* convert the image to GRAY8 */
222 ret = ff_scale_image(gray_data, gray_linesize, *w, *h, AV_PIX_FMT_GRAY8,
223 src_frame->data, src_frame->linesize, *w, *h,
224 src_frame->format, log_ctx);
225 if (ret < 0)
226 goto end;
227
228 /* copy mask to a newly allocated array */
229 *mask = av_malloc(*w * *h);
230 if (!*mask) {
231 ret = AVERROR(ENOMEM);
232 goto end;
233 }
234 av_image_copy_plane(*mask, *w, gray_data[0], gray_linesize[0], *w, *h);
235
236 end:
237 av_freep(&gray_data[0]);
238 av_frame_free(&src_frame);
239 return ret;
240 }
241
242 /**
243 * Generate a scaled down image with half width, height, and intensity.
244 *
245 * This function not only scales down an image, but halves the value
246 * in each pixel too. The purpose of this is to produce a chroma
247 * filter image out of a luma filter image. The pixel values store the
248 * distance to the edge of the logo and halving the dimensions halves
249 * the distance. This function rounds up, because a downwards rounding
250 * error could cause the filter to fail, but an upwards rounding error
251 * will only cause a minor amount of excess blur in the chroma planes.
252 */
253 static void generate_half_size_image(const uint8_t *src_data, int src_linesize,
254 uint8_t *dst_data, int dst_linesize,
255 int src_w, int src_h,
256 int *max_mask_size)
257 {
258 int x, y;
259
260 /* Copy over the image data, using the average of 4 pixels for to
261 * calculate each downsampled pixel. */
262 for (y = 0; y < src_h/2; y++) {
263 for (x = 0; x < src_w/2; x++) {
264 /* Set the pixel if there exists a non-zero value in the
265 * source pixels, else clear it. */
266 dst_data[(y * dst_linesize) + x] =
267 src_data[((y << 1) * src_linesize) + (x << 1)] ||
268 src_data[((y << 1) * src_linesize) + (x << 1) + 1] ||
269 src_data[(((y << 1) + 1) * src_linesize) + (x << 1)] ||
270 src_data[(((y << 1) + 1) * src_linesize) + (x << 1) + 1];
271 dst_data[(y * dst_linesize) + x] = FFMIN(1, dst_data[(y * dst_linesize) + x]);
272 }
273 }
274
275 convert_mask_to_strength_mask(dst_data, dst_linesize,
276 src_w/2, src_h/2, 0, max_mask_size);
277 }
278
279 static av_cold int init(AVFilterContext *ctx)
280 {
281 RemovelogoContext *s = ctx->priv;
282 int ***mask;
283 int ret = 0;
284 int a, b, c, w, h;
285 int full_max_mask_size, half_max_mask_size;
286
287 if (!s->filename) {
288 av_log(ctx, AV_LOG_ERROR, "The bitmap file name is mandatory\n");
289 return AVERROR(EINVAL);
290 }
291
292 /* Load our mask image. */
293 if ((ret = load_mask(&s->full_mask_data, &w, &h, s->filename, ctx)) < 0)
294 return ret;
295 s->mask_w = w;
296 s->mask_h = h;
297
298 convert_mask_to_strength_mask(s->full_mask_data, w, w, h,
299 16, &full_max_mask_size);
300
301 /* Create the scaled down mask image for the chroma planes. */
302 if (!(s->half_mask_data = av_mallocz(w/2 * h/2)))
303 return AVERROR(ENOMEM);
304 generate_half_size_image(s->full_mask_data, w,
305 s->half_mask_data, w/2,
306 w, h, &half_max_mask_size);
307
308 s->max_mask_size = FFMAX(full_max_mask_size, half_max_mask_size);
309
310 /* Create a circular mask for each size up to max_mask_size. When
311 the filter is applied, the mask size is determined on a pixel
312 by pixel basis, with pixels nearer the edge of the logo getting
313 smaller mask sizes. */
314 mask = (int ***)av_malloc_array(s->max_mask_size + 1, sizeof(int **));
315 if (!mask)
316 return AVERROR(ENOMEM);
317
318 for (a = 0; a <= s->max_mask_size; a++) {
319 mask[a] = (int **)av_malloc_array((a * 2) + 1, sizeof(int *));
320 if (!mask[a]) {
321 av_free(mask);
322 return AVERROR(ENOMEM);
323 }
324 for (b = -a; b <= a; b++) {
325 mask[a][b + a] = (int *)av_malloc_array((a * 2) + 1, sizeof(int));
326 if (!mask[a][b + a]) {
327 av_free(mask);
328 return AVERROR(ENOMEM);
329 }
330 for (c = -a; c <= a; c++) {
331 if ((b * b) + (c * c) <= (a * a)) /* Circular 0/1 mask. */
332 mask[a][b + a][c + a] = 1;
333 else
334 mask[a][b + a][c + a] = 0;
335 }
336 }
337 }
338 s->mask = mask;
339
340 /* Calculate our bounding rectangles, which determine in what
341 * region the logo resides for faster processing. */
342 ff_calculate_bounding_box(&s->full_mask_bbox, s->full_mask_data, w, w, h, 0, 8);
343 ff_calculate_bounding_box(&s->half_mask_bbox, s->half_mask_data, w/2, w/2, h/2, 0, 8);
344
345 #define SHOW_LOGO_INFO(mask_type) \
346 av_log(ctx, AV_LOG_VERBOSE, #mask_type " x1:%d x2:%d y1:%d y2:%d max_mask_size:%d\n", \
347 s->mask_type##_mask_bbox.x1, s->mask_type##_mask_bbox.x2, \
348 s->mask_type##_mask_bbox.y1, s->mask_type##_mask_bbox.y2, \
349 mask_type##_max_mask_size);
350 SHOW_LOGO_INFO(full);
351 SHOW_LOGO_INFO(half);
352
353 return 0;
354 }
355
356 static int config_props_input(AVFilterLink *inlink)
357 {
358 AVFilterContext *ctx = inlink->dst;
359 RemovelogoContext *s = ctx->priv;
360
361 if (inlink->w != s->mask_w || inlink->h != s->mask_h) {
362 av_log(ctx, AV_LOG_INFO,
363 "Mask image size %dx%d does not match with the input video size %dx%d\n",
364 s->mask_w, s->mask_h, inlink->w, inlink->h);
365 return AVERROR(EINVAL);
366 }
367
368 return 0;
369 }
370
371 /**
372 * Blur image.
373 *
374 * It takes a pixel that is inside the mask and blurs it. It does so
375 * by finding the average of all the pixels within the mask and
376 * outside of the mask.
377 *
378 * @param mask_data the mask plane to use for averaging
379 * @param image_data the image plane to blur
380 * @param w width of the image
381 * @param h height of the image
382 * @param x x-coordinate of the pixel to blur
383 * @param y y-coordinate of the pixel to blur
384 */
385 static unsigned int blur_pixel(int ***mask,
386 const uint8_t *mask_data, int mask_linesize,
387 uint8_t *image_data, int image_linesize,
388 int w, int h, int x, int y)
389 {
390 /* Mask size tells how large a circle to use. The radius is about
391 * (slightly larger than) mask size. */
392 int mask_size;
393 int start_posx, start_posy, end_posx, end_posy;
394 int i, j;
395 unsigned int accumulator = 0, divisor = 0;
396 /* What pixel we are reading out of the circular blur mask. */
397 const uint8_t *image_read_position;
398 /* What pixel we are reading out of the filter image. */
399 const uint8_t *mask_read_position;
400
401 /* Prepare our bounding rectangle and clip it if need be. */
402 mask_size = mask_data[y * mask_linesize + x];
403 start_posx = FFMAX(0, x - mask_size);
404 start_posy = FFMAX(0, y - mask_size);
405 end_posx = FFMIN(w - 1, x + mask_size);
406 end_posy = FFMIN(h - 1, y + mask_size);
407
408 image_read_position = image_data + image_linesize * start_posy + start_posx;
409 mask_read_position = mask_data + mask_linesize * start_posy + start_posx;
410
411 for (j = start_posy; j <= end_posy; j++) {
412 for (i = start_posx; i <= end_posx; i++) {
413 /* Check if this pixel is in the mask or not. Only use the
414 * pixel if it is not. */
415 if (!(*mask_read_position) && mask[mask_size][i - start_posx][j - start_posy]) {
416 accumulator += *image_read_position;
417 divisor++;
418 }
419
420 image_read_position++;
421 mask_read_position++;
422 }
423
424 image_read_position += (image_linesize - ((end_posx + 1) - start_posx));
425 mask_read_position += (mask_linesize - ((end_posx + 1) - start_posx));
426 }
427
428 /* If divisor is 0, it means that not a single pixel is outside of
429 the logo, so we have no data. Else we need to normalise the
430 data using the divisor. */
431 return divisor == 0 ? 255:
432 (accumulator + (divisor / 2)) / divisor; /* divide, taking into account average rounding error */
433 }
434
435 /**
436 * Blur image plane using a mask.
437 *
438 * @param source The image to have it's logo removed.
439 * @param destination Where the output image will be stored.
440 * @param source_stride How far apart (in memory) two consecutive lines are.
441 * @param destination Same as source_stride, but for the destination image.
442 * @param width Width of the image. This is the same for source and destination.
443 * @param height Height of the image. This is the same for source and destination.
444 * @param is_image_direct If the image is direct, then source and destination are
445 * the same and we can save a lot of time by not copying pixels that
446 * haven't changed.
447 * @param filter The image that stores the distance to the edge of the logo for
448 * each pixel.
449 * @param logo_start_x smallest x-coordinate that contains at least 1 logo pixel.
450 * @param logo_start_y smallest y-coordinate that contains at least 1 logo pixel.
451 * @param logo_end_x largest x-coordinate that contains at least 1 logo pixel.
452 * @param logo_end_y largest y-coordinate that contains at least 1 logo pixel.
453 *
454 * This function processes an entire plane. Pixels outside of the logo are copied
455 * to the output without change, and pixels inside the logo have the de-blurring
456 * function applied.
457 */
458 static void blur_image(int ***mask,
459 const uint8_t *src_data, int src_linesize,
460 uint8_t *dst_data, int dst_linesize,
461 const uint8_t *mask_data, int mask_linesize,
462 int w, int h, int direct,
463 FFBoundingBox *bbox)
464 {
465 int x, y;
466 uint8_t *dst_line;
467 const uint8_t *src_line;
468
469 if (!direct)
470 av_image_copy_plane(dst_data, dst_linesize, src_data, src_linesize, w, h);
471
472 for (y = bbox->y1; y <= bbox->y2; y++) {
473 src_line = src_data + src_linesize * y;
474 dst_line = dst_data + dst_linesize * y;
475
476 for (x = bbox->x1; x <= bbox->x2; x++) {
477 if (mask_data[y * mask_linesize + x]) {
478 /* Only process if we are in the mask. */
479 dst_line[x] = blur_pixel(mask,
480 mask_data, mask_linesize,
481 dst_data, dst_linesize,
482 w, h, x, y);
483 } else {
484 /* Else just copy the data. */
485 if (!direct)
486 dst_line[x] = src_line[x];
487 }
488 }
489 }
490 }
491
492 static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
493 {
494 RemovelogoContext *s = inlink->dst->priv;
495 AVFilterLink *outlink = inlink->dst->outputs[0];
496 AVFrame *outpicref;
497 int direct = 0;
498
499 if (av_frame_is_writable(inpicref)) {
500 direct = 1;
501 outpicref = inpicref;
502 } else {
503 outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h);
504 if (!outpicref) {
505 av_frame_free(&inpicref);
506 return AVERROR(ENOMEM);
507 }
508 av_frame_copy_props(outpicref, inpicref);
509 }
510
511 blur_image(s->mask,
512 inpicref ->data[0], inpicref ->linesize[0],
513 outpicref->data[0], outpicref->linesize[0],
514 s->full_mask_data, inlink->w,
515 inlink->w, inlink->h, direct, &s->full_mask_bbox);
516 blur_image(s->mask,
517 inpicref ->data[1], inpicref ->linesize[1],
518 outpicref->data[1], outpicref->linesize[1],
519 s->half_mask_data, inlink->w/2,
520 inlink->w/2, inlink->h/2, direct, &s->half_mask_bbox);
521 blur_image(s->mask,
522 inpicref ->data[2], inpicref ->linesize[2],
523 outpicref->data[2], outpicref->linesize[2],
524 s->half_mask_data, inlink->w/2,
525 inlink->w/2, inlink->h/2, direct, &s->half_mask_bbox);
526
527 if (!direct)
528 av_frame_free(&inpicref);
529
530 return ff_filter_frame(outlink, outpicref);
531 }
532
533 static av_cold void uninit(AVFilterContext *ctx)
534 {
535 RemovelogoContext *s = ctx->priv;
536 int a, b;
537
538 av_freep(&s->full_mask_data);
539 av_freep(&s->half_mask_data);
540
541 if (s->mask) {
542 /* Loop through each mask. */
543 for (a = 0; a <= s->max_mask_size; a++) {
544 /* Loop through each scanline in a mask. */
545 for (b = -a; b <= a; b++) {
546 av_freep(&s->mask[a][b + a]); /* Free a scanline. */
547 }
548 av_freep(&s->mask[a]);
549 }
550 /* Free the array of pointers pointing to the masks. */
551 av_freep(&s->mask);
552 }
553 }
554
555 static const AVFilterPad removelogo_inputs[] = {
556 {
557 .name = "default",
558 .type = AVMEDIA_TYPE_VIDEO,
559 .config_props = config_props_input,
560 .filter_frame = filter_frame,
561 },
562 };
563
564 const FFFilter ff_vf_removelogo = {
565 .p.name = "removelogo",
566 .p.description = NULL_IF_CONFIG_SMALL("Remove a TV logo based on a mask image."),
567 .p.priv_class = &removelogo_class,
568 .p.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
569 .priv_size = sizeof(RemovelogoContext),
570 .init = init,
571 .uninit = uninit,
572 FILTER_INPUTS(removelogo_inputs),
573 FILTER_OUTPUTS(ff_video_default_filterpad),
574 FILTER_SINGLE_PIXFMT(AV_PIX_FMT_YUV420P),
575 };
576