FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vf_removelogo.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2005 Robert Edele <yartrebo@earthlink.net>
3  * Copyright (c) 2012 Stefano Sabatini
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Advanced blur-based logo removing filter
25  *
26  * This filter loads an image mask file showing where a logo is and
27  * uses a blur transform to remove the logo.
28  *
29  * Based on the libmpcodecs remove-logo filter by Robert Edele.
30  */
31 
32 /**
33  * This code implements a filter to remove annoying TV logos and other annoying
34  * images placed onto a video stream. It works by filling in the pixels that
35  * comprise the logo with neighboring pixels. The transform is very loosely
36  * based on a gaussian blur, but it is different enough to merit its own
37  * paragraph later on. It is a major improvement on the old delogo filter as it
38  * both uses a better blurring algorithm and uses a bitmap to use an arbitrary
39  * and generally much tighter fitting shape than a rectangle.
40  *
41  * The logo removal algorithm has two key points. The first is that it
42  * distinguishes between pixels in the logo and those not in the logo by using
43  * the passed-in bitmap. Pixels not in the logo are copied over directly without
44  * being modified and they also serve as source pixels for the logo
45  * fill-in. Pixels inside the logo have the mask applied.
46  *
47  * At init-time the bitmap is reprocessed internally, and the distance to the
48  * nearest edge of the logo (Manhattan distance), along with a little extra to
49  * remove rough edges, is stored in each pixel. This is done using an in-place
50  * erosion algorithm, and incrementing each pixel that survives any given
51  * erosion. Once every pixel is eroded, the maximum value is recorded, and a
52  * set of masks from size 0 to this size are generaged. The masks are circular
53  * binary masks, where each pixel within a radius N (where N is the size of the
54  * mask) is a 1, and all other pixels are a 0. Although a gaussian mask would be
55  * more mathematically accurate, a binary mask works better in practice because
56  * we generally do not use the central pixels in the mask (because they are in
57  * the logo region), and thus a gaussian mask will cause too little blur and
58  * thus a very unstable image.
59  *
60  * The mask is applied in a special way. Namely, only pixels in the mask that
61  * line up to pixels outside the logo are used. The dynamic mask size means that
62  * the mask is just big enough so that the edges touch pixels outside the logo,
63  * so the blurring is kept to a minimum and at least the first boundary
64  * condition is met (that the image function itself is continuous), even if the
65  * second boundary condition (that the derivative of the image function is
66  * continuous) is not met. A masking algorithm that does preserve the second
67  * boundary coundition (perhaps something based on a highly-modified bi-cubic
68  * algorithm) should offer even better results on paper, but the noise in a
69  * typical TV signal should make anything based on derivatives hopelessly noisy.
70  */
71 
72 #include "libavutil/imgutils.h"
73 #include "libavutil/opt.h"
74 #include "avfilter.h"
75 #include "formats.h"
76 #include "internal.h"
77 #include "video.h"
78 #include "bbox.h"
79 #include "lavfutils.h"
80 #include "lswsutils.h"
81 
82 typedef struct {
83  const AVClass *class;
84  char *filename;
85  /* Stores our collection of masks. The first is for an array of
86  the second for the y axis, and the third for the x axis. */
87  int ***mask;
89  int mask_w, mask_h;
90 
96 
97 #define OFFSET(x) offsetof(RemovelogoContext, x)
98 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
99 static const AVOption removelogo_options[] = {
100  { "filename", "set bitmap filename", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
101  { "f", "set bitmap filename", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
102  { NULL }
103 };
104 
105 AVFILTER_DEFINE_CLASS(removelogo);
106 
107 /**
108  * Choose a slightly larger mask size to improve performance.
109  *
110  * This function maps the absolute minimum mask size needed to the
111  * mask size we'll actually use. f(x) = x (the smallest that will
112  * work) will produce the sharpest results, but will be quite
113  * jittery. f(x) = 1.25x (what I'm using) is a good tradeoff in my
114  * opinion. This will calculate only at init-time, so you can put a
115  * long expression here without effecting performance.
116  */
117 #define apply_mask_fudge_factor(x) (((x) >> 2) + x)
118 
119 /**
120  * Pre-process an image to give distance information.
121  *
122  * This function takes a bitmap image and converts it in place into a
123  * distance image. A distance image is zero for pixels outside of the
124  * logo and is the Manhattan distance (|dx| + |dy|) from the logo edge
125  * for pixels inside of the logo. This will overestimate the distance,
126  * but that is safe, and is far easier to implement than a proper
127  * pythagorean distance since I'm using a modified erosion algorithm
128  * to compute the distances.
129  *
130  * @param mask image which will be converted from a greyscale image
131  * into a distance image.
132  */
133 static void convert_mask_to_strength_mask(uint8_t *data, int linesize,
134  int w, int h, int min_val,
135  int *max_mask_size)
136 {
137  int x, y;
138 
139  /* How many times we've gone through the loop. Used in the
140  in-place erosion algorithm and to get us max_mask_size later on. */
141  int current_pass = 0;
142 
143  /* set all non-zero values to 1 */
144  for (y = 0; y < h; y++)
145  for (x = 0; x < w; x++)
146  data[y*linesize + x] = data[y*linesize + x] > min_val;
147 
148  /* For each pass, if a pixel is itself the same value as the
149  current pass, and its four neighbors are too, then it is
150  incremented. If no pixels are incremented by the end of the
151  pass, then we go again. Edge pixels are counted as always
152  excluded (this should be true anyway for any sane mask, but if
153  it isn't this will ensure that we eventually exit). */
154  while (1) {
155  /* If this doesn't get set by the end of this pass, then we're done. */
156  int has_anything_changed = 0;
157  uint8_t *current_pixel0 = data + 1 + linesize, *current_pixel;
158  current_pass++;
159 
160  for (y = 1; y < h-1; y++) {
161  current_pixel = current_pixel0;
162  for (x = 1; x < w-1; x++) {
163  /* Apply the in-place erosion transform. It is based
164  on the following two premises:
165  1 - Any pixel that fails 1 erosion will fail all
166  future erosions.
167 
168  2 - Only pixels having survived all erosions up to
169  the present will be >= to current_pass.
170  It doesn't matter if it survived the current pass,
171  failed it, or hasn't been tested yet. By using >=
172  instead of ==, we allow the algorithm to work in
173  place. */
174  if ( *current_pixel >= current_pass &&
175  *(current_pixel + 1) >= current_pass &&
176  *(current_pixel - 1) >= current_pass &&
177  *(current_pixel + linesize) >= current_pass &&
178  *(current_pixel - linesize) >= current_pass) {
179  /* Increment the value since it still has not been
180  * eroded, as evidenced by the if statement that
181  * just evaluated to true. */
182  (*current_pixel)++;
183  has_anything_changed = 1;
184  }
185  current_pixel++;
186  }
187  current_pixel0 += linesize;
188  }
189  if (!has_anything_changed)
190  break;
191  }
192 
193  /* Apply the fudge factor, which will increase the size of the
194  * mask a little to reduce jitter at the cost of more blur. */
195  for (y = 1; y < h - 1; y++)
196  for (x = 1; x < w - 1; x++)
197  data[(y * linesize) + x] = apply_mask_fudge_factor(data[(y * linesize) + x]);
198 
199  /* As a side-effect, we now know the maximum mask size, which
200  * we'll use to generate our masks. */
201  /* Apply the fudge factor to this number too, since we must ensure
202  * that enough masks are generated. */
203  *max_mask_size = apply_mask_fudge_factor(current_pass + 1);
204 }
205 
207 {
208  static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE };
210  return 0;
211 }
212 
213 static int load_mask(uint8_t **mask, int *w, int *h,
214  const char *filename, void *log_ctx)
215 {
216  int ret;
217  enum AVPixelFormat pix_fmt;
218  uint8_t *src_data[4], *gray_data[4];
219  int src_linesize[4], gray_linesize[4];
220 
221  /* load image from file */
222  if ((ret = ff_load_image(src_data, src_linesize, w, h, &pix_fmt, filename, log_ctx)) < 0)
223  return ret;
224 
225  /* convert the image to GRAY8 */
226  if ((ret = ff_scale_image(gray_data, gray_linesize, *w, *h, AV_PIX_FMT_GRAY8,
227  src_data, src_linesize, *w, *h, pix_fmt,
228  log_ctx)) < 0)
229  goto end;
230 
231  /* copy mask to a newly allocated array */
232  *mask = av_malloc(*w * *h);
233  if (!*mask)
234  ret = AVERROR(ENOMEM);
235  av_image_copy_plane(*mask, *w, gray_data[0], gray_linesize[0], *w, *h);
236 
237 end:
238  av_freep(&src_data[0]);
239  av_freep(&gray_data[0]);
240  return ret;
241 }
242 
243 /**
244  * Generate a scaled down image with half width, height, and intensity.
245  *
246  * This function not only scales down an image, but halves the value
247  * in each pixel too. The purpose of this is to produce a chroma
248  * filter image out of a luma filter image. The pixel values store the
249  * distance to the edge of the logo and halving the dimensions halves
250  * the distance. This function rounds up, because a downwards rounding
251  * error could cause the filter to fail, but an upwards rounding error
252  * will only cause a minor amount of excess blur in the chroma planes.
253  */
254 static void generate_half_size_image(const uint8_t *src_data, int src_linesize,
255  uint8_t *dst_data, int dst_linesize,
256  int src_w, int src_h,
257  int *max_mask_size)
258 {
259  int x, y;
260 
261  /* Copy over the image data, using the average of 4 pixels for to
262  * calculate each downsampled pixel. */
263  for (y = 0; y < src_h/2; y++) {
264  for (x = 0; x < src_w/2; x++) {
265  /* Set the pixel if there exists a non-zero value in the
266  * source pixels, else clear it. */
267  dst_data[(y * dst_linesize) + x] =
268  src_data[((y << 1) * src_linesize) + (x << 1)] ||
269  src_data[((y << 1) * src_linesize) + (x << 1) + 1] ||
270  src_data[(((y << 1) + 1) * src_linesize) + (x << 1)] ||
271  src_data[(((y << 1) + 1) * src_linesize) + (x << 1) + 1];
272  dst_data[(y * dst_linesize) + x] = FFMIN(1, dst_data[(y * dst_linesize) + x]);
273  }
274  }
275 
276  convert_mask_to_strength_mask(dst_data, dst_linesize,
277  src_w/2, src_h/2, 0, max_mask_size);
278 }
279 
280 static av_cold int init(AVFilterContext *ctx)
281 {
282  RemovelogoContext *s = ctx->priv;
283  int ***mask;
284  int ret = 0;
285  int a, b, c, w, h;
286  int full_max_mask_size, half_max_mask_size;
287 
288  if (!s->filename) {
289  av_log(ctx, AV_LOG_ERROR, "The bitmap file name is mandatory\n");
290  return AVERROR(EINVAL);
291  }
292 
293  /* Load our mask image. */
294  if ((ret = load_mask(&s->full_mask_data, &w, &h, s->filename, ctx)) < 0)
295  return ret;
296  s->mask_w = w;
297  s->mask_h = h;
298 
300  16, &full_max_mask_size);
301 
302  /* Create the scaled down mask image for the chroma planes. */
303  if (!(s->half_mask_data = av_mallocz(w/2 * h/2)))
304  return AVERROR(ENOMEM);
306  s->half_mask_data, w/2,
307  w, h, &half_max_mask_size);
308 
309  s->max_mask_size = FFMAX(full_max_mask_size, half_max_mask_size);
310 
311  /* Create a circular mask for each size up to max_mask_size. When
312  the filter is applied, the mask size is determined on a pixel
313  by pixel basis, with pixels nearer the edge of the logo getting
314  smaller mask sizes. */
315  mask = (int ***)av_malloc(sizeof(int **) * (s->max_mask_size + 1));
316  if (!mask)
317  return AVERROR(ENOMEM);
318 
319  for (a = 0; a <= s->max_mask_size; a++) {
320  mask[a] = (int **)av_malloc(sizeof(int *) * ((a * 2) + 1));
321  if (!mask[a])
322  return AVERROR(ENOMEM);
323  for (b = -a; b <= a; b++) {
324  mask[a][b + a] = (int *)av_malloc(sizeof(int) * ((a * 2) + 1));
325  if (!mask[a][b + a])
326  return AVERROR(ENOMEM);
327  for (c = -a; c <= a; c++) {
328  if ((b * b) + (c * c) <= (a * a)) /* Circular 0/1 mask. */
329  mask[a][b + a][c + a] = 1;
330  else
331  mask[a][b + a][c + a] = 0;
332  }
333  }
334  }
335  s->mask = mask;
336 
337  /* Calculate our bounding rectangles, which determine in what
338  * region the logo resides for faster processing. */
340  ff_calculate_bounding_box(&s->half_mask_bbox, s->half_mask_data, w/2, w/2, h/2, 0);
341 
342 #define SHOW_LOGO_INFO(mask_type) \
343  av_log(ctx, AV_LOG_VERBOSE, #mask_type " x1:%d x2:%d y1:%d y2:%d max_mask_size:%d\n", \
344  s->mask_type##_mask_bbox.x1, s->mask_type##_mask_bbox.x2, \
345  s->mask_type##_mask_bbox.y1, s->mask_type##_mask_bbox.y2, \
346  mask_type##_max_mask_size);
347  SHOW_LOGO_INFO(full);
348  SHOW_LOGO_INFO(half);
349 
350  return 0;
351 }
352 
353 static int config_props_input(AVFilterLink *inlink)
354 {
355  AVFilterContext *ctx = inlink->dst;
356  RemovelogoContext *s = ctx->priv;
357 
358  if (inlink->w != s->mask_w || inlink->h != s->mask_h) {
359  av_log(ctx, AV_LOG_INFO,
360  "Mask image size %dx%d does not match with the input video size %dx%d\n",
361  s->mask_w, s->mask_h, inlink->w, inlink->h);
362  return AVERROR(EINVAL);
363  }
364 
365  return 0;
366 }
367 
368 /**
369  * Blur image.
370  *
371  * It takes a pixel that is inside the mask and blurs it. It does so
372  * by finding the average of all the pixels within the mask and
373  * outside of the mask.
374  *
375  * @param mask_data the mask plane to use for averaging
376  * @param image_data the image plane to blur
377  * @param w width of the image
378  * @param h height of the image
379  * @param x x-coordinate of the pixel to blur
380  * @param y y-coordinate of the pixel to blur
381  */
382 static unsigned int blur_pixel(int ***mask,
383  const uint8_t *mask_data, int mask_linesize,
384  uint8_t *image_data, int image_linesize,
385  int w, int h, int x, int y)
386 {
387  /* Mask size tells how large a circle to use. The radius is about
388  * (slightly larger than) mask size. */
389  int mask_size;
390  int start_posx, start_posy, end_posx, end_posy;
391  int i, j;
392  unsigned int accumulator = 0, divisor = 0;
393  /* What pixel we are reading out of the circular blur mask. */
394  const uint8_t *image_read_position;
395  /* What pixel we are reading out of the filter image. */
396  const uint8_t *mask_read_position;
397 
398  /* Prepare our bounding rectangle and clip it if need be. */
399  mask_size = mask_data[y * mask_linesize + x];
400  start_posx = FFMAX(0, x - mask_size);
401  start_posy = FFMAX(0, y - mask_size);
402  end_posx = FFMIN(w - 1, x + mask_size);
403  end_posy = FFMIN(h - 1, y + mask_size);
404 
405  image_read_position = image_data + image_linesize * start_posy + start_posx;
406  mask_read_position = mask_data + mask_linesize * start_posy + start_posx;
407 
408  for (j = start_posy; j <= end_posy; j++) {
409  for (i = start_posx; i <= end_posx; i++) {
410  /* Check if this pixel is in the mask or not. Only use the
411  * pixel if it is not. */
412  if (!(*mask_read_position) && mask[mask_size][i - start_posx][j - start_posy]) {
413  accumulator += *image_read_position;
414  divisor++;
415  }
416 
417  image_read_position++;
418  mask_read_position++;
419  }
420 
421  image_read_position += (image_linesize - ((end_posx + 1) - start_posx));
422  mask_read_position += (mask_linesize - ((end_posx + 1) - start_posx));
423  }
424 
425  /* If divisor is 0, it means that not a single pixel is outside of
426  the logo, so we have no data. Else we need to normalise the
427  data using the divisor. */
428  return divisor == 0 ? 255:
429  (accumulator + (divisor / 2)) / divisor; /* divide, taking into account average rounding error */
430 }
431 
432 /**
433  * Blur image plane using a mask.
434  *
435  * @param source The image to have it's logo removed.
436  * @param destination Where the output image will be stored.
437  * @param source_stride How far apart (in memory) two consecutive lines are.
438  * @param destination Same as source_stride, but for the destination image.
439  * @param width Width of the image. This is the same for source and destination.
440  * @param height Height of the image. This is the same for source and destination.
441  * @param is_image_direct If the image is direct, then source and destination are
442  * the same and we can save a lot of time by not copying pixels that
443  * haven't changed.
444  * @param filter The image that stores the distance to the edge of the logo for
445  * each pixel.
446  * @param logo_start_x smallest x-coordinate that contains at least 1 logo pixel.
447  * @param logo_start_y smallest y-coordinate that contains at least 1 logo pixel.
448  * @param logo_end_x largest x-coordinate that contains at least 1 logo pixel.
449  * @param logo_end_y largest y-coordinate that contains at least 1 logo pixel.
450  *
451  * This function processes an entire plane. Pixels outside of the logo are copied
452  * to the output without change, and pixels inside the logo have the de-blurring
453  * function applied.
454  */
455 static void blur_image(int ***mask,
456  const uint8_t *src_data, int src_linesize,
457  uint8_t *dst_data, int dst_linesize,
458  const uint8_t *mask_data, int mask_linesize,
459  int w, int h, int direct,
460  FFBoundingBox *bbox)
461 {
462  int x, y;
463  uint8_t *dst_line;
464  const uint8_t *src_line;
465 
466  if (!direct)
467  av_image_copy_plane(dst_data, dst_linesize, src_data, src_linesize, w, h);
468 
469  for (y = bbox->y1; y <= bbox->y2; y++) {
470  src_line = src_data + src_linesize * y;
471  dst_line = dst_data + dst_linesize * y;
472 
473  for (x = bbox->x1; x <= bbox->x2; x++) {
474  if (mask_data[y * mask_linesize + x]) {
475  /* Only process if we are in the mask. */
476  dst_line[x] = blur_pixel(mask,
477  mask_data, mask_linesize,
478  dst_data, dst_linesize,
479  w, h, x, y);
480  } else {
481  /* Else just copy the data. */
482  if (!direct)
483  dst_line[x] = src_line[x];
484  }
485  }
486  }
487 }
488 
489 static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
490 {
491  RemovelogoContext *s = inlink->dst->priv;
492  AVFilterLink *outlink = inlink->dst->outputs[0];
493  AVFrame *outpicref;
494  int direct = 0;
495 
496  if (av_frame_is_writable(inpicref)) {
497  direct = 1;
498  outpicref = inpicref;
499  } else {
500  outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h);
501  if (!outpicref) {
502  av_frame_free(&inpicref);
503  return AVERROR(ENOMEM);
504  }
505  av_frame_copy_props(outpicref, inpicref);
506  }
507 
508  blur_image(s->mask,
509  inpicref ->data[0], inpicref ->linesize[0],
510  outpicref->data[0], outpicref->linesize[0],
511  s->full_mask_data, inlink->w,
512  inlink->w, inlink->h, direct, &s->full_mask_bbox);
513  blur_image(s->mask,
514  inpicref ->data[1], inpicref ->linesize[1],
515  outpicref->data[1], outpicref->linesize[1],
516  s->half_mask_data, inlink->w/2,
517  inlink->w/2, inlink->h/2, direct, &s->half_mask_bbox);
518  blur_image(s->mask,
519  inpicref ->data[2], inpicref ->linesize[2],
520  outpicref->data[2], outpicref->linesize[2],
521  s->half_mask_data, inlink->w/2,
522  inlink->w/2, inlink->h/2, direct, &s->half_mask_bbox);
523 
524  if (!direct)
525  av_frame_free(&inpicref);
526 
527  return ff_filter_frame(outlink, outpicref);
528 }
529 
530 static av_cold void uninit(AVFilterContext *ctx)
531 {
532  RemovelogoContext *s = ctx->priv;
533  int a, b;
534 
537 
538  if (s->mask) {
539  /* Loop through each mask. */
540  for (a = 0; a <= s->max_mask_size; a++) {
541  /* Loop through each scanline in a mask. */
542  for (b = -a; b <= a; b++) {
543  av_freep(&s->mask[a][b + a]); /* Free a scanline. */
544  }
545  av_freep(&s->mask[a]);
546  }
547  /* Free the array of pointers pointing to the masks. */
548  av_freep(&s->mask);
549  }
550 }
551 
552 static const AVFilterPad removelogo_inputs[] = {
553  {
554  .name = "default",
555  .type = AVMEDIA_TYPE_VIDEO,
556  .config_props = config_props_input,
557  .filter_frame = filter_frame,
558  },
559  { NULL }
560 };
561 
562 static const AVFilterPad removelogo_outputs[] = {
563  {
564  .name = "default",
565  .type = AVMEDIA_TYPE_VIDEO,
566  },
567  { NULL }
568 };
569 
571  .name = "removelogo",
572  .description = NULL_IF_CONFIG_SMALL("Remove a TV logo based on a mask image."),
573  .priv_size = sizeof(RemovelogoContext),
574  .init = init,
575  .uninit = uninit,
577  .inputs = removelogo_inputs,
578  .outputs = removelogo_outputs,
579  .priv_class = &removelogo_class,
581 };