FFmpeg
vf_signature.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Gerion Entrup
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 /**
22  * @file
23  * MPEG-7 video signature calculation and lookup filter
24  * @see http://epubs.surrey.ac.uk/531590/1/MPEG-7%20Video%20Signature%20Author%27s%20Copy.pdf
25  */
26 
27 #include <float.h>
28 #include "libavcodec/put_bits.h"
29 #include "libavformat/avformat.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/avstring.h"
32 #include "libavutil/intreadwrite.h"
33 #include "libavutil/timestamp.h"
34 #include "avfilter.h"
35 #include "internal.h"
36 #include "signature.h"
37 #include "signature_lookup.c"
38 
39 #define OFFSET(x) offsetof(SignatureContext, x)
40 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
41 #define BLOCK_LCM (int64_t) 476985600
42 
43 static const AVOption signature_options[] = {
44  { "detectmode", "set the detectmode",
45  OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_OFF}, 0, NB_LOOKUP_MODE-1, FLAGS, "mode" },
46  { "off", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_OFF}, 0, 0, .flags = FLAGS, "mode" },
47  { "full", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FULL}, 0, 0, .flags = FLAGS, "mode" },
48  { "fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FAST}, 0, 0, .flags = FLAGS, "mode" },
49  { "nb_inputs", "number of inputs",
50  OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, FLAGS },
51  { "filename", "filename for output files",
52  OFFSET(filename), AV_OPT_TYPE_STRING, {.str = ""}, 0, NB_FORMATS-1, FLAGS },
53  { "format", "set output format",
54  OFFSET(format), AV_OPT_TYPE_INT, {.i64 = FORMAT_BINARY}, 0, 1, FLAGS , "format" },
55  { "binary", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_BINARY}, 0, 0, FLAGS, "format" },
56  { "xml", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_XML}, 0, 0, FLAGS, "format" },
57  { "th_d", "threshold to detect one word as similar",
58  OFFSET(thworddist), AV_OPT_TYPE_INT, {.i64 = 9000}, 1, INT_MAX, FLAGS },
59  { "th_dc", "threshold to detect all words as similar",
60  OFFSET(thcomposdist), AV_OPT_TYPE_INT, {.i64 = 60000}, 1, INT_MAX, FLAGS },
61  { "th_xh", "threshold to detect frames as similar",
62  OFFSET(thl1), AV_OPT_TYPE_INT, {.i64 = 116}, 1, INT_MAX, FLAGS },
63  { "th_di", "minimum length of matching sequence in frames",
64  OFFSET(thdi), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
65  { "th_it", "threshold for relation of good to all frames",
66  OFFSET(thit), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0.0, 1.0, FLAGS },
67  { NULL }
68 };
69 
71 
73 {
74  /* all formats with a separate gray value */
75  static const enum AVPixelFormat pix_fmts[] = {
85  };
86 
87  return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
88 }
89 
91 {
92  AVFilterContext *ctx = inlink->dst;
93  SignatureContext *sic = ctx->priv;
94  StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
95 
96  sc->time_base = inlink->time_base;
97  /* test for overflow */
98  sc->divide = (((uint64_t) inlink->w/32) * (inlink->w/32 + 1) * (inlink->h/32 * inlink->h/32 + 1) > INT64_MAX / (BLOCK_LCM * 255));
99  if (sc->divide) {
100  av_log(ctx, AV_LOG_WARNING, "Input dimension too high for precise calculation, numbers will be rounded.\n");
101  }
102  sc->w = inlink->w;
103  sc->h = inlink->h;
104  return 0;
105 }
106 
107 static int get_block_size(const Block *b)
108 {
109  return (b->to.y - b->up.y + 1) * (b->to.x - b->up.x + 1);
110 }
111 
112 static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)
113 {
114  uint64_t sum = 0;
115 
116  int x0, y0, x1, y1;
117 
118  x0 = b->up.x;
119  y0 = b->up.y;
120  x1 = b->to.x;
121  y1 = b->to.y;
122 
123  if (x0-1 >= 0 && y0-1 >= 0) {
124  sum = intpic[y1][x1] + intpic[y0-1][x0-1] - intpic[y1][x0-1] - intpic[y0-1][x1];
125  } else if (x0-1 >= 0) {
126  sum = intpic[y1][x1] - intpic[y1][x0-1];
127  } else if (y0-1 >= 0) {
128  sum = intpic[y1][x1] - intpic[y0-1][x1];
129  } else {
130  sum = intpic[y1][x1];
131  }
132  return sum;
133 }
134 
135 static int cmp(const void *x, const void *y)
136 {
137  const uint64_t *a = x, *b = y;
138  return *a < *b ? -1 : ( *a > *b ? 1 : 0 );
139 }
140 
141 /**
142  * sets the bit at position pos to 1 in data
143  */
144 static void set_bit(uint8_t* data, size_t pos)
145 {
146  uint8_t mask = 1 << 7-(pos%8);
147  data[pos/8] |= mask;
148 }
149 
151 {
152  AVFilterContext *ctx = inlink->dst;
153  SignatureContext *sic = ctx->priv;
154  StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
155  FineSignature* fs;
156 
157  static const uint8_t pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
158  /* indexes of words : 210,217,219,274,334 44,175,233,270,273 57,70,103,237,269 100,285,295,337,354 101,102,111,275,296
159  s2usw = sorted to unsorted wordvec: 44 is at index 5, 57 at index 10...
160  */
161  static const unsigned int wordvec[25] = {44,57,70,100,101,102,103,111,175,210,217,219,233,237,269,270,273,274,275,285,295,296,334,337,354};
162  static const uint8_t s2usw[25] = { 5,10,11, 15, 20, 21, 12, 22, 6, 0, 1, 2, 7, 13, 14, 8, 9, 3, 23, 16, 17, 24, 4, 18, 19};
163 
164  uint8_t wordt2b[5] = { 0, 0, 0, 0, 0 }; /* word ternary to binary */
165  uint64_t intpic[32][32];
166  uint64_t rowcount;
167  uint8_t *p = picref->data[0];
168  int inti, intj;
169  int *intjlut;
170 
171  uint64_t conflist[DIFFELEM_SIZE];
172  int f = 0, g = 0, w = 0;
173  int32_t dh1 = 1, dh2 = 1, dw1 = 1, dw2 = 1, a, b;
174  int64_t denom;
175  int i, j, k, ternary;
176  uint64_t blocksum;
177  int blocksize;
178  int64_t th; /* threshold */
179  int64_t sum;
180 
181  int64_t precfactor = (sc->divide) ? 65536 : BLOCK_LCM;
182 
183  /* initialize fs */
184  if (sc->curfinesig) {
185  fs = av_mallocz(sizeof(FineSignature));
186  if (!fs)
187  return AVERROR(ENOMEM);
188  sc->curfinesig->next = fs;
189  fs->prev = sc->curfinesig;
190  sc->curfinesig = fs;
191  } else {
192  fs = sc->curfinesig = sc->finesiglist;
193  sc->curcoarsesig1->first = fs;
194  }
195 
196  fs->pts = picref->pts;
197  fs->index = sc->lastindex++;
198 
199  memset(intpic, 0, sizeof(uint64_t)*32*32);
200  intjlut = av_malloc_array(inlink->w, sizeof(int));
201  if (!intjlut)
202  return AVERROR(ENOMEM);
203  for (i = 0; i < inlink->w; i++) {
204  intjlut[i] = (i*32)/inlink->w;
205  }
206 
207  for (i = 0; i < inlink->h; i++) {
208  inti = (i*32)/inlink->h;
209  for (j = 0; j < inlink->w; j++) {
210  intj = intjlut[j];
211  intpic[inti][intj] += p[j];
212  }
213  p += picref->linesize[0];
214  }
215  av_freep(&intjlut);
216 
217  /* The following calculates a summed area table (intpic) and brings the numbers
218  * in intpic to the same denominator.
219  * So you only have to handle the numinator in the following sections.
220  */
221  dh1 = inlink->h / 32;
222  if (inlink->h % 32)
223  dh2 = dh1 + 1;
224  dw1 = inlink->w / 32;
225  if (inlink->w % 32)
226  dw2 = dw1 + 1;
227  denom = (sc->divide) ? dh1 * dh2 * dw1 * dw2 : 1;
228 
229  for (i = 0; i < 32; i++) {
230  rowcount = 0;
231  a = 1;
232  if (dh2 > 1) {
233  a = ((inlink->h*(i+1))%32 == 0) ? (inlink->h*(i+1))/32 - 1 : (inlink->h*(i+1))/32;
234  a -= ((inlink->h*i)%32 == 0) ? (inlink->h*i)/32 - 1 : (inlink->h*i)/32;
235  a = (a == dh1)? dh2 : dh1;
236  }
237  for (j = 0; j < 32; j++) {
238  b = 1;
239  if (dw2 > 1) {
240  b = ((inlink->w*(j+1))%32 == 0) ? (inlink->w*(j+1))/32 - 1 : (inlink->w*(j+1))/32;
241  b -= ((inlink->w*j)%32 == 0) ? (inlink->w*j)/32 - 1 : (inlink->w*j)/32;
242  b = (b == dw1)? dw2 : dw1;
243  }
244  rowcount += intpic[i][j] * a * b * precfactor / denom;
245  if (i > 0) {
246  intpic[i][j] = intpic[i-1][j] + rowcount;
247  } else {
248  intpic[i][j] = rowcount;
249  }
250  }
251  }
252 
253  denom = (sc->divide) ? 1 : dh1 * dh2 * dw1 * dw2;
254 
255  for (i = 0; i < ELEMENT_COUNT; i++) {
256  const ElemCat* elemcat = elements[i];
257  int64_t* elemsignature;
258  uint64_t* sortsignature;
259 
260  elemsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));
261  if (!elemsignature)
262  return AVERROR(ENOMEM);
263  sortsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));
264  if (!sortsignature) {
265  av_freep(&elemsignature);
266  return AVERROR(ENOMEM);
267  }
268 
269  for (j = 0; j < elemcat->elem_count; j++) {
270  blocksum = 0;
271  blocksize = 0;
272  for (k = 0; k < elemcat->left_count; k++) {
273  blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
274  blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
275  }
276  sum = blocksum / blocksize;
277  if (elemcat->av_elem) {
278  sum -= 128 * precfactor * denom;
279  } else {
280  blocksum = 0;
281  blocksize = 0;
282  for (; k < elemcat->block_count; k++) {
283  blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
284  blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
285  }
286  sum -= blocksum / blocksize;
287  conflist[g++] = FFABS(sum * 8 / (precfactor * denom));
288  }
289 
290  elemsignature[j] = sum;
291  sortsignature[j] = FFABS(sum);
292  }
293 
294  /* get threshold */
295  qsort(sortsignature, elemcat->elem_count, sizeof(uint64_t), cmp);
296  th = sortsignature[(int) (elemcat->elem_count*0.333)];
297 
298  /* ternarize */
299  for (j = 0; j < elemcat->elem_count; j++) {
300  if (elemsignature[j] < -th) {
301  ternary = 0;
302  } else if (elemsignature[j] <= th) {
303  ternary = 1;
304  } else {
305  ternary = 2;
306  }
307  fs->framesig[f/5] += ternary * pot3[f%5];
308 
309  if (f == wordvec[w]) {
310  fs->words[s2usw[w]/5] += ternary * pot3[wordt2b[s2usw[w]/5]++];
311  if (w < 24)
312  w++;
313  }
314  f++;
315  }
316  av_freep(&elemsignature);
317  av_freep(&sortsignature);
318  }
319 
320  /* confidence */
321  qsort(conflist, DIFFELEM_SIZE, sizeof(uint64_t), cmp);
322  fs->confidence = FFMIN(conflist[DIFFELEM_SIZE/2], 255);
323 
324  /* coarsesignature */
325  if (sc->coarsecount == 0) {
326  if (sc->curcoarsesig2) {
328  if (!sc->curcoarsesig1)
329  return AVERROR(ENOMEM);
330  sc->curcoarsesig1->first = fs;
331  sc->curcoarsesig2->next = sc->curcoarsesig1;
332  sc->coarseend = sc->curcoarsesig1;
333  }
334  }
335  if (sc->coarsecount == 45) {
336  sc->midcoarse = 1;
338  if (!sc->curcoarsesig2)
339  return AVERROR(ENOMEM);
340  sc->curcoarsesig2->first = fs;
341  sc->curcoarsesig1->next = sc->curcoarsesig2;
342  sc->coarseend = sc->curcoarsesig2;
343  }
344  for (i = 0; i < 5; i++) {
345  set_bit(sc->curcoarsesig1->data[i], fs->words[i]);
346  }
347  /* assuming the actual frame is the last */
348  sc->curcoarsesig1->last = fs;
349  if (sc->midcoarse) {
350  for (i = 0; i < 5; i++) {
351  set_bit(sc->curcoarsesig2->data[i], fs->words[i]);
352  }
353  sc->curcoarsesig2->last = fs;
354  }
355 
356  sc->coarsecount = (sc->coarsecount+1)%90;
357 
358  /* debug printing finesignature */
359  if (av_log_get_level() == AV_LOG_DEBUG) {
360  av_log(ctx, AV_LOG_DEBUG, "input %d, confidence: %d\n", FF_INLINK_IDX(inlink), fs->confidence);
361 
362  av_log(ctx, AV_LOG_DEBUG, "words:");
363  for (i = 0; i < 5; i++) {
364  av_log(ctx, AV_LOG_DEBUG, " %d:", fs->words[i] );
365  av_log(ctx, AV_LOG_DEBUG, " %d", fs->words[i] / pot3[0] );
366  for (j = 1; j < 5; j++)
367  av_log(ctx, AV_LOG_DEBUG, ",%d", fs->words[i] % pot3[j-1] / pot3[j] );
368  av_log(ctx, AV_LOG_DEBUG, ";");
369  }
370  av_log(ctx, AV_LOG_DEBUG, "\n");
371 
372  av_log(ctx, AV_LOG_DEBUG, "framesignature:");
373  for (i = 0; i < SIGELEM_SIZE/5; i++) {
374  av_log(ctx, AV_LOG_DEBUG, " %d", fs->framesig[i] / pot3[0] );
375  for (j = 1; j < 5; j++)
376  av_log(ctx, AV_LOG_DEBUG, ",%d", fs->framesig[i] % pot3[j-1] / pot3[j] );
377  }
378  av_log(ctx, AV_LOG_DEBUG, "\n");
379  }
380 
381  if (FF_INLINK_IDX(inlink) == 0)
382  return ff_filter_frame(inlink->dst->outputs[0], picref);
383  return 1;
384 }
385 
386 static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
387 {
388  FineSignature* fs;
389  CoarseSignature* cs;
390  int i, j;
391  FILE* f;
392  unsigned int pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
393 
394  f = fopen(filename, "w");
395  if (!f) {
396  int err = AVERROR(EINVAL);
397  char buf[128];
398  av_strerror(err, buf, sizeof(buf));
399  av_log(ctx, AV_LOG_ERROR, "cannot open xml file %s: %s\n", filename, buf);
400  return err;
401  }
402 
403  /* header */
404  fprintf(f, "<?xml version='1.0' encoding='ASCII' ?>\n");
405  fprintf(f, "<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 schema/Mpeg7-2001.xsd\">\n");
406  fprintf(f, " <DescriptionUnit xsi:type=\"DescriptorCollectionType\">\n");
407  fprintf(f, " <Descriptor xsi:type=\"VideoSignatureType\">\n");
408  fprintf(f, " <VideoSignatureRegion>\n");
409  fprintf(f, " <VideoSignatureSpatialRegion>\n");
410  fprintf(f, " <Pixel>0 0 </Pixel>\n");
411  fprintf(f, " <Pixel>%d %d </Pixel>\n", sc->w - 1, sc->h - 1);
412  fprintf(f, " </VideoSignatureSpatialRegion>\n");
413  fprintf(f, " <StartFrameOfSpatialRegion>0</StartFrameOfSpatialRegion>\n");
414  /* hoping num is 1, other values are vague */
415  fprintf(f, " <MediaTimeUnit>%d</MediaTimeUnit>\n", sc->time_base.den / sc->time_base.num);
416  fprintf(f, " <MediaTimeOfSpatialRegion>\n");
417  fprintf(f, " <StartMediaTimeOfSpatialRegion>0</StartMediaTimeOfSpatialRegion>\n");
418  fprintf(f, " <EndMediaTimeOfSpatialRegion>%" PRIu64 "</EndMediaTimeOfSpatialRegion>\n", sc->coarseend->last->pts);
419  fprintf(f, " </MediaTimeOfSpatialRegion>\n");
420 
421  /* coarsesignatures */
422  for (cs = sc->coarsesiglist; cs; cs = cs->next) {
423  fprintf(f, " <VSVideoSegment>\n");
424  fprintf(f, " <StartFrameOfSegment>%" PRIu32 "</StartFrameOfSegment>\n", cs->first->index);
425  fprintf(f, " <EndFrameOfSegment>%" PRIu32 "</EndFrameOfSegment>\n", cs->last->index);
426  fprintf(f, " <MediaTimeOfSegment>\n");
427  fprintf(f, " <StartMediaTimeOfSegment>%" PRIu64 "</StartMediaTimeOfSegment>\n", cs->first->pts);
428  fprintf(f, " <EndMediaTimeOfSegment>%" PRIu64 "</EndMediaTimeOfSegment>\n", cs->last->pts);
429  fprintf(f, " </MediaTimeOfSegment>\n");
430  for (i = 0; i < 5; i++) {
431  fprintf(f, " <BagOfWords>");
432  for (j = 0; j < 31; j++) {
433  uint8_t n = cs->data[i][j];
434  if (j < 30) {
435  fprintf(f, "%d %d %d %d %d %d %d %d ", (n & 0x80) >> 7,
436  (n & 0x40) >> 6,
437  (n & 0x20) >> 5,
438  (n & 0x10) >> 4,
439  (n & 0x08) >> 3,
440  (n & 0x04) >> 2,
441  (n & 0x02) >> 1,
442  (n & 0x01));
443  } else {
444  /* print only 3 bit in last byte */
445  fprintf(f, "%d %d %d ", (n & 0x80) >> 7,
446  (n & 0x40) >> 6,
447  (n & 0x20) >> 5);
448  }
449  }
450  fprintf(f, "</BagOfWords>\n");
451  }
452  fprintf(f, " </VSVideoSegment>\n");
453  }
454 
455  /* finesignatures */
456  for (fs = sc->finesiglist; fs; fs = fs->next) {
457  fprintf(f, " <VideoFrame>\n");
458  fprintf(f, " <MediaTimeOfFrame>%" PRIu64 "</MediaTimeOfFrame>\n", fs->pts);
459  /* confidence */
460  fprintf(f, " <FrameConfidence>%d</FrameConfidence>\n", fs->confidence);
461  /* words */
462  fprintf(f, " <Word>");
463  for (i = 0; i < 5; i++) {
464  fprintf(f, "%d ", fs->words[i]);
465  if (i < 4) {
466  fprintf(f, " ");
467  }
468  }
469  fprintf(f, "</Word>\n");
470  /* framesignature */
471  fprintf(f, " <FrameSignature>");
472  for (i = 0; i< SIGELEM_SIZE/5; i++) {
473  if (i > 0) {
474  fprintf(f, " ");
475  }
476  fprintf(f, "%d ", fs->framesig[i] / pot3[0]);
477  for (j = 1; j < 5; j++)
478  fprintf(f, " %d ", fs->framesig[i] % pot3[j-1] / pot3[j] );
479  }
480  fprintf(f, "</FrameSignature>\n");
481  fprintf(f, " </VideoFrame>\n");
482  }
483  fprintf(f, " </VideoSignatureRegion>\n");
484  fprintf(f, " </Descriptor>\n");
485  fprintf(f, " </DescriptionUnit>\n");
486  fprintf(f, "</Mpeg7>\n");
487 
488  fclose(f);
489  return 0;
490 }
491 
492 static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
493 {
494  FILE* f;
495  FineSignature* fs;
496  CoarseSignature* cs;
497  uint32_t numofsegments = (sc->lastindex + 44)/45;
498  int i, j;
499  PutBitContext buf;
500  /* buffer + header + coarsesignatures + finesignature */
501  int len = (512 + 6 * 32 + 3*16 + 2 +
502  numofsegments * (4*32 + 1 + 5*243) +
503  sc->lastindex * (2 + 32 + 6*8 + 608)) / 8;
504  uint8_t* buffer = av_malloc_array(len, sizeof(uint8_t));
505  if (!buffer)
506  return AVERROR(ENOMEM);
507 
508  f = fopen(filename, "wb");
509  if (!f) {
510  int err = AVERROR(EINVAL);
511  char buf[128];
512  av_strerror(err, buf, sizeof(buf));
513  av_log(ctx, AV_LOG_ERROR, "cannot open file %s: %s\n", filename, buf);
514  av_freep(&buffer);
515  return err;
516  }
517  init_put_bits(&buf, buffer, len);
518 
519  put_bits32(&buf, 1); /* NumOfSpatial Regions, only 1 supported */
520  put_bits(&buf, 1, 1); /* SpatialLocationFlag, always the whole image */
521  put_bits32(&buf, 0); /* PixelX,1 PixelY,1, 0,0 */
522  put_bits(&buf, 16, sc->w-1 & 0xFFFF); /* PixelX,2 */
523  put_bits(&buf, 16, sc->h-1 & 0xFFFF); /* PixelY,2 */
524  put_bits32(&buf, 0); /* StartFrameOfSpatialRegion */
525  put_bits32(&buf, sc->lastindex); /* NumOfFrames */
526  /* hoping num is 1, other values are vague */
527  /* den/num might be greater than 16 bit, so cutting it */
528  put_bits(&buf, 16, 0xFFFF & (sc->time_base.den / sc->time_base.num)); /* MediaTimeUnit */
529  put_bits(&buf, 1, 1); /* MediaTimeFlagOfSpatialRegion */
530  put_bits32(&buf, 0); /* StartMediaTimeOfSpatialRegion */
531  put_bits32(&buf, 0xFFFFFFFF & sc->coarseend->last->pts); /* EndMediaTimeOfSpatialRegion */
532  put_bits32(&buf, numofsegments); /* NumOfSegments */
533  /* coarsesignatures */
534  for (cs = sc->coarsesiglist; cs; cs = cs->next) {
535  put_bits32(&buf, cs->first->index); /* StartFrameOfSegment */
536  put_bits32(&buf, cs->last->index); /* EndFrameOfSegment */
537  put_bits(&buf, 1, 1); /* MediaTimeFlagOfSegment */
538  put_bits32(&buf, 0xFFFFFFFF & cs->first->pts); /* StartMediaTimeOfSegment */
539  put_bits32(&buf, 0xFFFFFFFF & cs->last->pts); /* EndMediaTimeOfSegment */
540  for (i = 0; i < 5; i++) {
541  /* put 243 bits ( = 7 * 32 + 19 = 8 * 28 + 19) into buffer */
542  for (j = 0; j < 30; j++) {
543  put_bits(&buf, 8, cs->data[i][j]);
544  }
545  put_bits(&buf, 3, cs->data[i][30] >> 5);
546  }
547  }
548  /* finesignatures */
549  put_bits(&buf, 1, 0); /* CompressionFlag, only 0 supported */
550  for (fs = sc->finesiglist; fs; fs = fs->next) {
551  put_bits(&buf, 1, 1); /* MediaTimeFlagOfFrame */
552  put_bits32(&buf, 0xFFFFFFFF & fs->pts); /* MediaTimeOfFrame */
553  put_bits(&buf, 8, fs->confidence); /* FrameConfidence */
554  for (i = 0; i < 5; i++) {
555  put_bits(&buf, 8, fs->words[i]); /* Words */
556  }
557  /* framesignature */
558  for (i = 0; i < SIGELEM_SIZE/5; i++) {
559  put_bits(&buf, 8, fs->framesig[i]);
560  }
561  }
562 
563  flush_put_bits(&buf);
564  fwrite(buffer, 1, put_bits_count(&buf)/8, f);
565  fclose(f);
566  av_freep(&buffer);
567  return 0;
568 }
569 
571 {
572  SignatureContext* sic = ctx->priv;
573  char filename[1024];
574 
575  if (sic->nb_inputs > 1) {
576  /* error already handled */
577  av_assert0(av_get_frame_filename(filename, sizeof(filename), sic->filename, input) == 0);
578  } else {
579  if (av_strlcpy(filename, sic->filename, sizeof(filename)) >= sizeof(filename))
580  return AVERROR(EINVAL);
581  }
582  if (sic->format == FORMAT_XML) {
583  return xml_export(ctx, sc, filename);
584  } else {
585  return binary_export(ctx, sc, filename);
586  }
587 }
588 
589 static int request_frame(AVFilterLink *outlink)
590 {
591  AVFilterContext *ctx = outlink->src;
592  SignatureContext *sic = ctx->priv;
593  StreamContext *sc, *sc2;
594  MatchingInfo match;
595  int i, j, ret;
596  int lookup = 1; /* indicates wheather EOF of all files is reached */
597 
598  /* process all inputs */
599  for (i = 0; i < sic->nb_inputs; i++){
600  sc = &(sic->streamcontexts[i]);
601 
602  ret = ff_request_frame(ctx->inputs[i]);
603 
604  /* return if unexpected error occurs in input stream */
605  if (ret < 0 && ret != AVERROR_EOF)
606  return ret;
607 
608  /* export signature at EOF */
609  if (ret == AVERROR_EOF && !sc->exported) {
610  /* export if wanted */
611  if (strlen(sic->filename) > 0) {
612  if (export(ctx, sc, i) < 0)
613  return ret;
614  }
615  sc->exported = 1;
616  }
617  lookup &= sc->exported;
618  }
619 
620  /* signature lookup */
621  if (lookup && sic->mode != MODE_OFF) {
622  /* iterate over every pair */
623  for (i = 0; i < sic->nb_inputs; i++) {
624  sc = &(sic->streamcontexts[i]);
625  for (j = i+1; j < sic->nb_inputs; j++) {
626  sc2 = &(sic->streamcontexts[j]);
627  match = lookup_signatures(ctx, sic, sc, sc2, sic->mode);
628  if (match.score != 0) {
629  av_log(ctx, AV_LOG_INFO, "matching of video %d at %f and %d at %f, %d frames matching\n",
630  i, ((double) match.first->pts * sc->time_base.num) / sc->time_base.den,
631  j, ((double) match.second->pts * sc2->time_base.num) / sc2->time_base.den,
632  match.matchframes);
633  if (match.whole)
634  av_log(ctx, AV_LOG_INFO, "whole video matching\n");
635  } else {
636  av_log(ctx, AV_LOG_INFO, "no matching of video %d and %d\n", i, j);
637  }
638  }
639  }
640  }
641 
642  return ret;
643 }
644 
646 {
647 
648  SignatureContext *sic = ctx->priv;
649  StreamContext *sc;
650  int i, ret;
651  char tmp[1024];
652 
653  sic->streamcontexts = av_mallocz(sic->nb_inputs * sizeof(StreamContext));
654  if (!sic->streamcontexts)
655  return AVERROR(ENOMEM);
656 
657  for (i = 0; i < sic->nb_inputs; i++) {
658  AVFilterPad pad = {
660  .name = av_asprintf("in%d", i),
661  .config_props = config_input,
662  .filter_frame = filter_frame,
663  };
664 
665  if (!pad.name)
666  return AVERROR(ENOMEM);
667  if ((ret = ff_insert_inpad(ctx, i, &pad)) < 0) {
668  av_freep(&pad.name);
669  return ret;
670  }
671 
672  sc = &(sic->streamcontexts[i]);
673 
674  sc->lastindex = 0;
675  sc->finesiglist = av_mallocz(sizeof(FineSignature));
676  if (!sc->finesiglist)
677  return AVERROR(ENOMEM);
678  sc->curfinesig = NULL;
679 
681  if (!sc->coarsesiglist)
682  return AVERROR(ENOMEM);
683  sc->curcoarsesig1 = sc->coarsesiglist;
684  sc->coarseend = sc->coarsesiglist;
685  sc->coarsecount = 0;
686  sc->midcoarse = 0;
687  }
688 
689  /* check filename */
690  if (sic->nb_inputs > 1 && strlen(sic->filename) > 0 && av_get_frame_filename(tmp, sizeof(tmp), sic->filename, 0) == -1) {
691  av_log(ctx, AV_LOG_ERROR, "The filename must contain %%d or %%0nd, if you have more than one input.\n");
692  return AVERROR(EINVAL);
693  }
694 
695  return 0;
696 }
697 
698 
699 
701 {
702  SignatureContext *sic = ctx->priv;
703  StreamContext *sc;
704  void* tmp;
705  FineSignature* finsig;
706  CoarseSignature* cousig;
707  int i;
708 
709 
710  /* free the lists */
711  if (sic->streamcontexts != NULL) {
712  for (i = 0; i < sic->nb_inputs; i++) {
713  sc = &(sic->streamcontexts[i]);
714  finsig = sc->finesiglist;
715  cousig = sc->coarsesiglist;
716 
717  while (finsig) {
718  tmp = finsig;
719  finsig = finsig->next;
720  av_freep(&tmp);
721  }
722  sc->finesiglist = NULL;
723 
724  while (cousig) {
725  tmp = cousig;
726  cousig = cousig->next;
727  av_freep(&tmp);
728  }
729  sc->coarsesiglist = NULL;
730  }
731  av_freep(&sic->streamcontexts);
732  }
733  for (unsigned i = 0; i < ctx->nb_inputs; i++)
734  av_freep(&ctx->input_pads[i].name);
735 }
736 
737 static int config_output(AVFilterLink *outlink)
738 {
739  AVFilterContext *ctx = outlink->src;
740  AVFilterLink *inlink = ctx->inputs[0];
741 
742  outlink->time_base = inlink->time_base;
743  outlink->frame_rate = inlink->frame_rate;
744  outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
745  outlink->w = inlink->w;
746  outlink->h = inlink->h;
747 
748  return 0;
749 }
750 
751 static const AVFilterPad signature_outputs[] = {
752  {
753  .name = "default",
754  .type = AVMEDIA_TYPE_VIDEO,
755  .request_frame = request_frame,
756  .config_props = config_output,
757  },
758  { NULL }
759 };
760 
762  .name = "signature",
763  .description = NULL_IF_CONFIG_SMALL("Calculate the MPEG-7 video signature"),
764  .priv_size = sizeof(SignatureContext),
765  .priv_class = &signature_class,
766  .init = init,
767  .uninit = uninit,
769  .outputs = signature_outputs,
770  .inputs = NULL,
772 };
static void av_unused put_bits32(PutBitContext *s, uint32_t value)
Write exactly 32 bits into a bitstream.
Definition: put_bits.h:268
#define NULL
Definition: coverity.c:32
This structure describes decoded (raw) audio or video data.
Definition: frame.h:308
int av_elem
Definition: signature.h:65
static const AVFilterPad signature_outputs[]
Definition: vf_signature.c:751
AVOption.
Definition: opt.h:248
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:100
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:218
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:200
uint8_t y
Definition: signature.h:56
Main libavfilter public API header.
const char * g
Definition: vf_curves.c:115
#define OFFSET(x)
Definition: vf_signature.c:39
#define AVFILTER_FLAG_DYNAMIC_INPUTS
The number of the filter inputs is not determined just by AVFilter.inputs.
Definition: avfilter.h:106
int num
Numerator.
Definition: rational.h:59
const Block * blocks
Definition: signature.h:69
static void set_bit(uint8_t *data, size_t pos)
sets the bit at position pos to 1 in data
Definition: vf_signature.c:144
struct FineSignature * next
Definition: signature.h:73
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:36
enum AVMediaType type
AVFilterPad type.
Definition: internal.h:65
static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
Definition: vf_signature.c:150
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:237
StreamContext * streamcontexts
Definition: signature.h:142
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate.The lists are not just lists
uint32_t index
Definition: signature.h:76
static const AVOption signature_options[]
Definition: vf_signature.c:43
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:287
const char * name
Pad name.
Definition: internal.h:60
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:349
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
static const char signature[]
Definition: ipmovie.c:615
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1091
static int export(AVFilterContext *ctx, StreamContext *sc, int input)
Definition: vf_signature.c:570
uint8_t confidence
Definition: signature.h:77
uint8_t
#define av_cold
Definition: attributes.h:88
#define fs(width, name, subs,...)
Definition: cbs_vp9.c:259
AVOptions.
timestamp utils, mostly useful for debugging/logging purposes
CoarseSignature * curcoarsesig1
Definition: signature.h:117
#define f(width, name)
Definition: cbs_vp9.c:255
static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)
Definition: vf_signature.c:112
struct FineSignature * first
Definition: signature.h:84
#define ELEMENT_COUNT
Definition: signature.h:36
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:401
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range...
Definition: pixfmt.h:100
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:79
#define AVERROR_EOF
End of file.
Definition: error.h:55
#define av_log(a,...)
uint32_t lastindex
Definition: signature.h:122
struct FineSignature * last
Definition: signature.h:85
static int get_block_size(const Block *b)
Definition: vf_signature.c:107
A filter pad used for either input or output.
Definition: internal.h:54
static int cmp(const void *x, const void *y)
Definition: vf_signature.c:135
AVFilterPad * input_pads
array of input pads
Definition: avfilter.h:348
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:588
static int config_output(AVFilterLink *outlink)
Definition: vf_signature.c:737
AVFILTER_DEFINE_CLASS(signature)
static const uint16_t mask[17]
Definition: lzw.c:38
FineSignature * curfinesig
Definition: signature.h:112
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:153
void * priv
private data for use by the filter
Definition: avfilter.h:356
unsigned int pos
Definition: spdifenc.c:410
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:215
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
uint8_t x
Definition: signature.h:55
int av_log_get_level(void)
Get the current log level.
Definition: log.c:435
size_t av_strlcpy(char *dst, const char *src, size_t size)
Copy the string src to dst, but no more than size - 1 bytes, and null-terminate dst.
Definition: avstring.c:83
static MatchingInfo lookup_signatures(AVFilterContext *ctx, SignatureContext *sc, StreamContext *first, StreamContext *second, int mode)
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:81
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
char * av_asprintf(const char *fmt,...)
Definition: avstring.c:113
#define BLOCK_LCM
Definition: vf_signature.c:41
short elem_count
Definition: signature.h:68
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
#define th
Definition: regdef.h:75
#define b
Definition: input.c:41
unsigned nb_inputs
number of input pads
Definition: avfilter.h:350
int matchframes
Definition: signature.h:95
struct FineSignature * second
Definition: signature.h:98
#define FFMIN(a, b)
Definition: common.h:96
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:78
AVRational time_base
Definition: signature.h:103
static const ElemCat * elements[ELEMENT_COUNT]
Definition: signature.h:566
uint8_t w
Definition: llviddspenc.c:38
int32_t
AVFormatContext * ctx
Definition: movenc.c:48
MPEG-7 video signature calculation and lookup filter.
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
short block_count
Definition: signature.h:67
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
int av_get_frame_filename(char *buf, int buf_size, const char *path, int number)
Definition: utils.c:4717
#define AV_LOG_INFO
Standard information.
Definition: log.h:205
#define DIFFELEM_SIZE
Definition: signature.h:38
static int request_frame(AVFilterLink *outlink)
Definition: vf_signature.c:589
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:339
short left_count
Definition: signature.h:66
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_signature.c:700
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:72
Filter definition.
Definition: avfilter.h:145
static int config_input(AVFilterLink *inlink)
Definition: vf_signature.c:90
const char * name
Filter name.
Definition: avfilter.h:149
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:353
struct CoarseSignature * next
Definition: signature.h:86
MPEG-7 video signature calculation and lookup filter.
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:300
CoarseSignature * coarsesiglist
Definition: signature.h:114
#define flags(name, subs,...)
Definition: cbs_av1.c:560
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:322
int av_strerror(int errnum, char *errbuf, size_t errbuf_size)
Put a description of the AVERROR code errnum in errbuf.
Definition: error.c:105
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
#define FF_INLINK_IDX(link)
Find the index of a link.
Definition: internal.h:302
struct FineSignature * first
Definition: signature.h:97
static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char *filename)
Definition: vf_signature.c:386
CoarseSignature * curcoarsesig2
Definition: signature.h:118
Main libavformat public API header.
int
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
Y , 8bpp.
Definition: pixfmt.h:74
static int query_formats(AVFilterContext *ctx)
Definition: vf_signature.c:72
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:115
#define SIGELEM_SIZE
Definition: signature.h:37
AVFilter ff_vf_signature
Definition: vf_signature.c:761
uint64_t pts
Definition: signature.h:75
struct FineSignature * prev
Definition: signature.h:74
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:80
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:73
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:62
int den
Denominator.
Definition: rational.h:60
CoarseSignature * coarseend
Definition: signature.h:115
uint8_t data[5][31]
Definition: signature.h:83
int len
static av_cold int init(AVFilterContext *ctx)
Definition: vf_signature.c:645
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples) full scale (JPEG), deprecated in favor ...
Definition: pixfmt.h:258
An instance of a filter.
Definition: avfilter.h:341
#define FLAGS
Definition: vf_signature.c:40
Point to
Definition: signature.h:61
#define av_freep(p)
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:99
uint8_t words[5]
Definition: signature.h:78
#define av_malloc_array(a, b)
int lookup
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:407
internal API functions
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
Point up
Definition: signature.h:60
mode
Use these values in ebur128_init (or&#39;ed).
Definition: ebur128.h:83
for(j=16;j >0;--j)
int i
Definition: input.c:407
GLuint buffer
Definition: opengl_enc.c:101
uint8_t framesig[SIGELEM_SIZE/5]
Definition: signature.h:79
static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char *filename)
Definition: vf_signature.c:492
FineSignature * finesiglist
Definition: signature.h:111
static int ff_insert_inpad(AVFilterContext *f, unsigned index, AVFilterPad *p)
Insert a new input pad for the filter.
Definition: internal.h:240
static uint8_t tmp[11]
Definition: aes_ctr.c:26
bitstream writer API