FFmpeg
vf_signature.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Gerion Entrup
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 /**
22  * @file
23  * MPEG-7 video signature calculation and lookup filter
24  * @see http://epubs.surrey.ac.uk/531590/1/MPEG-7%20Video%20Signature%20Author%27s%20Copy.pdf
25  */
26 
27 #include <float.h>
28 #include "libavcodec/put_bits.h"
29 #include "libavformat/avformat.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/avstring.h"
32 #include "libavutil/intreadwrite.h"
33 #include "libavutil/timestamp.h"
34 #include "avfilter.h"
35 #include "internal.h"
36 #include "signature.h"
37 #include "signature_lookup.c"
38 
39 #define OFFSET(x) offsetof(SignatureContext, x)
40 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
41 #define BLOCK_LCM (int64_t) 476985600
42 
43 static const AVOption signature_options[] = {
44  { "detectmode", "set the detectmode",
45  OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_OFF}, 0, NB_LOOKUP_MODE-1, FLAGS, "mode" },
46  { "off", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_OFF}, 0, 0, .flags = FLAGS, "mode" },
47  { "full", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FULL}, 0, 0, .flags = FLAGS, "mode" },
48  { "fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FAST}, 0, 0, .flags = FLAGS, "mode" },
49  { "nb_inputs", "number of inputs",
50  OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, FLAGS },
51  { "filename", "filename for output files",
52  OFFSET(filename), AV_OPT_TYPE_STRING, {.str = ""}, 0, NB_FORMATS-1, FLAGS },
53  { "format", "set output format",
54  OFFSET(format), AV_OPT_TYPE_INT, {.i64 = FORMAT_BINARY}, 0, 1, FLAGS , "format" },
55  { "binary", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_BINARY}, 0, 0, FLAGS, "format" },
56  { "xml", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_XML}, 0, 0, FLAGS, "format" },
57  { "th_d", "threshold to detect one word as similar",
58  OFFSET(thworddist), AV_OPT_TYPE_INT, {.i64 = 9000}, 1, INT_MAX, FLAGS },
59  { "th_dc", "threshold to detect all words as similar",
60  OFFSET(thcomposdist), AV_OPT_TYPE_INT, {.i64 = 60000}, 1, INT_MAX, FLAGS },
61  { "th_xh", "threshold to detect frames as similar",
62  OFFSET(thl1), AV_OPT_TYPE_INT, {.i64 = 116}, 1, INT_MAX, FLAGS },
63  { "th_di", "minimum length of matching sequence in frames",
64  OFFSET(thdi), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
65  { "th_it", "threshold for relation of good to all frames",
66  OFFSET(thit), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0.0, 1.0, FLAGS },
67  { NULL }
68 };
69 
71 
73 {
74  /* all formats with a separate gray value */
75  static const enum AVPixelFormat pix_fmts[] = {
85  };
86 
87  return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
88 }
89 
91 {
92  AVFilterContext *ctx = inlink->dst;
93  SignatureContext *sic = ctx->priv;
94  StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
95 
96  sc->time_base = inlink->time_base;
97  /* test for overflow */
98  sc->divide = (((uint64_t) inlink->w/32) * (inlink->w/32 + 1) * (inlink->h/32 * inlink->h/32 + 1) > INT64_MAX / (BLOCK_LCM * 255));
99  if (sc->divide) {
100  av_log(ctx, AV_LOG_WARNING, "Input dimension too high for precise calculation, numbers will be rounded.\n");
101  }
102  sc->w = inlink->w;
103  sc->h = inlink->h;
104  return 0;
105 }
106 
107 static int get_block_size(const Block *b)
108 {
109  return (b->to.y - b->up.y + 1) * (b->to.x - b->up.x + 1);
110 }
111 
112 static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)
113 {
114  uint64_t sum = 0;
115 
116  int x0, y0, x1, y1;
117 
118  x0 = b->up.x;
119  y0 = b->up.y;
120  x1 = b->to.x;
121  y1 = b->to.y;
122 
123  if (x0-1 >= 0 && y0-1 >= 0) {
124  sum = intpic[y1][x1] + intpic[y0-1][x0-1] - intpic[y1][x0-1] - intpic[y0-1][x1];
125  } else if (x0-1 >= 0) {
126  sum = intpic[y1][x1] - intpic[y1][x0-1];
127  } else if (y0-1 >= 0) {
128  sum = intpic[y1][x1] - intpic[y0-1][x1];
129  } else {
130  sum = intpic[y1][x1];
131  }
132  return sum;
133 }
134 
135 static int cmp(const uint64_t *a, const uint64_t *b)
136 {
137  return *a < *b ? -1 : ( *a > *b ? 1 : 0 );
138 }
139 
140 /**
141  * sets the bit at position pos to 1 in data
142  */
143 static void set_bit(uint8_t* data, size_t pos)
144 {
145  uint8_t mask = 1 << 7-(pos%8);
146  data[pos/8] |= mask;
147 }
148 
150 {
151  AVFilterContext *ctx = inlink->dst;
152  SignatureContext *sic = ctx->priv;
153  StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
154  FineSignature* fs;
155 
156  static const uint8_t pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
157  /* indexes of words : 210,217,219,274,334 44,175,233,270,273 57,70,103,237,269 100,285,295,337,354 101,102,111,275,296
158  s2usw = sorted to unsorted wordvec: 44 is at index 5, 57 at index 10...
159  */
160  static const unsigned int wordvec[25] = {44,57,70,100,101,102,103,111,175,210,217,219,233,237,269,270,273,274,275,285,295,296,334,337,354};
161  static const uint8_t s2usw[25] = { 5,10,11, 15, 20, 21, 12, 22, 6, 0, 1, 2, 7, 13, 14, 8, 9, 3, 23, 16, 17, 24, 4, 18, 19};
162 
163  uint8_t wordt2b[5] = { 0, 0, 0, 0, 0 }; /* word ternary to binary */
164  uint64_t intpic[32][32];
165  uint64_t rowcount;
166  uint8_t *p = picref->data[0];
167  int inti, intj;
168  int *intjlut;
169 
170  uint64_t conflist[DIFFELEM_SIZE];
171  int f = 0, g = 0, w = 0;
172  int32_t dh1 = 1, dh2 = 1, dw1 = 1, dw2 = 1, a, b;
173  int64_t denom;
174  int i, j, k, ternary;
175  uint64_t blocksum;
176  int blocksize;
177  int64_t th; /* threshold */
178  int64_t sum;
179 
180  int64_t precfactor = (sc->divide) ? 65536 : BLOCK_LCM;
181 
182  /* initialize fs */
183  if (sc->curfinesig) {
184  fs = av_mallocz(sizeof(FineSignature));
185  if (!fs)
186  return AVERROR(ENOMEM);
187  sc->curfinesig->next = fs;
188  fs->prev = sc->curfinesig;
189  sc->curfinesig = fs;
190  } else {
191  fs = sc->curfinesig = sc->finesiglist;
192  sc->curcoarsesig1->first = fs;
193  }
194 
195  fs->pts = picref->pts;
196  fs->index = sc->lastindex++;
197 
198  memset(intpic, 0, sizeof(uint64_t)*32*32);
199  intjlut = av_malloc_array(inlink->w, sizeof(int));
200  if (!intjlut)
201  return AVERROR(ENOMEM);
202  for (i = 0; i < inlink->w; i++) {
203  intjlut[i] = (i*32)/inlink->w;
204  }
205 
206  for (i = 0; i < inlink->h; i++) {
207  inti = (i*32)/inlink->h;
208  for (j = 0; j < inlink->w; j++) {
209  intj = intjlut[j];
210  intpic[inti][intj] += p[j];
211  }
212  p += picref->linesize[0];
213  }
214  av_freep(&intjlut);
215 
216  /* The following calculates a summed area table (intpic) and brings the numbers
217  * in intpic to the same denominator.
218  * So you only have to handle the numinator in the following sections.
219  */
220  dh1 = inlink->h / 32;
221  if (inlink->h % 32)
222  dh2 = dh1 + 1;
223  dw1 = inlink->w / 32;
224  if (inlink->w % 32)
225  dw2 = dw1 + 1;
226  denom = (sc->divide) ? dh1 * dh2 * dw1 * dw2 : 1;
227 
228  for (i = 0; i < 32; i++) {
229  rowcount = 0;
230  a = 1;
231  if (dh2 > 1) {
232  a = ((inlink->h*(i+1))%32 == 0) ? (inlink->h*(i+1))/32 - 1 : (inlink->h*(i+1))/32;
233  a -= ((inlink->h*i)%32 == 0) ? (inlink->h*i)/32 - 1 : (inlink->h*i)/32;
234  a = (a == dh1)? dh2 : dh1;
235  }
236  for (j = 0; j < 32; j++) {
237  b = 1;
238  if (dw2 > 1) {
239  b = ((inlink->w*(j+1))%32 == 0) ? (inlink->w*(j+1))/32 - 1 : (inlink->w*(j+1))/32;
240  b -= ((inlink->w*j)%32 == 0) ? (inlink->w*j)/32 - 1 : (inlink->w*j)/32;
241  b = (b == dw1)? dw2 : dw1;
242  }
243  rowcount += intpic[i][j] * a * b * precfactor / denom;
244  if (i > 0) {
245  intpic[i][j] = intpic[i-1][j] + rowcount;
246  } else {
247  intpic[i][j] = rowcount;
248  }
249  }
250  }
251 
252  denom = (sc->divide) ? 1 : dh1 * dh2 * dw1 * dw2;
253 
254  for (i = 0; i < ELEMENT_COUNT; i++) {
255  const ElemCat* elemcat = elements[i];
256  int64_t* elemsignature;
257  uint64_t* sortsignature;
258 
259  elemsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));
260  if (!elemsignature)
261  return AVERROR(ENOMEM);
262  sortsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));
263  if (!sortsignature) {
264  av_freep(&elemsignature);
265  return AVERROR(ENOMEM);
266  }
267 
268  for (j = 0; j < elemcat->elem_count; j++) {
269  blocksum = 0;
270  blocksize = 0;
271  for (k = 0; k < elemcat->left_count; k++) {
272  blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
273  blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
274  }
275  sum = blocksum / blocksize;
276  if (elemcat->av_elem) {
277  sum -= 128 * precfactor * denom;
278  } else {
279  blocksum = 0;
280  blocksize = 0;
281  for (; k < elemcat->block_count; k++) {
282  blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
283  blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
284  }
285  sum -= blocksum / blocksize;
286  conflist[g++] = FFABS(sum * 8 / (precfactor * denom));
287  }
288 
289  elemsignature[j] = sum;
290  sortsignature[j] = FFABS(sum);
291  }
292 
293  /* get threshold */
294  qsort(sortsignature, elemcat->elem_count, sizeof(uint64_t), (void*) cmp);
295  th = sortsignature[(int) (elemcat->elem_count*0.333)];
296 
297  /* ternarize */
298  for (j = 0; j < elemcat->elem_count; j++) {
299  if (elemsignature[j] < -th) {
300  ternary = 0;
301  } else if (elemsignature[j] <= th) {
302  ternary = 1;
303  } else {
304  ternary = 2;
305  }
306  fs->framesig[f/5] += ternary * pot3[f%5];
307 
308  if (f == wordvec[w]) {
309  fs->words[s2usw[w]/5] += ternary * pot3[wordt2b[s2usw[w]/5]++];
310  if (w < 24)
311  w++;
312  }
313  f++;
314  }
315  av_freep(&elemsignature);
316  av_freep(&sortsignature);
317  }
318 
319  /* confidence */
320  qsort(conflist, DIFFELEM_SIZE, sizeof(uint64_t), (void*) cmp);
321  fs->confidence = FFMIN(conflist[DIFFELEM_SIZE/2], 255);
322 
323  /* coarsesignature */
324  if (sc->coarsecount == 0) {
325  if (sc->curcoarsesig2) {
327  if (!sc->curcoarsesig1)
328  return AVERROR(ENOMEM);
329  sc->curcoarsesig1->first = fs;
330  sc->curcoarsesig2->next = sc->curcoarsesig1;
331  sc->coarseend = sc->curcoarsesig1;
332  }
333  }
334  if (sc->coarsecount == 45) {
335  sc->midcoarse = 1;
337  if (!sc->curcoarsesig2)
338  return AVERROR(ENOMEM);
339  sc->curcoarsesig2->first = fs;
340  sc->curcoarsesig1->next = sc->curcoarsesig2;
341  sc->coarseend = sc->curcoarsesig2;
342  }
343  for (i = 0; i < 5; i++) {
344  set_bit(sc->curcoarsesig1->data[i], fs->words[i]);
345  }
346  /* assuming the actual frame is the last */
347  sc->curcoarsesig1->last = fs;
348  if (sc->midcoarse) {
349  for (i = 0; i < 5; i++) {
350  set_bit(sc->curcoarsesig2->data[i], fs->words[i]);
351  }
352  sc->curcoarsesig2->last = fs;
353  }
354 
355  sc->coarsecount = (sc->coarsecount+1)%90;
356 
357  /* debug printing finesignature */
358  if (av_log_get_level() == AV_LOG_DEBUG) {
359  av_log(ctx, AV_LOG_DEBUG, "input %d, confidence: %d\n", FF_INLINK_IDX(inlink), fs->confidence);
360 
361  av_log(ctx, AV_LOG_DEBUG, "words:");
362  for (i = 0; i < 5; i++) {
363  av_log(ctx, AV_LOG_DEBUG, " %d:", fs->words[i] );
364  av_log(ctx, AV_LOG_DEBUG, " %d", fs->words[i] / pot3[0] );
365  for (j = 1; j < 5; j++)
366  av_log(ctx, AV_LOG_DEBUG, ",%d", fs->words[i] % pot3[j-1] / pot3[j] );
367  av_log(ctx, AV_LOG_DEBUG, ";");
368  }
369  av_log(ctx, AV_LOG_DEBUG, "\n");
370 
371  av_log(ctx, AV_LOG_DEBUG, "framesignature:");
372  for (i = 0; i < SIGELEM_SIZE/5; i++) {
373  av_log(ctx, AV_LOG_DEBUG, " %d", fs->framesig[i] / pot3[0] );
374  for (j = 1; j < 5; j++)
375  av_log(ctx, AV_LOG_DEBUG, ",%d", fs->framesig[i] % pot3[j-1] / pot3[j] );
376  }
377  av_log(ctx, AV_LOG_DEBUG, "\n");
378  }
379 
380  if (FF_INLINK_IDX(inlink) == 0)
381  return ff_filter_frame(inlink->dst->outputs[0], picref);
382  return 1;
383 }
384 
385 static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
386 {
387  FineSignature* fs;
388  CoarseSignature* cs;
389  int i, j;
390  FILE* f;
391  unsigned int pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
392 
393  f = fopen(filename, "w");
394  if (!f) {
395  int err = AVERROR(EINVAL);
396  char buf[128];
397  av_strerror(err, buf, sizeof(buf));
398  av_log(ctx, AV_LOG_ERROR, "cannot open xml file %s: %s\n", filename, buf);
399  return err;
400  }
401 
402  /* header */
403  fprintf(f, "<?xml version='1.0' encoding='ASCII' ?>\n");
404  fprintf(f, "<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 schema/Mpeg7-2001.xsd\">\n");
405  fprintf(f, " <DescriptionUnit xsi:type=\"DescriptorCollectionType\">\n");
406  fprintf(f, " <Descriptor xsi:type=\"VideoSignatureType\">\n");
407  fprintf(f, " <VideoSignatureRegion>\n");
408  fprintf(f, " <VideoSignatureSpatialRegion>\n");
409  fprintf(f, " <Pixel>0 0 </Pixel>\n");
410  fprintf(f, " <Pixel>%d %d </Pixel>\n", sc->w - 1, sc->h - 1);
411  fprintf(f, " </VideoSignatureSpatialRegion>\n");
412  fprintf(f, " <StartFrameOfSpatialRegion>0</StartFrameOfSpatialRegion>\n");
413  /* hoping num is 1, other values are vague */
414  fprintf(f, " <MediaTimeUnit>%d</MediaTimeUnit>\n", sc->time_base.den / sc->time_base.num);
415  fprintf(f, " <MediaTimeOfSpatialRegion>\n");
416  fprintf(f, " <StartMediaTimeOfSpatialRegion>0</StartMediaTimeOfSpatialRegion>\n");
417  fprintf(f, " <EndMediaTimeOfSpatialRegion>%" PRIu64 "</EndMediaTimeOfSpatialRegion>\n", sc->coarseend->last->pts);
418  fprintf(f, " </MediaTimeOfSpatialRegion>\n");
419 
420  /* coarsesignatures */
421  for (cs = sc->coarsesiglist; cs; cs = cs->next) {
422  fprintf(f, " <VSVideoSegment>\n");
423  fprintf(f, " <StartFrameOfSegment>%" PRIu32 "</StartFrameOfSegment>\n", cs->first->index);
424  fprintf(f, " <EndFrameOfSegment>%" PRIu32 "</EndFrameOfSegment>\n", cs->last->index);
425  fprintf(f, " <MediaTimeOfSegment>\n");
426  fprintf(f, " <StartMediaTimeOfSegment>%" PRIu64 "</StartMediaTimeOfSegment>\n", cs->first->pts);
427  fprintf(f, " <EndMediaTimeOfSegment>%" PRIu64 "</EndMediaTimeOfSegment>\n", cs->last->pts);
428  fprintf(f, " </MediaTimeOfSegment>\n");
429  for (i = 0; i < 5; i++) {
430  fprintf(f, " <BagOfWords>");
431  for (j = 0; j < 31; j++) {
432  uint8_t n = cs->data[i][j];
433  if (j < 30) {
434  fprintf(f, "%d %d %d %d %d %d %d %d ", (n & 0x80) >> 7,
435  (n & 0x40) >> 6,
436  (n & 0x20) >> 5,
437  (n & 0x10) >> 4,
438  (n & 0x08) >> 3,
439  (n & 0x04) >> 2,
440  (n & 0x02) >> 1,
441  (n & 0x01));
442  } else {
443  /* print only 3 bit in last byte */
444  fprintf(f, "%d %d %d ", (n & 0x80) >> 7,
445  (n & 0x40) >> 6,
446  (n & 0x20) >> 5);
447  }
448  }
449  fprintf(f, "</BagOfWords>\n");
450  }
451  fprintf(f, " </VSVideoSegment>\n");
452  }
453 
454  /* finesignatures */
455  for (fs = sc->finesiglist; fs; fs = fs->next) {
456  fprintf(f, " <VideoFrame>\n");
457  fprintf(f, " <MediaTimeOfFrame>%" PRIu64 "</MediaTimeOfFrame>\n", fs->pts);
458  /* confidence */
459  fprintf(f, " <FrameConfidence>%d</FrameConfidence>\n", fs->confidence);
460  /* words */
461  fprintf(f, " <Word>");
462  for (i = 0; i < 5; i++) {
463  fprintf(f, "%d ", fs->words[i]);
464  if (i < 4) {
465  fprintf(f, " ");
466  }
467  }
468  fprintf(f, "</Word>\n");
469  /* framesignature */
470  fprintf(f, " <FrameSignature>");
471  for (i = 0; i< SIGELEM_SIZE/5; i++) {
472  if (i > 0) {
473  fprintf(f, " ");
474  }
475  fprintf(f, "%d ", fs->framesig[i] / pot3[0]);
476  for (j = 1; j < 5; j++)
477  fprintf(f, " %d ", fs->framesig[i] % pot3[j-1] / pot3[j] );
478  }
479  fprintf(f, "</FrameSignature>\n");
480  fprintf(f, " </VideoFrame>\n");
481  }
482  fprintf(f, " </VideoSignatureRegion>\n");
483  fprintf(f, " </Descriptor>\n");
484  fprintf(f, " </DescriptionUnit>\n");
485  fprintf(f, "</Mpeg7>\n");
486 
487  fclose(f);
488  return 0;
489 }
490 
491 static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
492 {
493  FILE* f;
494  FineSignature* fs;
495  CoarseSignature* cs;
496  uint32_t numofsegments = (sc->lastindex + 44)/45;
497  int i, j;
499  /* buffer + header + coarsesignatures + finesignature */
500  int len = (512 + 6 * 32 + 3*16 + 2 +
501  numofsegments * (4*32 + 1 + 5*243) +
502  sc->lastindex * (2 + 32 + 6*8 + 608)) / 8;
503  uint8_t* buffer = av_malloc_array(len, sizeof(uint8_t));
504  if (!buffer)
505  return AVERROR(ENOMEM);
506 
507  f = fopen(filename, "wb");
508  if (!f) {
509  int err = AVERROR(EINVAL);
510  char buf[128];
511  av_strerror(err, buf, sizeof(buf));
512  av_log(ctx, AV_LOG_ERROR, "cannot open file %s: %s\n", filename, buf);
513  av_freep(&buffer);
514  return err;
515  }
516  init_put_bits(&buf, buffer, len);
517 
518  put_bits32(&buf, 1); /* NumOfSpatial Regions, only 1 supported */
519  put_bits(&buf, 1, 1); /* SpatialLocationFlag, always the whole image */
520  put_bits32(&buf, 0); /* PixelX,1 PixelY,1, 0,0 */
521  put_bits(&buf, 16, sc->w-1 & 0xFFFF); /* PixelX,2 */
522  put_bits(&buf, 16, sc->h-1 & 0xFFFF); /* PixelY,2 */
523  put_bits32(&buf, 0); /* StartFrameOfSpatialRegion */
524  put_bits32(&buf, sc->lastindex); /* NumOfFrames */
525  /* hoping num is 1, other values are vague */
526  /* den/num might be greater than 16 bit, so cutting it */
527  put_bits(&buf, 16, 0xFFFF & (sc->time_base.den / sc->time_base.num)); /* MediaTimeUnit */
528  put_bits(&buf, 1, 1); /* MediaTimeFlagOfSpatialRegion */
529  put_bits32(&buf, 0); /* StartMediaTimeOfSpatialRegion */
530  put_bits32(&buf, 0xFFFFFFFF & sc->coarseend->last->pts); /* EndMediaTimeOfSpatialRegion */
531  put_bits32(&buf, numofsegments); /* NumOfSegments */
532  /* coarsesignatures */
533  for (cs = sc->coarsesiglist; cs; cs = cs->next) {
534  put_bits32(&buf, cs->first->index); /* StartFrameOfSegment */
535  put_bits32(&buf, cs->last->index); /* EndFrameOfSegment */
536  put_bits(&buf, 1, 1); /* MediaTimeFlagOfSegment */
537  put_bits32(&buf, 0xFFFFFFFF & cs->first->pts); /* StartMediaTimeOfSegment */
538  put_bits32(&buf, 0xFFFFFFFF & cs->last->pts); /* EndMediaTimeOfSegment */
539  for (i = 0; i < 5; i++) {
540  /* put 243 bits ( = 7 * 32 + 19 = 8 * 28 + 19) into buffer */
541  for (j = 0; j < 30; j++) {
542  put_bits(&buf, 8, cs->data[i][j]);
543  }
544  put_bits(&buf, 3, cs->data[i][30] >> 5);
545  }
546  }
547  /* finesignatures */
548  put_bits(&buf, 1, 0); /* CompressionFlag, only 0 supported */
549  for (fs = sc->finesiglist; fs; fs = fs->next) {
550  put_bits(&buf, 1, 1); /* MediaTimeFlagOfFrame */
551  put_bits32(&buf, 0xFFFFFFFF & fs->pts); /* MediaTimeOfFrame */
552  put_bits(&buf, 8, fs->confidence); /* FrameConfidence */
553  for (i = 0; i < 5; i++) {
554  put_bits(&buf, 8, fs->words[i]); /* Words */
555  }
556  /* framesignature */
557  for (i = 0; i < SIGELEM_SIZE/5; i++) {
558  put_bits(&buf, 8, fs->framesig[i]);
559  }
560  }
561 
562  avpriv_align_put_bits(&buf);
563  flush_put_bits(&buf);
564  fwrite(buffer, 1, put_bits_count(&buf)/8, f);
565  fclose(f);
566  av_freep(&buffer);
567  return 0;
568 }
569 
571 {
572  SignatureContext* sic = ctx->priv;
573  char filename[1024];
574 
575  if (sic->nb_inputs > 1) {
576  /* error already handled */
577  av_assert0(av_get_frame_filename(filename, sizeof(filename), sic->filename, input) == 0);
578  } else {
579  if (av_strlcpy(filename, sic->filename, sizeof(filename)) >= sizeof(filename))
580  return AVERROR(EINVAL);
581  }
582  if (sic->format == FORMAT_XML) {
583  return xml_export(ctx, sc, filename);
584  } else {
585  return binary_export(ctx, sc, filename);
586  }
587 }
588 
589 static int request_frame(AVFilterLink *outlink)
590 {
591  AVFilterContext *ctx = outlink->src;
592  SignatureContext *sic = ctx->priv;
593  StreamContext *sc, *sc2;
594  MatchingInfo match;
595  int i, j, ret;
596  int lookup = 1; /* indicates wheather EOF of all files is reached */
597 
598  /* process all inputs */
599  for (i = 0; i < sic->nb_inputs; i++){
600  sc = &(sic->streamcontexts[i]);
601 
602  ret = ff_request_frame(ctx->inputs[i]);
603 
604  /* return if unexpected error occurs in input stream */
605  if (ret < 0 && ret != AVERROR_EOF)
606  return ret;
607 
608  /* export signature at EOF */
609  if (ret == AVERROR_EOF && !sc->exported) {
610  /* export if wanted */
611  if (strlen(sic->filename) > 0) {
612  if (export(ctx, sc, i) < 0)
613  return ret;
614  }
615  sc->exported = 1;
616  }
617  lookup &= sc->exported;
618  }
619 
620  /* signature lookup */
621  if (lookup && sic->mode != MODE_OFF) {
622  /* iterate over every pair */
623  for (i = 0; i < sic->nb_inputs; i++) {
624  sc = &(sic->streamcontexts[i]);
625  for (j = i+1; j < sic->nb_inputs; j++) {
626  sc2 = &(sic->streamcontexts[j]);
627  match = lookup_signatures(ctx, sic, sc, sc2, sic->mode);
628  if (match.score != 0) {
629  av_log(ctx, AV_LOG_INFO, "matching of video %d at %f and %d at %f, %d frames matching\n",
630  i, ((double) match.first->pts * sc->time_base.num) / sc->time_base.den,
631  j, ((double) match.second->pts * sc2->time_base.num) / sc2->time_base.den,
632  match.matchframes);
633  if (match.whole)
634  av_log(ctx, AV_LOG_INFO, "whole video matching\n");
635  } else {
636  av_log(ctx, AV_LOG_INFO, "no matching of video %d and %d\n", i, j);
637  }
638  }
639  }
640  }
641 
642  return ret;
643 }
644 
646 {
647 
648  SignatureContext *sic = ctx->priv;
649  StreamContext *sc;
650  int i, ret;
651  char tmp[1024];
652 
653  sic->streamcontexts = av_mallocz(sic->nb_inputs * sizeof(StreamContext));
654  if (!sic->streamcontexts)
655  return AVERROR(ENOMEM);
656 
657  for (i = 0; i < sic->nb_inputs; i++) {
658  AVFilterPad pad = {
660  .name = av_asprintf("in%d", i),
661  .config_props = config_input,
662  .filter_frame = filter_frame,
663  };
664 
665  if (!pad.name)
666  return AVERROR(ENOMEM);
667 
668  sc = &(sic->streamcontexts[i]);
669 
670  sc->lastindex = 0;
671  sc->finesiglist = av_mallocz(sizeof(FineSignature));
672  if (!sc->finesiglist)
673  return AVERROR(ENOMEM);
674  sc->curfinesig = NULL;
675 
677  if (!sc->coarsesiglist)
678  return AVERROR(ENOMEM);
679  sc->curcoarsesig1 = sc->coarsesiglist;
680  sc->coarseend = sc->coarsesiglist;
681  sc->coarsecount = 0;
682  sc->midcoarse = 0;
683 
684  if ((ret = ff_insert_inpad(ctx, i, &pad)) < 0) {
685  av_freep(&pad.name);
686  return ret;
687  }
688  }
689 
690  /* check filename */
691  if (sic->nb_inputs > 1 && strlen(sic->filename) > 0 && av_get_frame_filename(tmp, sizeof(tmp), sic->filename, 0) == -1) {
692  av_log(ctx, AV_LOG_ERROR, "The filename must contain %%d or %%0nd, if you have more than one input.\n");
693  return AVERROR(EINVAL);
694  }
695 
696  return 0;
697 }
698 
699 
700 
702 {
703  SignatureContext *sic = ctx->priv;
704  StreamContext *sc;
705  void* tmp;
706  FineSignature* finsig;
707  CoarseSignature* cousig;
708  int i;
709 
710 
711  /* free the lists */
712  if (sic->streamcontexts != NULL) {
713  for (i = 0; i < sic->nb_inputs; i++) {
714  sc = &(sic->streamcontexts[i]);
715  finsig = sc->finesiglist;
716  cousig = sc->coarsesiglist;
717 
718  while (finsig) {
719  tmp = finsig;
720  finsig = finsig->next;
721  av_freep(&tmp);
722  }
723  sc->finesiglist = NULL;
724 
725  while (cousig) {
726  tmp = cousig;
727  cousig = cousig->next;
728  av_freep(&tmp);
729  }
730  sc->coarsesiglist = NULL;
731  }
732  av_freep(&sic->streamcontexts);
733  }
734 }
735 
736 static int config_output(AVFilterLink *outlink)
737 {
738  AVFilterContext *ctx = outlink->src;
739  AVFilterLink *inlink = ctx->inputs[0];
740 
741  outlink->time_base = inlink->time_base;
742  outlink->frame_rate = inlink->frame_rate;
743  outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
744  outlink->w = inlink->w;
745  outlink->h = inlink->h;
746 
747  return 0;
748 }
749 
750 static const AVFilterPad signature_outputs[] = {
751  {
752  .name = "default",
753  .type = AVMEDIA_TYPE_VIDEO,
754  .request_frame = request_frame,
755  .config_props = config_output,
756  },
757  { NULL }
758 };
759 
761  .name = "signature",
762  .description = NULL_IF_CONFIG_SMALL("Calculate the MPEG-7 video signature"),
763  .priv_size = sizeof(SignatureContext),
764  .priv_class = &signature_class,
765  .init = init,
766  .uninit = uninit,
768  .outputs = signature_outputs,
769  .inputs = NULL,
771 };
static void av_unused put_bits32(PutBitContext *s, uint32_t value)
Write exactly 32 bits into a bitstream.
Definition: put_bits.h:250
#define NULL
Definition: coverity.c:32
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
int av_elem
Definition: signature.h:65
static const AVFilterPad signature_outputs[]
Definition: vf_signature.c:750
AVOption.
Definition: opt.h:246
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:100
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:208
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:182
uint8_t y
Definition: signature.h:56
Main libavfilter public API header.
const char * g
Definition: vf_curves.c:115
#define OFFSET(x)
Definition: vf_signature.c:39
static int cmp(const uint64_t *a, const uint64_t *b)
Definition: vf_signature.c:135
#define AVFILTER_FLAG_DYNAMIC_INPUTS
The number of the filter inputs is not determined just by AVFilter.inputs.
Definition: avfilter.h:105
int num
Numerator.
Definition: rational.h:59
const Block * blocks
Definition: signature.h:69
static void set_bit(uint8_t *data, size_t pos)
sets the bit at position pos to 1 in data
Definition: vf_signature.c:143
struct FineSignature * next
Definition: signature.h:73
void avpriv_align_put_bits(PutBitContext *s)
Pad the bitstream with zeros up to the next byte boundary.
Definition: bitstream.c:48
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:36
enum AVMediaType type
AVFilterPad type.
Definition: internal.h:65
static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
Definition: vf_signature.c:149
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:236
StreamContext * streamcontexts
Definition: signature.h:142
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate.The lists are not just lists
uint32_t index
Definition: signature.h:76
static const AVOption signature_options[]
Definition: vf_signature.c:43
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:283
const char * name
Pad name.
Definition: internal.h:60
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
static const char signature[]
Definition: ipmovie.c:615
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1093
static int export(AVFilterContext *ctx, StreamContext *sc, int input)
Definition: vf_signature.c:570
uint8_t confidence
Definition: signature.h:77
uint8_t
#define av_cold
Definition: attributes.h:82
#define fs(width, name, subs,...)
Definition: cbs_vp9.c:259
AVOptions.
timestamp utils, mostly useful for debugging/logging purposes
CoarseSignature * curcoarsesig1
Definition: signature.h:117
#define f(width, name)
Definition: cbs_vp9.c:255
static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)
Definition: vf_signature.c:112
struct FineSignature * first
Definition: signature.h:84
#define ELEMENT_COUNT
Definition: signature.h:36
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:388
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range...
Definition: pixfmt.h:100
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:79
#define AVERROR_EOF
End of file.
Definition: error.h:55
#define av_log(a,...)
uint32_t lastindex
Definition: signature.h:122
struct FineSignature * last
Definition: signature.h:85
static int get_block_size(const Block *b)
Definition: vf_signature.c:107
A filter pad used for either input or output.
Definition: internal.h:54
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:569
static int config_output(AVFilterLink *outlink)
Definition: vf_signature.c:736
AVFILTER_DEFINE_CLASS(signature)
static const uint16_t mask[17]
Definition: lzw.c:38
FineSignature * curfinesig
Definition: signature.h:112
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:197
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
uint8_t x
Definition: signature.h:55
int av_log_get_level(void)
Get the current log level.
Definition: log.c:380
size_t av_strlcpy(char *dst, const char *src, size_t size)
Copy the string src to dst, but no more than size - 1 bytes, and null-terminate dst.
Definition: avstring.c:83
static MatchingInfo lookup_signatures(AVFilterContext *ctx, SignatureContext *sc, StreamContext *first, StreamContext *second, int mode)
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:67
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
char * av_asprintf(const char *fmt,...)
Definition: avstring.c:113
#define BLOCK_LCM
Definition: vf_signature.c:41
short elem_count
Definition: signature.h:68
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
#define th
Definition: regdef.h:75
#define b
Definition: input.c:41
int matchframes
Definition: signature.h:95
struct FineSignature * second
Definition: signature.h:98
#define FFMIN(a, b)
Definition: common.h:96
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:78
AVRational time_base
Definition: signature.h:103
static const ElemCat * elements[ELEMENT_COUNT]
Definition: signature.h:566
uint8_t w
Definition: llviddspenc.c:38
int32_t
AVFormatContext * ctx
Definition: movenc.c:48
MPEG-7 video signature calculation and lookup filter.
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
int n
Definition: avisynth_c.h:760
short block_count
Definition: signature.h:67
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
int av_get_frame_filename(char *buf, int buf_size, const char *path, int number)
Definition: utils.c:4770
#define AV_LOG_INFO
Standard information.
Definition: log.h:187
#define DIFFELEM_SIZE
Definition: signature.h:38
static int request_frame(AVFilterLink *outlink)
Definition: vf_signature.c:589
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:326
short left_count
Definition: signature.h:66
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_signature.c:701
void * buf
Definition: avisynth_c.h:766
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:72
Filter definition.
Definition: avfilter.h:144
static int config_input(AVFilterLink *inlink)
Definition: vf_signature.c:90
const char * name
Filter name.
Definition: avfilter.h:148
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
struct CoarseSignature * next
Definition: signature.h:86
MPEG-7 video signature calculation and lookup filter.
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:275
CoarseSignature * coarsesiglist
Definition: signature.h:114
#define flags(name, subs,...)
Definition: cbs_av1.c:561
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:309
int av_strerror(int errnum, char *errbuf, size_t errbuf_size)
Put a description of the AVERROR code errnum in errbuf.
Definition: error.c:105
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
#define FF_INLINK_IDX(link)
Find the index of a link.
Definition: internal.h:348
struct FineSignature * first
Definition: signature.h:97
static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char *filename)
Definition: vf_signature.c:385
CoarseSignature * curcoarsesig2
Definition: signature.h:118
Main libavformat public API header.
int
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
Y , 8bpp.
Definition: pixfmt.h:74
static int query_formats(AVFilterContext *ctx)
Definition: vf_signature.c:72
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:101
#define SIGELEM_SIZE
Definition: signature.h:37
AVFilter ff_vf_signature
Definition: vf_signature.c:760
uint64_t pts
Definition: signature.h:75
struct FineSignature * prev
Definition: signature.h:74
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:80
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:73
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:48
int den
Denominator.
Definition: rational.h:60
CoarseSignature * coarseend
Definition: signature.h:115
uint8_t data[5][31]
Definition: signature.h:83
int len
static av_cold int init(AVFilterContext *ctx)
Definition: vf_signature.c:645
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples) full scale (JPEG), deprecated in favor ...
Definition: pixfmt.h:258
An instance of a filter.
Definition: avfilter.h:338
#define FLAGS
Definition: vf_signature.c:40
Point to
Definition: signature.h:61
#define av_freep(p)
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:99
uint8_t words[5]
Definition: signature.h:78
#define av_malloc_array(a, b)
int lookup
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:407
internal API functions
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
Point up
Definition: signature.h:60
mode
Use these values in ebur128_init (or&#39;ed).
Definition: ebur128.h:83
for(j=16;j >0;--j)
GLuint buffer
Definition: opengl_enc.c:101
uint8_t framesig[SIGELEM_SIZE/5]
Definition: signature.h:79
static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char *filename)
Definition: vf_signature.c:491
FineSignature * finesiglist
Definition: signature.h:111
static int ff_insert_inpad(AVFilterContext *f, unsigned index, AVFilterPad *p)
Insert a new input pad for the filter.
Definition: internal.h:277
static uint8_t tmp[11]
Definition: aes_ctr.c:26
bitstream writer API