39 #define OFFSET(x) offsetof(SignatureContext, x) 40 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM 41 #define BLOCK_LCM (int64_t) 476985600 44 {
"detectmode",
"set the detectmode",
49 {
"nb_inputs",
"number of inputs",
51 {
"filename",
"filename for output files",
53 {
"format",
"set output format",
57 {
"th_d",
"threshold to detect one word as similar",
59 {
"th_dc",
"threshold to detect all words as similar",
61 {
"th_xh",
"threshold to detect frames as similar",
63 {
"th_di",
"minimum length of matching sequence in frames",
65 {
"th_it",
"threshold for relation of good to all frames",
98 sc->
divide = (((uint64_t) inlink->
w/32) * (inlink->
w/32 + 1) * (inlink->
h/32 * inlink->
h/32 + 1) > INT64_MAX / (
BLOCK_LCM * 255));
100 av_log(ctx,
AV_LOG_WARNING,
"Input dimension too high for precise calculation, numbers will be rounded.\n");
109 return (b->
to.
y - b->
up.
y + 1) * (b->
to.
x - b->
up.
x + 1);
123 if (x0-1 >= 0 && y0-1 >= 0) {
124 sum = intpic[y1][x1] + intpic[y0-1][x0-1] - intpic[y1][x0-1] - intpic[y0-1][x1];
125 }
else if (x0-1 >= 0) {
126 sum = intpic[y1][x1] - intpic[y1][x0-1];
127 }
else if (y0-1 >= 0) {
128 sum = intpic[y1][x1] - intpic[y0-1][x1];
130 sum = intpic[y1][x1];
135 static int cmp(
const void *x,
const void *y)
137 const uint64_t *
a = x, *
b = y;
138 return *a < *b ? -1 : ( *a > *b ? 1 : 0 );
157 static const uint8_t pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
161 static const unsigned int wordvec[25] = {44,57,70,100,101,102,103,111,175,210,217,219,233,237,269,270,273,274,275,285,295,296,334,337,354};
162 static const uint8_t s2usw[25] = { 5,10,11, 15, 20, 21, 12, 22, 6, 0, 1, 2, 7, 13, 14, 8, 9, 3, 23, 16, 17, 24, 4, 18, 19};
164 uint8_t wordt2b[5] = { 0, 0, 0, 0, 0 };
165 uint64_t intpic[32][32];
172 int f = 0,
g = 0,
w = 0;
173 int32_t dh1 = 1, dh2 = 1, dw1 = 1, dw2 = 1,
a,
b;
175 int i, j, k, ternary;
199 memset(intpic, 0,
sizeof(uint64_t)*32*32);
203 for (i = 0; i < inlink->
w; i++) {
204 intjlut[
i] = (i*32)/inlink->
w;
207 for (i = 0; i < inlink->
h; i++) {
208 inti = (i*32)/inlink->
h;
209 for (j = 0; j < inlink->
w; j++) {
211 intpic[inti][intj] += p[j];
221 dh1 = inlink->
h / 32;
224 dw1 = inlink->
w / 32;
227 denom = (sc->
divide) ? dh1 * dh2 * dw1 * dw2 : 1;
229 for (i = 0; i < 32; i++) {
233 a = ((inlink->
h*(i+1))%32 == 0) ? (inlink->
h*(i+1))/32 - 1 : (inlink->
h*(i+1))/32;
234 a -= ((inlink->
h*
i)%32 == 0) ? (inlink->
h*
i)/32 - 1 : (inlink->
h*i)/32;
235 a = (
a == dh1)? dh2 : dh1;
237 for (j = 0; j < 32; j++) {
240 b = ((inlink->
w*(j+1))%32 == 0) ? (inlink->
w*(j+1))/32 - 1 : (inlink->
w*(j+1))/32;
241 b -= ((inlink->
w*j)%32 == 0) ? (inlink->
w*j)/32 - 1 : (inlink->
w*j)/32;
242 b = (b == dw1)? dw2 : dw1;
244 rowcount += intpic[
i][j] *
a * b * precfactor / denom;
246 intpic[
i][j] = intpic[i-1][j] + rowcount;
248 intpic[
i][j] = rowcount;
253 denom = (sc->
divide) ? 1 : dh1 * dh2 * dw1 * dw2;
257 int64_t* elemsignature;
258 uint64_t* sortsignature;
264 if (!sortsignature) {
276 sum = blocksum / blocksize;
278 sum -= 128 * precfactor * denom;
286 sum -= blocksum / blocksize;
287 conflist[
g++] =
FFABS(sum * 8 / (precfactor * denom));
290 elemsignature[j] = sum;
291 sortsignature[j] =
FFABS(sum);
295 qsort(sortsignature, elemcat->
elem_count,
sizeof(uint64_t),
cmp);
300 if (elemsignature[j] < -th) {
302 }
else if (elemsignature[j] <= th) {
307 fs->
framesig[f/5] += ternary * pot3[f%5];
309 if (f == wordvec[
w]) {
310 fs->
words[s2usw[
w]/5] += ternary * pot3[wordt2b[s2usw[
w]/5]++];
344 for (i = 0; i < 5; i++) {
350 for (i = 0; i < 5; i++) {
363 for (i = 0; i < 5; i++) {
366 for (j = 1; j < 5; j++)
375 for (j = 1; j < 5; j++)
392 unsigned int pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
394 f = fopen(filename,
"w");
404 fprintf(f,
"<?xml version='1.0' encoding='ASCII' ?>\n");
405 fprintf(f,
"<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 schema/Mpeg7-2001.xsd\">\n");
406 fprintf(f,
" <DescriptionUnit xsi:type=\"DescriptorCollectionType\">\n");
407 fprintf(f,
" <Descriptor xsi:type=\"VideoSignatureType\">\n");
408 fprintf(f,
" <VideoSignatureRegion>\n");
409 fprintf(f,
" <VideoSignatureSpatialRegion>\n");
410 fprintf(f,
" <Pixel>0 0 </Pixel>\n");
411 fprintf(f,
" <Pixel>%d %d </Pixel>\n", sc->
w - 1, sc->
h - 1);
412 fprintf(f,
" </VideoSignatureSpatialRegion>\n");
413 fprintf(f,
" <StartFrameOfSpatialRegion>0</StartFrameOfSpatialRegion>\n");
416 fprintf(f,
" <MediaTimeOfSpatialRegion>\n");
417 fprintf(f,
" <StartMediaTimeOfSpatialRegion>0</StartMediaTimeOfSpatialRegion>\n");
418 fprintf(f,
" <EndMediaTimeOfSpatialRegion>%" PRIu64
"</EndMediaTimeOfSpatialRegion>\n", sc->
coarseend->
last->
pts);
419 fprintf(f,
" </MediaTimeOfSpatialRegion>\n");
423 fprintf(f,
" <VSVideoSegment>\n");
424 fprintf(f,
" <StartFrameOfSegment>%" PRIu32
"</StartFrameOfSegment>\n", cs->
first->
index);
425 fprintf(f,
" <EndFrameOfSegment>%" PRIu32
"</EndFrameOfSegment>\n", cs->
last->
index);
426 fprintf(f,
" <MediaTimeOfSegment>\n");
427 fprintf(f,
" <StartMediaTimeOfSegment>%" PRIu64
"</StartMediaTimeOfSegment>\n", cs->
first->
pts);
428 fprintf(f,
" <EndMediaTimeOfSegment>%" PRIu64
"</EndMediaTimeOfSegment>\n", cs->
last->
pts);
429 fprintf(f,
" </MediaTimeOfSegment>\n");
430 for (i = 0; i < 5; i++) {
431 fprintf(f,
" <BagOfWords>");
432 for (j = 0; j < 31; j++) {
435 fprintf(f,
"%d %d %d %d %d %d %d %d ", (n & 0x80) >> 7,
445 fprintf(f,
"%d %d %d ", (n & 0x80) >> 7,
450 fprintf(f,
"</BagOfWords>\n");
452 fprintf(f,
" </VSVideoSegment>\n");
457 fprintf(f,
" <VideoFrame>\n");
458 fprintf(f,
" <MediaTimeOfFrame>%" PRIu64
"</MediaTimeOfFrame>\n", fs->
pts);
460 fprintf(f,
" <FrameConfidence>%d</FrameConfidence>\n", fs->
confidence);
462 fprintf(f,
" <Word>");
463 for (i = 0; i < 5; i++) {
464 fprintf(f,
"%d ", fs->
words[i]);
469 fprintf(f,
"</Word>\n");
471 fprintf(f,
" <FrameSignature>");
476 fprintf(f,
"%d ", fs->
framesig[i] / pot3[0]);
477 for (j = 1; j < 5; j++)
478 fprintf(f,
" %d ", fs->
framesig[i] % pot3[j-1] / pot3[j] );
480 fprintf(f,
"</FrameSignature>\n");
481 fprintf(f,
" </VideoFrame>\n");
483 fprintf(f,
" </VideoSignatureRegion>\n");
484 fprintf(f,
" </Descriptor>\n");
485 fprintf(f,
" </DescriptionUnit>\n");
486 fprintf(f,
"</Mpeg7>\n");
497 uint32_t numofsegments = (sc->
lastindex + 44)/45;
501 int len = (512 + 6 * 32 + 3*16 + 2 +
502 numofsegments * (4*32 + 1 + 5*243) +
503 sc->
lastindex * (2 + 32 + 6*8 + 608)) / 8;
508 f = fopen(filename,
"wb");
540 for (i = 0; i < 5; i++) {
542 for (j = 0; j < 30; j++) {
554 for (i = 0; i < 5; i++) {
612 if (
export(ctx, sc, i) < 0)
628 if (match.
score != 0) {
629 av_log(ctx,
AV_LOG_INFO,
"matching of video %d at %f and %d at %f, %d frames matching\n",
691 av_log(ctx,
AV_LOG_ERROR,
"The filename must contain %%d or %%0nd, if you have more than one input.\n");
719 finsig = finsig->
next;
726 cousig = cousig->
next;
733 for (
unsigned i = 0; i < ctx->
nb_inputs; i++)
745 outlink->
w = inlink->
w;
746 outlink->
h = inlink->
h;
765 .priv_class = &signature_class,
static void av_unused put_bits32(PutBitContext *s, uint32_t value)
Write exactly 32 bits into a bitstream.
This structure describes decoded (raw) audio or video data.
static const AVFilterPad signature_outputs[]
ptrdiff_t const GLvoid * data
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
#define AV_LOG_WARNING
Something somehow does not look correct.
Main libavfilter public API header.
int h
agreed upon image height
#define AVFILTER_FLAG_DYNAMIC_INPUTS
The number of the filter inputs is not determined just by AVFilter.inputs.
static void set_bit(uint8_t *data, size_t pos)
sets the bit at position pos to 1 in data
struct FineSignature * next
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
enum AVMediaType type
AVFilterPad type.
static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
StreamContext * streamcontexts
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate.The lists are not just lists
static const AVOption signature_options[]
const char * name
Pad name.
AVFilterLink ** inputs
array of pointers to input links
#define av_assert0(cond)
assert() equivalent, that is always enabled.
static const char signature[]
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
static int export(AVFilterContext *ctx, StreamContext *sc, int input)
#define fs(width, name, subs,...)
timestamp utils, mostly useful for debugging/logging purposes
CoarseSignature * curcoarsesig1
static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)
struct FineSignature * first
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range...
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
#define AVERROR_EOF
End of file.
struct FineSignature * last
static int get_block_size(const Block *b)
A filter pad used for either input or output.
A link between two filters.
static int cmp(const void *x, const void *y)
AVFilterPad * input_pads
array of input pads
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
static int config_output(AVFilterLink *outlink)
AVRational frame_rate
Frame rate of the stream on the link, or 1/0 if unknown or variable; if left to 0/0, will be automatically copied from the first input of the source filter if it exists.
AVFILTER_DEFINE_CLASS(signature)
static const uint16_t mask[17]
FineSignature * curfinesig
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
void * priv
private data for use by the filter
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
AVRational time_base
Define the time base used by the PTS of the frames/samples which will pass through this link...
int av_log_get_level(void)
Get the current log level.
size_t av_strlcpy(char *dst, const char *src, size_t size)
Copy the string src to dst, but no more than size - 1 bytes, and null-terminate dst.
static MatchingInfo lookup_signatures(AVFilterContext *ctx, SignatureContext *sc, StreamContext *first, StreamContext *second, int mode)
static int put_bits_count(PutBitContext *s)
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
int w
agreed upon image width
char * av_asprintf(const char *fmt,...)
as above, but U and V bytes are swapped
unsigned nb_inputs
number of input pads
struct FineSignature * second
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
static const ElemCat * elements[ELEMENT_COUNT]
MPEG-7 video signature calculation and lookup filter.
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
AVFilterContext * src
source filter
static const AVFilterPad outputs[]
int av_get_frame_filename(char *buf, int buf_size, const char *path, int number)
#define AV_LOG_INFO
Standard information.
static int request_frame(AVFilterLink *outlink)
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
static av_cold void uninit(AVFilterContext *ctx)
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
static int config_input(AVFilterLink *inlink)
const char * name
Filter name.
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
AVRational sample_aspect_ratio
agreed upon sample aspect ratio
AVFilterLink ** outputs
array of pointers to output links
struct CoarseSignature * next
MPEG-7 video signature calculation and lookup filter.
static enum AVPixelFormat pix_fmts[]
CoarseSignature * coarsesiglist
#define flags(name, subs,...)
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
int av_strerror(int errnum, char *errbuf, size_t errbuf_size)
Put a description of the AVERROR code errnum in errbuf.
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
#define FF_INLINK_IDX(link)
Find the index of a link.
struct FineSignature * first
static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char *filename)
CoarseSignature * curcoarsesig2
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
static int query_formats(AVFilterContext *ctx)
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
struct FineSignature * prev
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
CoarseSignature * coarseend
AVFilterContext * dst
dest filter
static av_cold int init(AVFilterContext *ctx)
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples) full scale (JPEG), deprecated in favor ...
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
#define av_malloc_array(a, b)
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
AVPixelFormat
Pixel format.
mode
Use these values in ebur128_init (or'ed).
uint8_t framesig[SIGELEM_SIZE/5]
static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char *filename)
FineSignature * finesiglist
static int ff_insert_inpad(AVFilterContext *f, unsigned index, AVFilterPad *p)
Insert a new input pad for the filter.