FFmpeg
vf_dnn_detect.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 /**
20  * @file
21  * implementing an object detecting filter using deep learning networks.
22  */
23 
24 #include "libavutil/file_open.h"
25 #include "libavutil/mem.h"
26 #include "libavutil/opt.h"
27 #include "filters.h"
28 #include "dnn_filter_common.h"
29 #include "video.h"
30 #include "libavutil/time.h"
31 #include "libavutil/avstring.h"
33 #include "libavutil/fifo.h"
34 #include <float.h>
35 
36 typedef enum {
42 
43 typedef struct DnnDetectContext {
44  const AVClass *class;
46  float confidence;
48  char **labels;
51  int cell_w;
52  int cell_h;
57  float *anchors;
58  int nb_anchor;
60 
61 static const AVOptionArrayDef anchor_array_def = { .sep = '&' };
62 
63 #define OFFSET(x) offsetof(DnnDetectContext, dnnctx.x)
64 #define OFFSET2(x) offsetof(DnnDetectContext, x)
65 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
66 static const AVOption dnn_detect_options[] = {
67  { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = DNN_OV }, INT_MIN, INT_MAX, FLAGS, .unit = "backend" },
68 #if (CONFIG_LIBTENSORFLOW == 1)
69  { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_TF }, 0, 0, FLAGS, .unit = "backend" },
70 #endif
71 #if (CONFIG_LIBOPENVINO == 1)
72  { "openvino", "openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_OV }, 0, 0, FLAGS, .unit = "backend" },
73 #endif
74  { "confidence", "threshold of confidence", OFFSET2(confidence), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0, 1, FLAGS},
75  { "labels", "path to labels file", OFFSET2(labels_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
76  { "model_type", "DNN detection model type", OFFSET2(model_type), AV_OPT_TYPE_INT, { .i64 = DDMT_SSD }, INT_MIN, INT_MAX, FLAGS, .unit = "model_type" },
77  { "ssd", "output shape [1, 1, N, 7]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_SSD }, 0, 0, FLAGS, .unit = "model_type" },
78  { "yolo", "output shape [1, N*Cx*Cy*DetectionBox]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV1V2 }, 0, 0, FLAGS, .unit = "model_type" },
79  { "yolov3", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV3 }, 0, 0, FLAGS, .unit = "model_type" },
80  { "yolov4", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV4 }, 0, 0, FLAGS, .unit = "model_type" },
81  { "cell_w", "cell width", OFFSET2(cell_w), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INTMAX_MAX, FLAGS },
82  { "cell_h", "cell height", OFFSET2(cell_h), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INTMAX_MAX, FLAGS },
83  { "nb_classes", "The number of class", OFFSET2(nb_classes), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INTMAX_MAX, FLAGS },
84  { "anchors", "anchors, splited by '&'", OFFSET2(anchors), AV_OPT_TYPE_FLOAT | AV_OPT_TYPE_FLAG_ARRAY, { .arr = &anchor_array_def }, FLT_MIN, FLT_MAX, FLAGS },
85  { NULL }
86 };
87 
89 
90 static inline float sigmoid(float x) {
91  return 1.f / (1.f + exp(-x));
92 }
93 
94 static inline float linear(float x) {
95  return x;
96 }
97 
98 static int dnn_detect_get_label_id(int nb_classes, int cell_size, float *label_data)
99 {
100  float max_prob = 0;
101  int label_id = 0;
102  for (int i = 0; i < nb_classes; i++) {
103  if (label_data[i * cell_size] > max_prob) {
104  max_prob = label_data[i * cell_size];
105  label_id = i;
106  }
107  }
108  return label_id;
109 }
110 
111 /* Calculate Intersection Over Union */
112 static float dnn_detect_IOU(AVDetectionBBox *bbox1, AVDetectionBBox *bbox2)
113 {
114  float overlapping_width = FFMIN(bbox1->x + bbox1->w, bbox2->x + bbox2->w) - FFMAX(bbox1->x, bbox2->x);
115  float overlapping_height = FFMIN(bbox1->y + bbox1->h, bbox2->y + bbox2->h) - FFMAX(bbox1->y, bbox2->y);
116  float intersection_area =
117  (overlapping_width < 0 || overlapping_height < 0) ? 0 : overlapping_height * overlapping_width;
118  float union_area = bbox1->w * bbox1->h + bbox2->w * bbox2->h - intersection_area;
119  return intersection_area / union_area;
120 }
121 
122 static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int output_index,
124 {
126  float conf_threshold = ctx->confidence;
127  int detection_boxes, box_size;
128  int cell_w = 0, cell_h = 0, scale_w = 0, scale_h = 0;
129  int nb_classes = ctx->nb_classes;
130  float *output_data = output[output_index].data;
131  float *anchors = ctx->anchors;
132  AVDetectionBBox *bbox;
133  float (*post_process_raw_data)(float x) = linear;
134  int is_NHWC = 0;
135 
136  if (ctx->model_type == DDMT_YOLOV1V2) {
137  cell_w = ctx->cell_w;
138  cell_h = ctx->cell_h;
139  scale_w = cell_w;
140  scale_h = cell_h;
141  } else {
142  if (output[output_index].dims[2] != output[output_index].dims[3] &&
143  output[output_index].dims[2] == output[output_index].dims[1]) {
144  is_NHWC = 1;
145  cell_w = output[output_index].dims[2];
146  cell_h = output[output_index].dims[1];
147  } else {
148  cell_w = output[output_index].dims[3];
149  cell_h = output[output_index].dims[2];
150  }
151  scale_w = ctx->scale_width;
152  scale_h = ctx->scale_height;
153  }
154  box_size = nb_classes + 5;
155 
156  switch (ctx->model_type) {
157  case DDMT_YOLOV1V2:
158  case DDMT_YOLOV3:
159  post_process_raw_data = linear;
160  break;
161  case DDMT_YOLOV4:
162  post_process_raw_data = sigmoid;
163  break;
164  }
165 
166  if (!cell_h || !cell_w) {
167  av_log(filter_ctx, AV_LOG_ERROR, "cell_w and cell_h are detected\n");
168  return AVERROR(EINVAL);
169  }
170 
171  if (!nb_classes) {
172  av_log(filter_ctx, AV_LOG_ERROR, "nb_classes is not set\n");
173  return AVERROR(EINVAL);
174  }
175 
176  if (output[output_index].dims[1] * output[output_index].dims[2] *
177  output[output_index].dims[3] % (box_size * cell_w * cell_h)) {
178  av_log(filter_ctx, AV_LOG_ERROR, "wrong cell_w, cell_h or nb_classes\n");
179  return AVERROR(EINVAL);
180  }
181  detection_boxes = output[output_index].dims[1] *
182  output[output_index].dims[2] *
183  output[output_index].dims[3] / box_size / cell_w / cell_h;
184 
185  anchors = anchors + (detection_boxes * output_index * 2);
186  /**
187  * find all candidate bbox
188  * yolo output can be reshaped to [B, N*D, Cx, Cy]
189  * Detection box 'D' has format [`x`, `y`, `h`, `w`, `box_score`, `class_no_1`, ...,]
190  **/
191  for (int box_id = 0; box_id < detection_boxes; box_id++) {
192  for (int cx = 0; cx < cell_w; cx++)
193  for (int cy = 0; cy < cell_h; cy++) {
194  float x, y, w, h, conf;
195  float *detection_boxes_data;
196  int label_id;
197 
198  if (is_NHWC) {
199  detection_boxes_data = output_data +
200  ((cy * cell_w + cx) * detection_boxes + box_id) * box_size;
201  conf = post_process_raw_data(detection_boxes_data[4]);
202  } else {
203  detection_boxes_data = output_data + box_id * box_size * cell_w * cell_h;
204  conf = post_process_raw_data(
205  detection_boxes_data[cy * cell_w + cx + 4 * cell_w * cell_h]);
206  }
207 
208  if (is_NHWC) {
209  x = post_process_raw_data(detection_boxes_data[0]);
210  y = post_process_raw_data(detection_boxes_data[1]);
211  w = detection_boxes_data[2];
212  h = detection_boxes_data[3];
213  label_id = dnn_detect_get_label_id(ctx->nb_classes, 1, detection_boxes_data + 5);
214  conf = conf * post_process_raw_data(detection_boxes_data[label_id + 5]);
215  } else {
216  x = post_process_raw_data(detection_boxes_data[cy * cell_w + cx]);
217  y = post_process_raw_data(detection_boxes_data[cy * cell_w + cx + cell_w * cell_h]);
218  w = detection_boxes_data[cy * cell_w + cx + 2 * cell_w * cell_h];
219  h = detection_boxes_data[cy * cell_w + cx + 3 * cell_w * cell_h];
220  label_id = dnn_detect_get_label_id(ctx->nb_classes, cell_w * cell_h,
221  detection_boxes_data + cy * cell_w + cx + 5 * cell_w * cell_h);
222  conf = conf * post_process_raw_data(
223  detection_boxes_data[cy * cell_w + cx + (label_id + 5) * cell_w * cell_h]);
224  }
225  if (conf < conf_threshold) {
226  continue;
227  }
228 
229  bbox = av_mallocz(sizeof(*bbox));
230  if (!bbox)
231  return AVERROR(ENOMEM);
232 
233  bbox->w = exp(w) * anchors[box_id * 2] * frame->width / scale_w;
234  bbox->h = exp(h) * anchors[box_id * 2 + 1] * frame->height / scale_h;
235  bbox->x = (cx + x) / cell_w * frame->width - bbox->w / 2;
236  bbox->y = (cy + y) / cell_h * frame->height - bbox->h / 2;
237  bbox->detect_confidence = av_make_q((int)(conf * 10000), 10000);
238  if (ctx->labels && label_id < ctx->label_count) {
239  av_strlcpy(bbox->detect_label, ctx->labels[label_id], sizeof(bbox->detect_label));
240  } else {
241  snprintf(bbox->detect_label, sizeof(bbox->detect_label), "%d", label_id);
242  }
243 
244  if (av_fifo_write(ctx->bboxes_fifo, &bbox, 1) < 0) {
245  av_freep(&bbox);
246  return AVERROR(ENOMEM);
247  }
248  bbox = NULL;
249  }
250  }
251  return 0;
252 }
253 
255 {
257  float conf_threshold = ctx->confidence;
258  AVDetectionBBox *bbox;
259  int nb_bboxes = 0;
261  if (av_fifo_can_read(ctx->bboxes_fifo) == 0) {
262  av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this frame.\n");
263  return 0;
264  }
265 
266  /* remove overlap bboxes */
267  for (int i = 0; i < av_fifo_can_read(ctx->bboxes_fifo); i++){
268  av_fifo_peek(ctx->bboxes_fifo, &bbox, 1, i);
269  for (int j = 0; j < av_fifo_can_read(ctx->bboxes_fifo); j++) {
270  AVDetectionBBox *overlap_bbox;
271  av_fifo_peek(ctx->bboxes_fifo, &overlap_bbox, 1, j);
272  if (!strcmp(bbox->detect_label, overlap_bbox->detect_label) &&
273  av_cmp_q(bbox->detect_confidence, overlap_bbox->detect_confidence) < 0 &&
274  dnn_detect_IOU(bbox, overlap_bbox) >= conf_threshold) {
275  bbox->classify_count = -1; // bad result
276  nb_bboxes++;
277  break;
278  }
279  }
280  }
281  nb_bboxes = av_fifo_can_read(ctx->bboxes_fifo) - nb_bboxes;
283  if (!header) {
284  av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d bounding boxes\n", nb_bboxes);
285  return -1;
286  }
287  av_strlcpy(header->source, ctx->dnnctx.model_filename, sizeof(header->source));
288 
289  while(av_fifo_can_read(ctx->bboxes_fifo)) {
290  AVDetectionBBox *candidate_bbox;
291  av_fifo_read(ctx->bboxes_fifo, &candidate_bbox, 1);
292 
293  if (nb_bboxes > 0 && candidate_bbox->classify_count != -1) {
294  bbox = av_get_detection_bbox(header, header->nb_bboxes - nb_bboxes);
295  memcpy(bbox, candidate_bbox, sizeof(*bbox));
296  nb_bboxes--;
297  }
298  av_freep(&candidate_bbox);
299  }
300  return 0;
301 }
302 
304 {
305  int ret = 0;
307  if (ret < 0)
308  return ret;
310  if (ret < 0)
311  return ret;
312  return 0;
313 }
314 
316  AVFilterContext *filter_ctx, int nb_outputs)
317 {
318  int ret = 0;
319  for (int i = 0; i < nb_outputs; i++) {
321  if (ret < 0)
322  return ret;
323  }
325  if (ret < 0)
326  return ret;
327  return 0;
328 }
329 
330 static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, int nb_outputs,
332 {
334  float conf_threshold = ctx->confidence;
335  int proposal_count = 0;
336  int detect_size = 0;
337  float *detections = NULL, *labels = NULL;
338  int nb_bboxes = 0;
340  AVDetectionBBox *bbox;
341  int scale_w = ctx->scale_width;
342  int scale_h = ctx->scale_height;
343 
344  if (nb_outputs == 1 && output->dims[3] == 7) {
345  proposal_count = output->dims[2];
346  detect_size = output->dims[3];
347  detections = output->data;
348  } else if (nb_outputs == 2 && output[0].dims[3] == 5) {
349  proposal_count = output[0].dims[2];
350  detect_size = output[0].dims[3];
351  detections = output[0].data;
352  labels = output[1].data;
353  } else if (nb_outputs == 2 && output[1].dims[3] == 5) {
354  proposal_count = output[1].dims[2];
355  detect_size = output[1].dims[3];
356  detections = output[1].data;
357  labels = output[0].data;
358  } else {
359  av_log(filter_ctx, AV_LOG_ERROR, "Model output shape doesn't match ssd requirement.\n");
360  return AVERROR(EINVAL);
361  }
362 
363  if (proposal_count == 0)
364  return 0;
365 
366  for (int i = 0; i < proposal_count; ++i) {
367  float conf;
368  if (nb_outputs == 1)
369  conf = detections[i * detect_size + 2];
370  else
371  conf = detections[i * detect_size + 4];
372  if (conf < conf_threshold) {
373  continue;
374  }
375  nb_bboxes++;
376  }
377 
378  if (nb_bboxes == 0) {
379  av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this frame.\n");
380  return 0;
381  }
382 
384  if (!header) {
385  av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d bounding boxes\n", nb_bboxes);
386  return -1;
387  }
388 
389  av_strlcpy(header->source, ctx->dnnctx.model_filename, sizeof(header->source));
390 
391  for (int i = 0; i < proposal_count; ++i) {
392  int av_unused image_id = (int)detections[i * detect_size + 0];
393  int label_id;
394  float conf, x0, y0, x1, y1;
395 
396  if (nb_outputs == 1) {
397  label_id = (int)detections[i * detect_size + 1];
398  conf = detections[i * detect_size + 2];
399  x0 = detections[i * detect_size + 3];
400  y0 = detections[i * detect_size + 4];
401  x1 = detections[i * detect_size + 5];
402  y1 = detections[i * detect_size + 6];
403  } else {
404  label_id = (int)labels[i];
405  x0 = detections[i * detect_size] / scale_w;
406  y0 = detections[i * detect_size + 1] / scale_h;
407  x1 = detections[i * detect_size + 2] / scale_w;
408  y1 = detections[i * detect_size + 3] / scale_h;
409  conf = detections[i * detect_size + 4];
410  }
411 
412  if (conf < conf_threshold) {
413  continue;
414  }
415 
416  bbox = av_get_detection_bbox(header, header->nb_bboxes - nb_bboxes);
417  bbox->x = (int)(x0 * frame->width);
418  bbox->w = (int)(x1 * frame->width) - bbox->x;
419  bbox->y = (int)(y0 * frame->height);
420  bbox->h = (int)(y1 * frame->height) - bbox->y;
421 
422  bbox->detect_confidence = av_make_q((int)(conf * 10000), 10000);
423  bbox->classify_count = 0;
424 
425  if (ctx->labels && label_id < ctx->label_count) {
426  av_strlcpy(bbox->detect_label, ctx->labels[label_id], sizeof(bbox->detect_label));
427  } else {
428  snprintf(bbox->detect_label, sizeof(bbox->detect_label), "%d", label_id);
429  }
430 
431  nb_bboxes--;
432  if (nb_bboxes == 0) {
433  break;
434  }
435  }
436  return 0;
437 }
438 
439 static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, int nb_outputs,
441 {
442  AVFrameSideData *sd;
444  int ret = 0;
445 
447  if (sd) {
448  av_log(filter_ctx, AV_LOG_ERROR, "already have bounding boxes in side data.\n");
449  return -1;
450  }
451 
452  switch (ctx->model_type) {
453  case DDMT_SSD:
455  if (ret < 0)
456  return ret;
457  break;
458  case DDMT_YOLOV1V2:
460  if (ret < 0)
461  return ret;
462  break;
463  case DDMT_YOLOV3:
464  case DDMT_YOLOV4:
466  if (ret < 0)
467  return ret;
468  break;
469  }
470  return 0;
471 }
472 
474 {
476  int proposal_count;
477  float conf_threshold = ctx->confidence;
478  float *conf, *position, *label_id, x0, y0, x1, y1;
479  int nb_bboxes = 0;
480  AVFrameSideData *sd;
481  AVDetectionBBox *bbox;
483 
484  proposal_count = *(float *)(output[0].data);
485  conf = output[1].data;
486  position = output[3].data;
487  label_id = output[2].data;
488 
490  if (sd) {
491  av_log(filter_ctx, AV_LOG_ERROR, "already have dnn bounding boxes in side data.\n");
492  return -1;
493  }
494 
495  for (int i = 0; i < proposal_count; ++i) {
496  if (conf[i] < conf_threshold)
497  continue;
498  nb_bboxes++;
499  }
500 
501  if (nb_bboxes == 0) {
502  av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this frame.\n");
503  return 0;
504  }
505 
507  if (!header) {
508  av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d bounding boxes\n", nb_bboxes);
509  return -1;
510  }
511 
512  av_strlcpy(header->source, ctx->dnnctx.model_filename, sizeof(header->source));
513 
514  for (int i = 0; i < proposal_count; ++i) {
515  y0 = position[i * 4];
516  x0 = position[i * 4 + 1];
517  y1 = position[i * 4 + 2];
518  x1 = position[i * 4 + 3];
519 
520  bbox = av_get_detection_bbox(header, i);
521 
522  if (conf[i] < conf_threshold) {
523  continue;
524  }
525 
526  bbox->x = (int)(x0 * frame->width);
527  bbox->w = (int)(x1 * frame->width) - bbox->x;
528  bbox->y = (int)(y0 * frame->height);
529  bbox->h = (int)(y1 * frame->height) - bbox->y;
530 
531  bbox->detect_confidence = av_make_q((int)(conf[i] * 10000), 10000);
532  bbox->classify_count = 0;
533 
534  if (ctx->labels && label_id[i] < ctx->label_count) {
535  av_strlcpy(bbox->detect_label, ctx->labels[(int)label_id[i]], sizeof(bbox->detect_label));
536  } else {
537  snprintf(bbox->detect_label, sizeof(bbox->detect_label), "%d", (int)label_id[i]);
538  }
539 
540  nb_bboxes--;
541  if (nb_bboxes == 0) {
542  break;
543  }
544  }
545  return 0;
546 }
547 
549 {
551  DnnContext *dnn_ctx = &ctx->dnnctx;
552  switch (dnn_ctx->backend_type) {
553  case DNN_OV:
555  case DNN_TF:
557  default:
558  avpriv_report_missing_feature(filter_ctx, "Current dnn backend does not support detect filter\n");
559  return AVERROR(EINVAL);
560  }
561 }
562 
564 {
565  for (int i = 0; i < ctx->label_count; i++) {
566  av_freep(&ctx->labels[i]);
567  }
568  ctx->label_count = 0;
569  av_freep(&ctx->labels);
570 }
571 
573 {
574  int line_len;
575  FILE *file;
576  DnnDetectContext *ctx = context->priv;
577 
578  file = avpriv_fopen_utf8(ctx->labels_filename, "r");
579  if (!file){
580  av_log(context, AV_LOG_ERROR, "failed to open file %s\n", ctx->labels_filename);
581  return AVERROR(EINVAL);
582  }
583 
584  while (!feof(file)) {
585  char *label;
586  char buf[256];
587  if (!fgets(buf, 256, file)) {
588  break;
589  }
590 
591  line_len = strlen(buf);
592  while (line_len) {
593  int i = line_len - 1;
594  if (buf[i] == '\n' || buf[i] == '\r' || buf[i] == ' ') {
595  buf[i] = '\0';
596  line_len--;
597  } else {
598  break;
599  }
600  }
601 
602  if (line_len == 0) // empty line
603  continue;
604 
605  if (line_len >= AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) {
606  av_log(context, AV_LOG_ERROR, "label %s too long\n", buf);
607  fclose(file);
608  return AVERROR(EINVAL);
609  }
610 
611  label = av_strdup(buf);
612  if (!label) {
613  av_log(context, AV_LOG_ERROR, "failed to allocate memory for label %s\n", buf);
614  fclose(file);
615  return AVERROR(ENOMEM);
616  }
617 
618  if (av_dynarray_add_nofree(&ctx->labels, &ctx->label_count, label) < 0) {
619  av_log(context, AV_LOG_ERROR, "failed to do av_dynarray_add\n");
620  fclose(file);
621  av_freep(&label);
622  return AVERROR(ENOMEM);
623  }
624  }
625 
626  fclose(file);
627  return 0;
628 }
629 
630 static int check_output_nb(DnnDetectContext *ctx, DNNBackendType backend_type, int output_nb)
631 {
632  switch(backend_type) {
633  case DNN_TF:
634  if (output_nb != 4) {
635  av_log(ctx, AV_LOG_ERROR, "Only support tensorflow detect model with 4 outputs, \
636  but get %d instead\n", output_nb);
637  return AVERROR(EINVAL);
638  }
639  return 0;
640  case DNN_OV:
641  return 0;
642  default:
643  avpriv_report_missing_feature(ctx, "Dnn detect filter does not support current backend\n");
644  return AVERROR(EINVAL);
645  }
646  return 0;
647 }
648 
650 {
651  DnnDetectContext *ctx = context->priv;
652  DnnContext *dnn_ctx = &ctx->dnnctx;
653  int ret;
654  int using_yolo = (ctx->model_type == DDMT_YOLOV3 ||
655  ctx->model_type == DDMT_YOLOV4 ||
656  ctx->model_type == DDMT_YOLOV1V2);
657 
658  if (using_yolo && !ctx->anchors) {
659  av_log(ctx, AV_LOG_ERROR, "anchors is not set while being required for YOLO models\n");
660  return AVERROR(EINVAL);
661  }
662 
664  if (ret < 0)
665  return ret;
666  ret = check_output_nb(ctx, dnn_ctx->backend_type, dnn_ctx->nb_outputs);
667  if (ret < 0)
668  return ret;
669  ctx->bboxes_fifo = av_fifo_alloc2(1, sizeof(AVDetectionBBox *), AV_FIFO_FLAG_AUTO_GROW);
670  if (!ctx->bboxes_fifo)
671  return AVERROR(ENOMEM);
673 
674  if (ctx->labels_filename) {
676  if (ret) {
677  return ret;
678  }
679  }
680 
681  return 0;
682 }
683 
684 static const enum AVPixelFormat pix_fmts[] = {
691 };
692 
693 static int dnn_detect_flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)
694 {
695  DnnDetectContext *ctx = outlink->src->priv;
696  int ret;
697  DNNAsyncStatusType async_state;
698 
699  ret = ff_dnn_flush(&ctx->dnnctx);
700  if (ret != 0) {
701  return -1;
702  }
703 
704  do {
705  AVFrame *in_frame = NULL;
706  AVFrame *out_frame = NULL;
707  async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame);
708  if (async_state == DAST_SUCCESS) {
709  ret = ff_filter_frame(outlink, in_frame);
710  if (ret < 0)
711  return ret;
712  if (out_pts)
713  *out_pts = in_frame->pts + pts;
714  }
715  av_usleep(5000);
716  } while (async_state >= DAST_NOT_READY);
717 
718  return 0;
719 }
720 
722 {
723  AVFilterLink *inlink = filter_ctx->inputs[0];
724  AVFilterLink *outlink = filter_ctx->outputs[0];
726  AVFrame *in = NULL;
727  int64_t pts;
728  int ret, status;
729  int got_frame = 0;
730  int async_state;
731 
733 
734  do {
735  // drain all input frames
737  if (ret < 0)
738  return ret;
739  if (ret > 0) {
740  if (ff_dnn_execute_model(&ctx->dnnctx, in, NULL) != 0) {
741  return AVERROR(EIO);
742  }
743  }
744  } while (ret > 0);
745 
746  // drain all processed frames
747  do {
748  AVFrame *in_frame = NULL;
749  AVFrame *out_frame = NULL;
750  async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame);
751  if (async_state == DAST_SUCCESS) {
752  ret = ff_filter_frame(outlink, in_frame);
753  if (ret < 0)
754  return ret;
755  got_frame = 1;
756  }
757  } while (async_state == DAST_SUCCESS);
758 
759  // if frame got, schedule to next filter
760  if (got_frame)
761  return 0;
762 
764  if (status == AVERROR_EOF) {
765  int64_t out_pts = pts;
766  ret = dnn_detect_flush_frame(outlink, pts, &out_pts);
767  ff_outlink_set_status(outlink, status, out_pts);
768  return ret;
769  }
770  }
771 
773 
774  return 0;
775 }
776 
778 {
779  DnnDetectContext *ctx = context->priv;
780  AVDetectionBBox *bbox;
781  ff_dnn_uninit(&ctx->dnnctx);
782  if (ctx->bboxes_fifo) {
783  while (av_fifo_can_read(ctx->bboxes_fifo)) {
784  av_fifo_read(ctx->bboxes_fifo, &bbox, 1);
785  av_freep(&bbox);
786  }
787  av_fifo_freep2(&ctx->bboxes_fifo);
788  }
789  av_freep(&ctx->anchors);
791 }
792 
794 {
796  DnnDetectContext *ctx = context->priv;
797  DNNData model_input;
798  int ret, width_idx, height_idx;
799 
800  ret = ff_dnn_get_input(&ctx->dnnctx, &model_input);
801  if (ret != 0) {
802  av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n");
803  return ret;
804  }
805  width_idx = dnn_get_width_idx_by_layout(model_input.layout);
806  height_idx = dnn_get_height_idx_by_layout(model_input.layout);
807  ctx->scale_width = model_input.dims[width_idx] == -1 ? inlink->w :
808  model_input.dims[width_idx];
809  ctx->scale_height = model_input.dims[height_idx] == -1 ? inlink->h :
810  model_input.dims[height_idx];
811 
812  return 0;
813 }
814 
815 static const AVFilterPad dnn_detect_inputs[] = {
816  {
817  .name = "default",
818  .type = AVMEDIA_TYPE_VIDEO,
819  .config_props = config_input,
820  },
821 };
822 
824  .p.name = "dnn_detect",
825  .p.description = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."),
826  .p.priv_class = &dnn_detect_class,
827  .priv_size = sizeof(DnnDetectContext),
834  .activate = dnn_detect_activate,
835 };
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: vf_dnn_detect.c:684
DnnDetectContext::nb_classes
int nb_classes
Definition: vf_dnn_detect.c:53
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
dnn_detect_parse_yolo_output
static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int output_index, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:122
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
FILTER_PIXFMTS_ARRAY
#define FILTER_PIXFMTS_ARRAY(array)
Definition: filters.h:242
av_frame_get_side_data
AVFrameSideData * av_frame_get_side_data(const AVFrame *frame, enum AVFrameSideDataType type)
Definition: frame.c:693
AVOptionArrayDef::sep
char sep
Separator between array elements in string representations of this option, used by av_opt_set() and a...
Definition: opt.h:423
AVOptionArrayDef
May be set as default_val for AV_OPT_TYPE_FLAG_ARRAY options.
Definition: opt.h:395
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1078
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
int64_t
long long int64_t
Definition: coverity.c:34
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
av_unused
#define av_unused
Definition: attributes.h:131
av_fifo_peek
int av_fifo_peek(const AVFifo *f, void *buf, size_t nb_elems, size_t offset)
Read data from a FIFO without modifying FIFO state.
Definition: fifo.c:255
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:410
AVFrame::pts
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:522
w
uint8_t w
Definition: llviddspenc.c:38
read_detect_label_file
static int read_detect_label_file(AVFilterContext *context)
Definition: vf_dnn_detect.c:572
AVOption
AVOption.
Definition: opt.h:429
data
const char data[16]
Definition: mxf.c:149
dnn_detect_post_proc_ssd
static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, int nb_outputs, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:330
dnn_detect_init
static av_cold int dnn_detect_init(AVFilterContext *context)
Definition: vf_dnn_detect.c:649
output_data
static int output_data(MLPDecodeContext *m, unsigned int substr, AVFrame *frame, int *got_frame_ptr)
Write the audio data into the output buffer.
Definition: mlpdec.c:1108
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:225
float.h
dnn_detect_inputs
static const AVFilterPad dnn_detect_inputs[]
Definition: vf_dnn_detect.c:815
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
preinit
static av_cold int preinit(AVFilterContext *ctx)
Definition: af_aresample.c:49
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:203
dnn_filter_common.h
AVDetectionBBox::y
int y
Definition: detection_bbox.h:32
video.h
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:637
AVFILTER_DNN_DEFINE_CLASS
AVFILTER_DNN_DEFINE_CLASS(dnn_detect, DNN_TF|DNN_OV)
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1507
fifo.h
AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE
#define AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE
Definition: detection_bbox.h:36
dnn_get_width_idx_by_layout
static int dnn_get_width_idx_by_layout(DNNLayout layout)
Definition: dnn_interface.h:197
AVDetectionBBox::detect_label
char detect_label[AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]
Detect result with confidence.
Definition: detection_bbox.h:41
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:272
av_fifo_write
int av_fifo_write(AVFifo *f, const void *buf, size_t nb_elems)
Write data into a FIFO.
Definition: fifo.c:188
DnnContext
Definition: dnn_interface.h:143
dnn_detect_IOU
static float dnn_detect_IOU(AVDetectionBBox *bbox1, AVDetectionBBox *bbox2)
Definition: vf_dnn_detect.c:112
filter_ctx
static FilteringContext * filter_ctx
Definition: transcode.c:52
ff_dnn_filter_init_child_class
int ff_dnn_filter_init_child_class(AVFilterContext *filter)
Definition: dnn_filter_common.c:61
dnn_detect_uninit
static av_cold void dnn_detect_uninit(AVFilterContext *context)
Definition: vf_dnn_detect.c:777
DnnDetectContext
Definition: vf_dnn_detect.c:43
pts
static int64_t pts
Definition: transcode_aac.c:644
DnnDetectContext::model_type
DNNDetectionModelType model_type
Definition: vf_dnn_detect.c:50
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:38
av_get_detection_bbox
static av_always_inline AVDetectionBBox * av_get_detection_bbox(const AVDetectionBBoxHeader *header, unsigned int idx)
Definition: detection_bbox.h:84
DnnDetectContext::scale_height
int scale_height
Definition: vf_dnn_detect.c:56
dnn_detect_post_proc_ov
static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, int nb_outputs, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:439
DNN_TF
@ DNN_TF
Definition: dnn_interface.h:36
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:209
av_cold
#define av_cold
Definition: attributes.h:90
av_fifo_read
int av_fifo_read(AVFifo *f, void *buf, size_t nb_elems)
Read data from a FIFO.
Definition: fifo.c:240
ff_video_default_filterpad
const AVFilterPad ff_video_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_VIDEO.
Definition: video.c:37
DnnDetectContext::bboxes_fifo
AVFifo * bboxes_fifo
Definition: vf_dnn_detect.c:54
FFFilter
Definition: filters.h:265
float
float
Definition: af_crystalizer.c:122
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:627
ff_dnn_set_detect_post_proc
int ff_dnn_set_detect_post_proc(DnnContext *ctx, DetectPostProc post_proc)
Definition: dnn_filter_common.c:146
free_detect_labels
static void free_detect_labels(DnnDetectContext *ctx)
Definition: vf_dnn_detect.c:563
DNNData
Definition: dnn_interface.h:69
dnn_detect_post_proc_yolov3
static int dnn_detect_post_proc_yolov3(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx, int nb_outputs)
Definition: vf_dnn_detect.c:315
filters.h
ff_dnn_get_result
DNNAsyncStatusType ff_dnn_get_result(DnnContext *ctx, AVFrame **in_frame, AVFrame **out_frame)
Definition: dnn_filter_common.c:198
ctx
AVFormatContext * ctx
Definition: movenc.c:49
config_input
static int config_input(AVFilterLink *inlink)
Definition: vf_dnn_detect.c:793
linear
static float linear(float x)
Definition: vf_dnn_detect.c:94
ff_vf_dnn_detect
const FFFilter ff_vf_dnn_detect
Definition: vf_dnn_detect.c:823
DnnDetectContext::scale_width
int scale_width
Definition: vf_dnn_detect.c:55
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
av_usleep
int av_usleep(unsigned usec)
Sleep for a period of time.
Definition: time.c:84
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
AV_PIX_FMT_GRAYF32
#define AV_PIX_FMT_GRAYF32
Definition: pixfmt.h:561
file_open.h
ff_dnn_get_input
int ff_dnn_get_input(DnnContext *ctx, DNNData *input)
Definition: dnn_filter_common.c:158
DNN_OV
@ DNN_OV
Definition: dnn_interface.h:37
context
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are in without and describe what they for example set the foo of the bar offset is the offset of the field in your context
Definition: writing_filters.txt:91
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:75
NULL
#define NULL
Definition: coverity.c:32
AVDetectionBBoxHeader
Definition: detection_bbox.h:56
DnnDetectContext::dnnctx
DnnContext dnnctx
Definition: vf_dnn_detect.c:45
DnnDetectContext::cell_h
int cell_h
Definition: vf_dnn_detect.c:52
dnn_detect_activate
static int dnn_detect_activate(AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:721
DnnDetectContext::labels_filename
char * labels_filename
Definition: vf_dnn_detect.c:47
av_fifo_can_read
size_t av_fifo_can_read(const AVFifo *f)
Definition: fifo.c:87
DnnDetectContext::labels
char ** labels
Definition: vf_dnn_detect.c:48
DDMT_YOLOV3
@ DDMT_YOLOV3
Definition: vf_dnn_detect.c:39
time.h
AV_PIX_FMT_GRAY8
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:81
exp
int8_t exp
Definition: eval.c:73
ff_dnn_flush
int ff_dnn_flush(DnnContext *ctx)
Definition: dnn_filter_common.c:203
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1454
FLAGS
#define FLAGS
Definition: vf_dnn_detect.c:65
DDMT_YOLOV4
@ DDMT_YOLOV4
Definition: vf_dnn_detect.c:40
av_detection_bbox_create_side_data
AVDetectionBBoxHeader * av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes)
Allocates memory for AVDetectionBBoxHeader, plus an array of.
Definition: detection_bbox.c:52
DnnContext::backend_type
DNNBackendType backend_type
Definition: dnn_interface.h:149
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:75
AVFifo
Definition: fifo.c:35
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
DnnDetectContext::label_count
int label_count
Definition: vf_dnn_detect.c:49
DAST_SUCCESS
@ DAST_SUCCESS
Definition: dnn_interface.h:53
AVDetectionBBox::w
int w
Definition: detection_bbox.h:33
AV_OPT_TYPE_FLAG_ARRAY
@ AV_OPT_TYPE_FLAG_ARRAY
May be combined with another regular option type to declare an array option.
Definition: opt.h:346
DNNBackendType
DNNBackendType
Definition: dnn_interface.h:35
DnnContext::nb_outputs
uint32_t nb_outputs
Definition: dnn_interface.h:156
av_make_q
static AVRational av_make_q(int num, int den)
Create an AVRational.
Definition: rational.h:71
avpriv_report_missing_feature
void avpriv_report_missing_feature(void *avc, const char *msg,...) av_printf_format(2
Log a generic warning message about a missing feature.
sigmoid
static float sigmoid(float x)
Definition: vf_dnn_detect.c:90
header
static const uint8_t header[24]
Definition: sdr2.c:68
DNNData::layout
DNNLayout layout
Definition: dnn_interface.h:75
AVDetectionBBox::classify_count
uint32_t classify_count
Definition: detection_bbox.h:51
DDMT_YOLOV1V2
@ DDMT_YOLOV1V2
Definition: vf_dnn_detect.c:38
FF_FILTER_FORWARD_WANTED
FF_FILTER_FORWARD_WANTED(outlink, inlink)
dnn_detect_options
static const AVOption dnn_detect_options[]
Definition: vf_dnn_detect.c:66
AV_OPT_TYPE_FLOAT
@ AV_OPT_TYPE_FLOAT
Underlying C type is float.
Definition: opt.h:271
dnn_detect_flush_frame
static int dnn_detect_flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)
Definition: vf_dnn_detect.c:693
dnn_detect_post_proc
static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, uint32_t nb, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:548
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
DFT_ANALYTICS_DETECT
@ DFT_ANALYTICS_DETECT
Definition: dnn_interface.h:59
check_output_nb
static int check_output_nb(DnnDetectContext *ctx, DNNBackendType backend_type, int output_nb)
Definition: vf_dnn_detect.c:630
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:256
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:44
avpriv_fopen_utf8
FILE * avpriv_fopen_utf8(const char *path, const char *mode)
Open a file using a UTF-8 filename.
Definition: file_open.c:161
dnn_detect_post_proc_yolo
static int dnn_detect_post_proc_yolo(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:303
DnnDetectContext::confidence
float confidence
Definition: vf_dnn_detect.c:46
av_cmp_q
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
Definition: rational.h:89
ret
ret
Definition: filter_design.txt:187
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
AVDetectionBBox::h
int h
Definition: detection_bbox.h:34
av_fifo_alloc2
AVFifo * av_fifo_alloc2(size_t nb_elems, size_t elem_size, unsigned int flags)
Allocate and initialize an AVFifo with a given element size.
Definition: fifo.c:47
AVDetectionBBox::detect_confidence
AVRational detect_confidence
Definition: detection_bbox.h:42
av_dynarray_add_nofree
int av_dynarray_add_nofree(void *tab_ptr, int *nb_ptr, void *elem)
Add an element to a dynamic array.
Definition: mem.c:315
DDMT_SSD
@ DDMT_SSD
Definition: vf_dnn_detect.c:37
status
ov_status_e status
Definition: dnn_backend_openvino.c:100
DNNDetectionModelType
DNNDetectionModelType
Definition: vf_dnn_detect.c:36
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
AVDetectionBBox::x
int x
Distance in pixels from the left/top edge of the frame, together with width and height,...
Definition: detection_bbox.h:31
DnnDetectContext::nb_anchor
int nb_anchor
Definition: vf_dnn_detect.c:58
anchor_array_def
static const AVOptionArrayDef anchor_array_def
Definition: vf_dnn_detect.c:61
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
AVFilterContext
An instance of a filter.
Definition: avfilter.h:257
DNNData::dims
int dims[4]
Definition: dnn_interface.h:71
av_strdup
char * av_strdup(const char *s)
Duplicate a string.
Definition: mem.c:272
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
FFFilter::p
AVFilter p
The public AVFilter.
Definition: filters.h:269
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
mem.h
OFFSET
#define OFFSET(x)
Definition: vf_dnn_detect.c:63
dnn_detect_fill_side_data
static int dnn_detect_fill_side_data(AVFrame *frame, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:254
dnn_get_height_idx_by_layout
static int dnn_get_height_idx_by_layout(DNNLayout layout)
Definition: dnn_interface.h:202
AVFrameSideData
Structure to hold side data for an AVFrame.
Definition: frame.h:265
DnnDetectContext::cell_w
int cell_w
Definition: vf_dnn_detect.c:51
DnnDetectContext::anchors
float * anchors
Definition: vf_dnn_detect.c:57
ff_dnn_init
int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
Definition: dnn_filter_common.c:73
dnn_detect_get_label_id
static int dnn_detect_get_label_id(int nb_classes, int cell_size, float *label_data)
Definition: vf_dnn_detect.c:98
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:80
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:79
av_strlcpy
size_t av_strlcpy(char *dst, const char *src, size_t size)
Copy the string src to dst, but no more than size - 1 bytes, and null-terminate dst.
Definition: avstring.c:85
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
av_fifo_freep2
void av_fifo_freep2(AVFifo **f)
Free an AVFifo and reset pointer to NULL.
Definition: fifo.c:286
ff_dnn_uninit
void ff_dnn_uninit(DnnContext *ctx)
Definition: dnn_filter_common.c:208
AVDetectionBBox
Definition: detection_bbox.h:26
h
h
Definition: vp9dsp_template.c:2070
dnn_detect_post_proc_tf
static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:473
ff_dnn_execute_model
int ff_dnn_execute_model(DnnContext *ctx, AVFrame *in_frame, AVFrame *out_frame)
Definition: dnn_filter_common.c:171
avstring.h
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Underlying C type is a uint8_t* that is either NULL or points to a C string allocated with the av_mal...
Definition: opt.h:276
DAST_NOT_READY
@ DAST_NOT_READY
Definition: dnn_interface.h:52
DNNAsyncStatusType
DNNAsyncStatusType
Definition: dnn_interface.h:49
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Special option type for declaring named constants.
Definition: opt.h:299
snprintf
#define snprintf
Definition: snprintf.h:34
OFFSET2
#define OFFSET2(x)
Definition: vf_dnn_detect.c:64
detection_bbox.h
AV_FIFO_FLAG_AUTO_GROW
#define AV_FIFO_FLAG_AUTO_GROW
Automatically resize the FIFO on writes, so that the data fits.
Definition: fifo.h:63
AV_FRAME_DATA_DETECTION_BBOXES
@ AV_FRAME_DATA_DETECTION_BBOXES
Bounding boxes for object detection and classification, as described by AVDetectionBBoxHeader.
Definition: frame.h:194