FFmpeg
vf_dnn_classify.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 /**
20  * @file
21  * implementing an classification filter using deep learning networks.
22  */
23 
24 #include "libavutil/file_open.h"
25 #include "libavutil/mem.h"
26 #include "libavutil/opt.h"
27 #include "filters.h"
28 #include "dnn_filter_common.h"
29 #include "video.h"
30 #include "libavutil/time.h"
31 #include "libavutil/avstring.h"
33 
34 typedef struct DnnClassifyContext {
35  const AVClass *class;
37  float confidence;
39  char *target;
40  char **labels;
43 
44 #define OFFSET(x) offsetof(DnnClassifyContext, dnnctx.x)
45 #define OFFSET2(x) offsetof(DnnClassifyContext, x)
46 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
47 static const AVOption dnn_classify_options[] = {
48  { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = DNN_OV }, INT_MIN, INT_MAX, FLAGS, .unit = "backend" },
49 #if (CONFIG_LIBOPENVINO == 1)
50  { "openvino", "openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_OV }, 0, 0, FLAGS, .unit = "backend" },
51 #endif
52  { "confidence", "threshold of confidence", OFFSET2(confidence), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0, 1, FLAGS},
53  { "labels", "path to labels file", OFFSET2(labels_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
54  { "target", "which one to be classified", OFFSET2(target), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
55  { NULL }
56 };
57 
58 AVFILTER_DNN_DEFINE_CLASS(dnn_classify, DNN_OV);
59 
61 {
63  float conf_threshold = ctx->confidence;
65  AVDetectionBBox *bbox;
66  float *classifications;
67  uint32_t label_id;
68  float confidence;
69  AVFrameSideData *sd;
70  int output_size = output->dims[3] * output->dims[2] * output->dims[1];
71  if (output_size <= 0) {
72  return -1;
73  }
74 
76  if (!sd) {
77  av_log(filter_ctx, AV_LOG_ERROR, "Cannot get side data in dnn_classify_post_proc\n");
78  return -1;
79  }
81 
82  if (bbox_index == 0) {
83  av_strlcat(header->source, ", ", sizeof(header->source));
84  av_strlcat(header->source, ctx->dnnctx.model_filename, sizeof(header->source));
85  }
86 
87  classifications = output->data;
88  label_id = 0;
89  confidence= classifications[0];
90  for (int i = 1; i < output_size; i++) {
91  if (classifications[i] > confidence) {
92  label_id = i;
93  confidence= classifications[i];
94  }
95  }
96 
97  if (confidence < conf_threshold) {
98  return 0;
99  }
100 
101  bbox = av_get_detection_bbox(header, bbox_index);
102  bbox->classify_confidences[bbox->classify_count] = av_make_q((int)(confidence * 10000), 10000);
103 
104  if (ctx->labels && label_id < ctx->label_count) {
105  av_strlcpy(bbox->classify_labels[bbox->classify_count], ctx->labels[label_id], sizeof(bbox->classify_labels[bbox->classify_count]));
106  } else {
107  snprintf(bbox->classify_labels[bbox->classify_count], sizeof(bbox->classify_labels[bbox->classify_count]), "%d", label_id);
108  }
109 
110  bbox->classify_count++;
111 
112  return 0;
113 }
114 
116 {
117  for (int i = 0; i < ctx->label_count; i++) {
118  av_freep(&ctx->labels[i]);
119  }
120  ctx->label_count = 0;
121  av_freep(&ctx->labels);
122 }
123 
125 {
126  int line_len;
127  FILE *file;
128  DnnClassifyContext *ctx = context->priv;
129 
130  file = avpriv_fopen_utf8(ctx->labels_filename, "r");
131  if (!file){
132  av_log(context, AV_LOG_ERROR, "failed to open file %s\n", ctx->labels_filename);
133  return AVERROR(EINVAL);
134  }
135 
136  while (!feof(file)) {
137  char *label;
138  char buf[256];
139  if (!fgets(buf, 256, file)) {
140  break;
141  }
142 
143  line_len = strlen(buf);
144  while (line_len) {
145  int i = line_len - 1;
146  if (buf[i] == '\n' || buf[i] == '\r' || buf[i] == ' ') {
147  buf[i] = '\0';
148  line_len--;
149  } else {
150  break;
151  }
152  }
153 
154  if (line_len == 0) // empty line
155  continue;
156 
157  if (line_len >= AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) {
158  av_log(context, AV_LOG_ERROR, "label %s too long\n", buf);
159  fclose(file);
160  return AVERROR(EINVAL);
161  }
162 
163  label = av_strdup(buf);
164  if (!label) {
165  av_log(context, AV_LOG_ERROR, "failed to allocate memory for label %s\n", buf);
166  fclose(file);
167  return AVERROR(ENOMEM);
168  }
169 
170  if (av_dynarray_add_nofree(&ctx->labels, &ctx->label_count, label) < 0) {
171  av_log(context, AV_LOG_ERROR, "failed to do av_dynarray_add\n");
172  fclose(file);
173  av_freep(&label);
174  return AVERROR(ENOMEM);
175  }
176  }
177 
178  fclose(file);
179  return 0;
180 }
181 
183 {
184  DnnClassifyContext *ctx = context->priv;
186  if (ret < 0)
187  return ret;
189 
190  if (ctx->labels_filename) {
192  }
193  return 0;
194 }
195 
196 static const enum AVPixelFormat pix_fmts[] = {
203 };
204 
205 static int dnn_classify_flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)
206 {
207  DnnClassifyContext *ctx = outlink->src->priv;
208  int ret;
209  DNNAsyncStatusType async_state;
210 
211  ret = ff_dnn_flush(&ctx->dnnctx);
212  if (ret != 0) {
213  return -1;
214  }
215 
216  do {
217  AVFrame *in_frame = NULL;
218  AVFrame *out_frame = NULL;
219  async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame);
220  if (async_state == DAST_SUCCESS) {
221  ret = ff_filter_frame(outlink, in_frame);
222  if (ret < 0)
223  return ret;
224  if (out_pts)
225  *out_pts = in_frame->pts + pts;
226  }
227  av_usleep(5000);
228  } while (async_state >= DAST_NOT_READY);
229 
230  return 0;
231 }
232 
234 {
235  AVFilterLink *inlink = filter_ctx->inputs[0];
236  AVFilterLink *outlink = filter_ctx->outputs[0];
238  AVFrame *in = NULL;
239  int64_t pts;
240  int ret, status;
241  int got_frame = 0;
242  int async_state;
243 
245 
246  do {
247  // drain all input frames
249  if (ret < 0)
250  return ret;
251  if (ret > 0) {
252  if (ff_dnn_execute_model_classification(&ctx->dnnctx, in, NULL, ctx->target) != 0) {
253  return AVERROR(EIO);
254  }
255  }
256  } while (ret > 0);
257 
258  // drain all processed frames
259  do {
260  AVFrame *in_frame = NULL;
261  AVFrame *out_frame = NULL;
262  async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame);
263  if (async_state == DAST_SUCCESS) {
264  ret = ff_filter_frame(outlink, in_frame);
265  if (ret < 0)
266  return ret;
267  got_frame = 1;
268  }
269  } while (async_state == DAST_SUCCESS);
270 
271  // if frame got, schedule to next filter
272  if (got_frame)
273  return 0;
274 
276  if (status == AVERROR_EOF) {
277  int64_t out_pts = pts;
278  ret = dnn_classify_flush_frame(outlink, pts, &out_pts);
279  ff_outlink_set_status(outlink, status, out_pts);
280  return ret;
281  }
282  }
283 
285 
286  return 0;
287 }
288 
290 {
291  DnnClassifyContext *ctx = context->priv;
292  ff_dnn_uninit(&ctx->dnnctx);
294 }
295 
297  .name = "dnn_classify",
298  .description = NULL_IF_CONFIG_SMALL("Apply DNN classify filter to the input."),
299  .priv_size = sizeof(DnnClassifyContext),
306  .priv_class = &dnn_classify_class,
307  .activate = dnn_classify_activate,
308 };
dnn_classify_flush_frame
static int dnn_classify_flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)
Definition: vf_dnn_classify.c:205
free_classify_labels
static void free_classify_labels(DnnClassifyContext *ctx)
Definition: vf_dnn_classify.c:115
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
FILTER_PIXFMTS_ARRAY
#define FILTER_PIXFMTS_ARRAY(array)
Definition: filters.h:242
av_frame_get_side_data
AVFrameSideData * av_frame_get_side_data(const AVFrame *frame, enum AVFrameSideDataType type)
Definition: frame.c:963
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1061
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
int64_t
long long int64_t
Definition: coverity.c:34
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:389
AVFrame::pts
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:501
AVOption
AVOption.
Definition: opt.h:429
DnnClassifyContext::confidence
float confidence
Definition: vf_dnn_classify.c:37
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
preinit
static av_cold int preinit(AVFilterContext *ctx)
Definition: af_aresample.c:48
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:205
dnn_filter_common.h
video.h
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:434
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1490
AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE
#define AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE
Definition: detection_bbox.h:36
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:472
dnn_classify_post_proc
static int dnn_classify_post_proc(AVFrame *frame, DNNData *output, uint32_t bbox_index, AVFilterContext *filter_ctx)
Definition: vf_dnn_classify.c:60
DnnContext
Definition: dnn_interface.h:143
filter_ctx
static FilteringContext * filter_ctx
Definition: transcode.c:52
ff_dnn_filter_init_child_class
int ff_dnn_filter_init_child_class(AVFilterContext *filter)
Definition: dnn_filter_common.c:61
pts
static int64_t pts
Definition: transcode_aac.c:644
av_get_detection_bbox
static av_always_inline AVDetectionBBox * av_get_detection_bbox(const AVDetectionBBoxHeader *header, unsigned int idx)
Definition: detection_bbox.h:84
AVFILTER_DNN_DEFINE_CLASS
AVFILTER_DNN_DEFINE_CLASS(dnn_classify, DNN_OV)
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:209
dnn_classify_options
static const AVOption dnn_classify_options[]
Definition: vf_dnn_classify.c:47
av_cold
#define av_cold
Definition: attributes.h:90
ff_video_default_filterpad
const AVFilterPad ff_video_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_VIDEO.
Definition: video.c:37
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:424
AVDetectionBBox::classify_confidences
AVRational classify_confidences[AV_NUM_DETECTION_BBOX_CLASSIFY]
Definition: detection_bbox.h:53
DNNData
Definition: dnn_interface.h:69
filters.h
ff_dnn_get_result
DNNAsyncStatusType ff_dnn_get_result(DnnContext *ctx, AVFrame **in_frame, AVFrame **out_frame)
Definition: dnn_filter_common.c:198
DnnClassifyContext
Definition: vf_dnn_classify.c:34
ctx
AVFormatContext * ctx
Definition: movenc.c:49
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
av_usleep
int av_usleep(unsigned usec)
Sleep for a period of time.
Definition: time.c:84
OFFSET2
#define OFFSET2(x)
Definition: vf_dnn_classify.c:45
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
AV_PIX_FMT_GRAYF32
#define AV_PIX_FMT_GRAYF32
Definition: pixfmt.h:535
file_open.h
DNN_OV
@ DNN_OV
Definition: dnn_interface.h:37
if
if(ret)
Definition: filter_design.txt:179
context
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are in without and describe what they for example set the foo of the bar offset is the offset of the field in your context
Definition: writing_filters.txt:91
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:75
NULL
#define NULL
Definition: coverity.c:32
AVDetectionBBox::classify_labels
char classify_labels[AV_NUM_DETECTION_BBOX_CLASSIFY][AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]
Definition: detection_bbox.h:52
AVDetectionBBoxHeader
Definition: detection_bbox.h:56
ff_dnn_execute_model_classification
int ff_dnn_execute_model_classification(DnnContext *ctx, AVFrame *in_frame, AVFrame *out_frame, const char *target)
Definition: dnn_filter_common.c:183
dnn_classify_init
static av_cold int dnn_classify_init(AVFilterContext *context)
Definition: vf_dnn_classify.c:182
time.h
DnnClassifyContext::target
char * target
Definition: vf_dnn_classify.c:39
AV_PIX_FMT_GRAY8
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:81
ff_dnn_flush
int ff_dnn_flush(DnnContext *ctx)
Definition: dnn_filter_common.c:203
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1437
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:75
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
DAST_SUCCESS
@ DAST_SUCCESS
Definition: dnn_interface.h:53
av_make_q
static AVRational av_make_q(int num, int den)
Create an AVRational.
Definition: rational.h:71
AVFrameSideData::data
uint8_t * data
Definition: frame.h:267
DnnClassifyContext::label_count
int label_count
Definition: vf_dnn_classify.c:41
header
static const uint8_t header[24]
Definition: sdr2.c:68
AVDetectionBBox::classify_count
uint32_t classify_count
Definition: detection_bbox.h:51
FF_FILTER_FORWARD_WANTED
FF_FILTER_FORWARD_WANTED(outlink, inlink)
DnnClassifyContext::labels
char ** labels
Definition: vf_dnn_classify.c:40
AV_OPT_TYPE_FLOAT
@ AV_OPT_TYPE_FLOAT
Underlying C type is float.
Definition: opt.h:271
ff_vf_dnn_classify
const AVFilter ff_vf_dnn_classify
Definition: vf_dnn_classify.c:296
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
dnn_classify_uninit
static av_cold void dnn_classify_uninit(AVFilterContext *context)
Definition: vf_dnn_classify.c:289
avpriv_fopen_utf8
FILE * avpriv_fopen_utf8(const char *path, const char *mode)
Open a file using a UTF-8 filename.
Definition: file_open.c:161
AVFilter
Filter definition.
Definition: avfilter.h:201
ret
ret
Definition: filter_design.txt:187
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
av_strlcat
size_t av_strlcat(char *dst, const char *src, size_t size)
Append the string src to the string dst, but to a total length of no more than size - 1 bytes,...
Definition: avstring.c:95
OFFSET
#define OFFSET(x)
Definition: vf_dnn_classify.c:44
av_dynarray_add_nofree
int av_dynarray_add_nofree(void *tab_ptr, int *nb_ptr, void *elem)
Add an element to a dynamic array.
Definition: mem.c:315
DFT_ANALYTICS_CLASSIFY
@ DFT_ANALYTICS_CLASSIFY
Definition: dnn_interface.h:60
status
ov_status_e status
Definition: dnn_backend_openvino.c:100
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
AVFilterContext
An instance of a filter.
Definition: avfilter.h:457
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: vf_dnn_classify.c:196
av_strdup
char * av_strdup(const char *s)
Duplicate a string.
Definition: mem.c:272
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
mem.h
AVFrameSideData
Structure to hold side data for an AVFrame.
Definition: frame.h:265
FLAGS
#define FLAGS
Definition: vf_dnn_classify.c:46
ff_dnn_init
int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
Definition: dnn_filter_common.c:73
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:80
dnn_classify_activate
static int dnn_classify_activate(AVFilterContext *filter_ctx)
Definition: vf_dnn_classify.c:233
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:79
av_strlcpy
size_t av_strlcpy(char *dst, const char *src, size_t size)
Copy the string src to dst, but no more than size - 1 bytes, and null-terminate dst.
Definition: avstring.c:85
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
ff_dnn_uninit
void ff_dnn_uninit(DnnContext *ctx)
Definition: dnn_filter_common.c:208
AVDetectionBBox
Definition: detection_bbox.h:26
read_classify_label_file
static int read_classify_label_file(AVFilterContext *context)
Definition: vf_dnn_classify.c:124
avstring.h
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Underlying C type is a uint8_t* that is either NULL or points to a C string allocated with the av_mal...
Definition: opt.h:276
DAST_NOT_READY
@ DAST_NOT_READY
Definition: dnn_interface.h:52
DNNAsyncStatusType
DNNAsyncStatusType
Definition: dnn_interface.h:49
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Special option type for declaring named constants.
Definition: opt.h:299
snprintf
#define snprintf
Definition: snprintf.h:34
detection_bbox.h
ff_dnn_set_classify_post_proc
int ff_dnn_set_classify_post_proc(DnnContext *ctx, ClassifyPostProc post_proc)
Definition: dnn_filter_common.c:152
AV_FRAME_DATA_DETECTION_BBOXES
@ AV_FRAME_DATA_DETECTION_BBOXES
Bounding boxes for object detection and classification, as described by AVDetectionBBoxHeader.
Definition: frame.h:194
DnnClassifyContext::labels_filename
char * labels_filename
Definition: vf_dnn_classify.c:38
DnnClassifyContext::dnnctx
DnnContext dnnctx
Definition: vf_dnn_classify.c:36