FFmpeg
detection_bbox.h
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #ifndef AVUTIL_DETECTION_BBOX_H
20 #define AVUTIL_DETECTION_BBOX_H
21 
22 #include "rational.h"
23 #include "avassert.h"
24 #include "frame.h"
25 
26 typedef struct AVDetectionBBox {
27  /**
28  * Distance in pixels from the left/top edge of the frame,
29  * together with width and height, defining the bounding box.
30  */
31  int x;
32  int y;
33  int w;
34  int h;
35 
36 #define AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE 64
37 
38  /**
39  * Detect result with confidence
40  */
43 
44  /**
45  * At most 4 classifications based on the detected bounding box.
46  * For example, we can get max 4 different attributes with 4 different
47  * DNN models on one bounding box.
48  * classify_count is zero if no classification.
49  */
50 #define AV_NUM_DETECTION_BBOX_CLASSIFY 4
51  uint32_t classify_count;
55 
56 typedef struct AVDetectionBBoxHeader {
57  /**
58  * Information about how the bounding box is generated.
59  * for example, the DNN model name.
60  */
61  char source[256];
62 
63  /**
64  * Number of bounding boxes in the array.
65  */
66  uint32_t nb_bboxes;
67 
68  /**
69  * Offset in bytes from the beginning of this structure at which
70  * the array of bounding boxes starts.
71  */
72  size_t bboxes_offset;
73 
74  /**
75  * Size of each bounding box in bytes.
76  */
77  size_t bbox_size;
79 
80 /*
81  * Get the bounding box at the specified {@code idx}. Must be between 0 and nb_bboxes.
82  */
85 {
86  av_assert0(idx < header->nb_bboxes);
87  return (AVDetectionBBox *)((uint8_t *)header + header->bboxes_offset +
88  idx * header->bbox_size);
89 }
90 
91 /**
92  * Allocates memory for AVDetectionBBoxHeader, plus an array of {@code nb_bboxes}
93  * AVDetectionBBox, and initializes the variables.
94  * Can be freed with a normal av_free() call.
95  *
96  * @param nb_bboxes number of AVDetectionBBox structures to allocate
97  * @param out_size if non-NULL, the size in bytes of the resulting data array is
98  * written here.
99  */
100 AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size);
101 
102 /**
103  * Allocates memory for AVDetectionBBoxHeader, plus an array of {@code nb_bboxes}
104  * AVDetectionBBox, in the given AVFrame {@code frame} as AVFrameSideData of type
105  * AV_FRAME_DATA_DETECTION_BBOXES and initializes the variables.
106  */
108 #endif
AVDetectionBBoxHeader::bboxes_offset
size_t bboxes_offset
Offset in bytes from the beginning of this structure at which the array of bounding boxes starts.
Definition: detection_bbox.h:72
rational.h
out_size
int out_size
Definition: movenc.c:55
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:330
av_detection_bbox_alloc
AVDetectionBBoxHeader * av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size)
Allocates memory for AVDetectionBBoxHeader, plus an array of.
Definition: detection_bbox.c:21
AVDetectionBBoxHeader::nb_bboxes
uint32_t nb_bboxes
Number of bounding boxes in the array.
Definition: detection_bbox.h:66
av_detection_bbox_create_side_data
AVDetectionBBoxHeader * av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes)
Allocates memory for AVDetectionBBoxHeader, plus an array of.
Definition: detection_bbox.c:51
AVDetectionBBox::y
int y
Definition: detection_bbox.h:32
AVDetectionBBoxHeader::source
char source[256]
Information about how the bounding box is generated.
Definition: detection_bbox.h:61
AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE
#define AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE
Definition: detection_bbox.h:36
AVDetectionBBox::detect_label
char detect_label[AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]
Detect result with confidence.
Definition: detection_bbox.h:41
av_get_detection_bbox
static av_always_inline AVDetectionBBox * av_get_detection_bbox(const AVDetectionBBoxHeader *header, unsigned int idx)
Definition: detection_bbox.h:84
avassert.h
AVDetectionBBox::classify_confidences
AVRational classify_confidences[AV_NUM_DETECTION_BBOX_CLASSIFY]
Definition: detection_bbox.h:53
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
AVDetectionBBox::classify_labels
char classify_labels[AV_NUM_DETECTION_BBOX_CLASSIFY][AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]
Definition: detection_bbox.h:52
AVDetectionBBoxHeader
Definition: detection_bbox.h:56
AVDetectionBBox::w
int w
Definition: detection_bbox.h:33
frame.h
header
static const uint8_t header[24]
Definition: sdr2.c:67
AVDetectionBBox::classify_count
uint32_t classify_count
Definition: detection_bbox.h:51
av_always_inline
#define av_always_inline
Definition: attributes.h:49
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
AVDetectionBBox::h
int h
Definition: detection_bbox.h:34
AVDetectionBBox::detect_confidence
AVRational detect_confidence
Definition: detection_bbox.h:42
AVDetectionBBox::x
int x
Distance in pixels from the left/top edge of the frame, together with width and height,...
Definition: detection_bbox.h:31
AV_NUM_DETECTION_BBOX_CLASSIFY
#define AV_NUM_DETECTION_BBOX_CLASSIFY
At most 4 classifications based on the detected bounding box.
Definition: detection_bbox.h:50
AVDetectionBBoxHeader::bbox_size
size_t bbox_size
Size of each bounding box in bytes.
Definition: detection_bbox.h:77
AVDetectionBBox
Definition: detection_bbox.h:26