#include "libavutil/file_open.h"
#include "libavutil/mem.h"
#include "libavutil/opt.h"
#include "filters.h"
#include "dnn_filter_common.h"
#include "video.h"
#include "libavutil/time.h"
#include "libavutil/avstring.h"
#include "libavutil/detection_bbox.h"
#include "libavutil/fifo.h"
#include <float.h>

Data Structures
struct	DnnDetectContext

Macros
#define	OFFSET(x) offsetof(DnnDetectContext, dnnctx.x)

#define	OFFSET2(x) offsetof(DnnDetectContext, x)

#define	FLAGS AV_OPT_FLAG_FILTERING_PARAM \| AV_OPT_FLAG_VIDEO_PARAM

Enumerations
enum	DNNDetectionModelType { DDMT_SSD, DDMT_YOLOV1V2, DDMT_YOLOV3, DDMT_YOLOV4 }

Functions
	AVFILTER_DNN_DEFINE_CLASS (dnn_detect, DNN_TF\|DNN_OV)

static float	sigmoid (float x)

static float	linear (float x)

static int	dnn_detect_get_label_id (int nb_classes, int cell_size, float *label_data)

static float	dnn_detect_IOU (AVDetectionBBox bbox1, AVDetectionBBox bbox2)

static int	dnn_detect_parse_yolo_output (AVFrame frame, DNNData output, int output_index, AVFilterContext *filter_ctx)

static int	dnn_detect_fill_side_data (AVFrame frame, AVFilterContext filter_ctx)

static int	dnn_detect_post_proc_yolo (AVFrame frame, DNNData output, AVFilterContext *filter_ctx)

static int	dnn_detect_post_proc_yolov3 (AVFrame frame, DNNData output, AVFilterContext *filter_ctx, int nb_outputs)

static int	dnn_detect_post_proc_ssd (AVFrame frame, DNNData output, int nb_outputs, AVFilterContext *filter_ctx)

static int	dnn_detect_post_proc_ov (AVFrame frame, DNNData output, int nb_outputs, AVFilterContext *filter_ctx)

static int	dnn_detect_post_proc_tf (AVFrame frame, DNNData output, AVFilterContext *filter_ctx)

static int	dnn_detect_post_proc (AVFrame frame, DNNData output, uint32_t nb, AVFilterContext *filter_ctx)

static void	free_detect_labels (DnnDetectContext *ctx)

static int	read_detect_label_file (AVFilterContext *context)

static int	check_output_nb (DnnDetectContext *ctx, DNNBackendType backend_type, int output_nb)

static av_cold int	dnn_detect_init (AVFilterContext *context)

static int	dnn_detect_flush_frame (AVFilterLink outlink, int64_t pts, int64_t out_pts)

static int	dnn_detect_activate (AVFilterContext *filter_ctx)

static av_cold void	dnn_detect_uninit (AVFilterContext *context)

static int	config_input (AVFilterLink *inlink)

Variables
static const AVOptionArrayDef	anchor_array_def = { .sep = '&' }

static const AVOption	dnn_detect_options []

static enum AVPixelFormat	pix_fmts []

static const AVFilterPad	dnn_detect_inputs []

const FFFilter	ff_vf_dnn_detect

Detailed Description

implementing an object detecting filter using deep learning networks.

Definition in file vf_dnn_detect.c.

Macro Definition Documentation

◆ OFFSET

#define OFFSET ( x ) offsetof(DnnDetectContext, dnnctx.x)

Definition at line 63 of file vf_dnn_detect.c.

◆ OFFSET2

#define OFFSET2 ( x ) offsetof(DnnDetectContext, x)

Definition at line 64 of file vf_dnn_detect.c.

◆ FLAGS

#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM

Definition at line 65 of file vf_dnn_detect.c.

Enumeration Type Documentation

◆ DNNDetectionModelType

enum DNNDetectionModelType

Enumerator
DDMT_SSD
DDMT_YOLOV1V2
DDMT_YOLOV3
DDMT_YOLOV4

Definition at line 36 of file vf_dnn_detect.c.

Function Documentation

◆ AVFILTER_DNN_DEFINE_CLASS()

AVFILTER_DNN_DEFINE_CLASS	(	dnn_detect	,
		DNN_TF\|	DNN_OV
	)

◆ sigmoid()

static float sigmoid ( float x )

inlinestatic

Definition at line 90 of file vf_dnn_detect.c.

Referenced by dnn_detect_parse_yolo_output().

◆ linear()

static float linear ( float x )

inlinestatic

Definition at line 94 of file vf_dnn_detect.c.

Referenced by dnn_detect_parse_yolo_output().

◆ dnn_detect_get_label_id()

static int dnn_detect_get_label_id	(	int	nb_classes,
		int	cell_size,
		float *	label_data
	)

static

Definition at line 98 of file vf_dnn_detect.c.

Referenced by dnn_detect_parse_yolo_output().

◆ dnn_detect_IOU()

static float dnn_detect_IOU	(	AVDetectionBBox *	bbox1,
		AVDetectionBBox *	bbox2
	)

static

Definition at line 112 of file vf_dnn_detect.c.

Referenced by dnn_detect_fill_side_data().

◆ dnn_detect_parse_yolo_output()

static int dnn_detect_parse_yolo_output	(	AVFrame *	frame,
		DNNData *	output,
		int	output_index,
		AVFilterContext *	filter_ctx
	)

static

find all candidate bbox yolo output can be reshaped to [B, N*D, Cx, Cy] Detection box 'D' has format [x, y, h, w, box_score, class_no_1, ...,]

Definition at line 122 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc_yolo(), and dnn_detect_post_proc_yolov3().

◆ dnn_detect_fill_side_data()

static int dnn_detect_fill_side_data	(	AVFrame *	frame,
		AVFilterContext *	filter_ctx
	)

static

Definition at line 254 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc_yolo(), and dnn_detect_post_proc_yolov3().

◆ dnn_detect_post_proc_yolo()

static int dnn_detect_post_proc_yolo	(	AVFrame *	frame,
		DNNData *	output,
		AVFilterContext *	filter_ctx
	)

static

Definition at line 303 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc_ov().

◆ dnn_detect_post_proc_yolov3()

static int dnn_detect_post_proc_yolov3	(	AVFrame *	frame,
		DNNData *	output,
		AVFilterContext *	filter_ctx,
		int	nb_outputs
	)

static

Definition at line 315 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc_ov().

◆ dnn_detect_post_proc_ssd()

static int dnn_detect_post_proc_ssd	(	AVFrame *	frame,
		DNNData *	output,
		int	nb_outputs,
		AVFilterContext *	filter_ctx
	)

static

Definition at line 330 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc_ov().

◆ dnn_detect_post_proc_ov()

static int dnn_detect_post_proc_ov	(	AVFrame *	frame,
		DNNData *	output,
		int	nb_outputs,
		AVFilterContext *	filter_ctx
	)

static

Definition at line 439 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc().

◆ dnn_detect_post_proc_tf()

static int dnn_detect_post_proc_tf	(	AVFrame *	frame,
		DNNData *	output,
		AVFilterContext *	filter_ctx
	)

static

Definition at line 473 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc().

◆ dnn_detect_post_proc()

static int dnn_detect_post_proc	(	AVFrame *	frame,
		DNNData *	output,
		uint32_t	nb,
		AVFilterContext *	filter_ctx
	)

static

Definition at line 548 of file vf_dnn_detect.c.

Referenced by dnn_detect_init().

◆ free_detect_labels()

static void free_detect_labels ( DnnDetectContext * ctx )

static

Definition at line 563 of file vf_dnn_detect.c.

Referenced by dnn_detect_uninit().

◆ read_detect_label_file()

static int read_detect_label_file ( AVFilterContext * context )

static

Definition at line 572 of file vf_dnn_detect.c.

Referenced by dnn_detect_init().

◆ check_output_nb()

static int check_output_nb	(	DnnDetectContext *	ctx,
		DNNBackendType	backend_type,
		int	output_nb
	)

static

Definition at line 630 of file vf_dnn_detect.c.

Referenced by dnn_detect_init().

◆ dnn_detect_init()

static av_cold int dnn_detect_init ( AVFilterContext * context )

static

Definition at line 649 of file vf_dnn_detect.c.

◆ dnn_detect_flush_frame()

static int dnn_detect_flush_frame	(	AVFilterLink *	outlink,
		int64_t	pts,
		int64_t *	out_pts
	)

static

Definition at line 693 of file vf_dnn_detect.c.

Referenced by dnn_detect_activate().

◆ dnn_detect_activate()

static int dnn_detect_activate ( AVFilterContext * filter_ctx )

static

Definition at line 721 of file vf_dnn_detect.c.

◆ dnn_detect_uninit()

static av_cold void dnn_detect_uninit ( AVFilterContext * context )

static

Definition at line 777 of file vf_dnn_detect.c.

◆ config_input()

static int config_input ( AVFilterLink * inlink )

static

Definition at line 793 of file vf_dnn_detect.c.

Variable Documentation

◆ anchor_array_def

const AVOptionArrayDef anchor_array_def = { .sep = '&' }

static

Definition at line 61 of file vf_dnn_detect.c.

◆ dnn_detect_options

const AVOption dnn_detect_options[]

static

Initial value:

= {
    { "dnn_backend", "DNN backend",                OFFSET(backend_type),     AV_OPT_TYPE_INT,       { .i64 = DNN_OV },    INT_MIN, INT_MAX, FLAGS, .unit = "backend" },
 
 
 
 
 
 
    { "confidence",  "threshold of confidence",    OFFSET2(confidence),      AV_OPT_TYPE_FLOAT,     { .dbl = 0.5 },  0, 1, FLAGS},
    { "labels",      "path to labels file",        OFFSET2(labels_filename), AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
    { "model_type",  "DNN detection model type",   OFFSET2(model_type),      AV_OPT_TYPE_INT,       { .i64 = DDMT_SSD },    INT_MIN, INT_MAX, FLAGS, .unit = "model_type" },
        { "ssd",     "output shape [1, 1, N, 7]",  0,                        AV_OPT_TYPE_CONST,       { .i64 = DDMT_SSD },    0, 0, FLAGS, .unit = "model_type" },
        { "yolo",    "output shape [1, N*Cx*Cy*DetectionBox]",  0,           AV_OPT_TYPE_CONST,       { .i64 = DDMT_YOLOV1V2 },    0, 0, FLAGS, .unit = "model_type" },
        { "yolov3",  "outputs shape [1, N*D, Cx, Cy]",  0,                   AV_OPT_TYPE_CONST,       { .i64 = DDMT_YOLOV3 },      0, 0, FLAGS, .unit = "model_type" },
        { "yolov4",  "outputs shape [1, N*D, Cx, Cy]",  0,                   AV_OPT_TYPE_CONST,       { .i64 = DDMT_YOLOV4 },    0, 0, FLAGS, .unit = "model_type" },
    { "cell_w",      "cell width",                 OFFSET2(cell_w),          AV_OPT_TYPE_INT,       { .i64 = 0 },    0, INTMAX_MAX, FLAGS },
    { "cell_h",      "cell height",                OFFSET2(cell_h),          AV_OPT_TYPE_INT,       { .i64 = 0 },    0, INTMAX_MAX, FLAGS },
    { "nb_classes",  "The number of class",        OFFSET2(nb_classes),      AV_OPT_TYPE_INT,       { .i64 = 0 },    0, INTMAX_MAX, FLAGS },
    { "anchors",     "anchors, splited by '&'",    OFFSET2(anchors),         AV_OPT_TYPE_FLOAT | AV_OPT_TYPE_FLAG_ARRAY,    { .arr = &anchor_array_def }, FLT_MIN, FLT_MAX, FLAGS },
    { NULL }
}

Definition at line 66 of file vf_dnn_detect.c.

◆ pix_fmts

enum AVPixelFormat pix_fmts[]

static

Initial value:

= {
    AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
    AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32,
    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
    AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
    AV_PIX_FMT_NV12,
    AV_PIX_FMT_NONE
}

Definition at line 684 of file vf_dnn_detect.c.

◆ dnn_detect_inputs

const AVFilterPad dnn_detect_inputs[]

static

Initial value:

= {
    {
        .name         = "default",
        .type         = AVMEDIA_TYPE_VIDEO,
        .config_props = config_input,
    },
}

Definition at line 815 of file vf_dnn_detect.c.

◆ ff_vf_dnn_detect

const FFFilter ff_vf_dnn_detect

Initial value:

= {
    .p.name        = "dnn_detect",
    .p.description = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."),
    .p.priv_class  = &dnn_detect_class,
    .priv_size     = sizeof(DnnDetectContext),
    .preinit       = ff_dnn_filter_init_child_class,
    .init          = dnn_detect_init,
    .uninit        = dnn_detect_uninit,
    FILTER_INPUTS(dnn_detect_inputs),
    FILTER_OUTPUTS(ff_video_default_filterpad),
    FILTER_PIXFMTS_ARRAY(pix_fmts),
    .activate      = dnn_detect_activate,
}

Definition at line 823 of file vf_dnn_detect.c.

Data Structures

Macros

Enumerations

Functions

Variables

Detailed Description

Macro Definition Documentation

◆ OFFSET

◆ OFFSET2

◆ FLAGS

Enumeration Type Documentation

◆ DNNDetectionModelType

Function Documentation

◆ AVFILTER_DNN_DEFINE_CLASS()

◆ sigmoid()

◆ linear()

◆ dnn_detect_get_label_id()

◆ dnn_detect_IOU()

◆ dnn_detect_parse_yolo_output()

◆ dnn_detect_fill_side_data()

◆ dnn_detect_post_proc_yolo()

◆ dnn_detect_post_proc_yolov3()

◆ dnn_detect_post_proc_ssd()

◆ dnn_detect_post_proc_ov()

◆ dnn_detect_post_proc_tf()

◆ dnn_detect_post_proc()

◆ free_detect_labels()

◆ read_detect_label_file()

◆ check_output_nb()

◆ dnn_detect_init()

◆ dnn_detect_flush_frame()

◆ dnn_detect_activate()

◆ dnn_detect_uninit()

◆ config_input()

Variable Documentation

◆ anchor_array_def

◆ dnn_detect_options

◆ pix_fmts

◆ dnn_detect_inputs

◆ ff_vf_dnn_detect