Go to the documentation of this file.
32 #include "../filters.h"
37 #include <onnxruntime_c_api.h>
69 #define OFFSET(x) offsetof(ONNXOptions, x)
70 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
72 {
"threads_per_operation",
"number of CPU threads per ORT operator (device=cpu only)",
84 g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
87 #define ORT_ABORT_ON_ERROR(expr) \
89 OrtStatus *status = (expr); \
90 if (status != NULL) { \
91 const char *msg = g_ort->GetErrorMessage(status); \
92 av_log(ctx, AV_LOG_ERROR, "ONNX Runtime error: %s\n", msg); \
93 g_ort->ReleaseStatus(status); \
150 if (!model || !*model)
190 OrtTypeInfo *type_info =
NULL;
191 const OrtTensorTypeAndShapeInfo *tensor_info =
NULL;
193 size_t input_count = 0;
194 size_t input_index = 0;
197 ONNXTensorElementDataType tensor_type;
200 if (!input_name || !*input_name) {
218 for (
size_t i = 0;
i < input_count;
i++) {
226 if (!strcmp(
name, input_name)) {
250 status =
g_ort->CastTypeInfoToTensorInfo(type_info, &tensor_info);
252 g_ort->ReleaseTypeInfo(type_info);
257 status =
g_ort->GetDimensionsCount(tensor_info, &num_dims);
259 g_ort->ReleaseTypeInfo(type_info);
266 g_ort->ReleaseTypeInfo(type_info);
272 g_ort->ReleaseTypeInfo(type_info);
276 g_ort->GetDimensions(tensor_info, dims, num_dims);
277 g_ort->GetTensorElementType(tensor_info, &tensor_type);
281 "ONNX model has fixed batch size %"PRId64
", but the backend "
282 "only supports a batch size of 1\n", dims[0]);
284 g_ort->ReleaseTypeInfo(type_info);
293 input->dims[0] = dims[0] > 0 ? dims[0] : 1;
294 input->dims[1] = dims[1] > 0 ? dims[1] : 3;
295 input->dims[2] = dims[2] > 0 ? dims[2] : -1;
296 input->dims[3] = dims[3] > 0 ? dims[3] : -1;
298 if (tensor_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
303 g_ort->ReleaseTypeInfo(type_info);
315 g_ort->ReleaseTypeInfo(type_info);
329 int ret, width_idx, height_idx, channel_idx;
331 size_t input_tensor_size;
332 OrtMemoryInfo *memory_info;
356 input_shape[0] =
input.dims[0];
357 input_shape[1] =
input.dims[channel_idx];
358 input_shape[2] =
input.dims[height_idx];
359 input_shape[3] =
input.dims[width_idx];
361 input_tensor_size = input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3];
362 input_tensor_size *=
sizeof(
float);
391 status =
g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info);
398 memory_info,
input.data, input_tensor_size,
399 input_shape, 4, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
402 g_ort->ReleaseMemoryInfo(memory_info);
428 const char *input_names[1];
429 const char *output_names[1];
444 "Multiple output tensors (%u) for ONNX backend", task->
nb_output);
450 "ONNX backend: input/output tensor name was not resolved at load time\n");
460 size_t output_count = 0;
461 int found_output = 0;
471 for (
size_t i = 0;
i < output_count;
i++) {
488 "Output name '%s' not found in ONNX model\n",
500 input_names, (
const OrtValue *
const *)&infer_request->
input_tensor, 1,
522 OrtTensorTypeAndShapeInfo *tensor_info;
523 ONNXTensorElementDataType tensor_type;
541 g_ort->GetDimensionsCount(tensor_info, &num_dims);
545 g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
548 g_ort->GetDimensions(tensor_info, dims, num_dims);
554 g_ort->GetTensorElementType(tensor_info, &tensor_type);
555 if (tensor_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
560 g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
572 g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
581 g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
604 g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
609 g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
634 if (lltask ==
NULL) {
670 const char *output_name,
int *output_width,
int *output_height)
679 .output_names = &output_name,
742 model = &onnx_model->
model;
745 status =
g_ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING,
"FFmpeg", &onnx_model->
env);
757 if (
options->num_threads > 0 &&
765 if (
g_ort->SessionOptionsAppendExecutionProvider_CUDA) {
766 OrtCUDAProviderOptions cuda_options;
767 memset(&cuda_options, 0,
sizeof(cuda_options));
768 cuda_options.device_id =
ctx->device_id;
770 status =
g_ort->SessionOptionsAppendExecutionProvider_CUDA(
775 ctx->device_id, msg);
781 av_log(
ctx,
AV_LOG_WARNING,
"CUDA provider function not available in this ONNX Runtime API version. Falling back to CPU\n");
785 const char* dml_options_keys[] = {
"device_id"};
786 const char* dml_options_values[] = {
NULL};
787 char device_id_str[32];
788 snprintf(device_id_str,
sizeof(device_id_str),
"%d",
ctx->device_id);
789 dml_options_values[0] = device_id_str;
800 if (
g_ort->SessionOptionsAppendExecutionProvider) {
801 status =
g_ort->SessionOptionsAppendExecutionProvider(
803 dml_options_keys, dml_options_values, 1);
807 ctx->device_id, msg);
813 av_log(
ctx,
AV_LOG_WARNING,
"DirectML provider function not available in this ONNX Runtime API version. Falling back to CPU\n");
819 if (
g_ort->SessionOptionsAppendExecutionProvider) {
820 status =
g_ort->SessionOptionsAppendExecutionProvider(
826 "Failed to enable VitisAI EP: %s. Falling back to CPU\n", msg);
833 "VitisAI provider function not available in this ONNX Runtime API version. Falling back to CPU.\n");
838 "Unknown device '%s'. Supported: cpu, cuda, dml, vitisai. Using CPU\n",
842 "Unknown device '%s'. Supported: cpu, cuda, vitisai. Using CPU\n",
852 wchar_t *wfilename =
NULL;
853 if (utf8towchar(
ctx->model_filename, &wfilename)) {
888 size_t input_count = 0;
896 if (input_count == 0) {
900 if (input_count > 1) {
902 "ONNX model exposes %zu input tensors; the ONNX backend "
903 "supports single-input models only.\n",
910 if (!
ctx->model_inputname || !*
ctx->model_inputname) {
923 if (!
ctx->model_inputname)
926 ctx->model_inputname);
930 if (!
ctx->model_outputnames) {
931 size_t output_count = 0;
940 if (output_count == 0) {
952 ctx->model_outputnames =
av_calloc(1,
sizeof(*
ctx->model_outputnames));
953 if (!
ctx->model_outputnames) {
959 if (!
ctx->model_outputnames[0]) {
964 if (output_count == 1) {
966 ctx->model_outputnames[0]);
969 "ONNX model exposes %zu output tensors; auto-using index 0 ('%s'). "
970 "Specify output=NAME to choose a different one.\n",
971 output_count,
ctx->model_outputnames[0]);
#define AV_LOG_WARNING
Something somehow does not look correct.
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
void * ff_safe_queue_pop_front(SafeQueue *sq)
Remove and free first element from the queue in SafeQueue.
static void dnn_free_model_onnx(DNNModel **model)
Common Async Execution Mechanism for the DNN Backends.
void * ff_queue_pop_front(Queue *q)
Remove and free first element from the Queue.
static AVOnce g_ort_init_once
int av_strcasecmp(const char *a, const char *b)
Locale-independent case-insensitive compare.
int ff_check_exec_params(void *ctx, DNNBackendType backend, DNNFunctionType func_type, DNNExecBaseParams *exec_params)
size_t ff_queue_size(Queue *q)
Return the length of the Queue.
#define DNN_GENERIC_ERROR
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
static int onnx_start_inference(void *args)
This structure describes decoded (raw) audio or video data.
static int fill_model_input_onnx(ONNXModel *onnx_model, ONNXRequestItem *request)
Double-ended queue with mutex locks ensuring data consistency while multithreading.
FramePrePostProc frame_pre_proc
static int output_data(MLPDecodeContext *m, unsigned int substr, AVFrame *frame, int *got_frame_ptr)
Write the audio data into the output buffer.
static void destroy_request_item(ONNXRequestItem **arg)
void(* callback)(void *args)
Completion Callback for the backend.
AVFILTER_DEFINE_CLASS(dnn_onnx)
AVFilterContext * filter_ctx
Queue * ff_queue_create(void)
Create a Queue instance.
static int dnn_flush_onnx(const DNNModel *model)
OrtSessionOptions * session_options
static int dnn_get_width_idx_by_layout(DNNLayout layout)
static void onnx_free_request(ONNXInferRequest *request)
static FilteringContext * filter_ctx
static int execute_model_onnx(ONNXRequestItem *request, Queue *lltask_queue)
static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue)
Linear double-ended data structure.
int ff_queue_push_back(Queue *q, void *v)
Add data to the tail of the queue.
static int ff_thread_once(char *control, void(*routine)(void))
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
static void infer_completion_callback(void *args)
const DNNModule ff_dnn_backend_onnx
static int dnn_execute_model_onnx(const DNNModel *model, DNNExecBaseParams *exec_params)
void ff_queue_destroy(Queue *q)
Destroy the Queue instance.
int ff_dnn_fill_gettingoutput_task(TaskItem *task, DNNExecBaseParams *exec_params, void *backend_model, int input_height, int input_width, void *ctx)
Allocate input and output frames and fill the Task with execution parameters.
int(* get_output)(struct DNNModel *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
static AVFormatContext * ctx
static const OrtApi * g_ort
DNNAsyncExecModule exec_module
size_t ff_safe_queue_size(SafeQueue *sq)
Return the length of the SafeQueue.
int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx)
SafeQueue * ff_safe_queue_create(void)
Create and initialize a SafeQueue instance.
static int get_input_onnx(DNNModel *model, DNNData *input, const char *input_name)
FramePrePostProc frame_post_proc
int ff_dnn_async_module_cleanup(DNNAsyncExecModule *async_module)
Join the Async Execution thread and set module pointers to NULL.
static DNNModel * dnn_load_model_onnx(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
ONNXInferRequest * infer_request
static const AVOption dnn_onnx_options[]
#define i(width, name, range_min, range_max)
DNNFunctionType func_type
void avpriv_report_missing_feature(void *avc, const char *msg,...) av_printf_format(2
Log a generic warning message about a missing feature.
void ff_safe_queue_destroy(SafeQueue *sq)
Destroy the SafeQueue instance.
int ff_dnn_fill_task(TaskItem *task, DNNExecBaseParams *exec_params, void *backend_model, int async, int do_ioproc)
Fill the Task for Backend Execution.
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
#define AV_LOG_INFO
Standard information.
#define DNN_DEFINE_CLASS(fname)
int ff_safe_queue_push_back(SafeQueue *sq, void *v)
Add data to the tail of queue in the SafeQueue after locking mutex.
static int get_output_onnx(DNNModel *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
int(* start_inference)(void *request)
Synchronous inference function for the backend with corresponding request item as the argument.
void * args
Argument for the execution functions.
const char ** output_names
static ONNXInferRequest * onnx_create_inference_request(void)
void * av_calloc(size_t nmemb, size_t size)
static const AVFilterPad outputs[]
SafeQueue * request_queue
LastLevelTaskItem * lltask
@ AV_OPT_TYPE_INT
Underlying C type is int.
DNNAsyncStatusType ff_dnn_get_result_common(Queue *task_queue, AVFrame **in, AVFrame **out)
Extract input and output frame from the Task Queue after asynchronous inference.
void * ff_queue_peek_front(Queue *q)
Return a pointer to the data at the head of the queue.
static void init_ort_api(void)
static int dnn_get_height_idx_by_layout(DNNLayout layout)
static int dnn_get_channel_idx_by_layout(DNNLayout layout)
int(* get_input)(struct DNNModel *model, DNNData *input, const char *input_name)
static DNNAsyncStatusType dnn_get_result_onnx(const DNNModel *model, AVFrame **in, AVFrame **out)
int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)