FFmpeg
dnn_backend_openvino.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * DNN OpenVINO backend implementation.
24  */
25 
26 #include "dnn_backend_openvino.h"
27 #include "dnn_io_proc.h"
28 #include "libavformat/avio.h"
29 #include "libavutil/avassert.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/avstring.h"
32 #include "../internal.h"
33 #include <c_api/ie_c_api.h>
34 
35 typedef struct OVOptions{
36  char *device_type;
37 } OVOptions;
38 
39 typedef struct OVContext {
40  const AVClass *class;
42 } OVContext;
43 
44 typedef struct OVModel{
47  ie_core_t *core;
48  ie_network_t *network;
49  ie_executable_network_t *exe_network;
50  ie_infer_request_t *infer_request;
51 } OVModel;
52 
53 #define APPEND_STRING(generated_string, iterate_string) \
54  generated_string = generated_string ? av_asprintf("%s %s", generated_string, iterate_string) : \
55  av_asprintf("%s", iterate_string);
56 
57 #define OFFSET(x) offsetof(OVContext, x)
58 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
59 static const AVOption dnn_openvino_options[] = {
60  { "device", "device to run model", OFFSET(options.device_type), AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS },
61  { NULL }
62 };
63 
64 AVFILTER_DEFINE_CLASS(dnn_openvino);
65 
66 static DNNReturnType execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame,
67  const char **output_names, uint32_t nb_output, AVFrame *out_frame,
68  int do_ioproc);
69 
70 static DNNDataType precision_to_datatype(precision_e precision)
71 {
72  switch (precision)
73  {
74  case FP32:
75  return DNN_FLOAT;
76  default:
77  av_assert0(!"not supported yet.");
78  return DNN_FLOAT;
79  }
80 }
81 
82 static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input_name)
83 {
84  OVModel *ov_model = (OVModel *)model;
85  OVContext *ctx = &ov_model->ctx;
86  char *model_input_name = NULL;
87  char *all_input_names = NULL;
88  IEStatusCode status;
89  size_t model_input_count = 0;
90  dimensions_t dims;
91  precision_e precision;
92 
93  status = ie_network_get_inputs_number(ov_model->network, &model_input_count);
94  if (status != OK) {
95  av_log(ctx, AV_LOG_ERROR, "Failed to get input count\n");
96  return DNN_ERROR;
97  }
98 
99  for (size_t i = 0; i < model_input_count; i++) {
100  status = ie_network_get_input_name(ov_model->network, i, &model_input_name);
101  if (status != OK) {
102  av_log(ctx, AV_LOG_ERROR, "Failed to get No.%d input's name\n", (int)i);
103  return DNN_ERROR;
104  }
105  if (strcmp(model_input_name, input_name) == 0) {
106  ie_network_name_free(&model_input_name);
107  status |= ie_network_get_input_dims(ov_model->network, input_name, &dims);
108  status |= ie_network_get_input_precision(ov_model->network, input_name, &precision);
109  if (status != OK) {
110  av_log(ctx, AV_LOG_ERROR, "Failed to get No.%d input's dims or precision\n", (int)i);
111  return DNN_ERROR;
112  }
113 
114  // The order of dims in the openvino is fixed and it is always NCHW for 4-D data.
115  // while we pass NHWC data from FFmpeg to openvino
116  status = ie_network_set_input_layout(ov_model->network, input_name, NHWC);
117  if (status != OK) {
118  av_log(ctx, AV_LOG_ERROR, "Input \"%s\" does not match layout NHWC\n", input_name);
119  return DNN_ERROR;
120  }
121 
122  input->channels = dims.dims[1];
123  input->height = dims.dims[2];
124  input->width = dims.dims[3];
125  input->dt = precision_to_datatype(precision);
126  return DNN_SUCCESS;
127  } else {
128  //incorrect input name
129  APPEND_STRING(all_input_names, model_input_name)
130  }
131 
132  ie_network_name_free(&model_input_name);
133  }
134 
135  av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model, all input(s) are: \"%s\"\n", input_name, all_input_names);
136  return DNN_ERROR;
137 }
138 
139 static DNNReturnType get_output_ov(void *model, const char *input_name, int input_width, int input_height,
140  const char *output_name, int *output_width, int *output_height)
141 {
143  OVModel *ov_model = (OVModel *)model;
144  OVContext *ctx = &ov_model->ctx;
145  AVFrame *in_frame = av_frame_alloc();
146  AVFrame *out_frame = NULL;
147 
148  if (!in_frame) {
149  av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input frame\n");
150  return DNN_ERROR;
151  }
152  out_frame = av_frame_alloc();
153  if (!out_frame) {
154  av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for output frame\n");
155  av_frame_free(&in_frame);
156  return DNN_ERROR;
157  }
158  in_frame->width = input_width;
159  in_frame->height = input_height;
160 
161  ret = execute_model_ov(ov_model->model, input_name, in_frame, &output_name, 1, out_frame, 0);
162  *output_width = out_frame->width;
163  *output_height = out_frame->height;
164 
165  av_frame_free(&out_frame);
166  av_frame_free(&in_frame);
167  return ret;
168 }
169 
170 DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, void *userdata)
171 {
172  char *all_dev_names = NULL;
173  DNNModel *model = NULL;
174  OVModel *ov_model = NULL;
175  OVContext *ctx = NULL;
176  IEStatusCode status;
177  ie_config_t config = {NULL, NULL, NULL};
178  ie_available_devices_t a_dev;
179 
180  model = av_mallocz(sizeof(DNNModel));
181  if (!model){
182  return NULL;
183  }
184 
185  ov_model = av_mallocz(sizeof(OVModel));
186  if (!ov_model)
187  goto err;
188  ov_model->model = model;
189  ov_model->ctx.class = &dnn_openvino_class;
190  ctx = &ov_model->ctx;
191 
192  //parse options
193  av_opt_set_defaults(ctx);
194  if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) {
195  av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
196  goto err;
197  }
198 
199  status = ie_core_create("", &ov_model->core);
200  if (status != OK)
201  goto err;
202 
203  status = ie_core_read_network(ov_model->core, model_filename, NULL, &ov_model->network);
204  if (status != OK)
205  goto err;
206 
207  status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network);
208  if (status != OK) {
209  av_log(ctx, AV_LOG_ERROR, "Failed to init OpenVINO model\n");
210  status = ie_core_get_available_devices(ov_model->core, &a_dev);
211  if (status != OK) {
212  av_log(ctx, AV_LOG_ERROR, "Failed to get available devices\n");
213  goto err;
214  }
215  for (int i = 0; i < a_dev.num_devices; i++) {
216  APPEND_STRING(all_dev_names, a_dev.devices[i])
217  }
218  av_log(ctx, AV_LOG_ERROR,"device %s may not be supported, all available devices are: \"%s\"\n",
219  ctx->options.device_type, all_dev_names);
220  goto err;
221  }
222 
223  status = ie_exec_network_create_infer_request(ov_model->exe_network, &ov_model->infer_request);
224  if (status != OK)
225  goto err;
226 
227  model->model = (void *)ov_model;
228  model->get_input = &get_input_ov;
229  model->get_output = &get_output_ov;
230  model->options = options;
231  model->userdata = userdata;
232 
233  return model;
234 
235 err:
236  if (model)
237  av_freep(&model);
238  if (ov_model) {
239  if (ov_model->infer_request)
240  ie_infer_request_free(&ov_model->infer_request);
241  if (ov_model->exe_network)
242  ie_exec_network_free(&ov_model->exe_network);
243  if (ov_model->network)
244  ie_network_free(&ov_model->network);
245  if (ov_model->core)
246  ie_core_free(&ov_model->core);
247  av_freep(&ov_model);
248  }
249  return NULL;
250 }
251 
252 static DNNReturnType execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame,
253  const char **output_names, uint32_t nb_output, AVFrame *out_frame,
254  int do_ioproc)
255 {
256  char *model_output_name = NULL;
257  char *all_output_names = NULL;
258  dimensions_t dims;
259  precision_e precision;
260  ie_blob_buffer_t blob_buffer;
261  OVModel *ov_model = (OVModel *)model->model;
262  OVContext *ctx = &ov_model->ctx;
263  IEStatusCode status;
264  size_t model_output_count = 0;
266  ie_blob_t *input_blob = NULL;
267 
268  status = ie_infer_request_get_blob(ov_model->infer_request, input_name, &input_blob);
269  if (status != OK) {
270  av_log(ctx, AV_LOG_ERROR, "Failed to get input blob\n");
271  return DNN_ERROR;
272  }
273 
274  status |= ie_blob_get_dims(input_blob, &dims);
275  status |= ie_blob_get_precision(input_blob, &precision);
276  if (status != OK) {
277  av_log(ctx, AV_LOG_ERROR, "Failed to get input blob dims/precision\n");
278  return DNN_ERROR;
279  }
280 
281  status = ie_blob_get_buffer(input_blob, &blob_buffer);
282  if (status != OK) {
283  av_log(ctx, AV_LOG_ERROR, "Failed to get input blob buffer\n");
284  return DNN_ERROR;
285  }
286 
287  input.height = dims.dims[2];
288  input.width = dims.dims[3];
289  input.channels = dims.dims[1];
290  input.data = blob_buffer.buffer;
291  input.dt = precision_to_datatype(precision);
292  if (do_ioproc) {
293  if (ov_model->model->pre_proc != NULL) {
294  ov_model->model->pre_proc(in_frame, &input, ov_model->model->userdata);
295  } else {
296  proc_from_frame_to_dnn(in_frame, &input, ctx);
297  }
298  }
299  ie_blob_free(&input_blob);
300 
301  if (nb_output != 1) {
302  // currently, the filter does not need multiple outputs,
303  // so we just pending the support until we really need it.
304  av_log(ctx, AV_LOG_ERROR, "do not support multiple outputs\n");
305  return DNN_ERROR;
306  }
307 
308  status = ie_infer_request_infer(ov_model->infer_request);
309  if (status != OK) {
310  av_log(ctx, AV_LOG_ERROR, "Failed to start synchronous model inference\n");
311  return DNN_ERROR;
312  }
313 
314  for (uint32_t i = 0; i < nb_output; ++i) {
315  const char *output_name = output_names[i];
316  ie_blob_t *output_blob = NULL;
317  status = ie_infer_request_get_blob(ov_model->infer_request, output_name, &output_blob);
318  if (status != OK) {
319  //incorrect output name
320  av_log(ctx, AV_LOG_ERROR, "Failed to get model output data\n");
321  status = ie_network_get_outputs_number(ov_model->network, &model_output_count);
322  for (size_t i = 0; i < model_output_count; i++) {
323  status = ie_network_get_output_name(ov_model->network, i, &model_output_name);
324  APPEND_STRING(all_output_names, model_output_name)
325  }
327  "output \"%s\" may not correct, all output(s) are: \"%s\"\n",
328  output_name, all_output_names);
329  return DNN_ERROR;
330  }
331 
332  status = ie_blob_get_buffer(output_blob, &blob_buffer);
333  if (status != OK) {
334  av_log(ctx, AV_LOG_ERROR, "Failed to access output memory\n");
335  return DNN_ERROR;
336  }
337 
338  status |= ie_blob_get_dims(output_blob, &dims);
339  status |= ie_blob_get_precision(output_blob, &precision);
340  if (status != OK) {
341  av_log(ctx, AV_LOG_ERROR, "Failed to get dims or precision of output\n");
342  return DNN_ERROR;
343  }
344 
345  output.channels = dims.dims[1];
346  output.height = dims.dims[2];
347  output.width = dims.dims[3];
348  output.dt = precision_to_datatype(precision);
349  output.data = blob_buffer.buffer;
350  if (do_ioproc) {
351  if (ov_model->model->post_proc != NULL) {
352  ov_model->model->post_proc(out_frame, &output, ov_model->model->userdata);
353  } else {
354  proc_from_dnn_to_frame(out_frame, &output, ctx);
355  }
356  } else {
357  out_frame->width = output.width;
358  out_frame->height = output.height;
359  }
360  ie_blob_free(&output_blob);
361  }
362 
363  return DNN_SUCCESS;
364 }
365 
366 DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame,
367  const char **output_names, uint32_t nb_output, AVFrame *out_frame)
368 {
369  OVModel *ov_model = (OVModel *)model->model;
370  OVContext *ctx = &ov_model->ctx;
371 
372  if (!in_frame) {
373  av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n");
374  return DNN_ERROR;
375  }
376 
377  if (!out_frame) {
378  av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
379  return DNN_ERROR;
380  }
381 
382  return execute_model_ov(model, input_name, in_frame, output_names, nb_output, out_frame, 1);
383 }
384 
386 {
387  if (*model){
388  OVModel *ov_model = (OVModel *)(*model)->model;
389  if (ov_model->infer_request)
390  ie_infer_request_free(&ov_model->infer_request);
391  if (ov_model->exe_network)
392  ie_exec_network_free(&ov_model->exe_network);
393  if (ov_model->network)
394  ie_network_free(&ov_model->network);
395  if (ov_model->core)
396  ie_core_free(&ov_model->core);
397  av_freep(&ov_model);
398  av_freep(model);
399  }
400 }
void * model
Definition: dnn_interface.h:46
#define NULL
Definition: coverity.c:32
Buffered I/O operations.
This structure describes decoded (raw) audio or video data.
Definition: frame.h:308
AVOption.
Definition: opt.h:248
void av_opt_set_defaults(void *s)
Set the values of all AVOption fields to their default values.
Definition: opt.c:1357
int channels
Definition: dnn_interface.h:41
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:237
const char * options
Definition: dnn_interface.h:48
DNNReturnType proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
Definition: dnn_io_proc.c:25
int av_opt_set_from_string(void *ctx, const char *opts, const char *const *shorthand, const char *key_val_sep, const char *pairs_sep)
Parse the key-value pairs list in opts.
Definition: opt.c:1558
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:190
AVOptions.
static const AVOption dnn_openvino_options[]
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input_name)
DNNModel * model
ie_infer_request_t * infer_request
static DNNReturnType get_output_ov(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
#define av_log(a,...)
DNNDataType
Definition: dnn_interface.h:36
DNNReturnType(* get_input)(void *model, DNNData *input, const char *input_name)
Definition: dnn_interface.h:53
int width
Definition: frame.h:366
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
int height
Definition: dnn_interface.h:41
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
void * data
Definition: dnn_interface.h:39
simple assert() macros that are a bit more flexible than ISO C assert().
ie_core_t * core
#define FLAGS
OVOptions options
int(* pre_proc)(AVFrame *frame_in, DNNData *model_input, void *user_data)
Definition: dnn_interface.h:59
static DNNDataType precision_to_datatype(precision_e precision)
DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame)
const AVClass * class
int(* post_proc)(AVFrame *frame_out, DNNData *model_output, void *user_data)
Definition: dnn_interface.h:62
OVContext ctx
AVFormatContext * ctx
Definition: movenc.c:48
#define OFFSET(x)
DNNReturnType
Definition: dnn_interface.h:32
AVFILTER_DEFINE_CLASS(dnn_openvino)
if(ret)
ie_network_t * network
DNN input&output process between AVFrame and DNNData.
void * userdata
Definition: dnn_interface.h:50
Describe the class of an AVClass context structure.
Definition: log.h:67
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
void ff_dnn_free_model_ov(DNNModel **model)
const OptionDef options[]
Definition: ffmpeg_opt.c:3400
DNNReturnType(* get_output)(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
Definition: dnn_interface.h:55
DNN inference functions interface for OpenVINO backend.
DNNReturnType proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
Definition: dnn_io_proc.c:80
DNNModel * ff_dnn_load_model_ov(const char *model_filename, const char *options, void *userdata)
#define APPEND_STRING(generated_string, iterate_string)
static DNNReturnType execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame, int do_ioproc)
int height
Definition: frame.h:366
#define av_freep(p)
DNNDataType dt
Definition: dnn_interface.h:40
ie_executable_network_t * exe_network
int i
Definition: input.c:407