FFmpeg
dnn_backend_native.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * DNN native backend implementation.
24  */
25 
26 #include "dnn_backend_native.h"
27 #include "libavutil/avassert.h"
30 #include "dnn_io_proc.h"
31 
32 #define OFFSET(x) offsetof(NativeContext, x)
33 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
34 static const AVOption dnn_native_options[] = {
35  { "conv2d_threads", "threads num for conv2d layer", OFFSET(options.conv2d_threads), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS },
36  { NULL },
37 };
38 
40  .class_name = "dnn_native",
41  .item_name = av_default_item_name,
42  .option = dnn_native_options,
43  .version = LIBAVUTIL_VERSION_INT,
44  .category = AV_CLASS_CATEGORY_FILTER,
45 };
46 
47 static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
48  const char **output_names, uint32_t nb_output, AVFrame *out_frame,
49  int do_ioproc);
50 
51 static DNNReturnType get_input_native(void *model, DNNData *input, const char *input_name)
52 {
53  NativeModel *native_model = (NativeModel *)model;
54  NativeContext *ctx = &native_model->ctx;
55 
56  for (int i = 0; i < native_model->operands_num; ++i) {
57  DnnOperand *oprd = &native_model->operands[i];
58  if (strcmp(oprd->name, input_name) == 0) {
59  if (oprd->type != DOT_INPUT) {
60  av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name);
61  return DNN_ERROR;
62  }
63  input->dt = oprd->data_type;
64  av_assert0(oprd->dims[0] == 1);
65  input->height = oprd->dims[1];
66  input->width = oprd->dims[2];
67  input->channels = oprd->dims[3];
68  return DNN_SUCCESS;
69  }
70  }
71 
72  // do not find the input operand
73  av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
74  return DNN_ERROR;
75 }
76 
77 static DNNReturnType get_output_native(void *model, const char *input_name, int input_width, int input_height,
78  const char *output_name, int *output_width, int *output_height)
79 {
81  NativeModel *native_model = (NativeModel *)model;
82  NativeContext *ctx = &native_model->ctx;
83  AVFrame *in_frame = av_frame_alloc();
84  AVFrame *out_frame = NULL;
85 
86  if (!in_frame) {
87  av_log(ctx, AV_LOG_ERROR, "Could not allocate memory for input frame\n");
88  return DNN_ERROR;
89  }
90 
91  out_frame = av_frame_alloc();
92 
93  if (!out_frame) {
94  av_log(ctx, AV_LOG_ERROR, "Could not allocate memory for output frame\n");
95  av_frame_free(&in_frame);
96  return DNN_ERROR;
97  }
98 
99  in_frame->width = input_width;
100  in_frame->height = input_height;
101 
102  ret = execute_model_native(native_model->model, input_name, in_frame, &output_name, 1, out_frame, 0);
103  *output_width = out_frame->width;
104  *output_height = out_frame->height;
105 
106  av_frame_free(&out_frame);
107  av_frame_free(&in_frame);
108  return ret;
109 }
110 
111 // Loads model and its parameters that are stored in a binary file with following structure:
112 // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
113 // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
114 // For DEPTH_TO_SPACE layer: block_size
115 DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options, void *userdata)
116 {
117  DNNModel *model = NULL;
118  char header_expected[] = "FFMPEGDNNNATIVE";
119  char *buf;
120  size_t size;
121  int version, header_size, major_version_expected = 1;
122  NativeModel *native_model = NULL;
123  AVIOContext *model_file_context;
124  int file_size, dnn_size, parsed_size;
125  int32_t layer;
126  DNNLayerType layer_type;
127 
128  if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
129  return NULL;
130  }
131  file_size = avio_size(model_file_context);
132 
133  model = av_mallocz(sizeof(DNNModel));
134  if (!model){
135  goto fail;
136  }
137 
138  /**
139  * check file header with string and version
140  */
141  size = sizeof(header_expected);
142  buf = av_malloc(size);
143  if (!buf) {
144  goto fail;
145  }
146 
147  // size - 1 to skip the ending '\0' which is not saved in file
148  avio_get_str(model_file_context, size - 1, buf, size);
149  dnn_size = size - 1;
150  if (strncmp(buf, header_expected, size) != 0) {
151  av_freep(&buf);
152  goto fail;
153  }
154  av_freep(&buf);
155 
156  version = (int32_t)avio_rl32(model_file_context);
157  dnn_size += 4;
158  if (version != major_version_expected) {
159  goto fail;
160  }
161 
162  // currently no need to check minor version
163  version = (int32_t)avio_rl32(model_file_context);
164  dnn_size += 4;
165  header_size = dnn_size;
166 
167  native_model = av_mallocz(sizeof(NativeModel));
168  if (!native_model){
169  goto fail;
170  }
171 
172  native_model->ctx.class = &dnn_native_class;
173  model->options = options;
174  if (av_opt_set_from_string(&native_model->ctx, model->options, NULL, "=", "&") < 0)
175  goto fail;
176  model->model = (void *)native_model;
177  native_model->model = model;
178 
179 #if !HAVE_PTHREAD_CANCEL
180  if (native_model->ctx.options.conv2d_threads > 1){
181  av_log(&native_model->ctx, AV_LOG_WARNING, "'conv2d_threads' option was set but it is not supported "
182  "on this build (pthread support is required)\n");
183  }
184 #endif
185 
186  avio_seek(model_file_context, file_size - 8, SEEK_SET);
187  native_model->layers_num = (int32_t)avio_rl32(model_file_context);
188  native_model->operands_num = (int32_t)avio_rl32(model_file_context);
189  dnn_size += 8;
190  avio_seek(model_file_context, header_size, SEEK_SET);
191 
192  native_model->layers = av_mallocz(native_model->layers_num * sizeof(Layer));
193  if (!native_model->layers){
194  goto fail;
195  }
196 
197  native_model->operands = av_mallocz(native_model->operands_num * sizeof(DnnOperand));
198  if (!native_model->operands){
199  goto fail;
200  }
201 
202  for (layer = 0; layer < native_model->layers_num; ++layer){
203  layer_type = (int32_t)avio_rl32(model_file_context);
204  dnn_size += 4;
205 
206  if (layer_type >= DLT_COUNT) {
207  goto fail;
208  }
209 
210  native_model->layers[layer].type = layer_type;
211  parsed_size = layer_funcs[layer_type].pf_load(&native_model->layers[layer], model_file_context, file_size, native_model->operands_num);
212  if (!parsed_size) {
213  goto fail;
214  }
215  dnn_size += parsed_size;
216  }
217 
218  for (int32_t i = 0; i < native_model->operands_num; ++i){
219  DnnOperand *oprd;
220  int32_t name_len;
221  int32_t operand_index = (int32_t)avio_rl32(model_file_context);
222  dnn_size += 4;
223 
224  if (operand_index >= native_model->operands_num) {
225  goto fail;
226  }
227 
228  oprd = &native_model->operands[operand_index];
229  name_len = (int32_t)avio_rl32(model_file_context);
230  dnn_size += 4;
231 
232  avio_get_str(model_file_context, name_len, oprd->name, sizeof(oprd->name));
233  dnn_size += name_len;
234 
235  oprd->type = (int32_t)avio_rl32(model_file_context);
236  dnn_size += 4;
237 
238  oprd->data_type = (int32_t)avio_rl32(model_file_context);
239  dnn_size += 4;
240 
241  for (int32_t dim = 0; dim < 4; ++dim) {
242  oprd->dims[dim] = (int32_t)avio_rl32(model_file_context);
243  dnn_size += 4;
244  }
245 
246  oprd->isNHWC = 1;
247  }
248 
249  avio_closep(&model_file_context);
250 
251  if (dnn_size != file_size){
252  ff_dnn_free_model_native(&model);
253  return NULL;
254  }
255 
256  model->get_input = &get_input_native;
257  model->get_output = &get_output_native;
258  model->userdata = userdata;
259 
260  return model;
261 
262 fail:
263  ff_dnn_free_model_native(&model);
264  avio_closep(&model_file_context);
265  return NULL;
266 }
267 
268 static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
269  const char **output_names, uint32_t nb_output, AVFrame *out_frame,
270  int do_ioproc)
271 {
272  NativeModel *native_model = (NativeModel *)model->model;
273  NativeContext *ctx = &native_model->ctx;
274  int32_t layer;
276  DnnOperand *oprd = NULL;
277 
278  if (native_model->layers_num <= 0 || native_model->operands_num <= 0) {
279  av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n");
280  return DNN_ERROR;
281  }
282 
283  for (int i = 0; i < native_model->operands_num; ++i) {
284  oprd = &native_model->operands[i];
285  if (strcmp(oprd->name, input_name) == 0) {
286  if (oprd->type != DOT_INPUT) {
287  av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name);
288  return DNN_ERROR;
289  }
290  break;
291  }
292  oprd = NULL;
293  }
294  if (!oprd) {
295  av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
296  return DNN_ERROR;
297  }
298 
299  oprd->dims[1] = in_frame->height;
300  oprd->dims[2] = in_frame->width;
301 
302  av_freep(&oprd->data);
303  oprd->length = calculate_operand_data_length(oprd);
304  if (oprd->length <= 0) {
305  av_log(ctx, AV_LOG_ERROR, "The input data length overflow\n");
306  return DNN_ERROR;
307  }
308  oprd->data = av_malloc(oprd->length);
309  if (!oprd->data) {
310  av_log(ctx, AV_LOG_ERROR, "Failed to malloc memory for input data\n");
311  return DNN_ERROR;
312  }
313 
314  input.height = oprd->dims[1];
315  input.width = oprd->dims[2];
316  input.channels = oprd->dims[3];
317  input.data = oprd->data;
318  input.dt = oprd->data_type;
319  if (do_ioproc) {
320  if (native_model->model->pre_proc != NULL) {
321  native_model->model->pre_proc(in_frame, &input, native_model->model->userdata);
322  } else {
323  proc_from_frame_to_dnn(in_frame, &input, ctx);
324  }
325  }
326 
327  if (nb_output != 1) {
328  // currently, the filter does not need multiple outputs,
329  // so we just pending the support until we really need it.
330  av_log(ctx, AV_LOG_ERROR, "do not support multiple outputs\n");
331  return DNN_ERROR;
332  }
333 
334  for (layer = 0; layer < native_model->layers_num; ++layer){
335  DNNLayerType layer_type = native_model->layers[layer].type;
336  if (layer_funcs[layer_type].pf_exec(native_model->operands,
337  native_model->layers[layer].input_operand_indexes,
338  native_model->layers[layer].output_operand_index,
339  native_model->layers[layer].params,
340  &native_model->ctx) == DNN_ERROR) {
341  av_log(ctx, AV_LOG_ERROR, "Failed to execuet model\n");
342  return DNN_ERROR;
343  }
344  }
345 
346  for (uint32_t i = 0; i < nb_output; ++i) {
347  DnnOperand *oprd = NULL;
348  const char *output_name = output_names[i];
349  for (int j = 0; j < native_model->operands_num; ++j) {
350  if (strcmp(native_model->operands[j].name, output_name) == 0) {
351  oprd = &native_model->operands[j];
352  break;
353  }
354  }
355 
356  if (oprd == NULL) {
357  av_log(ctx, AV_LOG_ERROR, "Could not find output in model\n");
358  return DNN_ERROR;
359  }
360 
361  output.data = oprd->data;
362  output.height = oprd->dims[1];
363  output.width = oprd->dims[2];
364  output.channels = oprd->dims[3];
365  output.dt = oprd->data_type;
366 
367  if (do_ioproc) {
368  if (native_model->model->post_proc != NULL) {
369  native_model->model->post_proc(out_frame, &output, native_model->model->userdata);
370  } else {
371  proc_from_dnn_to_frame(out_frame, &output, ctx);
372  }
373  } else {
374  out_frame->width = output.width;
375  out_frame->height = output.height;
376  }
377  }
378 
379  return DNN_SUCCESS;
380 }
381 
382 DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
383  const char **output_names, uint32_t nb_output, AVFrame *out_frame)
384 {
385  NativeModel *native_model = (NativeModel *)model->model;
386  NativeContext *ctx = &native_model->ctx;
387 
388  if (!in_frame) {
389  av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n");
390  return DNN_ERROR;
391  }
392 
393  if (!out_frame) {
394  av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
395  return DNN_ERROR;
396  }
397 
398  return execute_model_native(model, input_name, in_frame, output_names, nb_output, out_frame, 1);
399 }
400 
402 {
403  int32_t result = 1;
404  for (int i = 0; i < 4; ++i)
405  result *= oprd->dims[i];
406 
407  return result;
408 }
409 
411 {
412  // currently, we just support DNN_FLOAT
413  uint64_t len = sizeof(float);
414  for (int i = 0; i < 4; i++) {
415  len *= oprd->dims[i];
416  if (len > INT32_MAX)
417  return 0;
418  }
419  return len;
420 }
421 
423 {
424  NativeModel *native_model;
425  ConvolutionalParams *conv_params;
426  int32_t layer;
427 
428  if (*model)
429  {
430  if ((*model)->model) {
431  native_model = (NativeModel *)(*model)->model;
432  if (native_model->layers) {
433  for (layer = 0; layer < native_model->layers_num; ++layer){
434  if (native_model->layers[layer].type == DLT_CONV2D){
435  conv_params = (ConvolutionalParams *)native_model->layers[layer].params;
436  av_freep(&conv_params->kernel);
437  av_freep(&conv_params->biases);
438  }
439  av_freep(&native_model->layers[layer].params);
440  }
441  av_freep(&native_model->layers);
442  }
443 
444  if (native_model->operands) {
445  for (uint32_t operand = 0; operand < native_model->operands_num; ++operand)
446  av_freep(&native_model->operands[operand].data);
447  av_freep(&native_model->operands);
448  }
449 
450  av_freep(&native_model);
451  }
452  av_freep(model);
453  }
454 }
int avio_open(AVIOContext **s, const char *url, int flags)
Create and initialize a AVIOContext for accessing the resource indicated by url.
Definition: aviobuf.c:1139
void * model
Definition: dnn_interface.h:46
#define NULL
Definition: coverity.c:32
Bytestream IO Context.
Definition: avio.h:161
int8_t isNHWC
NHWC if 1, otherwise NCHW.
int64_t avio_size(AVIOContext *s)
Get the filesize.
Definition: aviobuf.c:346
version
Definition: libkvazaar.c:317
This structure describes decoded (raw) audio or video data.
Definition: frame.h:308
AVOption.
Definition: opt.h:248
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:200
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
int channels
Definition: dnn_interface.h:41
DNN inference functions interface for native backend.
int64_t avio_seek(AVIOContext *s, int64_t offset, int whence)
fseek() equivalent for AVIOContext.
Definition: aviobuf.c:253
#define AVIO_FLAG_READ
read-only
Definition: avio.h:674
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:235
int32_t calculate_operand_dims_count(const DnnOperand *oprd)
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:237
DNNOperandType type
input/output/intermediate operand of the network
const char * options
Definition: dnn_interface.h:48
DnnOperand * operands
DNNReturnType proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
Definition: dnn_io_proc.c:25
int32_t input_operand_indexes[4]
a layer can have multiple inputs and one output.
LayerFunc layer_funcs[DLT_COUNT]
int av_opt_set_from_string(void *ctx, const char *opts, const char *const *shorthand, const char *key_val_sep, const char *pairs_sep)
Parse the key-value pairs list in opts.
Definition: opt.c:1558
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:72
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
#define av_malloc(s)
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:190
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
const AVClass * class
DNNDataType data_type
support different kinds of data type such as float, half float, int8 etc, first support float now...
ptrdiff_t size
Definition: opengl_enc.c:100
NativeContext ctx
#define av_log(a,...)
DNNModel * model
DNNReturnType(* get_input)(void *model, DNNData *input, const char *input_name)
Definition: dnn_interface.h:53
int width
Definition: frame.h:366
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame, int do_ioproc)
int height
Definition: dnn_interface.h:41
unsigned int avio_rl32(AVIOContext *s)
Definition: aviobuf.c:752
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
void * data
data pointer with data length in bytes.
simple assert() macros that are a bit more flexible than ISO C assert().
int32_t dims[4]
there are two memory layouts, NHWC or NCHW, so we use dims, dims[0] is Number.
#define fail()
Definition: checkasm.h:123
int(* pre_proc)(AVFrame *frame_in, DNNData *model_input, void *user_data)
Definition: dnn_interface.h:59
static DNNReturnType get_output_native(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
char name[128]
to avoid possible memory leak, do not use char *name
int(* post_proc)(AVFrame *frame_out, DNNData *model_output, void *user_data)
Definition: dnn_interface.h:62
int32_t
AVFormatContext * ctx
Definition: movenc.c:48
#define OFFSET(x)
DNNReturnType
Definition: dnn_interface.h:32
void ff_dnn_free_model_native(DNNModel **model)
if(ret)
const AVClass dnn_native_class
static const AVOption dnn_native_options[]
static DNNReturnType get_input_native(void *model, DNNData *input, const char *input_name)
DNN input&output process between AVFrame and DNNData.
void * userdata
Definition: dnn_interface.h:50
Describe the class of an AVClass context structure.
Definition: log.h:67
int dim
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
DNNLayerType type
DNNLayerType
the enum value of DNNLayerType should not be changed, the same values are used in convert_from_tensor...
#define FLAGS
const OptionDef options[]
Definition: ffmpeg_opt.c:3393
DNNReturnType(* get_output)(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
Definition: dnn_interface.h:55
DNNReturnType proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
Definition: dnn_io_proc.c:80
NativeOptions options
int32_t calculate_operand_data_length(const DnnOperand *oprd)
int len
void * params
DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame)
and forward the result(frame or status change) to the corresponding input.If nothing is possible
int height
Definition: frame.h:366
#define av_freep(p)
int avio_get_str(AVIOContext *pb, int maxlen, char *buf, int buflen)
Read a string from pb into buf.
Definition: aviobuf.c:862
DNNDataType dt
Definition: dnn_interface.h:40
int avio_closep(AVIOContext **s)
Close the resource accessed by the AVIOContext *s, free it and set the pointer pointing to it to NULL...
Definition: aviobuf.c:1194
DNNModel * ff_dnn_load_model_native(const char *model_filename, const char *options, void *userdata)
int i
Definition: input.c:407
int32_t output_operand_index
LAYER_LOAD_FUNC pf_load