FFmpeg
dnn_backend_tf.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * DNN tensorflow backend implementation.
24  */
25 
26 #include "dnn_backend_tf.h"
27 #include "dnn_backend_native.h"
30 #include "libavformat/avio.h"
31 #include "libavutil/avassert.h"
32 #include "../internal.h"
35 #include "dnn_io_proc.h"
36 
37 #include <tensorflow/c/c_api.h>
38 
39 typedef struct TFOptions{
40  char *sess_config;
41 } TFOptions;
42 
43 typedef struct TFContext {
44  const AVClass *class;
46 } TFContext;
47 
48 typedef struct TFModel{
51  TF_Graph *graph;
52  TF_Session *session;
53  TF_Status *status;
54 } TFModel;
55 
56 #define OFFSET(x) offsetof(TFContext, x)
57 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
58 static const AVOption dnn_tensorflow_options[] = {
59  { "sess_config", "config for SessionOptions", OFFSET(options.sess_config), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
60  { NULL }
61 };
62 
63 AVFILTER_DEFINE_CLASS(dnn_tensorflow);
64 
65 static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
66  const char **output_names, uint32_t nb_output, AVFrame *out_frame,
67  int do_ioproc);
68 
69 static void free_buffer(void *data, size_t length)
70 {
71  av_freep(&data);
72 }
73 
74 static TF_Buffer *read_graph(const char *model_filename)
75 {
76  TF_Buffer *graph_buf;
77  unsigned char *graph_data = NULL;
78  AVIOContext *model_file_context;
79  long size, bytes_read;
80 
81  if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
82  return NULL;
83  }
84 
85  size = avio_size(model_file_context);
86 
87  graph_data = av_malloc(size);
88  if (!graph_data){
89  avio_closep(&model_file_context);
90  return NULL;
91  }
92  bytes_read = avio_read(model_file_context, graph_data, size);
93  avio_closep(&model_file_context);
94  if (bytes_read != size){
95  av_freep(&graph_data);
96  return NULL;
97  }
98 
99  graph_buf = TF_NewBuffer();
100  graph_buf->data = (void *)graph_data;
101  graph_buf->length = size;
102  graph_buf->data_deallocator = free_buffer;
103 
104  return graph_buf;
105 }
106 
107 static TF_Tensor *allocate_input_tensor(const DNNData *input)
108 {
109  TF_DataType dt;
110  size_t size;
111  int64_t input_dims[] = {1, input->height, input->width, input->channels};
112  switch (input->dt) {
113  case DNN_FLOAT:
114  dt = TF_FLOAT;
115  size = sizeof(float);
116  break;
117  case DNN_UINT8:
118  dt = TF_UINT8;
119  size = 1;
120  break;
121  default:
122  av_assert0(!"should not reach here");
123  }
124 
125  return TF_AllocateTensor(dt, input_dims, 4,
126  input_dims[1] * input_dims[2] * input_dims[3] * size);
127 }
128 
129 static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input_name)
130 {
131  TFModel *tf_model = (TFModel *)model;
132  TFContext *ctx = &tf_model->ctx;
133  TF_Status *status;
134  int64_t dims[4];
135 
136  TF_Output tf_output;
137  tf_output.oper = TF_GraphOperationByName(tf_model->graph, input_name);
138  if (!tf_output.oper) {
139  av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
140  return DNN_ERROR;
141  }
142 
143  tf_output.index = 0;
144  input->dt = TF_OperationOutputType(tf_output);
145 
146  status = TF_NewStatus();
147  TF_GraphGetTensorShape(tf_model->graph, tf_output, dims, 4, status);
148  if (TF_GetCode(status) != TF_OK){
149  TF_DeleteStatus(status);
150  av_log(ctx, AV_LOG_ERROR, "Failed to get input tensor shape: number of dimension incorrect\n");
151  return DNN_ERROR;
152  }
153  TF_DeleteStatus(status);
154 
155  // currently only NHWC is supported
156  av_assert0(dims[0] == 1);
157  input->height = dims[1];
158  input->width = dims[2];
159  input->channels = dims[3];
160 
161  return DNN_SUCCESS;
162 }
163 
164 static DNNReturnType get_output_tf(void *model, const char *input_name, int input_width, int input_height,
165  const char *output_name, int *output_width, int *output_height)
166 {
168  TFModel *tf_model = (TFModel *)model;
169  TFContext *ctx = &tf_model->ctx;
170  AVFrame *in_frame = av_frame_alloc();
171  AVFrame *out_frame = NULL;
172 
173  if (!in_frame) {
174  av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input frame\n");
175  return DNN_ERROR;
176  }
177 
178  out_frame = av_frame_alloc();
179  if (!out_frame) {
180  av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for output frame\n");
181  av_frame_free(&in_frame);
182  return DNN_ERROR;
183  }
184 
185  in_frame->width = input_width;
186  in_frame->height = input_height;
187 
188  ret = execute_model_tf(tf_model->model, input_name, in_frame, &output_name, 1, out_frame, 0);
189  *output_width = out_frame->width;
190  *output_height = out_frame->height;
191 
192  av_frame_free(&out_frame);
193  av_frame_free(&in_frame);
194  return ret;
195 }
196 
197 static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
198 {
199  TFContext *ctx = &tf_model->ctx;
200  TF_Buffer *graph_def;
201  TF_ImportGraphDefOptions *graph_opts;
202  TF_SessionOptions *sess_opts;
203  const TF_Operation *init_op;
205  int sess_config_length = 0;
206 
207  // prepare the sess config data
208  if (tf_model->ctx.options.sess_config != NULL) {
209  /*
210  tf_model->ctx.options.sess_config is hex to present the serialized proto
211  required by TF_SetConfig below, so we need to first generate the serialized
212  proto in a python script, the following is a script example to generate
213  serialized proto which specifies one GPU, we can change the script to add
214  more options.
215 
216  import tensorflow as tf
217  gpu_options = tf.GPUOptions(visible_device_list='0')
218  config = tf.ConfigProto(gpu_options=gpu_options)
219  s = config.SerializeToString()
220  b = ''.join("%02x" % int(ord(b)) for b in s[::-1])
221  print('0x%s' % b)
222 
223  the script output looks like: 0xab...cd, and then pass 0xab...cd to sess_config.
224  */
225  char tmp[3];
226  tmp[2] = '\0';
227 
228  if (strncmp(tf_model->ctx.options.sess_config, "0x", 2) != 0) {
229  av_log(ctx, AV_LOG_ERROR, "sess_config should start with '0x'\n");
230  return DNN_ERROR;
231  }
232 
233  sess_config_length = strlen(tf_model->ctx.options.sess_config);
234  if (sess_config_length % 2 != 0) {
235  av_log(ctx, AV_LOG_ERROR, "the length of sess_config is not even (%s), "
236  "please re-generate the config.\n",
237  tf_model->ctx.options.sess_config);
238  return DNN_ERROR;
239  }
240 
241  sess_config_length -= 2; //ignore the first '0x'
242  sess_config_length /= 2; //get the data length in byte
243 
244  sess_config = av_malloc(sess_config_length);
245  if (!sess_config) {
246  av_log(ctx, AV_LOG_ERROR, "failed to allocate memory\n");
247  return DNN_ERROR;
248  }
249 
250  for (int i = 0; i < sess_config_length; i++) {
251  int index = 2 + (sess_config_length - 1 - i) * 2;
252  tmp[0] = tf_model->ctx.options.sess_config[index];
253  tmp[1] = tf_model->ctx.options.sess_config[index + 1];
254  sess_config[i] = strtol(tmp, NULL, 16);
255  }
256  }
257 
258  graph_def = read_graph(model_filename);
259  if (!graph_def){
260  av_log(ctx, AV_LOG_ERROR, "Failed to read model \"%s\" graph\n", model_filename);
261  av_freep(&sess_config);
262  return DNN_ERROR;
263  }
264  tf_model->graph = TF_NewGraph();
265  tf_model->status = TF_NewStatus();
266  graph_opts = TF_NewImportGraphDefOptions();
267  TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status);
268  TF_DeleteImportGraphDefOptions(graph_opts);
269  TF_DeleteBuffer(graph_def);
270  if (TF_GetCode(tf_model->status) != TF_OK){
271  TF_DeleteGraph(tf_model->graph);
272  TF_DeleteStatus(tf_model->status);
273  av_log(ctx, AV_LOG_ERROR, "Failed to import serialized graph to model graph\n");
274  av_freep(&sess_config);
275  return DNN_ERROR;
276  }
277 
278  init_op = TF_GraphOperationByName(tf_model->graph, "init");
279  sess_opts = TF_NewSessionOptions();
280 
281  if (sess_config) {
282  TF_SetConfig(sess_opts, sess_config, sess_config_length,tf_model->status);
283  av_freep(&sess_config);
284  if (TF_GetCode(tf_model->status) != TF_OK) {
285  av_log(ctx, AV_LOG_ERROR, "Failed to set config for sess options with %s\n",
286  tf_model->ctx.options.sess_config);
287  return DNN_ERROR;
288  }
289  }
290 
291  tf_model->session = TF_NewSession(tf_model->graph, sess_opts, tf_model->status);
292  TF_DeleteSessionOptions(sess_opts);
293  if (TF_GetCode(tf_model->status) != TF_OK)
294  {
295  av_log(ctx, AV_LOG_ERROR, "Failed to create new session with model graph\n");
296  return DNN_ERROR;
297  }
298 
299  // Run initialization operation with name "init" if it is present in graph
300  if (init_op){
301  TF_SessionRun(tf_model->session, NULL,
302  NULL, NULL, 0,
303  NULL, NULL, 0,
304  &init_op, 1, NULL, tf_model->status);
305  if (TF_GetCode(tf_model->status) != TF_OK)
306  {
307  av_log(ctx, AV_LOG_ERROR, "Failed to run session when initializing\n");
308  return DNN_ERROR;
309  }
310  }
311 
312  return DNN_SUCCESS;
313 }
314 
315 #define NAME_BUFFER_SIZE 256
316 
317 static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op,
318  ConvolutionalParams* params, const int layer)
319 {
320  TFContext *ctx = &tf_model->ctx;
321  TF_Operation *op;
322  TF_OperationDescription *op_desc;
323  TF_Output input;
324  int64_t strides[] = {1, 1, 1, 1};
325  TF_Tensor *tensor;
326  int64_t dims[4];
327  int dims_len;
328  char name_buffer[NAME_BUFFER_SIZE];
329  int32_t size;
330 
331  size = params->input_num * params->output_num * params->kernel_size * params->kernel_size;
332  input.index = 0;
333 
334  snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_kernel%d", layer);
335  op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
336  TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
337  dims[0] = params->output_num;
338  dims[1] = params->kernel_size;
339  dims[2] = params->kernel_size;
340  dims[3] = params->input_num;
341  dims_len = 4;
342  tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float));
343  memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float));
344  TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
345  if (TF_GetCode(tf_model->status) != TF_OK){
346  av_log(ctx, AV_LOG_ERROR, "Failed to set value for kernel of conv layer %d\n", layer);
347  return DNN_ERROR;
348  }
349  op = TF_FinishOperation(op_desc, tf_model->status);
350  if (TF_GetCode(tf_model->status) != TF_OK){
351  av_log(ctx, AV_LOG_ERROR, "Failed to add kernel to conv layer %d\n", layer);
352  return DNN_ERROR;
353  }
354 
355  snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer);
356  op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
357  input.oper = op;
358  TF_AddInput(op_desc, input);
359  input.oper = transpose_op;
360  TF_AddInput(op_desc, input);
361  TF_SetAttrType(op_desc, "T", TF_FLOAT);
362  TF_SetAttrType(op_desc, "Tperm", TF_INT32);
363  op = TF_FinishOperation(op_desc, tf_model->status);
364  if (TF_GetCode(tf_model->status) != TF_OK){
365  av_log(ctx, AV_LOG_ERROR, "Failed to add transpose to conv layer %d\n", layer);
366  return DNN_ERROR;
367  }
368 
369  snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer);
370  op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
371  input.oper = *cur_op;
372  TF_AddInput(op_desc, input);
373  input.oper = op;
374  TF_AddInput(op_desc, input);
375  TF_SetAttrType(op_desc, "T", TF_FLOAT);
376  TF_SetAttrIntList(op_desc, "strides", strides, 4);
377  TF_SetAttrString(op_desc, "padding", "VALID", 5);
378  *cur_op = TF_FinishOperation(op_desc, tf_model->status);
379  if (TF_GetCode(tf_model->status) != TF_OK){
380  av_log(ctx, AV_LOG_ERROR, "Failed to add conv2d to conv layer %d\n", layer);
381  return DNN_ERROR;
382  }
383 
384  snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer);
385  op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
386  TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
387  dims[0] = params->output_num;
388  dims_len = 1;
389  tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float));
390  memcpy(TF_TensorData(tensor), params->biases, params->output_num * sizeof(float));
391  TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
392  if (TF_GetCode(tf_model->status) != TF_OK){
393  av_log(ctx, AV_LOG_ERROR, "Failed to set value for conv_biases of conv layer %d\n", layer);
394  return DNN_ERROR;
395  }
396  op = TF_FinishOperation(op_desc, tf_model->status);
397  if (TF_GetCode(tf_model->status) != TF_OK){
398  av_log(ctx, AV_LOG_ERROR, "Failed to add conv_biases to conv layer %d\n", layer);
399  return DNN_ERROR;
400  }
401 
402  snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer);
403  op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
404  input.oper = *cur_op;
405  TF_AddInput(op_desc, input);
406  input.oper = op;
407  TF_AddInput(op_desc, input);
408  TF_SetAttrType(op_desc, "T", TF_FLOAT);
409  *cur_op = TF_FinishOperation(op_desc, tf_model->status);
410  if (TF_GetCode(tf_model->status) != TF_OK){
411  av_log(ctx, AV_LOG_ERROR, "Failed to add bias_add to conv layer %d\n", layer);
412  return DNN_ERROR;
413  }
414 
415  snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer);
416  switch (params->activation){
417  case RELU:
418  op_desc = TF_NewOperation(tf_model->graph, "Relu", name_buffer);
419  break;
420  case TANH:
421  op_desc = TF_NewOperation(tf_model->graph, "Tanh", name_buffer);
422  break;
423  case SIGMOID:
424  op_desc = TF_NewOperation(tf_model->graph, "Sigmoid", name_buffer);
425  break;
426  default:
427  av_log(ctx, AV_LOG_ERROR, "Unsupported convolutional activation function\n");
428  return DNN_ERROR;
429  }
430  input.oper = *cur_op;
431  TF_AddInput(op_desc, input);
432  TF_SetAttrType(op_desc, "T", TF_FLOAT);
433  *cur_op = TF_FinishOperation(op_desc, tf_model->status);
434  if (TF_GetCode(tf_model->status) != TF_OK){
435  av_log(ctx, AV_LOG_ERROR, "Failed to add activation function to conv layer %d\n", layer);
436  return DNN_ERROR;
437  }
438 
439  return DNN_SUCCESS;
440 }
441 
442 static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op,
443  DepthToSpaceParams *params, const int layer)
444 {
445  TFContext *ctx = &tf_model->ctx;
446  TF_OperationDescription *op_desc;
447  TF_Output input;
448  char name_buffer[NAME_BUFFER_SIZE];
449 
450  snprintf(name_buffer, NAME_BUFFER_SIZE, "depth_to_space%d", layer);
451  op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", name_buffer);
452  input.oper = *cur_op;
453  input.index = 0;
454  TF_AddInput(op_desc, input);
455  TF_SetAttrType(op_desc, "T", TF_FLOAT);
456  TF_SetAttrInt(op_desc, "block_size", params->block_size);
457  *cur_op = TF_FinishOperation(op_desc, tf_model->status);
458  if (TF_GetCode(tf_model->status) != TF_OK){
459  av_log(ctx, AV_LOG_ERROR, "Failed to add depth_to_space to layer %d\n", layer);
460  return DNN_ERROR;
461  }
462 
463  return DNN_SUCCESS;
464 }
465 
466 static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op,
467  LayerPadParams *params, const int layer)
468 {
469  TFContext *ctx = &tf_model->ctx;
470  TF_Operation *op;
471  TF_Tensor *tensor;
472  TF_OperationDescription *op_desc;
473  TF_Output input;
474  int32_t *pads;
475  int64_t pads_shape[] = {4, 2};
476 
477  char name_buffer[NAME_BUFFER_SIZE];
478  snprintf(name_buffer, NAME_BUFFER_SIZE, "pad%d", layer);
479 
480  op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
481  TF_SetAttrType(op_desc, "dtype", TF_INT32);
482  tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
483  pads = (int32_t *)TF_TensorData(tensor);
484  pads[0] = params->paddings[0][0];
485  pads[1] = params->paddings[0][1];
486  pads[2] = params->paddings[1][0];
487  pads[3] = params->paddings[1][1];
488  pads[4] = params->paddings[2][0];
489  pads[5] = params->paddings[2][1];
490  pads[6] = params->paddings[3][0];
491  pads[7] = params->paddings[3][1];
492  TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
493  if (TF_GetCode(tf_model->status) != TF_OK){
494  av_log(ctx, AV_LOG_ERROR, "Failed to set value for pad of layer %d\n", layer);
495  return DNN_ERROR;
496  }
497  op = TF_FinishOperation(op_desc, tf_model->status);
498  if (TF_GetCode(tf_model->status) != TF_OK){
499  av_log(ctx, AV_LOG_ERROR, "Failed to add pad to layer %d\n", layer);
500  return DNN_ERROR;
501  }
502 
503  op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
504  input.oper = *cur_op;
505  input.index = 0;
506  TF_AddInput(op_desc, input);
507  input.oper = op;
508  TF_AddInput(op_desc, input);
509  TF_SetAttrType(op_desc, "T", TF_FLOAT);
510  TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
511  TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
512  *cur_op = TF_FinishOperation(op_desc, tf_model->status);
513  if (TF_GetCode(tf_model->status) != TF_OK){
514  av_log(ctx, AV_LOG_ERROR, "Failed to add mirror_pad to layer %d\n", layer);
515  return DNN_ERROR;
516  }
517 
518  return DNN_SUCCESS;
519 }
520 
521 static DNNReturnType add_maximum_layer(TFModel *tf_model, TF_Operation **cur_op,
522  DnnLayerMaximumParams *params, const int layer)
523 {
524  TFContext *ctx = &tf_model->ctx;
525  TF_Operation *op;
526  TF_Tensor *tensor;
527  TF_OperationDescription *op_desc;
528  TF_Output input;
529  float *y;
530 
531  char name_buffer[NAME_BUFFER_SIZE];
532  snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum/y%d", layer);
533 
534  op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
535  TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
536  tensor = TF_AllocateTensor(TF_FLOAT, NULL, 0, TF_DataTypeSize(TF_FLOAT));
537  y = (float *)TF_TensorData(tensor);
538  *y = params->val.y;
539  TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
540  if (TF_GetCode(tf_model->status) != TF_OK){
541  av_log(ctx, AV_LOG_ERROR, "Failed to set value for maximum/y of layer %d", layer);
542  return DNN_ERROR;
543  }
544  op = TF_FinishOperation(op_desc, tf_model->status);
545  if (TF_GetCode(tf_model->status) != TF_OK){
546  av_log(ctx, AV_LOG_ERROR, "Failed to add maximum/y to layer %d\n", layer);
547  return DNN_ERROR;
548  }
549 
550  snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum%d", layer);
551  op_desc = TF_NewOperation(tf_model->graph, "Maximum", name_buffer);
552  input.oper = *cur_op;
553  input.index = 0;
554  TF_AddInput(op_desc, input);
555  input.oper = op;
556  TF_AddInput(op_desc, input);
557  TF_SetAttrType(op_desc, "T", TF_FLOAT);
558  *cur_op = TF_FinishOperation(op_desc, tf_model->status);
559  if (TF_GetCode(tf_model->status) != TF_OK){
560  av_log(ctx, AV_LOG_ERROR, "Failed to add maximum to layer %d\n", layer);
561  return DNN_ERROR;
562  }
563 
564  return DNN_SUCCESS;
565 }
566 
567 static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
568 {
569  TFContext *ctx = &tf_model->ctx;
570  int32_t layer;
571  TF_OperationDescription *op_desc;
572  TF_Operation *op;
573  TF_Operation *transpose_op;
574  TF_Tensor *tensor;
575  TF_Output input;
577  int64_t transpose_perm_shape[] = {4};
578  int64_t input_shape[] = {1, -1, -1, -1};
579  DNNReturnType layer_add_res;
580  DNNModel *model = NULL;
581  NativeModel *native_model;
582 
583  model = ff_dnn_load_model_native(model_filename, NULL, NULL);
584  if (!model){
585  av_log(ctx, AV_LOG_ERROR, "Failed to load native model\n");
586  return DNN_ERROR;
587  }
588 
589  native_model = (NativeModel *)model->model;
590  tf_model->graph = TF_NewGraph();
591  tf_model->status = TF_NewStatus();
592 
593 #define CLEANUP_ON_ERROR(tf_model) \
594  { \
595  TF_DeleteGraph(tf_model->graph); \
596  TF_DeleteStatus(tf_model->status); \
597  av_log(ctx, AV_LOG_ERROR, "Failed to set value or add operator to layer\n"); \
598  return DNN_ERROR; \
599  }
600 
601  op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
602  TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
603  TF_SetAttrShape(op_desc, "shape", input_shape, 4);
604  op = TF_FinishOperation(op_desc, tf_model->status);
605  if (TF_GetCode(tf_model->status) != TF_OK){
606  CLEANUP_ON_ERROR(tf_model);
607  }
608 
609  op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
610  TF_SetAttrType(op_desc, "dtype", TF_INT32);
611  tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 * sizeof(int32_t));
612  transpose_perm = (int32_t *)TF_TensorData(tensor);
613  transpose_perm[0] = 1;
614  transpose_perm[1] = 2;
615  transpose_perm[2] = 3;
616  transpose_perm[3] = 0;
617  TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
618  if (TF_GetCode(tf_model->status) != TF_OK){
619  CLEANUP_ON_ERROR(tf_model);
620  }
621  transpose_op = TF_FinishOperation(op_desc, tf_model->status);
622 
623  for (layer = 0; layer < native_model->layers_num; ++layer){
624  switch (native_model->layers[layer].type){
625  case DLT_INPUT:
626  layer_add_res = DNN_SUCCESS;
627  break;
628  case DLT_CONV2D:
629  layer_add_res = add_conv_layer(tf_model, transpose_op, &op,
630  (ConvolutionalParams *)native_model->layers[layer].params, layer);
631  break;
632  case DLT_DEPTH_TO_SPACE:
633  layer_add_res = add_depth_to_space_layer(tf_model, &op,
634  (DepthToSpaceParams *)native_model->layers[layer].params, layer);
635  break;
636  case DLT_MIRROR_PAD:
637  layer_add_res = add_pad_layer(tf_model, &op,
638  (LayerPadParams *)native_model->layers[layer].params, layer);
639  break;
640  case DLT_MAXIMUM:
641  layer_add_res = add_maximum_layer(tf_model, &op,
642  (DnnLayerMaximumParams *)native_model->layers[layer].params, layer);
643  break;
644  default:
645  CLEANUP_ON_ERROR(tf_model);
646  }
647 
648  if (layer_add_res != DNN_SUCCESS){
649  CLEANUP_ON_ERROR(tf_model);
650  }
651  }
652 
653  op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
654  input.oper = op;
655  input.index = 0;
656  TF_AddInput(op_desc, input);
657  TF_FinishOperation(op_desc, tf_model->status);
658  if (TF_GetCode(tf_model->status) != TF_OK){
659  CLEANUP_ON_ERROR(tf_model);
660  }
661 
662  ff_dnn_free_model_native(&model);
663 
664  return DNN_SUCCESS;
665 }
666 
667 DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, void *userdata)
668 {
669  DNNModel *model = NULL;
670  TFModel *tf_model = NULL;
671 
672  model = av_mallocz(sizeof(DNNModel));
673  if (!model){
674  return NULL;
675  }
676 
677  tf_model = av_mallocz(sizeof(TFModel));
678  if (!tf_model){
679  av_freep(&model);
680  return NULL;
681  }
682  tf_model->ctx.class = &dnn_tensorflow_class;
683  tf_model->model = model;
684 
685  //parse options
686  av_opt_set_defaults(&tf_model->ctx);
687  if (av_opt_set_from_string(&tf_model->ctx, options, NULL, "=", "&") < 0) {
688  av_log(&tf_model->ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
689  av_freep(&tf_model);
690  av_freep(&model);
691  return NULL;
692  }
693 
694  if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){
695  if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){
696  av_freep(&tf_model);
697  av_freep(&model);
698 
699  return NULL;
700  }
701  }
702 
703  model->model = (void *)tf_model;
704  model->get_input = &get_input_tf;
705  model->get_output = &get_output_tf;
706  model->options = options;
707  model->userdata = userdata;
708 
709  return model;
710 }
711 
712 static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
713  const char **output_names, uint32_t nb_output, AVFrame *out_frame,
714  int do_ioproc)
715 {
716  TF_Output *tf_outputs;
717  TFModel *tf_model = (TFModel *)model->model;
718  TFContext *ctx = &tf_model->ctx;
720  TF_Tensor **output_tensors;
721  TF_Output tf_input;
722  TF_Tensor *input_tensor;
723 
724  if (get_input_tf(tf_model, &input, input_name) != DNN_SUCCESS)
725  return DNN_ERROR;
726  input.height = in_frame->height;
727  input.width = in_frame->width;
728 
729  tf_input.oper = TF_GraphOperationByName(tf_model->graph, input_name);
730  if (!tf_input.oper){
731  av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
732  return DNN_ERROR;
733  }
734  tf_input.index = 0;
735  input_tensor = allocate_input_tensor(&input);
736  if (!input_tensor){
737  av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input tensor\n");
738  return DNN_ERROR;
739  }
740  input.data = (float *)TF_TensorData(input_tensor);
741 
742  if (do_ioproc) {
743  if (tf_model->model->pre_proc != NULL) {
744  tf_model->model->pre_proc(in_frame, &input, tf_model->model->userdata);
745  } else {
746  proc_from_frame_to_dnn(in_frame, &input, ctx);
747  }
748  }
749 
750  if (nb_output != 1) {
751  // currently, the filter does not need multiple outputs,
752  // so we just pending the support until we really need it.
753  av_log(ctx, AV_LOG_ERROR, "do not support multiple outputs\n");
754  return DNN_ERROR;
755  }
756 
757  tf_outputs = av_malloc_array(nb_output, sizeof(*tf_outputs));
758  if (tf_outputs == NULL) {
759  av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for *tf_outputs\n"); \
761  }
762 
763  output_tensors = av_mallocz_array(nb_output, sizeof(*output_tensors));
764  if (!output_tensors) {
765  av_freep(&tf_outputs);
766  av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for output tensor\n"); \
768  }
769 
770  for (int i = 0; i < nb_output; ++i) {
771  tf_outputs[i].oper = TF_GraphOperationByName(tf_model->graph, output_names[i]);
772  if (!tf_outputs[i].oper) {
773  av_freep(&tf_outputs);
774  av_freep(&output_tensors);
775  av_log(ctx, AV_LOG_ERROR, "Could not find output \"%s\" in model\n", output_names[i]); \
777  }
778  tf_outputs[i].index = 0;
779  }
780 
781  TF_SessionRun(tf_model->session, NULL,
782  &tf_input, &input_tensor, 1,
783  tf_outputs, output_tensors, nb_output,
784  NULL, 0, NULL, tf_model->status);
785  if (TF_GetCode(tf_model->status) != TF_OK) {
786  av_freep(&tf_outputs);
787  av_freep(&output_tensors);
788  av_log(ctx, AV_LOG_ERROR, "Failed to run session when executing model\n");
789  return DNN_ERROR;
790  }
791 
792  for (uint32_t i = 0; i < nb_output; ++i) {
793  output.height = TF_Dim(output_tensors[i], 1);
794  output.width = TF_Dim(output_tensors[i], 2);
795  output.channels = TF_Dim(output_tensors[i], 3);
796  output.data = TF_TensorData(output_tensors[i]);
797  output.dt = TF_TensorType(output_tensors[i]);
798 
799  if (do_ioproc) {
800  if (tf_model->model->post_proc != NULL) {
801  tf_model->model->post_proc(out_frame, &output, tf_model->model->userdata);
802  } else {
803  proc_from_dnn_to_frame(out_frame, &output, ctx);
804  }
805  } else {
806  out_frame->width = output.width;
807  out_frame->height = output.height;
808  }
809  }
810 
811  for (uint32_t i = 0; i < nb_output; ++i) {
812  if (output_tensors[i]) {
813  TF_DeleteTensor(output_tensors[i]);
814  }
815  }
816  TF_DeleteTensor(input_tensor);
817  av_freep(&output_tensors);
818  av_freep(&tf_outputs);
819  return DNN_SUCCESS;
820 }
821 
822 DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
823  const char **output_names, uint32_t nb_output, AVFrame *out_frame)
824 {
825  TFModel *tf_model = (TFModel *)model->model;
826  TFContext *ctx = &tf_model->ctx;
827 
828  if (!in_frame) {
829  av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n");
830  return DNN_ERROR;
831  }
832 
833  if (!out_frame) {
834  av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
835  return DNN_ERROR;
836  }
837 
838  return execute_model_tf(model, input_name, in_frame, output_names, nb_output, out_frame, 1);
839 }
840 
842 {
843  TFModel *tf_model;
844 
845  if (*model){
846  tf_model = (TFModel *)(*model)->model;
847  if (tf_model->graph){
848  TF_DeleteGraph(tf_model->graph);
849  }
850  if (tf_model->session){
851  TF_CloseSession(tf_model->session, tf_model->status);
852  TF_DeleteSession(tf_model->session, tf_model->status);
853  }
854  if (tf_model->status){
855  TF_DeleteStatus(tf_model->status);
856  }
857  av_freep(&tf_model);
858  av_freep(model);
859  }
860 }
int avio_open(AVIOContext **s, const char *url, int flags)
Create and initialize a AVIOContext for accessing the resource indicated by url.
Definition: aviobuf.c:1141
void * model
Definition: dnn_interface.h:46
#define NULL
Definition: coverity.c:32
Bytestream IO Context.
Definition: avio.h:161
int64_t avio_size(AVIOContext *s)
Get the filesize.
Definition: aviobuf.c:346
Buffered I/O operations.
This structure describes decoded (raw) audio or video data.
Definition: frame.h:314
TFOptions options
AVOption.
Definition: opt.h:248
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:100
static TF_Buffer * read_graph(const char *model_filename)
void av_opt_set_defaults(void *s)
Set the values of all AVOption fields to their default values.
Definition: opt.c:1357
int channels
Definition: dnn_interface.h:41
DNN inference functions interface for native backend.
#define AVIO_FLAG_READ
read-only
Definition: avio.h:674
static const AVOption dnn_tensorflow_options[]
TFContext ctx
char * sess_config
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:237
const char * options
Definition: dnn_interface.h:48
DNNReturnType proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
Definition: dnn_io_proc.c:25
int av_opt_set_from_string(void *ctx, const char *opts, const char *const *shorthand, const char *key_val_sep, const char *pairs_sep)
Parse the key-value pairs list in opts.
Definition: opt.c:1558
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
uint8_t
#define av_malloc(s)
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:190
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame)
TF_Status * status
static DNNReturnType get_output_tf(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
DNN inference functions interface for TensorFlow backend.
ptrdiff_t size
Definition: opengl_enc.c:100
union DnnLayerMaximumParams::@207 val
#define av_log(a,...)
int avio_read(AVIOContext *s, unsigned char *buf, int size)
Read size bytes from AVIOContext into buf.
Definition: aviobuf.c:637
#define OFFSET(x)
DNNReturnType(* get_input)(void *model, DNNData *input, const char *input_name)
Definition: dnn_interface.h:53
int width
Definition: frame.h:372
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
static DNNReturnType add_maximum_layer(TFModel *tf_model, TF_Operation **cur_op, DnnLayerMaximumParams *params, const int layer)
int height
Definition: dnn_interface.h:41
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
void * data
Definition: dnn_interface.h:39
static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
GLenum GLint * params
Definition: opengl_enc.c:113
simple assert() macros that are a bit more flexible than ISO C assert().
GLsizei GLsizei * length
Definition: opengl_enc.c:114
static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input_name)
int(* pre_proc)(AVFrame *frame_in, DNNData *model_input, void *user_data)
Definition: dnn_interface.h:59
static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
#define NAME_BUFFER_SIZE
int(* post_proc)(AVFrame *frame_out, DNNData *model_output, void *user_data)
Definition: dnn_interface.h:62
int32_t
AVFormatContext * ctx
Definition: movenc.c:48
DNN inference functions interface for native backend.
const AVClass * class
DNN inference functions interface for native backend.
DNNReturnType
Definition: dnn_interface.h:32
void ff_dnn_free_model_native(DNNModel **model)
if(ret)
static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op, ConvolutionalParams *params, const int layer)
static TF_Tensor * allocate_input_tensor(const DNNData *input)
static void free_buffer(void *data, size_t length)
static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op, DepthToSpaceParams *params, const int layer)
DNNModel * model
static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame, int do_ioproc)
DNN input&output process between AVFrame and DNNData.
void * userdata
Definition: dnn_interface.h:50
Describe the class of an AVClass context structure.
Definition: log.h:67
layer pad (equivalent to tf.pad) for native backend.
int index
Definition: gxfenc.c:89
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
#define snprintf
Definition: snprintf.h:34
DNNLayerType type
AVFILTER_DEFINE_CLASS(dnn_tensorflow)
TF_Session * session
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:75
const OptionDef options[]
Definition: ffmpeg_opt.c:3400
DNNReturnType(* get_output)(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
Definition: dnn_interface.h:55
DNNReturnType proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
Definition: dnn_io_proc.c:80
TF_Graph * graph
void ff_dnn_free_model_tf(DNNModel **model)
DNNModel * ff_dnn_load_model_tf(const char *model_filename, const char *options, void *userdata)
#define FLAGS
void * params
static void transpose_perm(int16_t *out, int16_t *in, int num_vect, const uint8_t line_len[2], int length_div)
Interpret the input data as in the following table:
Definition: twinvq.c:620
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a it should return
int height
Definition: frame.h:372
#define av_freep(p)
static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op, LayerPadParams *params, const int layer)
#define av_malloc_array(a, b)
DNNDataType dt
Definition: dnn_interface.h:40
int avio_closep(AVIOContext **s)
Close the resource accessed by the AVIOContext *s, free it and set the pointer pointing to it to NULL...
Definition: aviobuf.c:1196
DNNModel * ff_dnn_load_model_native(const char *model_filename, const char *options, void *userdata)
int i
Definition: input.c:407
#define CLEANUP_ON_ERROR(tf_model)
void * av_mallocz_array(size_t nmemb, size_t size)
Definition: mem.c:190
static uint8_t tmp[11]
Definition: aes_ctr.c:26