FFmpeg
dnn_backend_tf.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * DNN tensorflow backend implementation.
24  */
25 
26 #include "dnn_backend_tf.h"
27 #include "dnn_backend_native.h"
30 #include "libavformat/avio.h"
31 #include "libavutil/avassert.h"
34 #include "dnn_io_proc.h"
35 
36 #include <tensorflow/c/c_api.h>
37 
38 typedef struct TFContext {
39  const AVClass *class;
40 } TFContext;
41 
42 typedef struct TFModel{
45  TF_Graph *graph;
46  TF_Session *session;
47  TF_Status *status;
48 } TFModel;
49 
50 static const AVClass dnn_tensorflow_class = {
51  .class_name = "dnn_tensorflow",
52  .item_name = av_default_item_name,
53  .option = NULL,
54  .version = LIBAVUTIL_VERSION_INT,
55  .category = AV_CLASS_CATEGORY_FILTER,
56 };
57 
58 static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
59  const char **output_names, uint32_t nb_output, AVFrame *out_frame,
60  int do_ioproc);
61 
62 static void free_buffer(void *data, size_t length)
63 {
64  av_freep(&data);
65 }
66 
67 static TF_Buffer *read_graph(const char *model_filename)
68 {
69  TF_Buffer *graph_buf;
70  unsigned char *graph_data = NULL;
71  AVIOContext *model_file_context;
72  long size, bytes_read;
73 
74  if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
75  return NULL;
76  }
77 
78  size = avio_size(model_file_context);
79 
80  graph_data = av_malloc(size);
81  if (!graph_data){
82  avio_closep(&model_file_context);
83  return NULL;
84  }
85  bytes_read = avio_read(model_file_context, graph_data, size);
86  avio_closep(&model_file_context);
87  if (bytes_read != size){
88  av_freep(&graph_data);
89  return NULL;
90  }
91 
92  graph_buf = TF_NewBuffer();
93  graph_buf->data = (void *)graph_data;
94  graph_buf->length = size;
95  graph_buf->data_deallocator = free_buffer;
96 
97  return graph_buf;
98 }
99 
100 static TF_Tensor *allocate_input_tensor(const DNNData *input)
101 {
102  TF_DataType dt;
103  size_t size;
104  int64_t input_dims[] = {1, input->height, input->width, input->channels};
105  switch (input->dt) {
106  case DNN_FLOAT:
107  dt = TF_FLOAT;
108  size = sizeof(float);
109  break;
110  case DNN_UINT8:
111  dt = TF_UINT8;
112  size = 1;
113  break;
114  default:
115  av_assert0(!"should not reach here");
116  }
117 
118  return TF_AllocateTensor(dt, input_dims, 4,
119  input_dims[1] * input_dims[2] * input_dims[3] * size);
120 }
121 
122 static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input_name)
123 {
124  TFModel *tf_model = (TFModel *)model;
125  TFContext *ctx = &tf_model->ctx;
126  TF_Status *status;
127  int64_t dims[4];
128 
129  TF_Output tf_output;
130  tf_output.oper = TF_GraphOperationByName(tf_model->graph, input_name);
131  if (!tf_output.oper) {
132  av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
133  return DNN_ERROR;
134  }
135 
136  tf_output.index = 0;
137  input->dt = TF_OperationOutputType(tf_output);
138 
139  status = TF_NewStatus();
140  TF_GraphGetTensorShape(tf_model->graph, tf_output, dims, 4, status);
141  if (TF_GetCode(status) != TF_OK){
142  TF_DeleteStatus(status);
143  av_log(ctx, AV_LOG_ERROR, "Failed to get input tensor shape: number of dimension incorrect\n");
144  return DNN_ERROR;
145  }
146  TF_DeleteStatus(status);
147 
148  // currently only NHWC is supported
149  av_assert0(dims[0] == 1);
150  input->height = dims[1];
151  input->width = dims[2];
152  input->channels = dims[3];
153 
154  return DNN_SUCCESS;
155 }
156 
157 static DNNReturnType get_output_tf(void *model, const char *input_name, int input_width, int input_height,
158  const char *output_name, int *output_width, int *output_height)
159 {
161  TFModel *tf_model = (TFModel *)model;
162  AVFrame *in_frame = av_frame_alloc();
163  AVFrame *out_frame = av_frame_alloc();
164  in_frame->width = input_width;
165  in_frame->height = input_height;
166 
167  ret = execute_model_tf(tf_model->model, input_name, in_frame, &output_name, 1, out_frame, 0);
168  *output_width = out_frame->width;
169  *output_height = out_frame->height;
170 
171  av_frame_free(&out_frame);
172  av_frame_free(&in_frame);
173  return ret;
174 }
175 
176 static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
177 {
178  TFContext *ctx = &tf_model->ctx;
179  TF_Buffer *graph_def;
180  TF_ImportGraphDefOptions *graph_opts;
181  TF_SessionOptions *sess_opts;
182  const TF_Operation *init_op;
183 
184  graph_def = read_graph(model_filename);
185  if (!graph_def){
186  av_log(ctx, AV_LOG_ERROR, "Failed to read model \"%s\" graph\n", model_filename);
187  return DNN_ERROR;
188  }
189  tf_model->graph = TF_NewGraph();
190  tf_model->status = TF_NewStatus();
191  graph_opts = TF_NewImportGraphDefOptions();
192  TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status);
193  TF_DeleteImportGraphDefOptions(graph_opts);
194  TF_DeleteBuffer(graph_def);
195  if (TF_GetCode(tf_model->status) != TF_OK){
196  TF_DeleteGraph(tf_model->graph);
197  TF_DeleteStatus(tf_model->status);
198  av_log(ctx, AV_LOG_ERROR, "Failed to import serialized graph to model graph\n");
199  return DNN_ERROR;
200  }
201 
202  init_op = TF_GraphOperationByName(tf_model->graph, "init");
203  sess_opts = TF_NewSessionOptions();
204  tf_model->session = TF_NewSession(tf_model->graph, sess_opts, tf_model->status);
205  TF_DeleteSessionOptions(sess_opts);
206  if (TF_GetCode(tf_model->status) != TF_OK)
207  {
208  av_log(ctx, AV_LOG_ERROR, "Failed to create new session with model graph\n");
209  return DNN_ERROR;
210  }
211 
212  // Run initialization operation with name "init" if it is present in graph
213  if (init_op){
214  TF_SessionRun(tf_model->session, NULL,
215  NULL, NULL, 0,
216  NULL, NULL, 0,
217  &init_op, 1, NULL, tf_model->status);
218  if (TF_GetCode(tf_model->status) != TF_OK)
219  {
220  av_log(ctx, AV_LOG_ERROR, "Failed to run session when initializing\n");
221  return DNN_ERROR;
222  }
223  }
224 
225  return DNN_SUCCESS;
226 }
227 
228 #define NAME_BUFFER_SIZE 256
229 
230 static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op,
231  ConvolutionalParams* params, const int layer)
232 {
233  TFContext *ctx = &tf_model->ctx;
234  TF_Operation *op;
235  TF_OperationDescription *op_desc;
236  TF_Output input;
237  int64_t strides[] = {1, 1, 1, 1};
238  TF_Tensor *tensor;
239  int64_t dims[4];
240  int dims_len;
241  char name_buffer[NAME_BUFFER_SIZE];
242  int32_t size;
243 
244  size = params->input_num * params->output_num * params->kernel_size * params->kernel_size;
245  input.index = 0;
246 
247  snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_kernel%d", layer);
248  op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
249  TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
250  dims[0] = params->output_num;
251  dims[1] = params->kernel_size;
252  dims[2] = params->kernel_size;
253  dims[3] = params->input_num;
254  dims_len = 4;
255  tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float));
256  memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float));
257  TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
258  if (TF_GetCode(tf_model->status) != TF_OK){
259  av_log(ctx, AV_LOG_ERROR, "Failed to set value for kernel of conv layer %d\n", layer);
260  return DNN_ERROR;
261  }
262  op = TF_FinishOperation(op_desc, tf_model->status);
263  if (TF_GetCode(tf_model->status) != TF_OK){
264  av_log(ctx, AV_LOG_ERROR, "Failed to add kernel to conv layer %d\n", layer);
265  return DNN_ERROR;
266  }
267 
268  snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer);
269  op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
270  input.oper = op;
271  TF_AddInput(op_desc, input);
272  input.oper = transpose_op;
273  TF_AddInput(op_desc, input);
274  TF_SetAttrType(op_desc, "T", TF_FLOAT);
275  TF_SetAttrType(op_desc, "Tperm", TF_INT32);
276  op = TF_FinishOperation(op_desc, tf_model->status);
277  if (TF_GetCode(tf_model->status) != TF_OK){
278  av_log(ctx, AV_LOG_ERROR, "Failed to add transpose to conv layer %d\n", layer);
279  return DNN_ERROR;
280  }
281 
282  snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer);
283  op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
284  input.oper = *cur_op;
285  TF_AddInput(op_desc, input);
286  input.oper = op;
287  TF_AddInput(op_desc, input);
288  TF_SetAttrType(op_desc, "T", TF_FLOAT);
289  TF_SetAttrIntList(op_desc, "strides", strides, 4);
290  TF_SetAttrString(op_desc, "padding", "VALID", 5);
291  *cur_op = TF_FinishOperation(op_desc, tf_model->status);
292  if (TF_GetCode(tf_model->status) != TF_OK){
293  av_log(ctx, AV_LOG_ERROR, "Failed to add conv2d to conv layer %d\n", layer);
294  return DNN_ERROR;
295  }
296 
297  snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer);
298  op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
299  TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
300  dims[0] = params->output_num;
301  dims_len = 1;
302  tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float));
303  memcpy(TF_TensorData(tensor), params->biases, params->output_num * sizeof(float));
304  TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
305  if (TF_GetCode(tf_model->status) != TF_OK){
306  av_log(ctx, AV_LOG_ERROR, "Failed to set value for conv_biases of conv layer %d\n", layer);
307  return DNN_ERROR;
308  }
309  op = TF_FinishOperation(op_desc, tf_model->status);
310  if (TF_GetCode(tf_model->status) != TF_OK){
311  av_log(ctx, AV_LOG_ERROR, "Failed to add conv_biases to conv layer %d\n", layer);
312  return DNN_ERROR;
313  }
314 
315  snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer);
316  op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
317  input.oper = *cur_op;
318  TF_AddInput(op_desc, input);
319  input.oper = op;
320  TF_AddInput(op_desc, input);
321  TF_SetAttrType(op_desc, "T", TF_FLOAT);
322  *cur_op = TF_FinishOperation(op_desc, tf_model->status);
323  if (TF_GetCode(tf_model->status) != TF_OK){
324  av_log(ctx, AV_LOG_ERROR, "Failed to add bias_add to conv layer %d\n", layer);
325  return DNN_ERROR;
326  }
327 
328  snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer);
329  switch (params->activation){
330  case RELU:
331  op_desc = TF_NewOperation(tf_model->graph, "Relu", name_buffer);
332  break;
333  case TANH:
334  op_desc = TF_NewOperation(tf_model->graph, "Tanh", name_buffer);
335  break;
336  case SIGMOID:
337  op_desc = TF_NewOperation(tf_model->graph, "Sigmoid", name_buffer);
338  break;
339  default:
340  av_log(ctx, AV_LOG_ERROR, "Unsupported convolutional activation function\n");
341  return DNN_ERROR;
342  }
343  input.oper = *cur_op;
344  TF_AddInput(op_desc, input);
345  TF_SetAttrType(op_desc, "T", TF_FLOAT);
346  *cur_op = TF_FinishOperation(op_desc, tf_model->status);
347  if (TF_GetCode(tf_model->status) != TF_OK){
348  av_log(ctx, AV_LOG_ERROR, "Failed to add activation function to conv layer %d\n", layer);
349  return DNN_ERROR;
350  }
351 
352  return DNN_SUCCESS;
353 }
354 
355 static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op,
356  DepthToSpaceParams *params, const int layer)
357 {
358  TFContext *ctx = &tf_model->ctx;
359  TF_OperationDescription *op_desc;
360  TF_Output input;
361  char name_buffer[NAME_BUFFER_SIZE];
362 
363  snprintf(name_buffer, NAME_BUFFER_SIZE, "depth_to_space%d", layer);
364  op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", name_buffer);
365  input.oper = *cur_op;
366  input.index = 0;
367  TF_AddInput(op_desc, input);
368  TF_SetAttrType(op_desc, "T", TF_FLOAT);
369  TF_SetAttrInt(op_desc, "block_size", params->block_size);
370  *cur_op = TF_FinishOperation(op_desc, tf_model->status);
371  if (TF_GetCode(tf_model->status) != TF_OK){
372  av_log(ctx, AV_LOG_ERROR, "Failed to add depth_to_space to layer %d\n", layer);
373  return DNN_ERROR;
374  }
375 
376  return DNN_SUCCESS;
377 }
378 
379 static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op,
380  LayerPadParams *params, const int layer)
381 {
382  TFContext *ctx = &tf_model->ctx;
383  TF_Operation *op;
384  TF_Tensor *tensor;
385  TF_OperationDescription *op_desc;
386  TF_Output input;
387  int32_t *pads;
388  int64_t pads_shape[] = {4, 2};
389 
390  char name_buffer[NAME_BUFFER_SIZE];
391  snprintf(name_buffer, NAME_BUFFER_SIZE, "pad%d", layer);
392 
393  op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
394  TF_SetAttrType(op_desc, "dtype", TF_INT32);
395  tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
396  pads = (int32_t *)TF_TensorData(tensor);
397  pads[0] = params->paddings[0][0];
398  pads[1] = params->paddings[0][1];
399  pads[2] = params->paddings[1][0];
400  pads[3] = params->paddings[1][1];
401  pads[4] = params->paddings[2][0];
402  pads[5] = params->paddings[2][1];
403  pads[6] = params->paddings[3][0];
404  pads[7] = params->paddings[3][1];
405  TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
406  if (TF_GetCode(tf_model->status) != TF_OK){
407  av_log(ctx, AV_LOG_ERROR, "Failed to set value for pad of layer %d\n", layer);
408  return DNN_ERROR;
409  }
410  op = TF_FinishOperation(op_desc, tf_model->status);
411  if (TF_GetCode(tf_model->status) != TF_OK){
412  av_log(ctx, AV_LOG_ERROR, "Failed to add pad to layer %d\n", layer);
413  return DNN_ERROR;
414  }
415 
416  op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
417  input.oper = *cur_op;
418  input.index = 0;
419  TF_AddInput(op_desc, input);
420  input.oper = op;
421  TF_AddInput(op_desc, input);
422  TF_SetAttrType(op_desc, "T", TF_FLOAT);
423  TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
424  TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
425  *cur_op = TF_FinishOperation(op_desc, tf_model->status);
426  if (TF_GetCode(tf_model->status) != TF_OK){
427  av_log(ctx, AV_LOG_ERROR, "Failed to add mirror_pad to layer %d\n", layer);
428  return DNN_ERROR;
429  }
430 
431  return DNN_SUCCESS;
432 }
433 
434 static DNNReturnType add_maximum_layer(TFModel *tf_model, TF_Operation **cur_op,
435  DnnLayerMaximumParams *params, const int layer)
436 {
437  TFContext *ctx = &tf_model->ctx;
438  TF_Operation *op;
439  TF_Tensor *tensor;
440  TF_OperationDescription *op_desc;
441  TF_Output input;
442  float *y;
443 
444  char name_buffer[NAME_BUFFER_SIZE];
445  snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum/y%d", layer);
446 
447  op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
448  TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
449  tensor = TF_AllocateTensor(TF_FLOAT, NULL, 0, TF_DataTypeSize(TF_FLOAT));
450  y = (float *)TF_TensorData(tensor);
451  *y = params->val.y;
452  TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
453  if (TF_GetCode(tf_model->status) != TF_OK){
454  av_log(ctx, AV_LOG_ERROR, "Failed to set value for maximum/y of layer %d", layer);
455  return DNN_ERROR;
456  }
457  op = TF_FinishOperation(op_desc, tf_model->status);
458  if (TF_GetCode(tf_model->status) != TF_OK){
459  av_log(ctx, AV_LOG_ERROR, "Failed to add maximum/y to layer %d\n", layer);
460  return DNN_ERROR;
461  }
462 
463  snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum%d", layer);
464  op_desc = TF_NewOperation(tf_model->graph, "Maximum", name_buffer);
465  input.oper = *cur_op;
466  input.index = 0;
467  TF_AddInput(op_desc, input);
468  input.oper = op;
469  TF_AddInput(op_desc, input);
470  TF_SetAttrType(op_desc, "T", TF_FLOAT);
471  *cur_op = TF_FinishOperation(op_desc, tf_model->status);
472  if (TF_GetCode(tf_model->status) != TF_OK){
473  av_log(ctx, AV_LOG_ERROR, "Failed to add maximum to layer %d\n", layer);
474  return DNN_ERROR;
475  }
476 
477  return DNN_SUCCESS;
478 }
479 
480 static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
481 {
482  TFContext *ctx = &tf_model->ctx;
483  int32_t layer;
484  TF_OperationDescription *op_desc;
485  TF_Operation *op;
486  TF_Operation *transpose_op;
487  TF_Tensor *tensor;
488  TF_Output input;
490  int64_t transpose_perm_shape[] = {4};
491  int64_t input_shape[] = {1, -1, -1, -1};
492  DNNReturnType layer_add_res;
493  DNNModel *model = NULL;
494  NativeModel *native_model;
495 
496  model = ff_dnn_load_model_native(model_filename, NULL, NULL);
497  if (!model){
498  av_log(ctx, AV_LOG_ERROR, "Failed to load native model\n");
499  return DNN_ERROR;
500  }
501 
502  native_model = (NativeModel *)model->model;
503  tf_model->graph = TF_NewGraph();
504  tf_model->status = TF_NewStatus();
505 
506 #define CLEANUP_ON_ERROR(tf_model) \
507  { \
508  TF_DeleteGraph(tf_model->graph); \
509  TF_DeleteStatus(tf_model->status); \
510  av_log(ctx, AV_LOG_ERROR, "Failed to set value or add operator to layer\n"); \
511  return DNN_ERROR; \
512  }
513 
514  op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
515  TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
516  TF_SetAttrShape(op_desc, "shape", input_shape, 4);
517  op = TF_FinishOperation(op_desc, tf_model->status);
518  if (TF_GetCode(tf_model->status) != TF_OK){
519  CLEANUP_ON_ERROR(tf_model);
520  }
521 
522  op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
523  TF_SetAttrType(op_desc, "dtype", TF_INT32);
524  tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 * sizeof(int32_t));
525  transpose_perm = (int32_t *)TF_TensorData(tensor);
526  transpose_perm[0] = 1;
527  transpose_perm[1] = 2;
528  transpose_perm[2] = 3;
529  transpose_perm[3] = 0;
530  TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
531  if (TF_GetCode(tf_model->status) != TF_OK){
532  CLEANUP_ON_ERROR(tf_model);
533  }
534  transpose_op = TF_FinishOperation(op_desc, tf_model->status);
535 
536  for (layer = 0; layer < native_model->layers_num; ++layer){
537  switch (native_model->layers[layer].type){
538  case DLT_INPUT:
539  layer_add_res = DNN_SUCCESS;
540  break;
541  case DLT_CONV2D:
542  layer_add_res = add_conv_layer(tf_model, transpose_op, &op,
543  (ConvolutionalParams *)native_model->layers[layer].params, layer);
544  break;
545  case DLT_DEPTH_TO_SPACE:
546  layer_add_res = add_depth_to_space_layer(tf_model, &op,
547  (DepthToSpaceParams *)native_model->layers[layer].params, layer);
548  break;
549  case DLT_MIRROR_PAD:
550  layer_add_res = add_pad_layer(tf_model, &op,
551  (LayerPadParams *)native_model->layers[layer].params, layer);
552  break;
553  case DLT_MAXIMUM:
554  layer_add_res = add_maximum_layer(tf_model, &op,
555  (DnnLayerMaximumParams *)native_model->layers[layer].params, layer);
556  break;
557  default:
558  CLEANUP_ON_ERROR(tf_model);
559  }
560 
561  if (layer_add_res != DNN_SUCCESS){
562  CLEANUP_ON_ERROR(tf_model);
563  }
564  }
565 
566  op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
567  input.oper = op;
568  input.index = 0;
569  TF_AddInput(op_desc, input);
570  TF_FinishOperation(op_desc, tf_model->status);
571  if (TF_GetCode(tf_model->status) != TF_OK){
572  CLEANUP_ON_ERROR(tf_model);
573  }
574 
575  ff_dnn_free_model_native(&model);
576 
577  return DNN_SUCCESS;
578 }
579 
580 DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, void *userdata)
581 {
582  DNNModel *model = NULL;
583  TFModel *tf_model = NULL;
584 
585  model = av_mallocz(sizeof(DNNModel));
586  if (!model){
587  return NULL;
588  }
589 
590  tf_model = av_mallocz(sizeof(TFModel));
591  if (!tf_model){
592  av_freep(&model);
593  return NULL;
594  }
595  tf_model->ctx.class = &dnn_tensorflow_class;
596  tf_model->model = model;
597 
598  if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){
599  if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){
600  av_freep(&tf_model);
601  av_freep(&model);
602 
603  return NULL;
604  }
605  }
606 
607  model->model = (void *)tf_model;
608  model->get_input = &get_input_tf;
609  model->get_output = &get_output_tf;
610  model->options = options;
611  model->userdata = userdata;
612 
613  return model;
614 }
615 
616 static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
617  const char **output_names, uint32_t nb_output, AVFrame *out_frame,
618  int do_ioproc)
619 {
620  TF_Output *tf_outputs;
621  TFModel *tf_model = (TFModel *)model->model;
622  TFContext *ctx = &tf_model->ctx;
624  TF_Tensor **output_tensors;
625  TF_Output tf_input;
626  TF_Tensor *input_tensor;
627 
628  if (get_input_tf(tf_model, &input, input_name) != DNN_SUCCESS)
629  return DNN_ERROR;
630  input.height = in_frame->height;
631  input.width = in_frame->width;
632 
633  tf_input.oper = TF_GraphOperationByName(tf_model->graph, input_name);
634  if (!tf_input.oper){
635  av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
636  return DNN_ERROR;
637  }
638  tf_input.index = 0;
639  input_tensor = allocate_input_tensor(&input);
640  if (!input_tensor){
641  av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input tensor\n");
642  return DNN_ERROR;
643  }
644  input.data = (float *)TF_TensorData(input_tensor);
645 
646  if (do_ioproc) {
647  if (tf_model->model->pre_proc != NULL) {
648  tf_model->model->pre_proc(in_frame, &input, tf_model->model->userdata);
649  } else {
650  proc_from_frame_to_dnn(in_frame, &input, ctx);
651  }
652  }
653 
654  if (nb_output != 1) {
655  // currently, the filter does not need multiple outputs,
656  // so we just pending the support until we really need it.
657  av_log(ctx, AV_LOG_ERROR, "do not support multiple outputs\n");
658  return DNN_ERROR;
659  }
660 
661  tf_outputs = av_malloc_array(nb_output, sizeof(*tf_outputs));
662  if (tf_outputs == NULL) {
663  av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for *tf_outputs\n"); \
665  }
666 
667  output_tensors = av_mallocz_array(nb_output, sizeof(*output_tensors));
668  if (!output_tensors) {
669  av_freep(&tf_outputs);
670  av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for output tensor\n"); \
672  }
673 
674  for (int i = 0; i < nb_output; ++i) {
675  tf_outputs[i].oper = TF_GraphOperationByName(tf_model->graph, output_names[i]);
676  if (!tf_outputs[i].oper) {
677  av_freep(&tf_outputs);
678  av_freep(&output_tensors);
679  av_log(ctx, AV_LOG_ERROR, "Could not find output \"%s\" in model\n", output_names[i]); \
681  }
682  tf_outputs[i].index = 0;
683  }
684 
685  TF_SessionRun(tf_model->session, NULL,
686  &tf_input, &input_tensor, 1,
687  tf_outputs, output_tensors, nb_output,
688  NULL, 0, NULL, tf_model->status);
689  if (TF_GetCode(tf_model->status) != TF_OK) {
690  av_freep(&tf_outputs);
691  av_freep(&output_tensors);
692  av_log(ctx, AV_LOG_ERROR, "Failed to run session when executing model\n");
693  return DNN_ERROR;
694  }
695 
696  for (uint32_t i = 0; i < nb_output; ++i) {
697  output.height = TF_Dim(output_tensors[i], 1);
698  output.width = TF_Dim(output_tensors[i], 2);
699  output.channels = TF_Dim(output_tensors[i], 3);
700  output.data = TF_TensorData(output_tensors[i]);
701  output.dt = TF_TensorType(output_tensors[i]);
702 
703  if (do_ioproc) {
704  if (tf_model->model->post_proc != NULL) {
705  tf_model->model->post_proc(out_frame, &output, tf_model->model->userdata);
706  } else {
707  proc_from_dnn_to_frame(out_frame, &output, ctx);
708  }
709  } else {
710  out_frame->width = output.width;
711  out_frame->height = output.height;
712  }
713  }
714 
715  for (uint32_t i = 0; i < nb_output; ++i) {
716  if (output_tensors[i]) {
717  TF_DeleteTensor(output_tensors[i]);
718  }
719  }
720  TF_DeleteTensor(input_tensor);
721  av_freep(&output_tensors);
722  av_freep(&tf_outputs);
723  return DNN_SUCCESS;
724 }
725 
726 DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
727  const char **output_names, uint32_t nb_output, AVFrame *out_frame)
728 {
729  TFModel *tf_model = (TFModel *)model->model;
730  TFContext *ctx = &tf_model->ctx;
731 
732  if (!in_frame) {
733  av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n");
734  return DNN_ERROR;
735  }
736 
737  if (!out_frame) {
738  av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
739  return DNN_ERROR;
740  }
741 
742  return execute_model_tf(model, input_name, in_frame, output_names, nb_output, out_frame, 1);
743 }
744 
746 {
747  TFModel *tf_model;
748 
749  if (*model){
750  tf_model = (TFModel *)(*model)->model;
751  if (tf_model->graph){
752  TF_DeleteGraph(tf_model->graph);
753  }
754  if (tf_model->session){
755  TF_CloseSession(tf_model->session, tf_model->status);
756  TF_DeleteSession(tf_model->session, tf_model->status);
757  }
758  if (tf_model->status){
759  TF_DeleteStatus(tf_model->status);
760  }
761  av_freep(&tf_model);
762  av_freep(model);
763  }
764 }
int avio_open(AVIOContext **s, const char *url, int flags)
Create and initialize a AVIOContext for accessing the resource indicated by url.
Definition: aviobuf.c:1127
void * model
Definition: dnn_interface.h:46
#define NULL
Definition: coverity.c:32
Bytestream IO Context.
Definition: avio.h:161
int64_t avio_size(AVIOContext *s)
Get the filesize.
Definition: aviobuf.c:346
Buffered I/O operations.
This structure describes decoded (raw) audio or video data.
Definition: frame.h:308
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:100
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
static TF_Buffer * read_graph(const char *model_filename)
int channels
Definition: dnn_interface.h:41
DNN inference functions interface for native backend.
#define AVIO_FLAG_READ
read-only
Definition: avio.h:674
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:235
TFContext ctx
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:237
const char * options
Definition: dnn_interface.h:48
DNNReturnType proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
Definition: dnn_io_proc.c:25
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:72
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
#define av_malloc(s)
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:190
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame)
TF_Status * status
static DNNReturnType get_output_tf(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
DNN inference functions interface for TensorFlow backend.
ptrdiff_t size
Definition: opengl_enc.c:100
union DnnLayerMaximumParams::@207 val
#define av_log(a,...)
int avio_read(AVIOContext *s, unsigned char *buf, int size)
Read size bytes from AVIOContext into buf.
Definition: aviobuf.c:637
DNNReturnType(* get_input)(void *model, DNNData *input, const char *input_name)
Definition: dnn_interface.h:53
int width
Definition: frame.h:366
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
static DNNReturnType add_maximum_layer(TFModel *tf_model, TF_Operation **cur_op, DnnLayerMaximumParams *params, const int layer)
int height
Definition: dnn_interface.h:41
static const AVClass dnn_tensorflow_class
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
void * data
Definition: dnn_interface.h:39
static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
GLenum GLint * params
Definition: opengl_enc.c:113
simple assert() macros that are a bit more flexible than ISO C assert().
GLsizei GLsizei * length
Definition: opengl_enc.c:114
static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input_name)
int(* pre_proc)(AVFrame *frame_in, DNNData *model_input, void *user_data)
Definition: dnn_interface.h:59
static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
#define NAME_BUFFER_SIZE
int(* post_proc)(AVFrame *frame_out, DNNData *model_output, void *user_data)
Definition: dnn_interface.h:62
int32_t
AVFormatContext * ctx
Definition: movenc.c:48
DNN inference functions interface for native backend.
const AVClass * class
DNN inference functions interface for native backend.
DNNReturnType
Definition: dnn_interface.h:32
void ff_dnn_free_model_native(DNNModel **model)
if(ret)
static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op, ConvolutionalParams *params, const int layer)
static TF_Tensor * allocate_input_tensor(const DNNData *input)
static void free_buffer(void *data, size_t length)
static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op, DepthToSpaceParams *params, const int layer)
DNNModel * model
static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame, int do_ioproc)
DNN input&output process between AVFrame and DNNData.
void * userdata
Definition: dnn_interface.h:50
Describe the class of an AVClass context structure.
Definition: log.h:67
layer pad (equivalent to tf.pad) for native backend.
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
#define snprintf
Definition: snprintf.h:34
DNNLayerType type
TF_Session * session
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:75
const OptionDef options[]
Definition: ffmpeg_opt.c:3393
DNNReturnType(* get_output)(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
Definition: dnn_interface.h:55
DNNReturnType proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
Definition: dnn_io_proc.c:80
TF_Graph * graph
void ff_dnn_free_model_tf(DNNModel **model)
DNNModel * ff_dnn_load_model_tf(const char *model_filename, const char *options, void *userdata)
void * params
static void transpose_perm(int16_t *out, int16_t *in, int num_vect, const uint8_t line_len[2], int length_div)
Interpret the input data as in the following table:
Definition: twinvq.c:620
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a it should return
int height
Definition: frame.h:366
#define av_freep(p)
static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op, LayerPadParams *params, const int layer)
#define av_malloc_array(a, b)
DNNDataType dt
Definition: dnn_interface.h:40
int avio_closep(AVIOContext **s)
Close the resource accessed by the AVIOContext *s, free it and set the pointer pointing to it to NULL...
Definition: aviobuf.c:1182
DNNModel * ff_dnn_load_model_native(const char *model_filename, const char *options, void *userdata)
int i
Definition: input.c:407
#define CLEANUP_ON_ERROR(tf_model)
void * av_mallocz_array(size_t nmemb, size_t size)
Definition: mem.c:190