FFmpeg
convert_from_tensorflow.py
Go to the documentation of this file.
1 # Copyright (c) 2019 Guo Yejun
2 #
3 # This file is part of FFmpeg.
4 #
5 # FFmpeg is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU Lesser General Public
7 # License as published by the Free Software Foundation; either
8 # version 2.1 of the License, or (at your option) any later version.
9 #
10 # FFmpeg is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Lesser General Public License for more details.
14 #
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with FFmpeg; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 # ==============================================================================
19 
20 import tensorflow as tf
21 import numpy as np
22 import sys, struct
23 import convert_header as header
24 
25 __all__ = ['convert_from_tensorflow']
26 
27 class Operand(object):
28  IOTYPE_INPUT = 1
29  IOTYPE_OUTPUT = 2
30  IOTYPE_INTERMEDIATE = IOTYPE_INPUT | IOTYPE_OUTPUT
31  DTYPE_FLOAT = 1
32  DTYPE_UINT8 = 4
33  index = 0
34  def __init__(self, name, dtype, dims):
35  self.name = name
36  self.dtype = dtype
37  self.dims = dims
38  self.iotype = 0
39  self.used_count = 0
40  self.index = Operand.index
41  Operand.index = Operand.index + 1
42  self.iotype2str = {Operand.IOTYPE_INPUT: 'in', Operand.IOTYPE_OUTPUT: 'out', Operand.IOTYPE_INTERMEDIATE: 'inout'}
43  self.dtype2str = {Operand.DTYPE_FLOAT: 'DT_FLOAT', Operand.DTYPE_UINT8: 'DT_UINT8'}
44 
45  def add_iotype(self, iotype):
46  self.iotype = self.iotype | iotype
47  if iotype == Operand.IOTYPE_INPUT:
48  self.used_count = self.used_count + 1
49 
50  def __str__(self):
51  return "{}: (name: {}, iotype: {}, dtype: {}, dims: ({},{},{},{}) used_count: {})".format(self.index,
52  self.name, self.iotype2str[self.iotype], self.dtype2str[self.dtype],
53  self.dims[0], self.dims[1], self.dims[2], self.dims[3], self.used_count)
54 
55  def __lt__(self, other):
56  return self.index < other.index
57 
59  def __init__(self, graph_def, nodes, outfile, dump4tb):
60  self.graph_def = graph_def
61  self.nodes = nodes
62  self.outfile = outfile
63  self.dump4tb = dump4tb
64  self.layer_number = 0
65  self.output_names = []
66  self.name_node_dict = {}
67  self.edges = {}
68  self.conv_activations = {'Relu':0, 'Tanh':1, 'Sigmoid':2, 'None':3, 'LeakyRelu':4}
69  self.conv_paddings = {'VALID':0, 'SAME':1}
70  self.pool_paddings = {'VALID':0, 'SAME':1}
74  self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4,
75  'MathBinary':5, 'MathUnary':6, 'AvgPool':7}
76  self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4, 'FloorMod':5}
77  self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4,
78  'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10,
79  'Acosh':11, 'Atanh':12, 'Ceil':13, 'Floor':14, 'Round':15}
80  self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
82 
83 
84  def add_operand(self, name, type):
85  node = self.name_node_dict[name]
86  if name not in self.name_operand_dict:
87  dtype = node.attr['dtype'].type
88  if dtype == 0:
89  dtype = node.attr['T'].type
90  dims = [-1,-1,-1,-1]
91  if 'shape' in node.attr:
92  dims[0] = node.attr['shape'].shape.dim[0].size
93  dims[1] = node.attr['shape'].shape.dim[1].size
94  dims[2] = node.attr['shape'].shape.dim[2].size
95  dims[3] = node.attr['shape'].shape.dim[3].size
96  operand = Operand(name, dtype, dims)
97  self.name_operand_dict[name] = operand;
98  self.name_operand_dict[name].add_iotype(type)
99  return self.name_operand_dict[name].index
100 
101 
103  graph = tf.get_default_graph()
104  tf.import_graph_def(self.graph_def, name="")
105  tf.summary.FileWriter('/tmp/graph', graph)
106  print('graph saved, run "tensorboard --logdir=/tmp/graph" to see it')
107 
108 
109  def get_conv2d_params(self, conv2d_scope_name):
110  knode = self.name_node_dict[conv2d_scope_name + '/kernel']
111  bnode = self.name_node_dict[conv2d_scope_name + '/bias']
112 
113  if conv2d_scope_name + '/dilation_rate' in self.name_node_dict:
114  dnode = self.name_node_dict[conv2d_scope_name + '/dilation_rate']
115  else:
116  dnode = None
117 
118  # the BiasAdd name is possible be changed into the output name,
119  # if activation is None, and BiasAdd.next is the last op which is Identity
120  if conv2d_scope_name + '/BiasAdd' in self.edges:
121  anode = self.edges[conv2d_scope_name + '/BiasAdd'][0]
122  if anode.op not in self.conv_activations:
123  anode = None
124  else:
125  anode = None
126  return knode, bnode, dnode, anode
127 
128 
129  def dump_complex_conv2d_to_file(self, node, f):
130  assert(node.op == 'Conv2D')
131  self.layer_number = self.layer_number + 1
132  self.converted_nodes.add(node.name)
133 
134  scope_name = TFConverter.get_scope_name(node.name)
135  #knode for kernel, bnode for bias, dnode for dilation, anode for activation
136  knode, bnode, dnode, anode = self.get_conv2d_params(scope_name)
137 
138  if dnode is not None:
139  dilation = struct.unpack('i', dnode.attr['value'].tensor.tensor_content[0:4])[0]
140  else:
141  dilation = 1
142 
143  if anode is not None:
144  activation = anode.op
145  else:
146  activation = 'None'
147 
148  padding = node.attr['padding'].s.decode("utf-8")
149  # conv2d with dilation > 1 generates tens of nodes, not easy to parse them, so use this tricky method.
150  if dilation > 1 and scope_name + '/stack' in self.name_node_dict:
151  if self.name_node_dict[scope_name + '/stack'].op == "Const":
152  padding = 'SAME'
153  padding = self.conv_paddings[padding]
154 
155  ktensor = knode.attr['value'].tensor
156  filter_height = ktensor.tensor_shape.dim[0].size
157  filter_width = ktensor.tensor_shape.dim[1].size
158  in_channels = ktensor.tensor_shape.dim[2].size
159  out_channels = ktensor.tensor_shape.dim[3].size
160  kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
161  kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
162  kernel = np.transpose(kernel, [3, 0, 1, 2])
163 
164  has_bias = 1
165  np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
166  kernel.tofile(f)
167 
168  btensor = bnode.attr['value'].tensor
169  if btensor.tensor_shape.dim[0].size == 1:
170  bias = struct.pack("f", btensor.float_val[0])
171  else:
172  bias = btensor.tensor_content
173  f.write(bias)
174 
175  input_name = self.conv2d_scopename_inputname_dict[scope_name]
176  input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
177 
178  if anode is not None:
179  output_operand_index = self.add_operand(anode.name, Operand.IOTYPE_OUTPUT)
180  else:
181  output_operand_index = self.add_operand(self.edges[bnode.name][0].name, Operand.IOTYPE_OUTPUT)
182  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
183 
184 
185  def dump_simple_conv2d_to_file(self, node, f):
186  assert(node.op == 'Conv2D')
187  self.layer_number = self.layer_number + 1
188  self.converted_nodes.add(node.name)
189 
190  node0 = self.name_node_dict[node.input[0]]
191  node1 = self.name_node_dict[node.input[1]]
192  if node0.op == 'Const':
193  knode = node0
194  input_name = node.input[1]
195  else:
196  knode = node1
197  input_name = node.input[0]
198 
199  ktensor = knode.attr['value'].tensor
200  filter_height = ktensor.tensor_shape.dim[0].size
201  filter_width = ktensor.tensor_shape.dim[1].size
202  in_channels = ktensor.tensor_shape.dim[2].size
203  out_channels = ktensor.tensor_shape.dim[3].size
204  if filter_height * filter_width * in_channels * out_channels == 1:
205  kernel = np.float32(ktensor.float_val[0])
206  else:
207  kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
208  kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
209  kernel = np.transpose(kernel, [3, 0, 1, 2])
210 
211  has_bias = 0
212  dilation = 1
213  padding = node.attr['padding'].s.decode("utf-8")
214  np.array([self.op2code[node.op], dilation, self.conv_paddings[padding], self.conv_activations['None'],
215  in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
216  kernel.tofile(f)
217 
218  input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
219  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
220  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
221 
222 
223  def dump_depth2space_to_file(self, node, f):
224  assert(node.op == 'DepthToSpace')
225  self.layer_number = self.layer_number + 1
226  block_size = node.attr['block_size'].i
227  np.array([self.op2code[node.op], block_size], dtype=np.uint32).tofile(f)
228  self.converted_nodes.add(node.name)
229  input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
230  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
231  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
232 
233 
234  def dump_mirrorpad_to_file(self, node, f):
235  assert(node.op == 'MirrorPad')
236  self.layer_number = self.layer_number + 1
237  mode = node.attr['mode'].s
238  mode = self.mirrorpad_mode[mode.decode("utf-8")]
239  np.array([self.op2code[node.op], mode], dtype=np.uint32).tofile(f)
240  pnode = self.name_node_dict[node.input[1]]
241  self.converted_nodes.add(pnode.name)
242  paddings = pnode.attr['value'].tensor.tensor_content
243  f.write(paddings)
244  self.converted_nodes.add(node.name)
245  input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
246  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
247  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
248 
249 
250  def dump_maximum_to_file(self, node, f):
251  assert(node.op == 'Maximum')
252  self.layer_number = self.layer_number + 1
253  ynode = self.name_node_dict[node.input[1]]
254  y = ynode.attr['value'].tensor.float_val[0]
255  np.array([self.op2code[node.op]], dtype=np.uint32).tofile(f)
256  np.array([y], dtype=np.float32).tofile(f)
257  self.converted_nodes.add(node.name)
258  input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
259  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
260  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
261 
262 
263  def dump_mathbinary_to_file(self, node, f):
264  self.layer_number = self.layer_number + 1
265  self.converted_nodes.add(node.name)
266  i0_node = self.name_node_dict[node.input[0]]
267  i1_node = self.name_node_dict[node.input[1]]
268  np.array([self.op2code['MathBinary'], self.mathbin2code[node.op]], dtype=np.uint32).tofile(f)
269  if i0_node.op == 'Const':
270  scalar = i0_node.attr['value'].tensor.float_val[0]
271  np.array([1], dtype=np.uint32).tofile(f) # broadcast: 1
272  np.array([scalar], dtype=np.float32).tofile(f)
273  np.array([0], dtype=np.uint32).tofile(f) # broadcast: 0
274  input_operand_index = self.add_operand(i1_node.name, Operand.IOTYPE_INPUT)
275  np.array([input_operand_index], dtype=np.uint32).tofile(f)
276  elif i1_node.op == 'Const':
277  scalar = i1_node.attr['value'].tensor.float_val[0]
278  np.array([0], dtype=np.uint32).tofile(f)
279  input_operand_index = self.add_operand(i0_node.name, Operand.IOTYPE_INPUT)
280  np.array([input_operand_index], dtype=np.uint32).tofile(f)
281  np.array([1], dtype=np.uint32).tofile(f)
282  np.array([scalar], dtype=np.float32).tofile(f)
283  else:
284  np.array([0], dtype=np.uint32).tofile(f)
285  input_operand_index = self.add_operand(i0_node.name, Operand.IOTYPE_INPUT)
286  np.array([input_operand_index], dtype=np.uint32).tofile(f)
287  np.array([0], dtype=np.uint32).tofile(f)
288  input_operand_index = self.add_operand(i1_node.name, Operand.IOTYPE_INPUT)
289  np.array([input_operand_index], dtype=np.uint32).tofile(f)
290  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
291  np.array([output_operand_index], dtype=np.uint32).tofile(f)
292 
293 
294  def dump_mathunary_to_file(self, node, f):
295  self.layer_number = self.layer_number + 1
296  self.converted_nodes.add(node.name)
297  i0_node = self.name_node_dict[node.input[0]]
298  np.array([self.op2code['MathUnary'], self.mathun2code[node.op]], dtype=np.uint32).tofile(f)
299  input_operand_index = self.add_operand(i0_node.name, Operand.IOTYPE_INPUT)
300  np.array([input_operand_index], dtype=np.uint32).tofile(f)
301  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
302  np.array([output_operand_index],dtype=np.uint32).tofile(f)
303 
304 
305  def dump_avg_pool_to_file(self, node, f):
306  assert(node.op == 'AvgPool')
307  self.layer_number = self.layer_number + 1
308  self.converted_nodes.add(node.name)
309  node0 = self.name_node_dict[node.input[0]]
310  strides = node.attr['strides']
311 
312  # Tensorflow do not support pooling strides in batch dimension and
313  # current native NN do not support pooling strides in channel dimension, added assert() here.
314  assert(strides.list.i[1]==strides.list.i[2])
315  assert(strides.list.i[0]==1)
316  assert(strides.list.i[3]==1)
317  strides = strides.list.i[1]
318  filter_node = node.attr['ksize']
319  input_name = node.input[0]
320 
321  # Tensorflow do not support pooling ksize in batch dimension and channel dimension.
322  assert(filter_node.list.i[0]==1)
323  assert(filter_node.list.i[3]==1)
324  filter_height = filter_node.list.i[1]
325  filter_width = filter_node.list.i[2]
326 
327  padding = node.attr['padding'].s.decode("utf-8")
328  np.array([self.op2code[node.op], strides, self.pool_paddings[padding], filter_height],
329  dtype=np.uint32).tofile(f)
330 
331  input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
332  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
333  np.array([input_operand_index, output_operand_index],dtype=np.uint32).tofile(f)
334 
335 
336  def dump_layers_to_file(self, f):
337  for node in self.nodes:
338  if node.name in self.converted_nodes:
339  continue
340 
341  # conv2d with dilation generates very complex nodes, so handle it in special
342  if self.in_conv2d_scope(node.name):
343  if node.op == 'Conv2D':
344  self.dump_complex_conv2d_to_file(node, f)
345  continue
346 
347  if node.op == 'Conv2D':
348  self.dump_simple_conv2d_to_file(node, f)
349  if node.op == 'AvgPool':
350  self.dump_avg_pool_to_file(node, f)
351  elif node.op == 'DepthToSpace':
352  self.dump_depth2space_to_file(node, f)
353  elif node.op == 'MirrorPad':
354  self.dump_mirrorpad_to_file(node, f)
355  elif node.op == 'Maximum':
356  self.dump_maximum_to_file(node, f)
357  elif node.op in self.mathbin2code:
358  self.dump_mathbinary_to_file(node, f)
359  elif node.op in self.mathun2code:
360  self.dump_mathunary_to_file(node, f)
361 
362 
363  def dump_operands_to_file(self, f):
364  operands = sorted(self.name_operand_dict.values())
365  for operand in operands:
366  #print('{}'.format(operand))
367  np.array([operand.index, len(operand.name)], dtype=np.uint32).tofile(f)
368  f.write(operand.name.encode('utf-8'))
369  np.array([operand.iotype, operand.dtype], dtype=np.uint32).tofile(f)
370  np.array([operand.dims[0], operand.dims[1], operand.dims[2], operand.dims[3]], dtype=np.uint32).tofile(f)
371 
372 
373  def dump_to_file(self):
374  with open(self.outfile, 'wb') as f:
375  f.write(header.str.encode('utf-8'))
376  np.array([header.major, header.minor], dtype=np.uint32).tofile(f)
377  self.dump_layers_to_file(f)
378  self.dump_operands_to_file(f)
379  np.array([self.layer_number, len(self.name_operand_dict)], dtype=np.uint32).tofile(f)
380 
381 
383  for node in self.nodes:
384  self.name_node_dict[node.name] = node
385 
386 
388  used_names = []
389  for node in self.nodes:
390  for input in node.input:
391  used_names.append(input)
392 
393  for node in self.nodes:
394  if node.name not in used_names:
395  self.output_names.append(node.name)
396 
397 
398  def remove_identity(self):
399  id_nodes = []
400  id_dict = {}
401  for node in self.nodes:
402  if node.op == 'Identity':
403  name = node.name
404  input = node.input[0]
405  id_nodes.append(node)
406  # do not change the output name
407  if name in self.output_names:
408  self.name_node_dict[input].name = name
409  self.name_node_dict[name] = self.name_node_dict[input]
410  del self.name_node_dict[input]
411  else:
412  id_dict[name] = input
413 
414  for idnode in id_nodes:
415  self.nodes.remove(idnode)
416 
417  for node in self.nodes:
418  for i in range(len(node.input)):
419  input = node.input[i]
420  if input in id_dict:
421  node.input[i] = id_dict[input]
422 
423 
424  def generate_edges(self):
425  for node in self.nodes:
426  for input in node.input:
427  if input in self.edges:
428  self.edges[input].append(node)
429  else:
430  self.edges[input] = [node]
431 
432 
433  @staticmethod
434  def get_scope_name(name):
435  index = name.rfind('/')
436  if index == -1:
437  return ""
438  return name[0:index]
439 
440 
441  def in_conv2d_scope(self, name):
442  inner_scope = TFConverter.get_scope_name(name)
443  if inner_scope == "":
444  return False;
445  for scope in self.conv2d_scope_names:
446  index = inner_scope.find(scope)
447  if index == 0:
448  return True
449  return False
450 
451 
453  # mostly, conv2d is a sub block in graph, get the scope name
454  for node in self.nodes:
455  if node.op == 'Conv2D':
456  scope = TFConverter.get_scope_name(node.name)
457  # for the case tf.nn.conv2d is called directly
458  if scope == '':
459  continue
460  # for the case tf.nn.conv2d is called within a scope
461  if scope + '/kernel' not in self.name_node_dict:
462  continue
463  self.conv2d_scope_names.add(scope)
464 
465  # get the input name to the conv2d sub block
466  for node in self.nodes:
467  scope = TFConverter.get_scope_name(node.name)
468  if scope in self.conv2d_scope_names:
469  if node.op == 'Conv2D' or node.op == 'Shape':
470  for inp in node.input:
471  if TFConverter.get_scope_name(inp) != scope:
472  self.conv2d_scopename_inputname_dict[scope] = inp
473 
474 
475  def run(self):
477  self.generate_output_names()
478  self.remove_identity()
479  self.generate_edges()
481 
482  if self.dump4tb:
483  self.dump_for_tensorboard()
484 
485  self.dump_to_file()
486 
487 
488 def convert_from_tensorflow(infile, outfile, dump4tb):
489  with open(infile, 'rb') as f:
490  # read the file in .proto format
491  graph_def = tf.GraphDef()
492  graph_def.ParseFromString(f.read())
493  nodes = graph_def.node
494 
495  converter = TFConverter(graph_def, nodes, outfile, dump4tb)
496  converter.run()
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate.The lists are not just lists
static void set(uint8_t *a[], int ch, int index, int ch_count, enum AVSampleFormat f, double v)
Definition: swresample.c:59
static uint8_t * append(uint8_t *buf, const uint8_t *src, int size)
def __init__(self, name, dtype, dims)
def __init__(self, graph_def, nodes, outfile, dump4tb)
def get_conv2d_params(self, conv2d_scope_name)
int len
def convert_from_tensorflow(infile, outfile, dump4tb)
static void print(AVTreeNode *t, int depth)
Definition: tree.c:44