FFmpeg
convert_from_tensorflow.py
Go to the documentation of this file.
1 # Copyright (c) 2019 Guo Yejun
2 #
3 # This file is part of FFmpeg.
4 #
5 # FFmpeg is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU Lesser General Public
7 # License as published by the Free Software Foundation; either
8 # version 2.1 of the License, or (at your option) any later version.
9 #
10 # FFmpeg is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Lesser General Public License for more details.
14 #
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with FFmpeg; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 # ==============================================================================
19 
20 import tensorflow as tf
21 import numpy as np
22 import sys, struct
23 import convert_header as header
24 
25 __all__ = ['convert_from_tensorflow']
26 
27 class Operand(object):
28  IOTYPE_INPUT = 1
29  IOTYPE_OUTPUT = 2
30  IOTYPE_INTERMEDIATE = IOTYPE_INPUT | IOTYPE_OUTPUT
31  DTYPE_FLOAT = 1
32  DTYPE_UINT8 = 4
33  index = 0
34  def __init__(self, name, dtype, dims):
35  self.name = name
36  self.dtype = dtype
37  self.dims = dims
38  self.iotype = 0
39  self.used_count = 0
40  self.index = Operand.index
41  Operand.index = Operand.index + 1
42  self.iotype2str = {Operand.IOTYPE_INPUT: 'in', Operand.IOTYPE_OUTPUT: 'out', Operand.IOTYPE_INTERMEDIATE: 'inout'}
43  self.dtype2str = {Operand.DTYPE_FLOAT: 'DT_FLOAT', Operand.DTYPE_UINT8: 'DT_UINT8'}
44 
45  def add_iotype(self, iotype):
46  self.iotype = self.iotype | iotype
47  if iotype == Operand.IOTYPE_INPUT:
48  self.used_count = self.used_count + 1
49 
50  def __str__(self):
51  return "{}: (name: {}, iotype: {}, dtype: {}, dims: ({},{},{},{}) used_count: {})".format(self.index,
52  self.name, self.iotype2str[self.iotype], self.dtype2str[self.dtype],
53  self.dims[0], self.dims[1], self.dims[2], self.dims[3], self.used_count)
54 
55  def __lt__(self, other):
56  return self.index < other.index
57 
59  def __init__(self, graph_def, nodes, outfile, dump4tb):
60  self.graph_def = graph_def
61  self.nodes = nodes
62  self.outfile = outfile
63  self.dump4tb = dump4tb
64  self.layer_number = 0
65  self.output_names = []
66  self.name_node_dict = {}
67  self.edges = {}
68  self.conv_activations = {'Relu':0, 'Tanh':1, 'Sigmoid':2, 'None':3, 'LeakyRelu':4}
69  self.conv_paddings = {'VALID':0, 'SAME':1}
73  self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4}
74  self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
76 
77 
78  def add_operand(self, name, type):
79  node = self.name_node_dict[name]
80  if name not in self.name_operand_dict:
81  dtype = node.attr['dtype'].type
82  if dtype == 0:
83  dtype = node.attr['T'].type
84  dims = [-1,-1,-1,-1]
85  if 'shape' in node.attr:
86  dims[0] = node.attr['shape'].shape.dim[0].size
87  dims[1] = node.attr['shape'].shape.dim[1].size
88  dims[2] = node.attr['shape'].shape.dim[2].size
89  dims[3] = node.attr['shape'].shape.dim[3].size
90  operand = Operand(name, dtype, dims)
91  self.name_operand_dict[name] = operand;
92  self.name_operand_dict[name].add_iotype(type)
93  return self.name_operand_dict[name].index
94 
95 
97  graph = tf.get_default_graph()
98  tf.import_graph_def(self.graph_def, name="")
99  tf.summary.FileWriter('/tmp/graph', graph)
100  print('graph saved, run "tensorboard --logdir=/tmp/graph" to see it')
101 
102 
103  def get_conv2d_params(self, conv2d_scope_name):
104  knode = self.name_node_dict[conv2d_scope_name + '/kernel']
105  bnode = self.name_node_dict[conv2d_scope_name + '/bias']
106 
107  if conv2d_scope_name + '/dilation_rate' in self.name_node_dict:
108  dnode = self.name_node_dict[conv2d_scope_name + '/dilation_rate']
109  else:
110  dnode = None
111 
112  # the BiasAdd name is possible be changed into the output name,
113  # if activation is None, and BiasAdd.next is the last op which is Identity
114  if conv2d_scope_name + '/BiasAdd' in self.edges:
115  anode = self.edges[conv2d_scope_name + '/BiasAdd'][0]
116  else:
117  anode = None
118  return knode, bnode, dnode, anode
119 
120 
121  def dump_complex_conv2d_to_file(self, node, f):
122  assert(node.op == 'Conv2D')
123  self.layer_number = self.layer_number + 1
124  self.converted_nodes.add(node.name)
125 
126  scope_name = TFConverter.get_scope_name(node.name)
127  #knode for kernel, bnode for bias, dnode for dilation, anode for activation
128  knode, bnode, dnode, anode = self.get_conv2d_params(scope_name)
129 
130  if dnode is not None:
131  dilation = struct.unpack('i', dnode.attr['value'].tensor.tensor_content[0:4])[0]
132  else:
133  dilation = 1
134 
135  if anode is not None:
136  activation = anode.op
137  else:
138  activation = 'None'
139 
140  padding = node.attr['padding'].s.decode("utf-8")
141  # conv2d with dilation > 1 generates tens of nodes, not easy to parse them, so use this tricky method.
142  if dilation > 1 and scope_name + '/stack' in self.name_node_dict:
143  if self.name_node_dict[scope_name + '/stack'].op == "Const":
144  padding = 'SAME'
145  padding = self.conv_paddings[padding]
146 
147  ktensor = knode.attr['value'].tensor
148  filter_height = ktensor.tensor_shape.dim[0].size
149  filter_width = ktensor.tensor_shape.dim[1].size
150  in_channels = ktensor.tensor_shape.dim[2].size
151  out_channels = ktensor.tensor_shape.dim[3].size
152  kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
153  kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
154  kernel = np.transpose(kernel, [3, 0, 1, 2])
155 
156  has_bias = 1
157  np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
158  kernel.tofile(f)
159 
160  btensor = bnode.attr['value'].tensor
161  if btensor.tensor_shape.dim[0].size == 1:
162  bias = struct.pack("f", btensor.float_val[0])
163  else:
164  bias = btensor.tensor_content
165  f.write(bias)
166 
167  input_name = self.conv2d_scopename_inputname_dict[scope_name]
168  input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
169 
170  if anode is not None:
171  output_operand_index = self.add_operand(anode.name, Operand.IOTYPE_OUTPUT)
172  else:
173  output_operand_index = self.add_operand(self.edges[bnode.name][0].name, Operand.IOTYPE_OUTPUT)
174  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
175 
176 
177  def dump_simple_conv2d_to_file(self, node, f):
178  assert(node.op == 'Conv2D')
179  self.layer_number = self.layer_number + 1
180  self.converted_nodes.add(node.name)
181 
182  node0 = self.name_node_dict[node.input[0]]
183  node1 = self.name_node_dict[node.input[1]]
184  if node0.op == 'Const':
185  knode = node0
186  input_name = node.input[1]
187  else:
188  knode = node1
189  input_name = node.input[0]
190 
191  ktensor = knode.attr['value'].tensor
192  filter_height = ktensor.tensor_shape.dim[0].size
193  filter_width = ktensor.tensor_shape.dim[1].size
194  in_channels = ktensor.tensor_shape.dim[2].size
195  out_channels = ktensor.tensor_shape.dim[3].size
196  kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
197  kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
198  kernel = np.transpose(kernel, [3, 0, 1, 2])
199 
200  has_bias = 0
201  dilation = 1
202  padding = node.attr['padding'].s.decode("utf-8")
203  np.array([self.op2code[node.op], dilation, self.conv_paddings[padding], self.conv_activations['None'],
204  in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
205  kernel.tofile(f)
206 
207  input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
208  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
209  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
210 
211 
212  def dump_depth2space_to_file(self, node, f):
213  assert(node.op == 'DepthToSpace')
214  self.layer_number = self.layer_number + 1
215  block_size = node.attr['block_size'].i
216  np.array([self.op2code[node.op], block_size], dtype=np.uint32).tofile(f)
217  self.converted_nodes.add(node.name)
218  input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
219  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
220  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
221 
222 
223  def dump_mirrorpad_to_file(self, node, f):
224  assert(node.op == 'MirrorPad')
225  self.layer_number = self.layer_number + 1
226  mode = node.attr['mode'].s
227  mode = self.mirrorpad_mode[mode.decode("utf-8")]
228  np.array([self.op2code[node.op], mode], dtype=np.uint32).tofile(f)
229  pnode = self.name_node_dict[node.input[1]]
230  self.converted_nodes.add(pnode.name)
231  paddings = pnode.attr['value'].tensor.tensor_content
232  f.write(paddings)
233  self.converted_nodes.add(node.name)
234  input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
235  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
236  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
237 
238 
239  def dump_maximum_to_file(self, node, f):
240  assert(node.op == 'Maximum')
241  self.layer_number = self.layer_number + 1
242  ynode = self.name_node_dict[node.input[1]]
243  y = ynode.attr['value'].tensor.float_val[0]
244  np.array([self.op2code[node.op]], dtype=np.uint32).tofile(f)
245  np.array([y], dtype=np.float32).tofile(f)
246  self.converted_nodes.add(node.name)
247  input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
248  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
249  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
250 
251 
252  def dump_layers_to_file(self, f):
253  for node in self.nodes:
254  if node.name in self.converted_nodes:
255  continue
256 
257  # conv2d with dilation generates very complex nodes, so handle it in special
258  scope_name = TFConverter.get_scope_name(node.name)
259  if scope_name in self.conv2d_scope_names:
260  if node.op == 'Conv2D':
261  self.dump_complex_conv2d_to_file(node, f)
262  continue
263 
264  if node.op == 'Conv2D':
265  self.dump_simple_conv2d_to_file(node, f)
266  elif node.op == 'DepthToSpace':
267  self.dump_depth2space_to_file(node, f)
268  elif node.op == 'MirrorPad':
269  self.dump_mirrorpad_to_file(node, f)
270  elif node.op == 'Maximum':
271  self.dump_maximum_to_file(node, f)
272 
273 
274  def dump_operands_to_file(self, f):
275  operands = sorted(self.name_operand_dict.values())
276  for operand in operands:
277  #print('{}'.format(operand))
278  np.array([operand.index, len(operand.name)], dtype=np.uint32).tofile(f)
279  f.write(operand.name.encode('utf-8'))
280  np.array([operand.iotype, operand.dtype], dtype=np.uint32).tofile(f)
281  np.array([operand.dims[0], operand.dims[1], operand.dims[2], operand.dims[3]], dtype=np.uint32).tofile(f)
282 
283 
284  def dump_to_file(self):
285  with open(self.outfile, 'wb') as f:
286  f.write(header.str.encode('utf-8'))
287  np.array([header.major, header.minor], dtype=np.uint32).tofile(f)
288  self.dump_layers_to_file(f)
289  self.dump_operands_to_file(f)
290  np.array([self.layer_number, len(self.name_operand_dict)], dtype=np.uint32).tofile(f)
291 
292 
294  for node in self.nodes:
295  self.name_node_dict[node.name] = node
296 
297 
299  used_names = []
300  for node in self.nodes:
301  for input in node.input:
302  used_names.append(input)
303 
304  for node in self.nodes:
305  if node.name not in used_names:
306  self.output_names.append(node.name)
307 
308 
309  def remove_identity(self):
310  id_nodes = []
311  id_dict = {}
312  for node in self.nodes:
313  if node.op == 'Identity':
314  name = node.name
315  input = node.input[0]
316  id_nodes.append(node)
317  # do not change the output name
318  if name in self.output_names:
319  self.name_node_dict[input].name = name
320  self.name_node_dict[name] = self.name_node_dict[input]
321  del self.name_node_dict[input]
322  else:
323  id_dict[name] = input
324 
325  for idnode in id_nodes:
326  self.nodes.remove(idnode)
327 
328  for node in self.nodes:
329  for i in range(len(node.input)):
330  input = node.input[i]
331  if input in id_dict:
332  node.input[i] = id_dict[input]
333 
334 
335  def generate_edges(self):
336  for node in self.nodes:
337  for input in node.input:
338  if input in self.edges:
339  self.edges[input].append(node)
340  else:
341  self.edges[input] = [node]
342 
343 
344  @staticmethod
345  def get_scope_name(name):
346  index = name.rfind('/')
347  if index == -1:
348  return ""
349  return name[0:index]
350 
351 
353  # mostly, conv2d is a sub block in graph, get the scope name
354  for node in self.nodes:
355  if node.op == 'Conv2D':
356  scope = TFConverter.get_scope_name(node.name)
357  # for the case tf.nn.conv2d is called directly
358  if scope == '':
359  continue
360  # for the case tf.nn.conv2d is called within a scope
361  if scope + '/kernel' not in self.name_node_dict:
362  continue
363  self.conv2d_scope_names.add(scope)
364 
365  # get the input name to the conv2d sub block
366  for node in self.nodes:
367  scope = TFConverter.get_scope_name(node.name)
368  if scope in self.conv2d_scope_names:
369  if node.op == 'Conv2D' or node.op == 'Shape':
370  for inp in node.input:
371  if TFConverter.get_scope_name(inp) != scope:
372  self.conv2d_scopename_inputname_dict[scope] = inp
373 
374 
375  def run(self):
377  self.generate_output_names()
378  self.remove_identity()
379  self.generate_edges()
381 
382  if self.dump4tb:
383  self.dump_for_tensorboard()
384 
385  self.dump_to_file()
386 
387 
388 def convert_from_tensorflow(infile, outfile, dump4tb):
389  with open(infile, 'rb') as f:
390  # read the file in .proto format
391  graph_def = tf.GraphDef()
392  graph_def.ParseFromString(f.read())
393  nodes = graph_def.node
394 
395  converter = TFConverter(graph_def, nodes, outfile, dump4tb)
396  converter.run()
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate.The lists are not just lists
static void set(uint8_t *a[], int ch, int index, int ch_count, enum AVSampleFormat f, double v)
Definition: swresample.c:59
static uint8_t * append(uint8_t *buf, const uint8_t *src, int size)
def __init__(self, name, dtype, dims)
def __init__(self, graph_def, nodes, outfile, dump4tb)
def get_conv2d_params(self, conv2d_scope_name)
int len
def convert_from_tensorflow(infile, outfile, dump4tb)
static void print(AVTreeNode *t, int depth)
Definition: tree.c:44