FFmpeg
convert_from_tensorflow.py
Go to the documentation of this file.
1 # Copyright (c) 2019 Guo Yejun
2 #
3 # This file is part of FFmpeg.
4 #
5 # FFmpeg is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU Lesser General Public
7 # License as published by the Free Software Foundation; either
8 # version 2.1 of the License, or (at your option) any later version.
9 #
10 # FFmpeg is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Lesser General Public License for more details.
14 #
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with FFmpeg; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 # ==============================================================================
19 
20 import tensorflow as tf
21 import numpy as np
22 import sys, struct
23 import convert_header as header
24 
25 __all__ = ['convert_from_tensorflow']
26 
27 class Operand(object):
28  IOTYPE_INPUT = 1
29  IOTYPE_OUTPUT = 2
30  IOTYPE_INTERMEDIATE = IOTYPE_INPUT | IOTYPE_OUTPUT
31  DTYPE_FLOAT = 1
32  DTYPE_UINT8 = 4
33  index = 0
34  def __init__(self, name, dtype, dims):
35  self.name = name
36  self.dtype = dtype
37  self.dims = dims
38  self.iotype = 0
39  self.used_count = 0
40  self.index = Operand.index
41  Operand.index = Operand.index + 1
42  self.iotype2str = {Operand.IOTYPE_INPUT: 'in', Operand.IOTYPE_OUTPUT: 'out', Operand.IOTYPE_INTERMEDIATE: 'inout'}
43  self.dtype2str = {Operand.DTYPE_FLOAT: 'DT_FLOAT', Operand.DTYPE_UINT8: 'DT_UINT8'}
44 
45  def add_iotype(self, iotype):
46  self.iotype = self.iotype | iotype
47  if iotype == Operand.IOTYPE_INPUT:
48  self.used_count = self.used_count + 1
49 
50  def __str__(self):
51  return "{}: (name: {}, iotype: {}, dtype: {}, dims: ({},{},{},{}) used_count: {})".format(self.index,
52  self.name, self.iotype2str[self.iotype], self.dtype2str[self.dtype],
53  self.dims[0], self.dims[1], self.dims[2], self.dims[3], self.used_count)
54 
55  def __lt__(self, other):
56  return self.index < other.index
57 
59  def __init__(self, graph_def, nodes, outfile, dump4tb):
60  self.graph_def = graph_def
61  self.nodes = nodes
62  self.outfile = outfile
63  self.dump4tb = dump4tb
64  self.layer_number = 0
65  self.output_names = []
66  self.name_node_dict = {}
67  self.edges = {}
68  self.conv_activations = {'Relu':0, 'Tanh':1, 'Sigmoid':2, 'None':3, 'LeakyRelu':4}
69  self.conv_paddings = {'VALID':0, 'SAME':1}
73  self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4}
74  self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
76 
77 
78  def add_operand(self, name, type):
79  node = self.name_node_dict[name]
80  if name not in self.name_operand_dict:
81  dtype = node.attr['dtype'].type
82  if dtype == 0:
83  dtype = node.attr['T'].type
84  dims = [-1,-1,-1,-1]
85  if 'shape' in node.attr:
86  dims[0] = node.attr['shape'].shape.dim[0].size
87  dims[1] = node.attr['shape'].shape.dim[1].size
88  dims[2] = node.attr['shape'].shape.dim[2].size
89  dims[3] = node.attr['shape'].shape.dim[3].size
90  operand = Operand(name, dtype, dims)
91  self.name_operand_dict[name] = operand;
92  self.name_operand_dict[name].add_iotype(type)
93  return self.name_operand_dict[name].index
94 
95 
97  graph = tf.get_default_graph()
98  tf.import_graph_def(self.graph_def, name="")
99  tf.summary.FileWriter('/tmp/graph', graph)
100  print('graph saved, run "tensorboard --logdir=/tmp/graph" to see it')
101 
102 
103  def get_conv2d_params(self, conv2d_scope_name):
104  knode = self.name_node_dict[conv2d_scope_name + '/kernel']
105  bnode = self.name_node_dict[conv2d_scope_name + '/bias']
106 
107  if conv2d_scope_name + '/dilation_rate' in self.name_node_dict:
108  dnode = self.name_node_dict[conv2d_scope_name + '/dilation_rate']
109  else:
110  dnode = None
111 
112  # the BiasAdd name is possible be changed into the output name,
113  # if activation is None, and BiasAdd.next is the last op which is Identity
114  if conv2d_scope_name + '/BiasAdd' in self.edges:
115  anode = self.edges[conv2d_scope_name + '/BiasAdd'][0]
116  else:
117  anode = None
118  return knode, bnode, dnode, anode
119 
120 
121  def dump_complex_conv2d_to_file(self, node, f):
122  assert(node.op == 'Conv2D')
123  self.layer_number = self.layer_number + 1
124  self.converted_nodes.add(node.name)
125 
126  scope_name = TFConverter.get_scope_name(node.name)
127  #knode for kernel, bnode for bias, dnode for dilation, anode for activation
128  knode, bnode, dnode, anode = self.get_conv2d_params(scope_name)
129 
130  if dnode is not None:
131  dilation = struct.unpack('i', dnode.attr['value'].tensor.tensor_content[0:4])[0]
132  else:
133  dilation = 1
134 
135  if anode is not None:
136  activation = anode.op
137  else:
138  activation = 'None'
139 
140  padding = node.attr['padding'].s.decode("utf-8")
141  # conv2d with dilation > 1 generates tens of nodes, not easy to parse them, so use this tricky method.
142  if dilation > 1 and scope_name + '/stack' in self.name_node_dict:
143  if self.name_node_dict[scope_name + '/stack'].op == "Const":
144  padding = 'SAME'
145  padding = self.conv_paddings[padding]
146 
147  ktensor = knode.attr['value'].tensor
148  filter_height = ktensor.tensor_shape.dim[0].size
149  filter_width = ktensor.tensor_shape.dim[1].size
150  in_channels = ktensor.tensor_shape.dim[2].size
151  out_channels = ktensor.tensor_shape.dim[3].size
152  kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
153  kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
154  kernel = np.transpose(kernel, [3, 0, 1, 2])
155 
156  has_bias = 1
157  np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
158  kernel.tofile(f)
159 
160  btensor = bnode.attr['value'].tensor
161  if btensor.tensor_shape.dim[0].size == 1:
162  bias = struct.pack("f", btensor.float_val[0])
163  else:
164  bias = btensor.tensor_content
165  f.write(bias)
166 
167  input_name = self.conv2d_scopename_inputname_dict[scope_name]
168  input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
169 
170  if anode is not None:
171  output_operand_index = self.add_operand(anode.name, Operand.IOTYPE_OUTPUT)
172  else:
173  output_operand_index = self.add_operand(self.edges[bnode.name][0].name, Operand.IOTYPE_OUTPUT)
174  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
175 
176 
177  def dump_simple_conv2d_to_file(self, node, f):
178  assert(node.op == 'Conv2D')
179  self.layer_number = self.layer_number + 1
180  self.converted_nodes.add(node.name)
181 
182  node0 = self.name_node_dict[node.input[0]]
183  node1 = self.name_node_dict[node.input[1]]
184  if node0.op == 'Const':
185  knode = node0
186  input_name = node.input[1]
187  else:
188  knode = node1
189  input_name = node.input[0]
190 
191  ktensor = knode.attr['value'].tensor
192  filter_height = ktensor.tensor_shape.dim[0].size
193  filter_width = ktensor.tensor_shape.dim[1].size
194  in_channels = ktensor.tensor_shape.dim[2].size
195  out_channels = ktensor.tensor_shape.dim[3].size
196  if filter_height * filter_width * in_channels * out_channels == 1:
197  kernel = np.float32(ktensor.float_val[0])
198  else:
199  kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
200  kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
201  kernel = np.transpose(kernel, [3, 0, 1, 2])
202 
203  has_bias = 0
204  dilation = 1
205  padding = node.attr['padding'].s.decode("utf-8")
206  np.array([self.op2code[node.op], dilation, self.conv_paddings[padding], self.conv_activations['None'],
207  in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
208  kernel.tofile(f)
209 
210  input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
211  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
212  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
213 
214 
215  def dump_depth2space_to_file(self, node, f):
216  assert(node.op == 'DepthToSpace')
217  self.layer_number = self.layer_number + 1
218  block_size = node.attr['block_size'].i
219  np.array([self.op2code[node.op], block_size], dtype=np.uint32).tofile(f)
220  self.converted_nodes.add(node.name)
221  input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
222  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
223  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
224 
225 
226  def dump_mirrorpad_to_file(self, node, f):
227  assert(node.op == 'MirrorPad')
228  self.layer_number = self.layer_number + 1
229  mode = node.attr['mode'].s
230  mode = self.mirrorpad_mode[mode.decode("utf-8")]
231  np.array([self.op2code[node.op], mode], dtype=np.uint32).tofile(f)
232  pnode = self.name_node_dict[node.input[1]]
233  self.converted_nodes.add(pnode.name)
234  paddings = pnode.attr['value'].tensor.tensor_content
235  f.write(paddings)
236  self.converted_nodes.add(node.name)
237  input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
238  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
239  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
240 
241 
242  def dump_maximum_to_file(self, node, f):
243  assert(node.op == 'Maximum')
244  self.layer_number = self.layer_number + 1
245  ynode = self.name_node_dict[node.input[1]]
246  y = ynode.attr['value'].tensor.float_val[0]
247  np.array([self.op2code[node.op]], dtype=np.uint32).tofile(f)
248  np.array([y], dtype=np.float32).tofile(f)
249  self.converted_nodes.add(node.name)
250  input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
251  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
252  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
253 
254 
255  def dump_layers_to_file(self, f):
256  for node in self.nodes:
257  if node.name in self.converted_nodes:
258  continue
259 
260  # conv2d with dilation generates very complex nodes, so handle it in special
261  scope_name = TFConverter.get_scope_name(node.name)
262  if scope_name in self.conv2d_scope_names:
263  if node.op == 'Conv2D':
264  self.dump_complex_conv2d_to_file(node, f)
265  continue
266 
267  if node.op == 'Conv2D':
268  self.dump_simple_conv2d_to_file(node, f)
269  elif node.op == 'DepthToSpace':
270  self.dump_depth2space_to_file(node, f)
271  elif node.op == 'MirrorPad':
272  self.dump_mirrorpad_to_file(node, f)
273  elif node.op == 'Maximum':
274  self.dump_maximum_to_file(node, f)
275 
276 
277  def dump_operands_to_file(self, f):
278  operands = sorted(self.name_operand_dict.values())
279  for operand in operands:
280  #print('{}'.format(operand))
281  np.array([operand.index, len(operand.name)], dtype=np.uint32).tofile(f)
282  f.write(operand.name.encode('utf-8'))
283  np.array([operand.iotype, operand.dtype], dtype=np.uint32).tofile(f)
284  np.array([operand.dims[0], operand.dims[1], operand.dims[2], operand.dims[3]], dtype=np.uint32).tofile(f)
285 
286 
287  def dump_to_file(self):
288  with open(self.outfile, 'wb') as f:
289  f.write(header.str.encode('utf-8'))
290  np.array([header.major, header.minor], dtype=np.uint32).tofile(f)
291  self.dump_layers_to_file(f)
292  self.dump_operands_to_file(f)
293  np.array([self.layer_number, len(self.name_operand_dict)], dtype=np.uint32).tofile(f)
294 
295 
297  for node in self.nodes:
298  self.name_node_dict[node.name] = node
299 
300 
302  used_names = []
303  for node in self.nodes:
304  for input in node.input:
305  used_names.append(input)
306 
307  for node in self.nodes:
308  if node.name not in used_names:
309  self.output_names.append(node.name)
310 
311 
312  def remove_identity(self):
313  id_nodes = []
314  id_dict = {}
315  for node in self.nodes:
316  if node.op == 'Identity':
317  name = node.name
318  input = node.input[0]
319  id_nodes.append(node)
320  # do not change the output name
321  if name in self.output_names:
322  self.name_node_dict[input].name = name
323  self.name_node_dict[name] = self.name_node_dict[input]
324  del self.name_node_dict[input]
325  else:
326  id_dict[name] = input
327 
328  for idnode in id_nodes:
329  self.nodes.remove(idnode)
330 
331  for node in self.nodes:
332  for i in range(len(node.input)):
333  input = node.input[i]
334  if input in id_dict:
335  node.input[i] = id_dict[input]
336 
337 
338  def generate_edges(self):
339  for node in self.nodes:
340  for input in node.input:
341  if input in self.edges:
342  self.edges[input].append(node)
343  else:
344  self.edges[input] = [node]
345 
346 
347  @staticmethod
348  def get_scope_name(name):
349  index = name.rfind('/')
350  if index == -1:
351  return ""
352  return name[0:index]
353 
354 
356  # mostly, conv2d is a sub block in graph, get the scope name
357  for node in self.nodes:
358  if node.op == 'Conv2D':
359  scope = TFConverter.get_scope_name(node.name)
360  # for the case tf.nn.conv2d is called directly
361  if scope == '':
362  continue
363  # for the case tf.nn.conv2d is called within a scope
364  if scope + '/kernel' not in self.name_node_dict:
365  continue
366  self.conv2d_scope_names.add(scope)
367 
368  # get the input name to the conv2d sub block
369  for node in self.nodes:
370  scope = TFConverter.get_scope_name(node.name)
371  if scope in self.conv2d_scope_names:
372  if node.op == 'Conv2D' or node.op == 'Shape':
373  for inp in node.input:
374  if TFConverter.get_scope_name(inp) != scope:
375  self.conv2d_scopename_inputname_dict[scope] = inp
376 
377 
378  def run(self):
380  self.generate_output_names()
381  self.remove_identity()
382  self.generate_edges()
384 
385  if self.dump4tb:
386  self.dump_for_tensorboard()
387 
388  self.dump_to_file()
389 
390 
391 def convert_from_tensorflow(infile, outfile, dump4tb):
392  with open(infile, 'rb') as f:
393  # read the file in .proto format
394  graph_def = tf.GraphDef()
395  graph_def.ParseFromString(f.read())
396  nodes = graph_def.node
397 
398  converter = TFConverter(graph_def, nodes, outfile, dump4tb)
399  converter.run()
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate.The lists are not just lists
static void set(uint8_t *a[], int ch, int index, int ch_count, enum AVSampleFormat f, double v)
Definition: swresample.c:59
static uint8_t * append(uint8_t *buf, const uint8_t *src, int size)
def __init__(self, name, dtype, dims)
def __init__(self, graph_def, nodes, outfile, dump4tb)
def get_conv2d_params(self, conv2d_scope_name)
int len
def convert_from_tensorflow(infile, outfile, dump4tb)
static void print(AVTreeNode *t, int depth)
Definition: tree.c:44