FFmpeg
convert_from_tensorflow.py
Go to the documentation of this file.
1 # Copyright (c) 2019 Guo Yejun
2 #
3 # This file is part of FFmpeg.
4 #
5 # FFmpeg is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU Lesser General Public
7 # License as published by the Free Software Foundation; either
8 # version 2.1 of the License, or (at your option) any later version.
9 #
10 # FFmpeg is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Lesser General Public License for more details.
14 #
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with FFmpeg; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 # ==============================================================================
19 
20 import tensorflow as tf
21 import numpy as np
22 import sys, struct
23 import convert_header as header
24 
25 __all__ = ['convert_from_tensorflow']
26 
27 class Operand(object):
28  IOTYPE_INPUT = 1
29  IOTYPE_OUTPUT = 2
30  IOTYPE_INTERMEDIATE = IOTYPE_INPUT | IOTYPE_OUTPUT
31  DTYPE_FLOAT = 1
32  DTYPE_UINT8 = 4
33  index = 0
34  def __init__(self, name, dtype, dims):
35  self.name = name
36  self.dtype = dtype
37  self.dims = dims
38  self.iotype = 0
39  self.used_count = 0
40  self.index = Operand.index
41  Operand.index = Operand.index + 1
42  self.iotype2str = {Operand.IOTYPE_INPUT: 'in', Operand.IOTYPE_OUTPUT: 'out', Operand.IOTYPE_INTERMEDIATE: 'inout'}
43  self.dtype2str = {Operand.DTYPE_FLOAT: 'DT_FLOAT', Operand.DTYPE_UINT8: 'DT_UINT8'}
44 
45  def add_iotype(self, iotype):
46  self.iotype = self.iotype | iotype
47  if iotype == Operand.IOTYPE_INPUT:
48  self.used_count = self.used_count + 1
49 
50  def __str__(self):
51  return "{}: (name: {}, iotype: {}, dtype: {}, dims: ({},{},{},{}) used_count: {})".format(self.index,
52  self.name, self.iotype2str[self.iotype], self.dtype2str[self.dtype],
53  self.dims[0], self.dims[1], self.dims[2], self.dims[3], self.used_count)
54 
55  def __lt__(self, other):
56  return self.index < other.index
57 
59  def __init__(self, graph_def, nodes, outfile, dump4tb):
60  self.graph_def = graph_def
61  self.nodes = nodes
62  self.outfile = outfile
63  self.dump4tb = dump4tb
64  self.layer_number = 0
65  self.output_names = []
66  self.name_node_dict = {}
67  self.edges = {}
68  self.conv_activations = {'Relu':0, 'Tanh':1, 'Sigmoid':2, 'None':3, 'LeakyRelu':4}
69  self.conv_paddings = {'VALID':0, 'SAME':1}
73  self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4}
74  self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
76 
77 
78  def add_operand(self, name, type):
79  node = self.name_node_dict[name]
80  if name not in self.name_operand_dict:
81  dtype = node.attr['dtype'].type
82  if dtype == 0:
83  dtype = node.attr['T'].type
84  dims = [-1,-1,-1,-1]
85  if 'shape' in node.attr:
86  dims[0] = node.attr['shape'].shape.dim[0].size
87  dims[1] = node.attr['shape'].shape.dim[1].size
88  dims[2] = node.attr['shape'].shape.dim[2].size
89  dims[3] = node.attr['shape'].shape.dim[3].size
90  operand = Operand(name, dtype, dims)
91  self.name_operand_dict[name] = operand;
92  self.name_operand_dict[name].add_iotype(type)
93  return self.name_operand_dict[name].index
94 
95 
97  graph = tf.get_default_graph()
98  tf.import_graph_def(self.graph_def, name="")
99  tf.summary.FileWriter('/tmp/graph', graph)
100  print('graph saved, run "tensorboard --logdir=/tmp/graph" to see it')
101 
102 
103  def get_conv2d_params(self, conv2d_scope_name):
104  knode = self.name_node_dict[conv2d_scope_name + '/kernel']
105  bnode = self.name_node_dict[conv2d_scope_name + '/bias']
106 
107  if conv2d_scope_name + '/dilation_rate' in self.name_node_dict:
108  dnode = self.name_node_dict[conv2d_scope_name + '/dilation_rate']
109  else:
110  dnode = None
111 
112  # the BiasAdd name is possible be changed into the output name,
113  # if activation is None, and BiasAdd.next is the last op which is Identity
114  if conv2d_scope_name + '/BiasAdd' in self.edges:
115  anode = self.edges[conv2d_scope_name + '/BiasAdd'][0]
116  else:
117  anode = None
118  return knode, bnode, dnode, anode
119 
120 
121  def dump_conv2d_to_file(self, node, f):
122  assert(node.op == 'Conv2D')
123  self.layer_number = self.layer_number + 1
124  self.converted_nodes.add(node.name)
125 
126  scope_name = TFConverter.get_scope_name(node.name)
127  #knode for kernel, bnode for bias, dnode for dilation, anode for activation
128  knode, bnode, dnode, anode = self.get_conv2d_params(scope_name)
129 
130  if dnode is not None:
131  dilation = struct.unpack('i', dnode.attr['value'].tensor.tensor_content[0:4])[0]
132  else:
133  dilation = 1
134 
135  if anode is not None:
136  activation = anode.op
137  else:
138  activation = 'None'
139 
140  padding = node.attr['padding'].s.decode("utf-8")
141  # conv2d with dilation > 1 generates tens of nodes, not easy to parse them, so use this tricky method.
142  if dilation > 1 and scope_name + '/stack' in self.name_node_dict:
143  if self.name_node_dict[scope_name + '/stack'].op == "Const":
144  padding = 'SAME'
145  padding = self.conv_paddings[padding]
146 
147  ktensor = knode.attr['value'].tensor
148  filter_height = ktensor.tensor_shape.dim[0].size
149  filter_width = ktensor.tensor_shape.dim[1].size
150  in_channels = ktensor.tensor_shape.dim[2].size
151  out_channels = ktensor.tensor_shape.dim[3].size
152  kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
153  kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
154  kernel = np.transpose(kernel, [3, 0, 1, 2])
155 
156  np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height], dtype=np.uint32).tofile(f)
157  kernel.tofile(f)
158 
159  btensor = bnode.attr['value'].tensor
160  if btensor.tensor_shape.dim[0].size == 1:
161  bias = struct.pack("f", btensor.float_val[0])
162  else:
163  bias = btensor.tensor_content
164  f.write(bias)
165 
166  input_name = self.conv2d_scopename_inputname_dict[scope_name]
167  input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
168 
169  if anode is not None:
170  output_operand_index = self.add_operand(anode.name, Operand.IOTYPE_OUTPUT)
171  else:
172  output_operand_index = self.add_operand(self.edges[bnode.name][0].name, Operand.IOTYPE_OUTPUT)
173  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
174 
175 
176  def dump_depth2space_to_file(self, node, f):
177  assert(node.op == 'DepthToSpace')
178  self.layer_number = self.layer_number + 1
179  block_size = node.attr['block_size'].i
180  np.array([self.op2code[node.op], block_size], dtype=np.uint32).tofile(f)
181  self.converted_nodes.add(node.name)
182  input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
183  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
184  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
185 
186 
187  def dump_mirrorpad_to_file(self, node, f):
188  assert(node.op == 'MirrorPad')
189  self.layer_number = self.layer_number + 1
190  mode = node.attr['mode'].s
191  mode = self.mirrorpad_mode[mode.decode("utf-8")]
192  np.array([self.op2code[node.op], mode], dtype=np.uint32).tofile(f)
193  pnode = self.name_node_dict[node.input[1]]
194  self.converted_nodes.add(pnode.name)
195  paddings = pnode.attr['value'].tensor.tensor_content
196  f.write(paddings)
197  self.converted_nodes.add(node.name)
198  input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
199  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
200  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
201 
202 
203  def dump_maximum_to_file(self, node, f):
204  assert(node.op == 'Maximum')
205  self.layer_number = self.layer_number + 1
206  ynode = self.name_node_dict[node.input[1]]
207  y = ynode.attr['value'].tensor.float_val[0]
208  np.array([self.op2code[node.op]], dtype=np.uint32).tofile(f)
209  np.array([y], dtype=np.float32).tofile(f)
210  self.converted_nodes.add(node.name)
211  input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
212  output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
213  np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
214 
215 
216  def dump_layers_to_file(self, f):
217  for node in self.nodes:
218  if node.name in self.converted_nodes:
219  continue
220 
221  # conv2d with dilation generates very complex nodes, so handle it in special
222  scope_name = TFConverter.get_scope_name(node.name)
223  if scope_name in self.conv2d_scope_names:
224  if node.op == 'Conv2D':
225  self.dump_conv2d_to_file(node, f)
226  continue
227 
228  if node.op == 'DepthToSpace':
229  self.dump_depth2space_to_file(node, f)
230  elif node.op == 'MirrorPad':
231  self.dump_mirrorpad_to_file(node, f)
232  elif node.op == 'Maximum':
233  self.dump_maximum_to_file(node, f)
234 
235 
236  def dump_operands_to_file(self, f):
237  operands = sorted(self.name_operand_dict.values())
238  for operand in operands:
239  #print('{}'.format(operand))
240  np.array([operand.index, len(operand.name)], dtype=np.uint32).tofile(f)
241  f.write(operand.name.encode('utf-8'))
242  np.array([operand.iotype, operand.dtype], dtype=np.uint32).tofile(f)
243  np.array([operand.dims[0], operand.dims[1], operand.dims[2], operand.dims[3]], dtype=np.uint32).tofile(f)
244 
245 
246  def dump_to_file(self):
247  with open(self.outfile, 'wb') as f:
248  f.write(header.str.encode('utf-8'))
249  np.array([header.major, header.minor], dtype=np.uint32).tofile(f)
250  self.dump_layers_to_file(f)
251  self.dump_operands_to_file(f)
252  np.array([self.layer_number, len(self.name_operand_dict)], dtype=np.uint32).tofile(f)
253 
254 
256  for node in self.nodes:
257  self.name_node_dict[node.name] = node
258 
259 
261  used_names = []
262  for node in self.nodes:
263  for input in node.input:
264  used_names.append(input)
265 
266  for node in self.nodes:
267  if node.name not in used_names:
268  self.output_names.append(node.name)
269 
270 
271  def remove_identity(self):
272  id_nodes = []
273  id_dict = {}
274  for node in self.nodes:
275  if node.op == 'Identity':
276  name = node.name
277  input = node.input[0]
278  id_nodes.append(node)
279  # do not change the output name
280  if name in self.output_names:
281  self.name_node_dict[input].name = name
282  self.name_node_dict[name] = self.name_node_dict[input]
283  del self.name_node_dict[input]
284  else:
285  id_dict[name] = input
286 
287  for idnode in id_nodes:
288  self.nodes.remove(idnode)
289 
290  for node in self.nodes:
291  for i in range(len(node.input)):
292  input = node.input[i]
293  if input in id_dict:
294  node.input[i] = id_dict[input]
295 
296 
297  def generate_edges(self):
298  for node in self.nodes:
299  for input in node.input:
300  if input in self.edges:
301  self.edges[input].append(node)
302  else:
303  self.edges[input] = [node]
304 
305 
306  @staticmethod
307  def get_scope_name(name):
308  index = name.rfind('/')
309  if index == -1:
310  return ""
311  return name[0:index]
312 
313 
315  # conv2d is a sub block in graph, get the scope name
316  for node in self.nodes:
317  if node.op == 'Conv2D':
318  scope = TFConverter.get_scope_name(node.name)
319  self.conv2d_scope_names.add(scope)
320 
321  # get the input name to the conv2d sub block
322  for node in self.nodes:
323  scope = TFConverter.get_scope_name(node.name)
324  if scope in self.conv2d_scope_names:
325  if node.op == 'Conv2D' or node.op == 'Shape':
326  for inp in node.input:
327  if TFConverter.get_scope_name(inp) != scope:
328  self.conv2d_scopename_inputname_dict[scope] = inp
329 
330 
331  def run(self):
333  self.generate_output_names()
334  self.remove_identity()
335  self.generate_edges()
337 
338  if self.dump4tb:
339  self.dump_for_tensorboard()
340 
341  self.dump_to_file()
342 
343 
344 def convert_from_tensorflow(infile, outfile, dump4tb):
345  with open(infile, 'rb') as f:
346  # read the file in .proto format
347  graph_def = tf.GraphDef()
348  graph_def.ParseFromString(f.read())
349  nodes = graph_def.node
350 
351  converter = TFConverter(graph_def, nodes, outfile, dump4tb)
352  converter.run()
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate.The lists are not just lists
static void set(uint8_t *a[], int ch, int index, int ch_count, enum AVSampleFormat f, double v)
Definition: swresample.c:59
static uint8_t * append(uint8_t *buf, const uint8_t *src, int size)
def __init__(self, name, dtype, dims)
def __init__(self, graph_def, nodes, outfile, dump4tb)
def get_conv2d_params(self, conv2d_scope_name)
int len
def convert_from_tensorflow(infile, outfile, dump4tb)
static void print(AVTreeNode *t, int depth)
Definition: tree.c:44