[FFmpeg-cvslog] convert_from_tensorflow.py: support conv2d with dilation
Guo, Yejun
git at videolan.org
Thu Aug 15 20:59:39 EEST 2019
ffmpeg | branch: master | Guo, Yejun <yejun.guo at intel.com> | Tue Jul 30 09:26:18 2019 +0800| [ddd92ba2c6c6d0b5f3d5b4496ab07fbcf73b58a2] | committer: Pedro Arthur
convert_from_tensorflow.py: support conv2d with dilation
conv2d with dilation > 1 generates tens of nodes in graph, it is not
easy to parse each node one by one, so we do special tricks to parse
the conv2d layer.
Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
Signed-off-by: Pedro Arthur <bygrandao at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ddd92ba2c6c6d0b5f3d5b4496ab07fbcf73b58a2
---
tools/python/convert_from_tensorflow.py | 80 ++++++++++++++++++++++++---------
1 file changed, 59 insertions(+), 21 deletions(-)
diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py
index 804c14f5f2..34454b86de 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -33,9 +33,10 @@ class TFConverter:
self.output_names = []
self.name_node_dict = {}
self.edges = {}
- self.conv_activations = {'Relu':0, 'Tanh':1, 'Sigmoid':2, 'LeakyRelu':4}
+ self.conv_activations = {'Relu':0, 'Tanh':1, 'Sigmoid':2, 'None':3, 'LeakyRelu':4}
self.conv_paddings = {'VALID':0, 'SAME':1}
self.converted_nodes = set()
+ self.conv2d_scope_names = set()
self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3}
self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
@@ -47,30 +48,45 @@ class TFConverter:
print('graph saved, run "tensorboard --logdir=/tmp/graph" to see it')
- def get_conv2d_params(self, node):
- knode = self.name_node_dict[node.input[1]]
- bnode = None
- activation = 'None'
- next = self.edges[node.name][0]
- if next.op == 'BiasAdd':
- self.converted_nodes.add(next.name)
- bnode = self.name_node_dict[next.input[1]]
- next = self.edges[next.name][0]
- if next.op in self.conv_activations:
- self.converted_nodes.add(next.name)
- activation = next.op
- return knode, bnode, activation
+ def get_conv2d_params(self, conv2d_scope_name):
+ knode = self.name_node_dict[conv2d_scope_name + '/kernel']
+ bnode = self.name_node_dict[conv2d_scope_name + '/bias']
+
+ if conv2d_scope_name + '/dilation_rate' in self.name_node_dict:
+ dnode = self.name_node_dict[conv2d_scope_name + '/dilation_rate']
+ else:
+ dnode = None
+
+ # the BiasAdd name is possible be changed into the output name,
+ # if activation is None, and BiasAdd.next is the last op which is Identity
+ if conv2d_scope_name + '/BiasAdd' in self.edges:
+ activation = self.edges[conv2d_scope_name + '/BiasAdd'][0]
+ activation = activation.op
+ else:
+ activation = 'None'
+ return knode, bnode, dnode, activation
def dump_conv2d_to_file(self, node, f):
assert(node.op == 'Conv2D')
self.layer_number = self.layer_number + 1
self.converted_nodes.add(node.name)
- knode, bnode, activation = self.get_conv2d_params(node)
- dilation = node.attr['dilations'].list.i[0]
- padding = node.attr['padding'].s
- padding = self.conv_paddings[padding.decode("utf-8")]
+ scope_name = TFConverter.get_scope_name(node.name)
+ #knode for kernel, bnode for bias, dnode for dilation
+ knode, bnode, dnode, activation = self.get_conv2d_params(scope_name)
+
+ if dnode is not None:
+ dilation = struct.unpack('i', dnode.attr['value'].tensor.tensor_content[0:4])[0]
+ else:
+ dilation = 1
+
+ padding = node.attr['padding'].s.decode("utf-8")
+ # conv2d with dilation > 1 generates tens of nodes, not easy to parse them, so use tricky.
+ if dilation > 1 and scope_name + '/stack' in self.name_node_dict:
+ if self.name_node_dict[scope_name + '/stack'].op == "Const":
+ padding = 'SAME'
+ padding = self.conv_paddings[padding]
ktensor = knode.attr['value'].tensor
filter_height = ktensor.tensor_shape.dim[0].size
@@ -126,9 +142,15 @@ class TFConverter:
for node in self.nodes:
if node.name in self.converted_nodes:
continue
- if node.op == 'Conv2D':
- self.dump_conv2d_to_file(node, f)
- elif node.op == 'DepthToSpace':
+
+ # conv2d with dilation generates very complex nodes, so handle it in special
+ scope_name = TFConverter.get_scope_name(node.name)
+ if scope_name in self.conv2d_scope_names:
+ if node.op == 'Conv2D':
+ self.dump_conv2d_to_file(node, f)
+ continue
+
+ if node.op == 'DepthToSpace':
self.dump_depth2space_to_file(node, f)
elif node.op == 'MirrorPad':
self.dump_mirrorpad_to_file(node, f)
@@ -192,11 +214,27 @@ class TFConverter:
self.edges[input] = [node]
+ @staticmethod
+ def get_scope_name(name):
+ index = name.rfind('/')
+ if index == -1:
+ return ""
+ return name[0:index]
+
+
+ def generate_conv2d_scope_names(self):
+ for node in self.nodes:
+ if node.op == 'Conv2D':
+ scope = TFConverter.get_scope_name(node.name)
+ self.conv2d_scope_names.add(scope)
+
+
def run(self):
self.generate_name_node_dict()
self.generate_output_names()
self.remove_identity()
self.generate_edges()
+ self.generate_conv2d_scope_names()
if self.dump4tb:
self.dump_for_tensorboard()
More information about the ffmpeg-cvslog
mailing list