FFmpeg
dnn-layer-conv2d-test.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Guo Yejun
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdio.h>
22 #include <string.h>
23 #include <math.h>
25 
26 #define EPSON 0.00001
27 
28 static int test_with_same_dilate(void)
29 {
30  // the input data and expected data are generated with below python code.
31  /*
32  x = tf.placeholder(tf.float32, shape=[1, None, None, 3])
33  y = tf.layers.conv2d(x, 2, 3, activation=tf.nn.tanh, padding='same', dilation_rate=(2, 2), bias_initializer=tf.keras.initializers.he_normal())
34  data = np.random.rand(1, 5, 6, 3);
35 
36  sess=tf.Session()
37  sess.run(tf.global_variables_initializer())
38 
39  weights = dict([(var.name, sess.run(var)) for var in tf.trainable_variables()])
40  kernel = weights['conv2d/kernel:0']
41  kernel = np.transpose(kernel, [3, 0, 1, 2])
42  print("kernel:")
43  print(kernel.shape)
44  print(list(kernel.flatten()))
45 
46  bias = weights['conv2d/bias:0']
47  print("bias:")
48  print(bias.shape)
49  print(list(bias.flatten()))
50 
51  output = sess.run(y, feed_dict={x: data})
52 
53  print("input:")
54  print(data.shape)
55  print(list(data.flatten()))
56 
57  print("output:")
58  print(output.shape)
59  print(list(output.flatten()))
60  */
61 
63  DnnOperand operands[2];
64  int32_t input_indexes[1];
65  float input[1*5*6*3] = {
66  0.7012556460308194, 0.4233847954643357, 0.19515900664313612, 0.16343083004926495, 0.5758261611052848, 0.9510767434014871, 0.11014085055947687,
67  0.906327053637727, 0.8136794715542507, 0.45371764543639526, 0.5768443343523952, 0.19543668786046986, 0.15648326047898609, 0.2099500241141279,
68  0.17658777090552413, 0.059335724777169196, 0.1729991838469117, 0.8150514704819208, 0.4435535466703049, 0.3752188477566878, 0.749936650421431,
69  0.6823494635284907, 0.10776389679424747, 0.34247481674596836, 0.5147867256244629, 0.9063709728129032, 0.12423605800856818, 0.6064872945412728,
70  0.5891681538551459, 0.9865836236466314, 0.9002163879294677, 0.003968273184274618, 0.8628374809643967, 0.1327176268279583, 0.8449799925703798,
71  0.1937671869354366, 0.41524410152707425, 0.02038786604756837, 0.49792466069597496, 0.8881874553848784, 0.9683921035597336, 0.4122972568010813,
72  0.843553550993252, 0.9588482762501964, 0.5190350762645546, 0.4283584264145317, 0.09781496073714646, 0.9501058833776156, 0.8665541760152776,
73  0.31669272550095806, 0.07133074675453632, 0.606438007334886, 0.7007157020538224, 0.4827996264130444, 0.5167615606392761, 0.6385043039312651,
74  0.23069664707810555, 0.058233497329354456, 0.06323892961591071, 0.24816458893245974, 0.8646369065257812, 0.24742185893094837, 0.09991225948167437,
75  0.625700606979606, 0.7678541502111257, 0.6215834594679912, 0.5623003956582483, 0.07389123942681242, 0.7659100715711249, 0.486061471642225,
76  0.9947455699829012, 0.9094911797643259, 0.7644355876253265, 0.05384315321492239, 0.13565394382783613, 0.9810628204953316, 0.007386389078887889,
77  0.226182754156241, 0.2609021390764772, 0.24182802076928933, 0.13264782451941648, 0.2035816485767682, 0.005504188177612557, 0.7014619934040155,
78  0.956215988391991, 0.5670398541013633, 0.9809764721750784, 0.6886338100487461, 0.5758152317218274, 0.7137823176776179
79  };
80  float expected_output[1*5*6*2] = {
81  -0.9480655, -0.7169147, -0.9404794, -0.5567385, -0.8991124, -0.8306558, -0.94487447, -0.8932543, -0.88238764, -0.7301602,
82  -0.8974813, -0.7026703, -0.8858988, -0.53203243, -0.92881465, -0.5648504, -0.8871471, -0.7000097, -0.91754407, -0.79684794,
83  -0.760465, -0.117928326, -0.88302773, -0.8975289, -0.70615053, 0.19231977, -0.8318776, -0.386184, -0.80698484, -0.8556624,
84  -0.7336671, -0.6168619, -0.7658234, -0.63449603, -0.73314047, -0.87502456, -0.58158904, -0.4184259, -0.52618927, -0.13613208,
85  -0.5093187, -0.21027721, -0.39455596, -0.44507834, -0.22269244, -0.73400885, -0.77655095, -0.74408925, -0.57313335, -0.15333457,
86  -0.74620694, -0.34858236, -0.42586932, -0.5240488, 0.1634339, -0.2447881, -0.57927346, -0.62732303, -0.82287043, -0.8474058
87  };
88  float *output;
89  float kernel[2*3*3*3] = {
90  0.26025516, 0.16536498, -0.24351254, 0.33892477, -0.34005195, 0.35202783, 0.34056443, 0.01422739, 0.13799345, 0.29489166,
91  0.2781723, 0.178585, 0.22122234, 0.044115514, 0.13134438, 0.31705368, 0.22527462, -0.021323413, 0.115134746, -0.18216397,
92  -0.21197563, -0.027848959, -0.01704529, -0.12401503, -0.23415318, -0.12661739, -0.35338148, 0.20049328, -0.076153606,
93  -0.23642601, -0.3125769, -0.025851756, -0.30006272, 0.050762743, 0.32003498, 0.3052225, -0.0017385483, 0.25337684, -0.25664508,
94  0.27846587, -0.3112659, 0.2066065, 0.31499845, 0.113178134, 0.09449363, -0.11828774, -0.12671001, -0.36259216, 0.2710235,
95  -0.19676702, 0.023612618, -0.2596915, -0.34949252, -0.108270735
96  };
97  float bias[2] = { -1.6574852, -0.72915393 };
98 
100  ctx.class = NULL;
101  ctx.options.conv2d_threads = 1;
102 
103  params.activation = TANH;
104  params.has_bias = 1;
105  params.biases = bias;
106  params.dilation = 2;
107  params.input_num = 3;
108  params.kernel = kernel;
109  params.kernel_size = 3;
110  params.output_num = 2;
111  params.padding_method = SAME;
112 
113  operands[0].data = input;
114  operands[0].dims[0] = 1;
115  operands[0].dims[1] = 5;
116  operands[0].dims[2] = 6;
117  operands[0].dims[3] = 3;
118  operands[1].data = NULL;
119 
120  input_indexes[0] = 0;
121  ff_dnn_execute_layer_conv2d(operands, input_indexes, 1, &params, &ctx);
122 
123  output = operands[1].data;
124  for (int i = 0; i < sizeof(expected_output) / sizeof(float); i++) {
125  if (fabs(output[i] - expected_output[i]) > EPSON) {
126  printf("at index %d, output: %f, expected_output: %f\n", i, output[i], expected_output[i]);
127  av_freep(&output);
128  return 1;
129  }
130  }
131 
132  av_freep(&output);
133  return 0;
134 }
135 
136 static int test_with_valid(void)
137 {
138  // the input data and expected data are generated with below python code.
139  /*
140  x = tf.placeholder(tf.float32, shape=[1, None, None, 3])
141  y = tf.layers.conv2d(x, 2, 3, activation=tf.nn.tanh, padding='valid', bias_initializer=tf.keras.initializers.he_normal())
142  data = np.random.rand(1, 5, 6, 3);
143 
144  sess=tf.Session()
145  sess.run(tf.global_variables_initializer())
146 
147  weights = dict([(var.name, sess.run(var)) for var in tf.trainable_variables()])
148  kernel = weights['conv2d/kernel:0']
149  kernel = np.transpose(kernel, [3, 0, 1, 2])
150  print("kernel:")
151  print(kernel.shape)
152  print(list(kernel.flatten()))
153 
154  bias = weights['conv2d/bias:0']
155  print("bias:")
156  print(bias.shape)
157  print(list(bias.flatten()))
158 
159  output = sess.run(y, feed_dict={x: data})
160 
161  print("input:")
162  print(data.shape)
163  print(list(data.flatten()))
164 
165  print("output:")
166  print(output.shape)
167  print(list(output.flatten()))
168  */
169 
171  DnnOperand operands[2];
172  int32_t input_indexes[1];
173  float input[1*5*6*3] = {
174  0.26126657468269665, 0.42762216215337556, 0.7466274030131497, 0.802550266787863, 0.3709323443076644, 0.5919817068197668, 0.49274512279324967,
175  0.7170132295090351, 0.0911793215410649, 0.5134213878288361, 0.670132600785118, 0.49417034512633484, 0.03887389460089885, 0.436785102836845,
176  0.1490231658611978, 0.6413606121498127, 0.8595987991375995, 0.9132593077586231, 0.7075959004873255, 0.17754995944845464, 0.5212507214937141,
177  0.35379732738215475, 0.25205107358505296, 0.3928792840544273, 0.09485294189485782, 0.8685115437448666, 0.6489046799288605, 0.509253797582924,
178  0.8993255536791972, 0.18740056466602373, 0.34237617336313986, 0.3871438962989183, 0.1488532571774911, 0.5187002331293636, 0.8137098818752955,
179  0.521761863717401, 0.4622312310118274, 0.29038411334638825, 0.16194915718170566, 0.5175999923925211, 0.8852230040101133, 0.0218263385047206,
180  0.08482355352852367, 0.3463638568376264, 0.28627127120619733, 0.9553293378948409, 0.4803391055970835, 0.841635695030805, 0.3556828280031952,
181  0.06778527221541808, 0.28193560357091596, 0.8399957619031576, 0.03305536359456385, 0.6625039162109645, 0.9300552020023897, 0.8551529138204146,
182  0.6133216915522418, 0.222427800857393, 0.1315422686800336, 0.6189144989185527, 0.5346184916866876, 0.8348888624532548, 0.6544834567840291,
183  0.2844062293389934, 0.28780026600883324, 0.5372272015684924, 0.6250226011503823, 0.28119106062279453, 0.49655812908420094, 0.6451488959145951,
184  0.7362580606834843, 0.44815578616664087, 0.6454760235835586, 0.6794062414265861, 0.045378883014935756, 0.9008388543865096, 0.7949752851269782,
185  0.4179928876222264, 0.28733419007048644, 0.996902319501908, 0.5690851338677467, 0.9511814013279738, 0.025323788678181636, 0.5594359732604794,
186  0.1213732595086251, 0.7172624313368294, 0.6759328959074691, 0.07252138454885071, 0.17557735158403442, 0.5988895455048769
187  };
188  float expected_output[1*3*4*2] = {
189  -0.556947, -0.42143887, -0.092070885, 0.27404794, -0.41886684, 0.0862887, -0.25001016, -0.342721, 0.020730592, 0.04016919, -0.69839877,
190  -0.06136704, 0.14186388, -0.11655602, -0.23489095, -0.3845829, -0.19017771, 0.1595885, -0.18308741, -0.3071209, -0.5848686, -0.22509028,
191  -0.6023201, -0.14448485
192  };
193  float *output;
194  float kernel[2*3*3*3] = {
195  -0.25291282, 0.22402048, 0.028642118, -0.14615723, -0.27362752, -0.34801802, -0.2759148, 0.19594926, -0.25029412, 0.34606284, 0.10376671,
196  -0.1015394, 0.23616093, 0.2134214, 0.35285157, 0.05893758, 0.0024731457, -0.17143056, 0.35758412, 0.2186206, -0.28384736, -0.21206513,
197  -0.20871592, 0.27070445, 0.25878823, 0.11136332, -0.33737376, 0.08353335, -0.34290665, 0.041805506, -0.09738535, 0.3284936, -0.16838405,
198  -0.032494456, -0.29193437, 0.033259362, -0.09272635, -0.2802651, -0.28648436, 0.3542878, 0.2432127, -0.24551713, 0.27813476, 0.21024024,
199  -0.013690501, -0.1350077, -0.07826337, -0.34563828, 0.3220685, -0.07571727, 0.19420576, 0.20783454, 0.18738335, 0.16672492
200  };
201  float bias[2] = { -0.4773722, -0.19620377 };
202 
204  ctx.class = NULL;
205  ctx.options.conv2d_threads = 1;
206 
207  params.activation = TANH;
208  params.has_bias = 1;
209  params.biases = bias;
210  params.dilation = 1;
211  params.input_num = 3;
212  params.kernel = kernel;
213  params.kernel_size = 3;
214  params.output_num = 2;
215  params.padding_method = VALID;
216 
217  operands[0].data = input;
218  operands[0].dims[0] = 1;
219  operands[0].dims[1] = 5;
220  operands[0].dims[2] = 6;
221  operands[0].dims[3] = 3;
222  operands[1].data = NULL;
223 
224  input_indexes[0] = 0;
225  ff_dnn_execute_layer_conv2d(operands, input_indexes, 1, &params, &ctx);
226 
227  output = operands[1].data;
228  for (int i = 0; i < sizeof(expected_output) / sizeof(float); i++) {
229  if (fabs(output[i] - expected_output[i]) > EPSON) {
230  printf("at index %d, output: %f, expected_output: %f\n", i, output[i], expected_output[i]);
231  av_freep(&output);
232  return 1;
233  }
234  }
235 
236  av_freep(&output);
237  return 0;
238 }
239 
240 int main(int argc, char **argv)
241 {
242  if (test_with_valid())
243  return 1;
244  if (test_with_same_dilate())
245  return 1;
246 
247  return 0;
248 }
#define NULL
Definition: coverity.c:32
int main(int argc, char **argv)
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
const AVClass * class
static int test_with_same_dilate(void)
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
void * data
data pointer with data length in bytes.
GLenum GLint * params
Definition: opengl_enc.c:113
int32_t dims[4]
there are two memory layouts, NHWC or NCHW, so we use dims, dims[0] is Number.
int32_t
AVFormatContext * ctx
Definition: movenc.c:48
#define EPSON
int ff_dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters, NativeContext *ctx)
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
static int test_with_valid(void)
NativeOptions options
printf("static const uint8_t my_array[100] = {\n")
#define av_freep(p)
int i
Definition: input.c:407