FFmpeg
dnn_backend_native_layer_dense.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
23 
24 int ff_dnn_load_layer_dense(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
25 {
26  DenseParams *dense_params;
27  int kernel_size;
28  int dnn_size = 0;
29  dense_params = av_malloc(sizeof(*dense_params));
30  if (!dense_params)
31  return 0;
32 
33  dense_params->activation = (int32_t)avio_rl32(model_file_context);
34  dense_params->input_num = (int32_t)avio_rl32(model_file_context);
35  dense_params->output_num = (int32_t)avio_rl32(model_file_context);
36  dense_params->has_bias = (int32_t)avio_rl32(model_file_context);
37  dnn_size += 16;
38 
39  kernel_size = dense_params->input_num * dense_params->output_num;
40  dnn_size += kernel_size * 4;
41  if (dense_params->has_bias)
42  dnn_size += dense_params->output_num * 4;
43 
44  if (dnn_size > file_size || dense_params->input_num <= 0 ||
45  dense_params->output_num <= 0){
46  av_freep(&dense_params);
47  return 0;
48  }
49 
50  dense_params->kernel = av_malloc(kernel_size * sizeof(float));
51  if (!dense_params->kernel) {
52  av_freep(&dense_params);
53  return 0;
54  }
55  for (int i = 0; i < kernel_size; ++i) {
56  dense_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
57  }
58 
59  dense_params->biases = NULL;
60  if (dense_params->has_bias) {
61  dense_params->biases = av_malloc(dense_params->output_num * sizeof(float));
62  if (!dense_params->biases){
63  av_freep(&dense_params->kernel);
64  av_freep(&dense_params);
65  return 0;
66  }
67  for (int i = 0; i < dense_params->output_num; ++i){
68  dense_params->biases[i] = av_int2float(avio_rl32(model_file_context));
69  }
70  }
71 
72  layer->params = dense_params;
73 
74  layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
75  layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
76  dnn_size += 8;
77 
78  if (layer->input_operand_indexes[0] >= operands_num || layer->output_operand_index >= operands_num) {
79  return 0;
80  }
81 
82  return dnn_size;
83 }
84 
85 int ff_dnn_execute_layer_dense(DnnOperand *operands, const int32_t *input_operand_indexes,
86  int32_t output_operand_index, const void *parameters, NativeContext *ctx)
87 {
88  float *output;
89  int32_t input_operand_index = input_operand_indexes[0];
90  int number = operands[input_operand_index].dims[0];
91  int height = operands[input_operand_index].dims[1];
92  int width = operands[input_operand_index].dims[2];
93  int channel = operands[input_operand_index].dims[3];
94  const float *input = operands[input_operand_index].data;
95  const DenseParams *dense_params = parameters;
96 
97  int src_linesize = width * channel;
98  DnnOperand *output_operand = &operands[output_operand_index];
99  output_operand->dims[0] = number;
100  output_operand->dims[1] = height;
101  output_operand->dims[2] = width;
102  output_operand->dims[3] = dense_params->output_num;
103  output_operand->data_type = operands[input_operand_index].data_type;
104  output_operand->length = ff_calculate_operand_data_length(output_operand);
105  if (output_operand->length <= 0) {
106  av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
107  return AVERROR(EINVAL);
108  }
109  output_operand->data = av_realloc(output_operand->data, output_operand->length);
110  if (!output_operand->data) {
111  av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
112  return AVERROR(ENOMEM);
113  }
114  output = output_operand->data;
115 
116  av_assert0(channel == dense_params->input_num);
117 
118  for (int y = 0; y < height; ++y) {
119  for (int x = 0; x < width; ++x) {
120  for (int n_filter = 0; n_filter < dense_params->output_num; ++n_filter) {
121  if (dense_params->has_bias)
122  output[n_filter] = dense_params->biases[n_filter];
123  else
124  output[n_filter] = 0.f;
125 
126  for (int ch = 0; ch < dense_params->input_num; ++ch) {
127  float input_pel;
128  input_pel = input[y * src_linesize + x * dense_params->input_num + ch];
129  output[n_filter] += input_pel * dense_params->kernel[n_filter*dense_params->input_num + ch];
130  }
131  switch (dense_params->activation){
132  case RELU:
133  output[n_filter] = FFMAX(output[n_filter], 0.0);
134  break;
135  case TANH:
136  output[n_filter] = 2.0f / (1.0f + exp(-2.0f * output[n_filter])) - 1.0f;
137  break;
138  case SIGMOID:
139  output[n_filter] = 1.0f / (1.0f + exp(-output[n_filter]));
140  break;
141  case NONE:
142  break;
143  case LEAKY_RELU:
144  output[n_filter] = FFMAX(output[n_filter], 0.0) + 0.2 * FFMIN(output[n_filter], 0.0);
145  }
146  }
147  output += dense_params->output_num;
148  }
149  }
150  return 0;
151 }
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
NONE
@ NONE
Definition: af_afade.c:56
ff_dnn_load_layer_dense
int ff_dnn_load_layer_dense(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
Load the Densely-Connected Layer.
Definition: dnn_backend_native_layer_dense.c:24
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
ff_dnn_execute_layer_dense
int ff_dnn_execute_layer_dense(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters, NativeContext *ctx)
Execute the Densely-Connected Layer.
Definition: dnn_backend_native_layer_dense.c:85
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
av_int2float
static av_always_inline float av_int2float(uint32_t i)
Reinterpret a 32-bit integer as a float.
Definition: intfloat.h:40
ff_calculate_operand_data_length
int32_t ff_calculate_operand_data_length(const DnnOperand *oprd)
Definition: dnn_backend_native.c:503
SIGMOID
@ SIGMOID
Definition: dnn_backend_native.h:55
avassert.h
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
width
#define width
TANH
@ TANH
Definition: dnn_backend_native.h:55
DnnOperand::data
void * data
data pointer with data length in bytes.
Definition: dnn_backend_native.h:104
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
DnnOperand::data_type
DNNDataType data_type
support different kinds of data type such as float, half float, int8 etc, first support float now.
Definition: dnn_backend_native.h:85
DenseParams::biases
float * biases
Definition: dnn_backend_native_layer_dense.h:31
ctx
AVFormatContext * ctx
Definition: movenc.c:48
Layer::params
void * params
Definition: dnn_backend_native.h:66
NULL
#define NULL
Definition: coverity.c:32
av_realloc
void * av_realloc(void *ptr, size_t size)
Allocate, reallocate, or free a block of memory.
Definition: mem.c:153
DenseParams::activation
DNNActivationFunc activation
Definition: dnn_backend_native_layer_dense.h:28
DnnOperand::dims
int32_t dims[4]
there are two memory layouts, NHWC or NCHW, so we use dims, dims[0] is Number.
Definition: dnn_backend_native.h:74
exp
int8_t exp
Definition: eval.c:72
DnnOperand::length
int32_t length
Definition: dnn_backend_native.h:105
avio_rl32
unsigned int avio_rl32(AVIOContext *s)
Definition: aviobuf.c:761
f
f
Definition: af_crystalizer.c:122
AVIOContext
Bytestream IO Context.
Definition: avio.h:166
Layer::output_operand_index
int32_t output_operand_index
Definition: dnn_backend_native.h:65
NativeContext
Definition: dnn_backend_native.h:118
Layer
Definition: dnn_backend_native.h:57
Layer::input_operand_indexes
int32_t input_operand_indexes[4]
a layer can have multiple inputs and one output.
Definition: dnn_backend_native.h:64
DenseParams::input_num
int32_t input_num
Definition: dnn_backend_native_layer_dense.h:27
height
#define height
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
RELU
@ RELU
Definition: dnn_backend_native.h:55
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
DenseParams
Definition: dnn_backend_native_layer_dense.h:26
DenseParams::kernel
float * kernel
Definition: dnn_backend_native_layer_dense.h:30
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
DenseParams::output_num
int32_t output_num
Definition: dnn_backend_native_layer_dense.h:27
DnnOperand
Definition: dnn_backend_native.h:69
dnn_backend_native_layer_dense.h
LEAKY_RELU
@ LEAKY_RELU
Definition: dnn_backend_native.h:55
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
int32_t
int32_t
Definition: audioconvert.c:56
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
channel
channel
Definition: ebur128.h:39
DenseParams::has_bias
int32_t has_bias
Definition: dnn_backend_native_layer_dense.h:29