26 #define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x))) 48 conv_params =
av_malloc(
sizeof(*conv_params));
63 dnn_size += kernel_size * 4;
67 if (dnn_size > file_size || conv_params->
input_num <= 0 ||
74 if (!conv_params->
kernel) {
78 for (
int i = 0;
i < kernel_size; ++
i) {
95 layer->
params = conv_params;
115 int height = operands[input_operand_index].
dims[1];
116 int width = operands[input_operand_index].
dims[2];
117 int channel = operands[input_operand_index].
dims[3];
118 const float *
input = operands[input_operand_index].
data;
122 int src_linesize = width * conv_params->
input_num;
124 int filter_size = conv_params->
kernel_size * filter_linesize;
132 for (
int y = thread_param->
thread_start; y < thread_param->thread_end; ++y) {
133 for (
int x = pad_size; x < width - pad_size; ++x) {
134 for (
int n_filter = 0; n_filter < conv_params->
output_num; ++n_filter) {
136 output[n_filter] = conv_params->
biases[n_filter];
138 output[n_filter] = 0.f;
140 for (
int ch = 0; ch < conv_params->
input_num; ++ch) {
141 for (
int kernel_y = 0; kernel_y < conv_params->
kernel_size; ++kernel_y) {
142 for (
int kernel_x = 0; kernel_x < conv_params->
kernel_size; ++kernel_x) {
147 input_pel = input[y_pos * src_linesize + x_pos * conv_params->
input_num + ch];
149 int y_pos = y + (kernel_y - radius) * conv_params->
dilation;
150 int x_pos = x + (kernel_x - radius) * conv_params->
dilation;
151 input_pel = (x_pos < 0 || x_pos >= width || y_pos < 0 || y_pos >=
height) ? 0.0 :
152 input[y_pos * src_linesize + x_pos * conv_params->
input_num + ch];
156 output[n_filter] += input_pel * conv_params->
kernel[n_filter * filter_size + kernel_y * filter_linesize +
163 output[n_filter] =
FFMAX(output[n_filter], 0.0);
166 output[n_filter] = 2.0f / (1.0f +
exp(-2.0
f * output[n_filter])) - 1.0
f;
169 output[n_filter] = 1.0f / (1.0f +
exp(-output[n_filter]));
174 output[n_filter] =
FFMAX(output[n_filter], 0.0) + 0.2 *
FFMIN(output[n_filter], 0.0);
189 #if HAVE_PTHREAD_CANCEL 196 int height = operands[input_operand_indexes[0]].
dims[1];
197 int width = operands[input_operand_indexes[0]].
dims[2];
201 output_operand->
dims[0] = operands[input_operand_indexes[0]].
dims[0];
202 output_operand->
dims[1] = height - pad_size * 2;
203 output_operand->
dims[2] = width - pad_size * 2;
207 if (output_operand->
length <= 0) {
212 if (!output_operand->
data) {
221 thread_common_param.
ctx =
ctx;
223 #if HAVE_PTHREAD_CANCEL 224 thread_stride = (height - pad_size * 2) / thread_num;
226 for (
int i = 0;
i < thread_num;
i++){
227 thread_param[
i] =
av_malloc(
sizeof(*thread_param[0]));
230 thread_param[
i]->
thread_end = (
i == thread_num - 1) ? (height - pad_size) : (thread_param[
i]->
thread_start + thread_stride);
235 for (
int i = 0;
i < thread_num;
i++){
242 for (
int i = 0;
i < thread_num;
i++){
246 thread_param[0] =
av_malloc(
sizeof(*thread_param[0]));
249 thread_param[0]->
thread_end = height - pad_size;
void * av_realloc(void *ptr, size_t size)
Allocate, reallocate, or free a block of memory.
static av_always_inline float av_int2float(uint32_t i)
Reinterpret a 32-bit integer as a float.
DNNActivationFunc activation
int32_t input_operand_indexes[4]
a layer can have multiple inputs and one output.
DNNPaddingParam padding_method
#define av_assert0(cond)
assert() equivalent, that is always enabled.
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
DNNDataType data_type
support different kinds of data type such as float, half float, int8 etc, first support float now...
int ff_dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
unsigned int avio_rl32(AVIOContext *s)
void * data
data pointer with data length in bytes.
ThreadCommonParam * thread_common_param
simple assert() macros that are a bit more flexible than ISO C assert().
int32_t dims[4]
there are two memory layouts, NHWC or NCHW, so we use dims, dims[0] is Number.
static av_always_inline int pthread_join(pthread_t thread, void **value_ptr)
int ff_dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters, NativeContext *ctx)
static av_always_inline int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
#define CLAMP_TO_EDGE(x, w)
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
const int32_t * input_operand_indexes
static void * dnn_execute_layer_conv2d_thread(void *threadarg)
channel
Use these values when setting the channel map with ebur128_set_channel().
#define av_malloc_array(a, b)
int32_t output_operand_index
int32_t ff_calculate_operand_data_length(const DnnOperand *oprd)
int32_t output_operand_index