FFmpeg
rv40dsp_init_arm.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdint.h>
22 
23 #include "libavutil/attributes.h"
24 #include "libavcodec/avcodec.h"
25 #include "libavcodec/rv34dsp.h"
26 #include "libavutil/arm/cpu.h"
27 
28 #define DECL_QPEL3(type, w, pos) \
29 void ff_ ## type ## _rv40_qpel ## w ## _mc ## pos ## _neon(uint8_t *dst, \
30  const uint8_t *src, \
31  ptrdiff_t stride)
32 
33 #define DECL_QPEL2(w, pos) \
34  DECL_QPEL3(put, w, pos); \
35  DECL_QPEL3(avg, w, pos)
36 
37 #define DECL_QPEL_XY(x, y) \
38  DECL_QPEL2(16, x ## y); \
39  DECL_QPEL2(8, x ## y)
40 
41 #define DECL_QPEL_Y(y) \
42  DECL_QPEL_XY(0, y); \
43  DECL_QPEL_XY(1, y); \
44  DECL_QPEL_XY(2, y); \
45  DECL_QPEL_XY(3, y); \
46 
47 DECL_QPEL_Y(0);
48 DECL_QPEL_Y(1);
49 DECL_QPEL_Y(2);
50 DECL_QPEL_Y(3);
51 
52 void ff_put_rv40_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
53 void ff_put_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
54 
55 void ff_avg_rv40_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
56 void ff_avg_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
57 
58 void ff_rv40_weight_func_16_neon(uint8_t *, uint8_t *, uint8_t *, int, int, ptrdiff_t);
59 void ff_rv40_weight_func_8_neon(uint8_t *, uint8_t *, uint8_t *, int, int, ptrdiff_t);
60 
62  int beta, int beta2, int edge,
63  int *p1, int *q1);
65  int beta, int beta2, int edge,
66  int *p1, int *q1);
67 
68 void ff_rv40_h_weak_loop_filter_neon(uint8_t *src, ptrdiff_t stride, int filter_p1,
69  int filter_q1, int alpha, int beta,
70  int lim_p0q0, int lim_q1, int lim_p1);
71 void ff_rv40_v_weak_loop_filter_neon(uint8_t *src, ptrdiff_t stride, int filter_p1,
72  int filter_q1, int alpha, int beta,
73  int lim_p0q0, int lim_q1, int lim_p1);
74 
76 {
77  c->put_pixels_tab[0][ 1] = ff_put_rv40_qpel16_mc10_neon;
78  c->put_pixels_tab[0][ 3] = ff_put_rv40_qpel16_mc30_neon;
79  c->put_pixels_tab[0][ 4] = ff_put_rv40_qpel16_mc01_neon;
80  c->put_pixels_tab[0][ 5] = ff_put_rv40_qpel16_mc11_neon;
81  c->put_pixels_tab[0][ 6] = ff_put_rv40_qpel16_mc21_neon;
82  c->put_pixels_tab[0][ 7] = ff_put_rv40_qpel16_mc31_neon;
83  c->put_pixels_tab[0][ 9] = ff_put_rv40_qpel16_mc12_neon;
84  c->put_pixels_tab[0][10] = ff_put_rv40_qpel16_mc22_neon;
85  c->put_pixels_tab[0][11] = ff_put_rv40_qpel16_mc32_neon;
86  c->put_pixels_tab[0][12] = ff_put_rv40_qpel16_mc03_neon;
87  c->put_pixels_tab[0][13] = ff_put_rv40_qpel16_mc13_neon;
88  c->put_pixels_tab[0][14] = ff_put_rv40_qpel16_mc23_neon;
89  c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_neon;
90  c->avg_pixels_tab[0][ 1] = ff_avg_rv40_qpel16_mc10_neon;
91  c->avg_pixels_tab[0][ 3] = ff_avg_rv40_qpel16_mc30_neon;
92  c->avg_pixels_tab[0][ 4] = ff_avg_rv40_qpel16_mc01_neon;
93  c->avg_pixels_tab[0][ 5] = ff_avg_rv40_qpel16_mc11_neon;
94  c->avg_pixels_tab[0][ 6] = ff_avg_rv40_qpel16_mc21_neon;
95  c->avg_pixels_tab[0][ 7] = ff_avg_rv40_qpel16_mc31_neon;
96  c->avg_pixels_tab[0][ 9] = ff_avg_rv40_qpel16_mc12_neon;
97  c->avg_pixels_tab[0][10] = ff_avg_rv40_qpel16_mc22_neon;
98  c->avg_pixels_tab[0][11] = ff_avg_rv40_qpel16_mc32_neon;
99  c->avg_pixels_tab[0][12] = ff_avg_rv40_qpel16_mc03_neon;
100  c->avg_pixels_tab[0][13] = ff_avg_rv40_qpel16_mc13_neon;
101  c->avg_pixels_tab[0][14] = ff_avg_rv40_qpel16_mc23_neon;
102  c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_neon;
103  c->put_pixels_tab[1][ 1] = ff_put_rv40_qpel8_mc10_neon;
104  c->put_pixels_tab[1][ 3] = ff_put_rv40_qpel8_mc30_neon;
105  c->put_pixels_tab[1][ 4] = ff_put_rv40_qpel8_mc01_neon;
106  c->put_pixels_tab[1][ 5] = ff_put_rv40_qpel8_mc11_neon;
107  c->put_pixels_tab[1][ 6] = ff_put_rv40_qpel8_mc21_neon;
108  c->put_pixels_tab[1][ 7] = ff_put_rv40_qpel8_mc31_neon;
109  c->put_pixels_tab[1][ 9] = ff_put_rv40_qpel8_mc12_neon;
110  c->put_pixels_tab[1][10] = ff_put_rv40_qpel8_mc22_neon;
111  c->put_pixels_tab[1][11] = ff_put_rv40_qpel8_mc32_neon;
112  c->put_pixels_tab[1][12] = ff_put_rv40_qpel8_mc03_neon;
113  c->put_pixels_tab[1][13] = ff_put_rv40_qpel8_mc13_neon;
114  c->put_pixels_tab[1][14] = ff_put_rv40_qpel8_mc23_neon;
115  c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_neon;
116  c->avg_pixels_tab[1][ 1] = ff_avg_rv40_qpel8_mc10_neon;
117  c->avg_pixels_tab[1][ 3] = ff_avg_rv40_qpel8_mc30_neon;
118  c->avg_pixels_tab[1][ 4] = ff_avg_rv40_qpel8_mc01_neon;
119  c->avg_pixels_tab[1][ 5] = ff_avg_rv40_qpel8_mc11_neon;
120  c->avg_pixels_tab[1][ 6] = ff_avg_rv40_qpel8_mc21_neon;
121  c->avg_pixels_tab[1][ 7] = ff_avg_rv40_qpel8_mc31_neon;
122  c->avg_pixels_tab[1][ 9] = ff_avg_rv40_qpel8_mc12_neon;
123  c->avg_pixels_tab[1][10] = ff_avg_rv40_qpel8_mc22_neon;
124  c->avg_pixels_tab[1][11] = ff_avg_rv40_qpel8_mc32_neon;
125  c->avg_pixels_tab[1][12] = ff_avg_rv40_qpel8_mc03_neon;
126  c->avg_pixels_tab[1][13] = ff_avg_rv40_qpel8_mc13_neon;
127  c->avg_pixels_tab[1][14] = ff_avg_rv40_qpel8_mc23_neon;
128  c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_neon;
129 
134 
137 
142 }
143 
145 {
146  int cpu_flags = av_get_cpu_flags();
147 
148  if (have_neon(cpu_flags))
150 }
qpel_mc_func put_pixels_tab[4][16]
Definition: rv34dsp.h:58
void ff_rv40_h_weak_loop_filter_neon(uint8_t *src, ptrdiff_t stride, int filter_p1, int filter_q1, int alpha, int beta, int lim_p0q0, int lim_q1, int lim_p1)
rv40_loop_filter_strength_func rv40_loop_filter_strength[2]
Definition: rv34dsp.h:74
av_cold void ff_rv40dsp_init_arm(RV34DSPContext *c)
void ff_rv40_v_weak_loop_filter_neon(uint8_t *src, ptrdiff_t stride, int filter_p1, int filter_q1, int alpha, int beta, int lim_p0q0, int lim_q1, int lim_p1)
static atomic_int cpu_flags
Definition: cpu.c:50
static const uint8_t q1[256]
Definition: twofish.c:96
#define src
Definition: vp8dsp.c:254
Macro definitions for various function/variable attributes.
uint8_t
#define av_cold
Definition: attributes.h:82
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
rv40_weak_loop_filter_func rv40_weak_loop_filter[2]
Definition: rv34dsp.h:72
rv40_weight_func rv40_weight_pixels_tab[2][2]
Biweight functions, first dimension is transform size (16/8), second is whether the weight is prescal...
Definition: rv34dsp.h:67
static av_cold void rv40dsp_init_neon(RV34DSPContext *c)
qpel_mc_func avg_pixels_tab[4][16]
Definition: rv34dsp.h:59
RV30/40 decoder motion compensation functions.
void ff_put_rv40_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int)
void ff_avg_rv40_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int)
#define have_neon(flags)
Definition: cpu.h:26
void ff_avg_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int)
Libavcodec external API header.
static const int16_t alpha[]
Definition: ilbcdata.h:55
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
void ff_rv40_weight_func_16_neon(uint8_t *, uint8_t *, uint8_t *, int, int, ptrdiff_t)
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
int ff_rv40_h_loop_filter_strength_neon(uint8_t *src, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
void ff_rv40_weight_func_8_neon(uint8_t *, uint8_t *, uint8_t *, int, int, ptrdiff_t)
#define DECL_QPEL_Y(y)
int ff_rv40_v_loop_filter_strength_neon(uint8_t *src, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
h264_chroma_mc_func avg_chroma_pixels_tab[3]
Definition: rv34dsp.h:61
h264_chroma_mc_func put_chroma_pixels_tab[3]
Definition: rv34dsp.h:60
void ff_put_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int)