Go to the documentation of this file.
36 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
37 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
39 #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
40 #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
41 #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
42 #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
43 #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
44 #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
45 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
46 #include "h264qpel_template.c"
48 #undef PREFIX_h264_qpel16_h_lowpass_altivec
49 #undef PREFIX_h264_qpel16_h_lowpass_num
50 #undef PREFIX_h264_qpel16_v_lowpass_altivec
51 #undef PREFIX_h264_qpel16_v_lowpass_num
52 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
53 #undef PREFIX_h264_qpel16_hv_lowpass_num
55 #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
56 #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
57 #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
58 #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
59 #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
60 #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
61 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
62 #include "h264qpel_template.c"
64 #undef PREFIX_h264_qpel16_h_lowpass_altivec
65 #undef PREFIX_h264_qpel16_h_lowpass_num
66 #undef PREFIX_h264_qpel16_v_lowpass_altivec
67 #undef PREFIX_h264_qpel16_v_lowpass_num
68 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
69 #undef PREFIX_h264_qpel16_hv_lowpass_num
71 #define H264_MC(OPNAME, SIZE, CODETYPE) \
72 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
74 ff_ ## OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
77 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
79 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
80 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
81 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
84 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
86 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
89 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
91 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
92 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
93 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
96 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
98 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
99 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
100 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
103 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
105 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
108 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
110 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
111 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
112 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
115 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
117 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
118 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
119 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
120 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
121 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
124 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
126 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
127 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
128 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
129 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
130 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
133 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
135 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
136 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
137 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
138 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
139 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
142 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
144 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
145 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
146 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
147 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
148 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
151 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
153 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
154 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
157 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
159 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
160 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
161 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
162 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
163 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
164 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
167 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
169 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
170 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
171 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
172 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
173 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
174 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
177 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
179 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
180 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
181 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
182 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
183 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
184 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
187 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
189 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
190 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
191 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
192 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
193 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
194 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
198 #define put_unligned_store(s, dest) { \
199 tmp1 = vec_ld(0, dest); \
200 mask = vec_lvsl(0, dest); \
201 tmp2 = vec_ld(15, dest); \
202 edges = vec_perm(tmp2, tmp1, mask); \
203 align = vec_lvsr(0, dest); \
204 tmp2 = vec_perm(s, edges, align); \
205 tmp1 = vec_perm(edges, s, align); \
206 vec_st(tmp2, 15, dest); \
207 vec_st(tmp1, 0 , dest); \
210 #define put_unligned_store(s, dest) vec_vsx_st(s, 0, dest);
213 static inline void put_pixels16_l2_altivec( uint8_t *
dst,
const uint8_t *
src1,
214 const uint8_t *
src2,
int dst_stride,
215 int src_stride1,
int h)
221 mask_ = vec_lvsl(0,
src2);
224 for (
i = 0;
i <
h;
i++) {
225 a = unaligned_load(
i * src_stride1,
src1);
226 b = load_with_perm_vec(
i * 16,
src2, mask_);
228 put_unligned_store(d,
dst);
234 #define avg_unligned_store(s, dest){ \
235 tmp1 = vec_ld(0, dest); \
236 mask = vec_lvsl(0, dest); \
237 tmp2 = vec_ld(15, dest); \
238 a = vec_avg(vec_perm(tmp1, tmp2, mask), s); \
239 edges = vec_perm(tmp2, tmp1, mask); \
240 align = vec_lvsr(0, dest); \
241 tmp2 = vec_perm(a, edges, align); \
242 tmp1 = vec_perm(edges, a, align); \
243 vec_st(tmp2, 15, dest); \
244 vec_st(tmp1, 0 , dest); \
247 #define avg_unligned_store(s, dest){ \
248 a = vec_avg(vec_vsx_ld(0, dst), s); \
249 vec_vsx_st(a, 0, dst); \
253 static inline void avg_pixels16_l2_altivec( uint8_t *
dst,
const uint8_t *
src1,
254 const uint8_t *
src2,
int dst_stride,
255 int src_stride1,
int h)
262 mask_ = vec_lvsl(0,
src2);
265 for (
i = 0;
i <
h;
i++) {
266 a = unaligned_load(
i * src_stride1,
src1);
267 b = load_with_perm_vec(
i * 16,
src2, mask_);
269 avg_unligned_store(d,
dst);
286 const int high_bit_depth =
bit_depth > 8;
291 if (!high_bit_depth) {
292 #define dspfunc(PFX, IDX, NUM) \
293 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
294 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
295 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
296 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
297 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
298 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
299 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
300 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
301 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
302 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
303 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
304 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
305 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
306 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
307 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
308 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
av_cold void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth)
#define H264_MC(OPNAME, SIZE)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define PPC_ALTIVEC(flags)
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
static const uint8_t *BS_FUNC() align(BSCTX *bc)
Skip bits to a byte boundary.
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
#define i(width, name, range_min, range_max)
#define dspfunc(PFX, IDX, NUM)