FFmpeg
h264qpel.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "config.h"
22 
23 #include "libavutil/attributes.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/intreadwrite.h"
26 #include "libavutil/ppc/cpu.h"
28 
29 #include "libavcodec/h264qpel.h"
30 
31 #include "hpeldsp_altivec.h"
32 
33 #if HAVE_ALTIVEC
34 
35 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
36 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
37 
38 #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
39 #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
40 #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
41 #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
42 #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
43 #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
44 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
45 #include "h264qpel_template.c"
46 #undef OP_U8_ALTIVEC
47 #undef PREFIX_h264_qpel16_h_lowpass_altivec
48 #undef PREFIX_h264_qpel16_h_lowpass_num
49 #undef PREFIX_h264_qpel16_v_lowpass_altivec
50 #undef PREFIX_h264_qpel16_v_lowpass_num
51 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
52 #undef PREFIX_h264_qpel16_hv_lowpass_num
53 
54 #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
55 #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
56 #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
57 #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
58 #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
59 #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
60 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
61 #include "h264qpel_template.c"
62 #undef OP_U8_ALTIVEC
63 #undef PREFIX_h264_qpel16_h_lowpass_altivec
64 #undef PREFIX_h264_qpel16_h_lowpass_num
65 #undef PREFIX_h264_qpel16_v_lowpass_altivec
66 #undef PREFIX_h264_qpel16_v_lowpass_num
67 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
68 #undef PREFIX_h264_qpel16_hv_lowpass_num
69 
70 #define H264_MC(OPNAME, SIZE, CODETYPE) \
71 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
72 {\
73  ff_ ## OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
74 }\
75 \
76 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
77 { \
78  DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
79  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
80  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
81 }\
82 \
83 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
84 {\
85  OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
86 }\
87 \
88 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
89 {\
90  DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
91  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
92  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
93 }\
94 \
95 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
96 {\
97  DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
98  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
99  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
100 }\
101 \
102 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
103 {\
104  OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
105 }\
106 \
107 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
108 {\
109  DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
110  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
111  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
112 }\
113 \
114 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
115 {\
116  DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
117  DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
118  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
119  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
120  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
121 }\
122 \
123 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
124 {\
125  DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
126  DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
127  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
128  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
129  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
130 }\
131 \
132 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
133 {\
134  DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
135  DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
136  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
137  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
138  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
139 }\
140 \
141 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
142 {\
143  DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
144  DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
145  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
146  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
147  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
148 }\
149 \
150 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
151 {\
152  DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
153  OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
154 }\
155 \
156 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
157 {\
158  DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
159  DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
160  DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
161  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
162  put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
163  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
164 }\
165 \
166 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
167 {\
168  DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
169  DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
170  DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
171  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
172  put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
173  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
174 }\
175 \
176 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
177 {\
178  DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
179  DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
180  DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
181  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
182  put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
183  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
184 }\
185 \
186 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
187 {\
188  DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
189  DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
190  DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
191  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
192  put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
193  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
194 }\
195 
196 #if HAVE_BIGENDIAN
197 #define put_unligned_store(s, dest) { \
198  tmp1 = vec_ld(0, dest); \
199  mask = vec_lvsl(0, dest); \
200  tmp2 = vec_ld(15, dest); \
201  edges = vec_perm(tmp2, tmp1, mask); \
202  align = vec_lvsr(0, dest); \
203  tmp2 = vec_perm(s, edges, align); \
204  tmp1 = vec_perm(edges, s, align); \
205  vec_st(tmp2, 15, dest); \
206  vec_st(tmp1, 0 , dest); \
207  }
208 #else
209 #define put_unligned_store(s, dest) vec_vsx_st(s, 0, dest);
210 #endif /* HAVE_BIGENDIAN */
211 
212 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
213  const uint8_t * src2, int dst_stride,
214  int src_stride1, int h)
215 {
216  int i;
217  vec_u8 a, b, d, mask_;
218 #if HAVE_BIGENDIAN
219  vec_u8 tmp1, tmp2, mask, edges, align;
220  mask_ = vec_lvsl(0, src2);
221 #endif
222 
223  for (i = 0; i < h; i++) {
224  a = unaligned_load(i * src_stride1, src1);
225  b = load_with_perm_vec(i * 16, src2, mask_);
226  d = vec_avg(a, b);
227  put_unligned_store(d, dst);
228  dst += dst_stride;
229  }
230 }
231 
232 #if HAVE_BIGENDIAN
233 #define avg_unligned_store(s, dest){ \
234  tmp1 = vec_ld(0, dest); \
235  mask = vec_lvsl(0, dest); \
236  tmp2 = vec_ld(15, dest); \
237  a = vec_avg(vec_perm(tmp1, tmp2, mask), s); \
238  edges = vec_perm(tmp2, tmp1, mask); \
239  align = vec_lvsr(0, dest); \
240  tmp2 = vec_perm(a, edges, align); \
241  tmp1 = vec_perm(edges, a, align); \
242  vec_st(tmp2, 15, dest); \
243  vec_st(tmp1, 0 , dest); \
244  }
245 #else
246 #define avg_unligned_store(s, dest){ \
247  a = vec_avg(vec_vsx_ld(0, dst), s); \
248  vec_vsx_st(a, 0, dst); \
249  }
250 #endif /* HAVE_BIGENDIAN */
251 
252 static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
253  const uint8_t * src2, int dst_stride,
254  int src_stride1, int h)
255 {
256  int i;
257  vec_u8 a, b, d, mask_;
258 
259 #if HAVE_BIGENDIAN
260  vec_u8 tmp1, tmp2, mask, edges, align;
261  mask_ = vec_lvsl(0, src2);
262 #endif
263 
264  for (i = 0; i < h; i++) {
265  a = unaligned_load(i * src_stride1, src1);
266  b = load_with_perm_vec(i * 16, src2, mask_);
267  d = vec_avg(a, b);
268  avg_unligned_store(d, dst);
269  dst += dst_stride;
270  }
271 }
272 
273 /* Implemented but could be faster
274 #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
275 #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
276  */
277 
278 H264_MC(put_, 16, altivec)
279 H264_MC(avg_, 16, altivec)
280 #endif /* HAVE_ALTIVEC */
281 
283 {
284 #if HAVE_ALTIVEC
285  const int high_bit_depth = bit_depth > 8;
286 
288  return;
289 
290  if (!high_bit_depth) {
291 #define dspfunc(PFX, IDX, NUM) \
292  c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
293  c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
294  c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
295  c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
296  c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
297  c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
298  c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
299  c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
300  c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
301  c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
302  c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
303  c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
304  c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
305  c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
306  c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
307  c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
308 
309  dspfunc(put_h264_qpel, 0, 16);
310  dspfunc(avg_h264_qpel, 0, 16);
311 #undef dspfunc
312  }
313 #endif /* HAVE_ALTIVEC */
314 }
bit_depth
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
Definition: af_astats.c:254
b
#define b
Definition: input.c:41
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
h264qpel.h
av_cold
#define av_cold
Definition: attributes.h:90
mask
static const uint16_t mask[17]
Definition: lzw.c:38
ff_h264qpel_init_ppc
av_cold void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth)
Definition: h264qpel.c:282
intreadwrite.h
H264_MC
#define H264_MC(OPNAME, SIZE)
Definition: h264qpel_template.c:380
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
PPC_ALTIVEC
#define PPC_ALTIVEC(flags)
Definition: cpu.h:25
cpu.h
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
vec_u8
#define vec_u8
Definition: util_altivec.h:34
attributes.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
dspfunc
#define dspfunc(PFX, IDX, NUM)
uint8_t
uint8_t
Definition: audio_convert.c:194
H264QpelContext
Definition: h264qpel.h:27
src1
#define src1
Definition: h264qpel.c:48
hpeldsp_altivec.h
util_altivec.h
cpu.h
h
h
Definition: vp9dsp_template.c:2038