FFmpeg
h264_qpel.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
3  * Copyright (c) 2011 Daniel Kang
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "libavutil/attributes.h"
23 #include "libavutil/cpu.h"
24 #include "libavutil/mem_internal.h"
25 #include "libavutil/x86/asm.h"
26 #include "libavutil/x86/cpu.h"
27 #include "libavcodec/h264qpel.h"
28 #include "libavcodec/pixels.h"
29 #include "fpel.h"
30 
31 #if HAVE_X86ASM
32 void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
33  int dstStride, int src1Stride, int h);
34 void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
35  int dstStride, int src1Stride, int h);
36 void ff_put_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
37  int dstStride, int src1Stride, int h);
38 void ff_avg_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
39  int dstStride, int src1Stride, int h);
40 void ff_put_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
41  int dstStride, int src1Stride, int h);
42 void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
43  int dstStride, int src1Stride, int h);
44 #define ff_put_pixels8_l2_sse2 ff_put_pixels8_l2_mmxext
45 #define ff_avg_pixels8_l2_sse2 ff_avg_pixels8_l2_mmxext
46 #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext
47 #define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext
48 #define ff_put_pixels16_mmxext ff_put_pixels16_mmx
49 #define ff_put_pixels8_mmxext ff_put_pixels8_mmx
50 #define ff_put_pixels4_mmxext ff_put_pixels4_mmx
51 
52 #define DEF_QPEL(OPNAME)\
53 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
54 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
55 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
56 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
57 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
58 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
59 void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
60 void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h);\
61 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, int srcStride);\
62 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, int dstStride);\
63 void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, int srcStride, int size);\
64 void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, int dstStride, int unused, int h);\
65 void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size);\
66 void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h);\
67 void ff_ ## OPNAME ## _pixels8_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h);
68 
69 DEF_QPEL(avg)
70 DEF_QPEL(put)
71 
72 #define QPEL_H264(OPNAME, OP, MMX)\
73 static av_always_inline void ff_ ## OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
74  int w=3;\
75  src -= 2*srcStride+2;\
76  while(w--){\
77  ff_ ## OPNAME ## h264_qpel4_hv_lowpass_v_mmxext(src, tmp, srcStride);\
78  tmp += 4;\
79  src += 4;\
80  }\
81  tmp -= 3*4;\
82  ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\
83 }\
84 \
85 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
86  int w = size>>4;\
87  do{\
88  ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_op_mmxext(dst, tmp, dstStride, 0, size);\
89  tmp += 8;\
90  dst += 8;\
91  }while(w--);\
92 }\
93 \
94 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
95  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
96  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
97  src += 8*srcStride;\
98  dst += 8*dstStride;\
99  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
100  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
101 }\
102 \
103 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\
104  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
105  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
106  src += 8*dstStride;\
107  dst += 8*dstStride;\
108  src2 += 8*src2Stride;\
109  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
110  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
111 }\
112 \
113 static av_always_inline void ff_ ## OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h)\
114 {\
115  ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst , src16 , src8 , dstStride, src8Stride, h);\
116  ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\
117 }\
118 
119 
120 #if ARCH_X86_64
121 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
122 
123 void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);
124 void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);
125 
126 #else // ARCH_X86_64
127 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
128 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\
129  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
130  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
131  src += 8*dstStride;\
132  dst += 8*dstStride;\
133  src2 += 8*src2Stride;\
134  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
135  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
136 }
137 #endif // ARCH_X86_64
138 
139 #define QPEL_H264_H_XMM(OPNAME, OP, MMX)\
140 QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
141 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
142  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
143  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
144  src += 8*srcStride;\
145  dst += 8*dstStride;\
146  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
147  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
148 }\
149 
150 #define QPEL_H264_V_XMM(OPNAME, OP, MMX)\
151 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
152  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
153 }\
154 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
155  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
156  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
157 }
158 
159 static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp,
160  const uint8_t *src,
161  int tmpStride,
162  int srcStride,
163  int size)
164 {
165  int w = (size+8)>>3;
166  src -= 2*srcStride+2;
167  while(w--){
168  ff_put_h264_qpel8or16_hv1_lowpass_op_sse2(src, tmp, srcStride, size);
169  tmp += 8;
170  src += 8;
171  }
172 }
173 
174 #define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\
175 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
176  put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\
177  ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
178 }\
179 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
180  ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\
181 }\
182 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
183  ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\
184 }\
185 
186 #define ff_put_h264_qpel8_h_lowpass_l2_sse2 ff_put_h264_qpel8_h_lowpass_l2_mmxext
187 #define ff_avg_h264_qpel8_h_lowpass_l2_sse2 ff_avg_h264_qpel8_h_lowpass_l2_mmxext
188 #define ff_put_h264_qpel16_h_lowpass_l2_sse2 ff_put_h264_qpel16_h_lowpass_l2_mmxext
189 #define ff_avg_h264_qpel16_h_lowpass_l2_sse2 ff_avg_h264_qpel16_h_lowpass_l2_mmxext
190 
191 #define ff_put_h264_qpel8_v_lowpass_ssse3 ff_put_h264_qpel8_v_lowpass_sse2
192 #define ff_avg_h264_qpel8_v_lowpass_ssse3 ff_avg_h264_qpel8_v_lowpass_sse2
193 #define ff_put_h264_qpel16_v_lowpass_ssse3 ff_put_h264_qpel16_v_lowpass_sse2
194 #define ff_avg_h264_qpel16_v_lowpass_ssse3 ff_avg_h264_qpel16_v_lowpass_sse2
195 
196 #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext
197 #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext
198 
199 #define H264_MC_C_H(OPNAME, SIZE, MMX, ALIGN) \
200 H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
201 H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
202 
203 #define H264_MC_C_V_H_HV(OPNAME, SIZE, MMX, ALIGN) \
204 H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
205 H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
206 H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
207 H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\
208 
209 static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src,
210  ptrdiff_t stride)
211 {
212  ff_put_pixels16_sse2(dst, src, stride, 16);
213 }
214 static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src,
215  ptrdiff_t stride)
216 {
217  ff_avg_pixels16_sse2(dst, src, stride, 16);
218 }
219 #define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext
220 #define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext
221 
222 #define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
223 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
224 {\
225  ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\
226 }\
227 
228 #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \
229 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
230 {\
231  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
232 }\
233 \
234 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
235 {\
236  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
237 }\
238 \
239 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
240 {\
241  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
242 }\
243 
244 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \
245 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
246 {\
247  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
248  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
249  ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
250 }\
251 \
252 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
253 {\
254  ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
255 }\
256 \
257 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
258 {\
259  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
260  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
261  ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
262 }\
263 
264 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \
265 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
266 {\
267  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
268  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
269  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
270 }\
271 \
272 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
273 {\
274  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
275  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
276  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
277 }\
278 \
279 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
280 {\
281  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
282  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
283  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
284 }\
285 \
286 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
287 {\
288  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
289  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
290  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
291 }\
292 \
293 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
294 {\
295  LOCAL_ALIGNED(ALIGN, uint16_t, temp, [SIZE*(SIZE<8?12:24)]);\
296  ff_ ## OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\
297 }\
298 \
299 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
300 {\
301  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
302  uint8_t * const halfHV= temp;\
303  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
304  av_assert2(((uintptr_t)temp & 7) == 0);\
305  ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
306  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
307 }\
308 \
309 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
310 {\
311  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
312  uint8_t * const halfHV= temp;\
313  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
314  av_assert2(((uintptr_t)temp & 7) == 0);\
315  ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
316  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
317 }\
318 \
319 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
320 {\
321  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
322  uint8_t * const halfHV= temp;\
323  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
324  av_assert2(((uintptr_t)temp & 7) == 0);\
325  ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
326  ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
327 }\
328 \
329 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
330 {\
331  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
332  uint8_t * const halfHV= temp;\
333  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
334  av_assert2(((uintptr_t)temp & 7) == 0);\
335  ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
336  ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
337 }\
338 
339 #define H264_MC(QPEL, SIZE, MMX, ALIGN)\
340 QPEL(put_, SIZE, MMX, ALIGN) \
341 QPEL(avg_, SIZE, MMX, ALIGN) \
342 
343 #define H264_MC_816(QPEL, XMM)\
344 QPEL(put_, 8, XMM, 16)\
345 QPEL(put_, 16,XMM, 16)\
346 QPEL(avg_, 8, XMM, 16)\
347 QPEL(avg_, 16,XMM, 16)\
348 
349 QPEL_H264(put_, PUT_OP, mmxext)
350 QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext)
351 QPEL_H264_V_XMM(put_, PUT_OP, sse2)
352 QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2)
353 QPEL_H264_HV_XMM(put_, PUT_OP, sse2)
354 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, sse2)
355 QPEL_H264_H_XMM(put_, PUT_OP, ssse3)
356 QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3)
357 QPEL_H264_HV_XMM(put_, PUT_OP, ssse3)
358 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
359 
360 H264_MC(H264_MC_C_V_H_HV, 4, mmxext, 8)
361 H264_MC(H264_MC_C_H, 8, mmxext, 8)
362 H264_MC(H264_MC_C_H, 16, mmxext, 8)
363 H264_MC_816(H264_MC_V, sse2)
364 H264_MC_816(H264_MC_HV, sse2)
365 H264_MC_816(H264_MC_H, ssse3)
366 H264_MC_816(H264_MC_HV, ssse3)
367 
368 
369 //10bit
370 #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
371 void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
372  (uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
373 
374 #define LUMA_MC_4(DEPTH, TYPE, OPT) \
375  LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \
376  LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT)
377 
378 #define LUMA_MC_816(DEPTH, TYPE, OPT) \
379  LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
380  LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
381  LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
382  LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
383 
384 LUMA_MC_4(10, mc00, mmxext)
385 LUMA_MC_4(10, mc10, mmxext)
386 LUMA_MC_4(10, mc20, mmxext)
387 LUMA_MC_4(10, mc30, mmxext)
388 LUMA_MC_4(10, mc01, mmxext)
389 LUMA_MC_4(10, mc11, mmxext)
390 LUMA_MC_4(10, mc21, mmxext)
391 LUMA_MC_4(10, mc31, mmxext)
392 LUMA_MC_4(10, mc02, mmxext)
393 LUMA_MC_4(10, mc12, mmxext)
394 LUMA_MC_4(10, mc22, mmxext)
395 LUMA_MC_4(10, mc32, mmxext)
396 LUMA_MC_4(10, mc03, mmxext)
397 LUMA_MC_4(10, mc13, mmxext)
398 LUMA_MC_4(10, mc23, mmxext)
399 LUMA_MC_4(10, mc33, mmxext)
400 
401 LUMA_MC_816(10, mc00, sse2)
402 LUMA_MC_816(10, mc10, sse2)
403 LUMA_MC_816(10, mc10, sse2_cache64)
404 LUMA_MC_816(10, mc10, ssse3_cache64)
405 LUMA_MC_816(10, mc20, sse2)
406 LUMA_MC_816(10, mc20, sse2_cache64)
407 LUMA_MC_816(10, mc20, ssse3_cache64)
408 LUMA_MC_816(10, mc30, sse2)
409 LUMA_MC_816(10, mc30, sse2_cache64)
410 LUMA_MC_816(10, mc30, ssse3_cache64)
411 LUMA_MC_816(10, mc01, sse2)
412 LUMA_MC_816(10, mc11, sse2)
413 LUMA_MC_816(10, mc21, sse2)
414 LUMA_MC_816(10, mc31, sse2)
415 LUMA_MC_816(10, mc02, sse2)
416 LUMA_MC_816(10, mc12, sse2)
417 LUMA_MC_816(10, mc22, sse2)
418 LUMA_MC_816(10, mc32, sse2)
419 LUMA_MC_816(10, mc03, sse2)
420 LUMA_MC_816(10, mc13, sse2)
421 LUMA_MC_816(10, mc23, sse2)
422 LUMA_MC_816(10, mc33, sse2)
423 
424 #endif /* HAVE_X86ASM */
425 
426 #define SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX) \
427  do { \
428  c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
429  c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
430  c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
431  c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
432  } while (0)
433 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
434  do { \
435  SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX); \
436  c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
437  c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
438  c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
439  c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
440  c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
441  c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
442  c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
443  c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
444  c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
445  c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
446  c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
447  c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
448  } while (0)
449 
450 #define H264_QPEL_FUNCS(x, y, CPU) \
451  do { \
452  c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \
453  c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc ## x ## y ## _ ## CPU; \
454  c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \
455  c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc ## x ## y ## _ ## CPU; \
456  } while (0)
457 
458 #define H264_QPEL_FUNCS_10(x, y, CPU) \
459  do { \
460  c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
461  c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \
462  c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
463  c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \
464  } while (0)
465 
467 {
468 #if HAVE_X86ASM
469  int high_bit_depth = bit_depth > 8;
470  int cpu_flags = av_get_cpu_flags();
471 
472  if (EXTERNAL_MMXEXT(cpu_flags)) {
473  if (!high_bit_depth) {
474  SET_QPEL_FUNCS0123(put_h264_qpel, 0, 16, mmxext, );
475  SET_QPEL_FUNCS0123(put_h264_qpel, 1, 8, mmxext, );
476  SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, );
477  SET_QPEL_FUNCS0123(avg_h264_qpel, 0, 16, mmxext, );
478  SET_QPEL_FUNCS0123(avg_h264_qpel, 1, 8, mmxext, );
479  SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, );
480  } else if (bit_depth == 10) {
481  SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
482  SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
483  }
484  }
485 
486  if (EXTERNAL_SSE2(cpu_flags)) {
487  if (!high_bit_depth) {
488  H264_QPEL_FUNCS(0, 1, sse2);
489  H264_QPEL_FUNCS(0, 2, sse2);
490  H264_QPEL_FUNCS(0, 3, sse2);
491  H264_QPEL_FUNCS(1, 1, sse2);
492  H264_QPEL_FUNCS(1, 2, sse2);
493  H264_QPEL_FUNCS(1, 3, sse2);
494  H264_QPEL_FUNCS(2, 1, sse2);
495  H264_QPEL_FUNCS(2, 2, sse2);
496  H264_QPEL_FUNCS(2, 3, sse2);
497  H264_QPEL_FUNCS(3, 1, sse2);
498  H264_QPEL_FUNCS(3, 2, sse2);
499  H264_QPEL_FUNCS(3, 3, sse2);
500  }
501 
502  if (bit_depth == 10) {
503  SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
504  SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
505  SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
506  SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_);
507  H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
508  H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
509  H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
510  }
511  }
512 
514  if (!high_bit_depth) {
515  H264_QPEL_FUNCS(0, 0, sse2);
516  }
517  }
518 
519  if (EXTERNAL_SSSE3(cpu_flags)) {
520  if (!high_bit_depth) {
521  H264_QPEL_FUNCS(1, 0, ssse3);
522  H264_QPEL_FUNCS(1, 1, ssse3);
523  H264_QPEL_FUNCS(1, 2, ssse3);
524  H264_QPEL_FUNCS(1, 3, ssse3);
525  H264_QPEL_FUNCS(2, 0, ssse3);
526  H264_QPEL_FUNCS(2, 1, ssse3);
527  H264_QPEL_FUNCS(2, 2, ssse3);
528  H264_QPEL_FUNCS(2, 3, ssse3);
529  H264_QPEL_FUNCS(3, 0, ssse3);
530  H264_QPEL_FUNCS(3, 1, ssse3);
531  H264_QPEL_FUNCS(3, 2, ssse3);
532  H264_QPEL_FUNCS(3, 3, ssse3);
533  }
534 
535  if (bit_depth == 10) {
536  H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
537  H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
538  H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
539  }
540  }
541 
542  if (EXTERNAL_AVX(cpu_flags)) {
543  /* AVX implies 64 byte cache lines without the need to avoid unaligned
544  * memory accesses that cross the boundary between two cache lines.
545  * TODO: Port X264_CPU_CACHELINE_32/64 detection from x264 to avoid
546  * having to treat SSE2 functions with such properties as AVX. */
547  if (bit_depth == 10) {
548  H264_QPEL_FUNCS_10(1, 0, sse2);
549  H264_QPEL_FUNCS_10(2, 0, sse2);
550  H264_QPEL_FUNCS_10(3, 0, sse2);
551  }
552  }
553 #endif
554 }
bit_depth
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
Definition: af_astats.c:226
cpu.h
EXTERNAL_SSE2_FAST
#define EXTERNAL_SSE2_FAST(flags)
Definition: cpu.h:60
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:421
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
w
uint8_t w
Definition: llviddspenc.c:38
pixels.h
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:101
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:52
h264qpel.h
ff_h264qpel_init_x86
av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
Definition: h264_qpel.c:466
av_cold
#define av_cold
Definition: attributes.h:90
ff_put_pixels16_l2_mmxext
void ff_put_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
H264_MC
#define H264_MC(OPNAME, SIZE)
Definition: h264qpel_template.c:380
H264_QPEL_FUNCS
#define H264_QPEL_FUNCS(x, y, CPU)
Definition: h264_qpel.c:450
SET_QPEL_FUNCS0123
#define SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX)
Definition: h264_qpel.c:426
SET_QPEL_FUNCS
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)
Definition: h264_qpel.c:433
ff_put_pixels16_sse2
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_avg_pixels8_l2_mmxext
void ff_avg_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
ff_avg_pixels16_sse2
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cpu.h
size
int size
Definition: twinvq_data.h:10344
asm.h
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
src2
const pixel * src2
Definition: h264pred_template.c:422
av_always_inline
#define av_always_inline
Definition: attributes.h:49
H264QpelContext
Definition: h264qpel.h:27
stride
#define stride
Definition: h264pred_template.c:537
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
ff_put_pixels8_l2_mmxext
void ff_put_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
fpel.h
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
h
h
Definition: vp9dsp_template.c:2038
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
H264_QPEL_FUNCS_10
#define H264_QPEL_FUNCS_10(x, y, CPU)
Definition: h264_qpel.c:458
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
ff_avg_pixels16_l2_mmxext
void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)