FFmpeg
h264_qpel.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
3  * Copyright (c) 2011 Daniel Kang
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "libavutil/attributes.h"
23 #include "libavutil/avassert.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/mem_internal.h"
26 #include "libavutil/x86/asm.h"
27 #include "libavutil/x86/cpu.h"
28 #include "libavcodec/h264qpel.h"
29 #include "libavcodec/pixels.h"
30 #include "fpel.h"
31 
32 #if HAVE_X86ASM
33 void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
34  int dstStride, int src1Stride, int h);
35 void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
36  int dstStride, int src1Stride, int h);
37 void ff_put_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
38  int dstStride, int src1Stride, int h);
39 void ff_avg_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
40  int dstStride, int src1Stride, int h);
41 void ff_put_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
42  int dstStride, int src1Stride, int h);
43 void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
44  int dstStride, int src1Stride, int h);
45 #define ff_put_pixels8_l2_sse2 ff_put_pixels8_l2_mmxext
46 #define ff_avg_pixels8_l2_sse2 ff_avg_pixels8_l2_mmxext
47 #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext
48 #define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext
49 #define ff_put_pixels16_mmxext ff_put_pixels16_mmx
50 #define ff_put_pixels8_mmxext ff_put_pixels8_mmx
51 #define ff_put_pixels4_mmxext ff_put_pixels4_mmx
52 
53 #define DEF_QPEL(OPNAME)\
54 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
55 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
56 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
57 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
58 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
59 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
60 void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
61 void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h);\
62 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, int srcStride);\
63 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, int dstStride);\
64 void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, int srcStride, int size);\
65 void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, int dstStride, int unused, int h);\
66 void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size);\
67 void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h);\
68 void ff_ ## OPNAME ## _pixels8_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h);
69 
70 DEF_QPEL(avg)
71 DEF_QPEL(put)
72 
73 #define QPEL_H264(OPNAME, OP, MMX)\
74 static av_always_inline void ff_ ## OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
75  int w=3;\
76  src -= 2*srcStride+2;\
77  while(w--){\
78  ff_ ## OPNAME ## h264_qpel4_hv_lowpass_v_mmxext(src, tmp, srcStride);\
79  tmp += 4;\
80  src += 4;\
81  }\
82  tmp -= 3*4;\
83  ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\
84 }\
85 \
86 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
87  int w = size>>4;\
88  do{\
89  ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_op_mmxext(dst, tmp, dstStride, 0, size);\
90  tmp += 8;\
91  dst += 8;\
92  }while(w--);\
93 }\
94 \
95 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
96  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
97  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
98  src += 8*srcStride;\
99  dst += 8*dstStride;\
100  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
101  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
102 }\
103 \
104 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\
105  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
106  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
107  src += 8*dstStride;\
108  dst += 8*dstStride;\
109  src2 += 8*src2Stride;\
110  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
111  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
112 }\
113 \
114 static av_always_inline void ff_ ## OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h)\
115 {\
116  ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst , src16 , src8 , dstStride, src8Stride, h);\
117  ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\
118 }\
119 
120 
121 #if ARCH_X86_64
122 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
123 
124 void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);
125 void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);
126 
127 #else // ARCH_X86_64
128 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
129 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\
130  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
131  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
132  src += 8*dstStride;\
133  dst += 8*dstStride;\
134  src2 += 8*src2Stride;\
135  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
136  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
137 }
138 #endif // ARCH_X86_64
139 
140 #define QPEL_H264_H_XMM(OPNAME, OP, MMX)\
141 QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
142 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
143  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
144  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
145  src += 8*srcStride;\
146  dst += 8*dstStride;\
147  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
148  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
149 }\
150 
151 #define QPEL_H264_V_XMM(OPNAME, OP, MMX)\
152 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
153  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
154 }\
155 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
156  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
157  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
158 }
159 
160 static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp,
161  const uint8_t *src,
162  int tmpStride,
163  int srcStride,
164  int size)
165 {
166  int w = (size+8)>>3;
167  src -= 2*srcStride+2;
168  while(w--){
169  ff_put_h264_qpel8or16_hv1_lowpass_op_sse2(src, tmp, srcStride, size);
170  tmp += 8;
171  src += 8;
172  }
173 }
174 
175 #define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\
176 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
177  put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\
178  ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
179 }\
180 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
181  ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\
182 }\
183 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
184  ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\
185 }\
186 
187 #define ff_put_h264_qpel8_h_lowpass_l2_sse2 ff_put_h264_qpel8_h_lowpass_l2_mmxext
188 #define ff_avg_h264_qpel8_h_lowpass_l2_sse2 ff_avg_h264_qpel8_h_lowpass_l2_mmxext
189 #define ff_put_h264_qpel16_h_lowpass_l2_sse2 ff_put_h264_qpel16_h_lowpass_l2_mmxext
190 #define ff_avg_h264_qpel16_h_lowpass_l2_sse2 ff_avg_h264_qpel16_h_lowpass_l2_mmxext
191 
192 #define ff_put_h264_qpel8_v_lowpass_ssse3 ff_put_h264_qpel8_v_lowpass_sse2
193 #define ff_avg_h264_qpel8_v_lowpass_ssse3 ff_avg_h264_qpel8_v_lowpass_sse2
194 #define ff_put_h264_qpel16_v_lowpass_ssse3 ff_put_h264_qpel16_v_lowpass_sse2
195 #define ff_avg_h264_qpel16_v_lowpass_ssse3 ff_avg_h264_qpel16_v_lowpass_sse2
196 
197 #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext
198 #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext
199 
200 #define H264_MC_C_H(OPNAME, SIZE, MMX, ALIGN) \
201 H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
202 H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
203 
204 #define H264_MC_C_V_H_HV(OPNAME, SIZE, MMX, ALIGN) \
205 H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
206 H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
207 H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
208 H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\
209 
210 static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src,
211  ptrdiff_t stride)
212 {
213  ff_put_pixels16_sse2(dst, src, stride, 16);
214 }
215 static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src,
216  ptrdiff_t stride)
217 {
218  ff_avg_pixels16_sse2(dst, src, stride, 16);
219 }
220 #define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext
221 #define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext
222 
223 #define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
224 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
225 {\
226  ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\
227 }\
228 
229 #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \
230 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
231 {\
232  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
233 }\
234 \
235 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
236 {\
237  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
238 }\
239 \
240 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
241 {\
242  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
243 }\
244 
245 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \
246 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
247 {\
248  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
249  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
250  ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
251 }\
252 \
253 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
254 {\
255  ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
256 }\
257 \
258 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
259 {\
260  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
261  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
262  ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
263 }\
264 
265 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \
266 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
267 {\
268  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
269  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
270  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
271 }\
272 \
273 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
274 {\
275  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
276  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
277  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
278 }\
279 \
280 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
281 {\
282  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
283  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
284  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
285 }\
286 \
287 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
288 {\
289  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
290  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
291  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
292 }\
293 \
294 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
295 {\
296  LOCAL_ALIGNED(ALIGN, uint16_t, temp, [SIZE*(SIZE<8?12:24)]);\
297  ff_ ## OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\
298 }\
299 \
300 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
301 {\
302  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
303  uint8_t * const halfHV= temp;\
304  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
305  av_assert2(((uintptr_t)temp & 7) == 0);\
306  ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
307  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
308 }\
309 \
310 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
311 {\
312  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
313  uint8_t * const halfHV= temp;\
314  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
315  av_assert2(((uintptr_t)temp & 7) == 0);\
316  ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
317  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
318 }\
319 \
320 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
321 {\
322  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
323  uint8_t * const halfHV= temp;\
324  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
325  av_assert2(((uintptr_t)temp & 7) == 0);\
326  ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
327  ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
328 }\
329 \
330 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
331 {\
332  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
333  uint8_t * const halfHV= temp;\
334  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
335  av_assert2(((uintptr_t)temp & 7) == 0);\
336  ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
337  ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
338 }\
339 
340 #define H264_MC(QPEL, SIZE, MMX, ALIGN)\
341 QPEL(put_, SIZE, MMX, ALIGN) \
342 QPEL(avg_, SIZE, MMX, ALIGN) \
343 
344 #define H264_MC_816(QPEL, XMM)\
345 QPEL(put_, 8, XMM, 16)\
346 QPEL(put_, 16,XMM, 16)\
347 QPEL(avg_, 8, XMM, 16)\
348 QPEL(avg_, 16,XMM, 16)\
349 
350 QPEL_H264(put_, PUT_OP, mmxext)
351 QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext)
352 QPEL_H264_V_XMM(put_, PUT_OP, sse2)
353 QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2)
354 QPEL_H264_HV_XMM(put_, PUT_OP, sse2)
355 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, sse2)
356 QPEL_H264_H_XMM(put_, PUT_OP, ssse3)
357 QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3)
358 QPEL_H264_HV_XMM(put_, PUT_OP, ssse3)
359 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
360 
361 H264_MC(H264_MC_C_V_H_HV, 4, mmxext, 8)
362 H264_MC(H264_MC_C_H, 8, mmxext, 8)
363 H264_MC(H264_MC_C_H, 16, mmxext, 8)
364 H264_MC_816(H264_MC_V, sse2)
365 H264_MC_816(H264_MC_HV, sse2)
366 H264_MC_816(H264_MC_H, ssse3)
367 H264_MC_816(H264_MC_HV, ssse3)
368 
369 
370 //10bit
371 #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
372 void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
373  (uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
374 
375 #define LUMA_MC_4(DEPTH, TYPE, OPT) \
376  LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \
377  LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT)
378 
379 #define LUMA_MC_816(DEPTH, TYPE, OPT) \
380  LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
381  LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
382  LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
383  LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
384 
385 LUMA_MC_4(10, mc00, mmxext)
386 LUMA_MC_4(10, mc10, mmxext)
387 LUMA_MC_4(10, mc20, mmxext)
388 LUMA_MC_4(10, mc30, mmxext)
389 LUMA_MC_4(10, mc01, mmxext)
390 LUMA_MC_4(10, mc11, mmxext)
391 LUMA_MC_4(10, mc21, mmxext)
392 LUMA_MC_4(10, mc31, mmxext)
393 LUMA_MC_4(10, mc02, mmxext)
394 LUMA_MC_4(10, mc12, mmxext)
395 LUMA_MC_4(10, mc22, mmxext)
396 LUMA_MC_4(10, mc32, mmxext)
397 LUMA_MC_4(10, mc03, mmxext)
398 LUMA_MC_4(10, mc13, mmxext)
399 LUMA_MC_4(10, mc23, mmxext)
400 LUMA_MC_4(10, mc33, mmxext)
401 
402 LUMA_MC_816(10, mc00, sse2)
403 LUMA_MC_816(10, mc10, sse2)
404 LUMA_MC_816(10, mc10, sse2_cache64)
405 LUMA_MC_816(10, mc10, ssse3_cache64)
406 LUMA_MC_816(10, mc20, sse2)
407 LUMA_MC_816(10, mc20, sse2_cache64)
408 LUMA_MC_816(10, mc20, ssse3_cache64)
409 LUMA_MC_816(10, mc30, sse2)
410 LUMA_MC_816(10, mc30, sse2_cache64)
411 LUMA_MC_816(10, mc30, ssse3_cache64)
412 LUMA_MC_816(10, mc01, sse2)
413 LUMA_MC_816(10, mc11, sse2)
414 LUMA_MC_816(10, mc21, sse2)
415 LUMA_MC_816(10, mc31, sse2)
416 LUMA_MC_816(10, mc02, sse2)
417 LUMA_MC_816(10, mc12, sse2)
418 LUMA_MC_816(10, mc22, sse2)
419 LUMA_MC_816(10, mc32, sse2)
420 LUMA_MC_816(10, mc03, sse2)
421 LUMA_MC_816(10, mc13, sse2)
422 LUMA_MC_816(10, mc23, sse2)
423 LUMA_MC_816(10, mc33, sse2)
424 
425 #endif /* HAVE_X86ASM */
426 
427 #define SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX) \
428  do { \
429  c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
430  c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
431  c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
432  c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
433  } while (0)
434 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
435  do { \
436  SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX); \
437  c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
438  c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
439  c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
440  c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
441  c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
442  c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
443  c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
444  c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
445  c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
446  c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
447  c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
448  c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
449  } while (0)
450 
451 #define H264_QPEL_FUNCS(x, y, CPU) \
452  do { \
453  c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \
454  c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc ## x ## y ## _ ## CPU; \
455  c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \
456  c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc ## x ## y ## _ ## CPU; \
457  } while (0)
458 
459 #define H264_QPEL_FUNCS_10(x, y, CPU) \
460  do { \
461  c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
462  c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \
463  c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
464  c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \
465  } while (0)
466 
468 {
469 #if HAVE_X86ASM
470  int high_bit_depth = bit_depth > 8;
471  int cpu_flags = av_get_cpu_flags();
472 
473  if (EXTERNAL_MMXEXT(cpu_flags)) {
474  if (!high_bit_depth) {
475  SET_QPEL_FUNCS0123(put_h264_qpel, 0, 16, mmxext, );
476  SET_QPEL_FUNCS0123(put_h264_qpel, 1, 8, mmxext, );
477  SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, );
478  SET_QPEL_FUNCS0123(avg_h264_qpel, 0, 16, mmxext, );
479  SET_QPEL_FUNCS0123(avg_h264_qpel, 1, 8, mmxext, );
480  SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, );
481  } else if (bit_depth == 10) {
482  SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
483  SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
484  }
485  }
486 
487  if (EXTERNAL_SSE2(cpu_flags)) {
488  if (!high_bit_depth) {
489  H264_QPEL_FUNCS(0, 1, sse2);
490  H264_QPEL_FUNCS(0, 2, sse2);
491  H264_QPEL_FUNCS(0, 3, sse2);
492  H264_QPEL_FUNCS(1, 1, sse2);
493  H264_QPEL_FUNCS(1, 2, sse2);
494  H264_QPEL_FUNCS(1, 3, sse2);
495  H264_QPEL_FUNCS(2, 1, sse2);
496  H264_QPEL_FUNCS(2, 2, sse2);
497  H264_QPEL_FUNCS(2, 3, sse2);
498  H264_QPEL_FUNCS(3, 1, sse2);
499  H264_QPEL_FUNCS(3, 2, sse2);
500  H264_QPEL_FUNCS(3, 3, sse2);
501  }
502 
503  if (bit_depth == 10) {
504  SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
505  SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
506  SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
507  SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_);
508  H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
509  H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
510  H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
511  }
512  }
513 
515  if (!high_bit_depth) {
516  H264_QPEL_FUNCS(0, 0, sse2);
517  }
518  }
519 
520  if (EXTERNAL_SSSE3(cpu_flags)) {
521  if (!high_bit_depth) {
522  H264_QPEL_FUNCS(1, 0, ssse3);
523  H264_QPEL_FUNCS(1, 1, ssse3);
524  H264_QPEL_FUNCS(1, 2, ssse3);
525  H264_QPEL_FUNCS(1, 3, ssse3);
526  H264_QPEL_FUNCS(2, 0, ssse3);
527  H264_QPEL_FUNCS(2, 1, ssse3);
528  H264_QPEL_FUNCS(2, 2, ssse3);
529  H264_QPEL_FUNCS(2, 3, ssse3);
530  H264_QPEL_FUNCS(3, 0, ssse3);
531  H264_QPEL_FUNCS(3, 1, ssse3);
532  H264_QPEL_FUNCS(3, 2, ssse3);
533  H264_QPEL_FUNCS(3, 3, ssse3);
534  }
535 
536  if (bit_depth == 10) {
537  H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
538  H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
539  H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
540  }
541  }
542 
543  if (EXTERNAL_AVX(cpu_flags)) {
544  /* AVX implies 64 byte cache lines without the need to avoid unaligned
545  * memory accesses that cross the boundary between two cache lines.
546  * TODO: Port X264_CPU_CACHELINE_32/64 detection from x264 to avoid
547  * having to treat SSE2 functions with such properties as AVX. */
548  if (bit_depth == 10) {
549  H264_QPEL_FUNCS_10(1, 0, sse2);
550  H264_QPEL_FUNCS_10(2, 0, sse2);
551  H264_QPEL_FUNCS_10(3, 0, sse2);
552  }
553  }
554 #endif
555 }
cpu.h
EXTERNAL_SSE2_FAST
#define EXTERNAL_SSE2_FAST(flags)
Definition: cpu.h:60
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:421
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
w
uint8_t w
Definition: llviddspenc.c:38
pixels.h
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:103
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:245
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:52
h264qpel.h
ff_h264qpel_init_x86
av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
Definition: h264_qpel.c:467
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
ff_put_pixels16_l2_mmxext
void ff_put_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
H264_MC
#define H264_MC(OPNAME, SIZE)
Definition: h264qpel_template.c:380
H264_QPEL_FUNCS
#define H264_QPEL_FUNCS(x, y, CPU)
Definition: h264_qpel.c:451
SET_QPEL_FUNCS0123
#define SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX)
Definition: h264_qpel.c:427
SET_QPEL_FUNCS
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)
Definition: h264_qpel.c:434
ff_put_pixels16_sse2
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_avg_pixels8_l2_mmxext
void ff_avg_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
ff_avg_pixels16_sse2
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cpu.h
size
int size
Definition: twinvq_data.h:10344
asm.h
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
src2
const pixel * src2
Definition: h264pred_template.c:422
av_always_inline
#define av_always_inline
Definition: attributes.h:49
H264QpelContext
Definition: h264qpel.h:27
stride
#define stride
Definition: h264pred_template.c:537
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
ff_put_pixels8_l2_mmxext
void ff_put_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
fpel.h
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
h
h
Definition: vp9dsp_template.c:2038
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
H264_QPEL_FUNCS_10
#define H264_QPEL_FUNCS_10(x, y, CPU)
Definition: h264_qpel.c:459
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
ff_avg_pixels16_l2_mmxext
void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)