Go to the documentation of this file.
38 const uint8_t *
src1,
const uint8_t *
src2,
39 ptrdiff_t dstStride, ptrdiff_t src1Stride);
41 const uint8_t *
src1,
const uint8_t *
src2,
42 ptrdiff_t dstStride, ptrdiff_t src1Stride);
44 #define QPEL_H(OPNAME, RND, SIZE, UNUSED1, XMM, UNUSED2, UNUSED3, L2) \
45 void ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_ ## XMM (uint8_t *dst, \
47 ptrdiff_t dstStride, \
48 ptrdiff_t srcStride, \
50 void ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_l2_ ## XMM(uint8_t *dst, \
52 ptrdiff_t dstStride, \
53 ptrdiff_t srcStride, \
55 ptrdiff_t l2_offset);\
56 static void OPNAME ## _qpel ## SIZE ## _mc10_ ## XMM(uint8_t *dst, \
60 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_l2_ ## XMM(dst, src, stride, \
64 static void OPNAME ## _qpel ## SIZE ## _mc20_ ## XMM(uint8_t *dst, \
68 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_ ## XMM(dst, src, stride, \
72 static void OPNAME ## _qpel ## SIZE ## _mc30_ ## XMM(uint8_t *dst, \
76 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_l2_ ## XMM(dst, src, stride, \
80 #define QPEL_V(OPNAME, RND, SIZE, UNUSED1, UNUSED2, XMM, UNUSED3, L2) \
81 void ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## XMM (uint8_t *dst, \
83 ptrdiff_t dstStride, \
84 ptrdiff_t srcStride); \
85 static void OPNAME ## _qpel ## SIZE ## _mc01_ ## XMM(uint8_t *dst, \
89 DECLARE_ALIGNED(SIZE, uint8_t, half)[SIZE*SIZE]; \
90 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## XMM(half, src, \
92 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, src, half, \
96 static void OPNAME ## _qpel ## SIZE ## _mc02_ ## XMM(uint8_t *dst, \
100 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## XMM(dst, src, \
104 static void OPNAME ## _qpel ## SIZE ## _mc03_ ## XMM(uint8_t *dst, \
105 const uint8_t *src, \
108 DECLARE_ALIGNED(SIZE, uint8_t, half)[SIZE*SIZE]; \
109 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## XMM(half, src, \
111 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, src + stride, \
112 half, stride, stride); \
115 #define QPEL_HV(OPNAME, RND, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2) \
116 static void OPNAME ## _qpel ## SIZE ## _mc11_ ## HVXMM(uint8_t *dst, \
117 const uint8_t *src, \
120 DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
121 uint8_t *const halfH = half + SIZE*SIZE; \
122 uint8_t *const halfHV = half; \
123 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
124 stride, SIZEP1, 0); \
125 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
127 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH, halfHV, \
131 static void OPNAME ## _qpel ## SIZE ## _mc31_ ## HVXMM(uint8_t *dst, \
132 const uint8_t *src, \
135 DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
136 uint8_t *const halfH = half + SIZE*SIZE; \
137 uint8_t *const halfHV = half; \
138 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
139 stride, SIZEP1, 1); \
140 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
142 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH, halfHV, \
146 static void OPNAME ## _qpel ## SIZE ## _mc13_ ## HVXMM(uint8_t *dst, \
147 const uint8_t *src, \
150 DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
151 uint8_t *const halfH = half + SIZE*SIZE; \
152 uint8_t *const halfHV = half; \
153 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
154 stride, SIZEP1, 0); \
155 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
157 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH + SIZE, \
158 halfHV, stride, SIZE); \
161 static void OPNAME ## _qpel ## SIZE ## _mc33_ ## HVXMM(uint8_t *dst, \
162 const uint8_t *src, \
165 DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
166 uint8_t *const halfH = half + SIZE*SIZE; \
167 uint8_t *const halfHV = half; \
168 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
169 stride, SIZEP1, 1); \
170 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
172 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH + SIZE, \
173 halfHV, stride, SIZE); \
176 static void OPNAME ## _qpel ## SIZE ## _mc21_ ## HVXMM(uint8_t *dst, \
177 const uint8_t *src, \
180 DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
181 uint8_t *const halfH = half + SIZE*SIZE; \
182 uint8_t *const halfHV = half; \
183 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_ ## HXMM(halfH, src, SIZE, \
185 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
187 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH, halfHV, \
191 static void OPNAME ## _qpel ## SIZE ## _mc23_ ## HVXMM(uint8_t *dst, \
192 const uint8_t *src, \
195 DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
196 uint8_t *const halfH = half + SIZE*SIZE; \
197 uint8_t *const halfHV = half; \
198 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_ ## HXMM(halfH, src, SIZE, \
200 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
202 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH + SIZE, \
203 halfHV, stride, SIZE); \
206 static void OPNAME ## _qpel ## SIZE ## _mc12_ ## HVXMM(uint8_t *dst, \
207 const uint8_t *src, \
210 DECLARE_ALIGNED(SIZE, uint8_t, halfH)[SIZEP1*SIZE]; \
211 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
212 stride, SIZEP1, 0); \
213 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## VXMM(dst, halfH, \
217 static void OPNAME ## _qpel ## SIZE ## _mc32_ ## HVXMM(uint8_t *dst, \
218 const uint8_t *src, \
221 DECLARE_ALIGNED(SIZE, uint8_t, halfH)[SIZEP1*SIZE]; \
222 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
223 stride, SIZEP1, 1); \
224 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## VXMM(dst, halfH, \
228 static void OPNAME ## _qpel ## SIZE ## _mc22_ ## HVXMM(uint8_t *dst, \
229 const uint8_t *src, \
232 DECLARE_ALIGNED(SIZE, uint8_t, halfH)[SIZEP1*SIZE]; \
233 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_ ## HXMM(halfH, src, SIZE, \
235 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## VXMM(dst, halfH, \
239 #define QPEL3(MACRO, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2) \
240 MACRO(put,, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2) \
241 MACRO(avg,, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2) \
242 MACRO(put_no_rnd, no_rnd_, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2)
251 #define SET_QPEL_FUNC(OP, X, Y, SIZE, CPU, PREFIX) \
252 c->OP ## _qpel_pixels_tab[SIZE == 8][X+4*Y] = PREFIX ## OP ## _qpel ## SIZE ## _mc ## X ## Y ## _ ## CPU
254 #define SET_QPEL_FUNCS3(X, Y, SIZE, CPU, PREFIX) \
255 SET_QPEL_FUNC(avg, X, Y, SIZE, CPU, PREFIX); \
256 SET_QPEL_FUNC(put, X, Y, SIZE, CPU, PREFIX); \
257 SET_QPEL_FUNC(put_no_rnd, X, Y, SIZE, CPU, PREFIX)
259 #define SET_H_QPEL_FUNCS(SIZE, CPU, PREFIX) \
260 SET_QPEL_FUNCS3(1, 0, SIZE, CPU, PREFIX); \
261 SET_QPEL_FUNCS3(2, 0, SIZE, CPU, PREFIX); \
262 SET_QPEL_FUNCS3(3, 0, SIZE, CPU, PREFIX)
264 #define SET_V_QPEL_FUNCS(SIZE, CPU, PREFIX) \
265 SET_QPEL_FUNCS3(0, 1, SIZE, CPU, PREFIX); \
266 SET_QPEL_FUNCS3(0, 2, SIZE, CPU, PREFIX); \
267 SET_QPEL_FUNCS3(0, 3, SIZE, CPU, PREFIX)
269 #define SET_HV_QPEL_FUNCS(SIZE, CPU, PREFIX) \
270 SET_QPEL_FUNCS3(1, 1, SIZE, CPU, PREFIX); \
271 SET_QPEL_FUNCS3(1, 2, SIZE, CPU, PREFIX); \
272 SET_QPEL_FUNCS3(1, 3, SIZE, CPU, PREFIX); \
273 SET_QPEL_FUNCS3(2, 1, SIZE, CPU, PREFIX); \
274 SET_QPEL_FUNCS3(2, 2, SIZE, CPU, PREFIX); \
275 SET_QPEL_FUNCS3(2, 3, SIZE, CPU, PREFIX); \
276 SET_QPEL_FUNCS3(3, 1, SIZE, CPU, PREFIX); \
277 SET_QPEL_FUNCS3(3, 2, SIZE, CPU, PREFIX); \
278 SET_QPEL_FUNCS3(3, 3, SIZE, CPU, PREFIX)
285 #if HAVE_MMXEXT_EXTERNAL
289 #if HAVE_SSE2_EXTERNAL
291 c->put_no_rnd_qpel_pixels_tab[0][0] =
293 c->put_no_rnd_qpel_pixels_tab[1][0] =
#define SET_V_QPEL_FUNCS(SIZE, CPU, PREFIX)
#define FF_VISIBILITY_PUSH_HIDDEN
#define SET_H_QPEL_FUNCS(SIZE, CPU, PREFIX)
void ff_avg_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
#define SET_HV_QPEL_FUNCS(SIZE, CPU, PREFIX)
void ff_put_no_rnd_pixels16x16_l2_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src1Stride)
#define QPEL_H(OPNAME, RND, SIZE, UNUSED1, XMM, UNUSED2, UNUSED3, L2)
#define QPEL_V(OPNAME, RND, SIZE, UNUSED1, UNUSED2, XMM, UNUSED3, L2)
void ff_put_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
void ff_put_pixels8x8_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
#define QPEL3(MACRO, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2)
#define EXTERNAL_SSE2(flags)
#define FF_VISIBILITY_POP_HIDDEN
FF_VISIBILITY_PUSH_HIDDEN void ff_put_no_rnd_pixels8x8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src1Stride)
void ff_avg_pixels8x8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
#define QPEL_HV(OPNAME, RND, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2)
#define EXTERNAL_SSSE3(flags)
#define X86_MMXEXT(flags)