FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dsputil_init.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2000, 2001 Fabrice Bellard
3  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "config.h"
23 #include "libavutil/attributes.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/internal.h"
26 #include "libavutil/x86/asm.h"
27 #include "libavutil/x86/cpu.h"
28 #include "libavcodec/dsputil.h"
29 #include "libavcodec/simple_idct.h"
30 #include "libavcodec/version.h"
31 #include "dsputil_x86.h"
32 #include "idct_xvid.h"
33 
34 void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
35  int dstStride, int src1Stride, int h);
37  uint8_t *src2, int dstStride,
38  int src1Stride, int h);
39 void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
40  int dstStride, int src1Stride, int h);
41 void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
42  int dstStride, int src1Stride, int h);
43 void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
44  int dstStride, int src1Stride, int h);
46  int dstStride, int src1Stride, int h);
48  int dstStride, int srcStride, int h);
50  int dstStride, int srcStride, int h);
52  int dstStride, int srcStride,
53  int h);
55  int dstStride, int srcStride, int h);
57  int dstStride, int srcStride, int h);
59  int dstStride, int srcStride,
60  int h);
62  int dstStride, int srcStride);
64  int dstStride, int srcStride);
66  int dstStride, int srcStride);
68  int dstStride, int srcStride);
70  int dstStride, int srcStride);
72  int dstStride, int srcStride);
73 #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext
74 #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext
75 
76 int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
77  int order);
78 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
79  int order);
80 int32_t ff_scalarproduct_and_madd_int16_mmxext(int16_t *v1, const int16_t *v2,
81  const int16_t *v3,
82  int order, int mul);
83 int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2,
84  const int16_t *v3,
85  int order, int mul);
86 int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2,
87  const int16_t *v3,
88  int order, int mul);
89 
90 void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w);
91 void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w);
92 
94  const uint8_t *diff, int w,
95  int *left, int *left_top);
97  int w, int left);
99  int w, int left);
100 
101 void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src,
102  int32_t min, int32_t max, unsigned int len);
103 void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src,
104  int32_t min, int32_t max, unsigned int len);
106  int32_t min, int32_t max, unsigned int len);
107 void ff_vector_clip_int32_sse4 (int32_t *dst, const int32_t *src,
108  int32_t min, int32_t max, unsigned int len);
109 
110 #if HAVE_YASM
111 
112 PIXELS16(static, ff_avg, , , _mmxext)
113 PIXELS16(static, ff_put, , , _mmxext)
114 
115 #define QPEL_OP(OPNAME, RND, MMX) \
116 static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t *dst, uint8_t *src, \
117  ptrdiff_t stride) \
118 { \
119  ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
120 } \
121  \
122 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
123  ptrdiff_t stride) \
124 { \
125  uint64_t temp[8]; \
126  uint8_t * const half = (uint8_t*)temp; \
127  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
128  stride, 8); \
129  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
130  stride, stride, 8); \
131 } \
132  \
133 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \
134  ptrdiff_t stride) \
135 { \
136  ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
137  stride, 8); \
138 } \
139  \
140 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
141  ptrdiff_t stride) \
142 { \
143  uint64_t temp[8]; \
144  uint8_t * const half = (uint8_t*)temp; \
145  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
146  stride, 8); \
147  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \
148  stride, 8); \
149 } \
150  \
151 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
152  ptrdiff_t stride) \
153 { \
154  uint64_t temp[8]; \
155  uint8_t * const half = (uint8_t*)temp; \
156  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
157  8, stride); \
158  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
159  stride, stride, 8); \
160 } \
161  \
162 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \
163  ptrdiff_t stride) \
164 { \
165  ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
166  stride, stride); \
167 } \
168  \
169 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
170  ptrdiff_t stride) \
171 { \
172  uint64_t temp[8]; \
173  uint8_t * const half = (uint8_t*)temp; \
174  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
175  8, stride); \
176  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
177  stride, 8); \
178 } \
179  \
180 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
181  ptrdiff_t stride) \
182 { \
183  uint64_t half[8 + 9]; \
184  uint8_t * const halfH = ((uint8_t*)half) + 64; \
185  uint8_t * const halfHV = ((uint8_t*)half); \
186  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
187  stride, 9); \
188  ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
189  stride, 9); \
190  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
191  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
192  stride, 8, 8); \
193 } \
194  \
195 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
196  ptrdiff_t stride) \
197 { \
198  uint64_t half[8 + 9]; \
199  uint8_t * const halfH = ((uint8_t*)half) + 64; \
200  uint8_t * const halfHV = ((uint8_t*)half); \
201  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
202  stride, 9); \
203  ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
204  stride, 9); \
205  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
206  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
207  stride, 8, 8); \
208 } \
209  \
210 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
211  ptrdiff_t stride) \
212 { \
213  uint64_t half[8 + 9]; \
214  uint8_t * const halfH = ((uint8_t*)half) + 64; \
215  uint8_t * const halfHV = ((uint8_t*)half); \
216  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
217  stride, 9); \
218  ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
219  stride, 9); \
220  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
221  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
222  stride, 8, 8); \
223 } \
224  \
225 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
226  ptrdiff_t stride) \
227 { \
228  uint64_t half[8 + 9]; \
229  uint8_t * const halfH = ((uint8_t*)half) + 64; \
230  uint8_t * const halfHV = ((uint8_t*)half); \
231  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
232  stride, 9); \
233  ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
234  stride, 9); \
235  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
236  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
237  stride, 8, 8); \
238 } \
239  \
240 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
241  ptrdiff_t stride) \
242 { \
243  uint64_t half[8 + 9]; \
244  uint8_t * const halfH = ((uint8_t*)half) + 64; \
245  uint8_t * const halfHV = ((uint8_t*)half); \
246  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
247  stride, 9); \
248  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
249  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
250  stride, 8, 8); \
251 } \
252  \
253 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
254  ptrdiff_t stride) \
255 { \
256  uint64_t half[8 + 9]; \
257  uint8_t * const halfH = ((uint8_t*)half) + 64; \
258  uint8_t * const halfHV = ((uint8_t*)half); \
259  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
260  stride, 9); \
261  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
262  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
263  stride, 8, 8); \
264 } \
265  \
266 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
267  ptrdiff_t stride) \
268 { \
269  uint64_t half[8 + 9]; \
270  uint8_t * const halfH = ((uint8_t*)half); \
271  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
272  stride, 9); \
273  ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
274  8, stride, 9); \
275  ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
276  stride, 8); \
277 } \
278  \
279 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
280  ptrdiff_t stride) \
281 { \
282  uint64_t half[8 + 9]; \
283  uint8_t * const halfH = ((uint8_t*)half); \
284  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
285  stride, 9); \
286  ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
287  stride, 9); \
288  ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
289  stride, 8); \
290 } \
291  \
292 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
293  ptrdiff_t stride) \
294 { \
295  uint64_t half[9]; \
296  uint8_t * const halfH = ((uint8_t*)half); \
297  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
298  stride, 9); \
299  ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
300  stride, 8); \
301 } \
302  \
303 static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src, \
304  ptrdiff_t stride) \
305 { \
306  ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
307 } \
308  \
309 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
310  ptrdiff_t stride) \
311 { \
312  uint64_t temp[32]; \
313  uint8_t * const half = (uint8_t*)temp; \
314  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
315  stride, 16); \
316  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
317  stride, 16); \
318 } \
319  \
320 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \
321  ptrdiff_t stride) \
322 { \
323  ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
324  stride, stride, 16);\
325 } \
326  \
327 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
328  ptrdiff_t stride) \
329 { \
330  uint64_t temp[32]; \
331  uint8_t * const half = (uint8_t*)temp; \
332  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
333  stride, 16); \
334  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \
335  stride, stride, 16); \
336 } \
337  \
338 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
339  ptrdiff_t stride) \
340 { \
341  uint64_t temp[32]; \
342  uint8_t * const half = (uint8_t*)temp; \
343  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
344  stride); \
345  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
346  stride, 16); \
347 } \
348  \
349 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \
350  ptrdiff_t stride) \
351 { \
352  ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
353  stride, stride); \
354 } \
355  \
356 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
357  ptrdiff_t stride) \
358 { \
359  uint64_t temp[32]; \
360  uint8_t * const half = (uint8_t*)temp; \
361  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
362  stride); \
363  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \
364  stride, stride, 16); \
365 } \
366  \
367 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
368  ptrdiff_t stride) \
369 { \
370  uint64_t half[16 * 2 + 17 * 2]; \
371  uint8_t * const halfH = ((uint8_t*)half) + 256; \
372  uint8_t * const halfHV = ((uint8_t*)half); \
373  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
374  stride, 17); \
375  ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
376  stride, 17); \
377  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
378  16, 16); \
379  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
380  stride, 16, 16); \
381 } \
382  \
383 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
384  ptrdiff_t stride) \
385 { \
386  uint64_t half[16 * 2 + 17 * 2]; \
387  uint8_t * const halfH = ((uint8_t*)half) + 256; \
388  uint8_t * const halfHV = ((uint8_t*)half); \
389  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
390  stride, 17); \
391  ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
392  stride, 17); \
393  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
394  16, 16); \
395  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
396  stride, 16, 16); \
397 } \
398  \
399 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
400  ptrdiff_t stride) \
401 { \
402  uint64_t half[16 * 2 + 17 * 2]; \
403  uint8_t * const halfH = ((uint8_t*)half) + 256; \
404  uint8_t * const halfHV = ((uint8_t*)half); \
405  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
406  stride, 17); \
407  ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
408  stride, 17); \
409  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
410  16, 16); \
411  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
412  stride, 16, 16); \
413 } \
414  \
415 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
416  ptrdiff_t stride) \
417 { \
418  uint64_t half[16 * 2 + 17 * 2]; \
419  uint8_t * const halfH = ((uint8_t*)half) + 256; \
420  uint8_t * const halfHV = ((uint8_t*)half); \
421  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
422  stride, 17); \
423  ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
424  stride, 17); \
425  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
426  16, 16); \
427  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
428  stride, 16, 16); \
429 } \
430  \
431 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
432  ptrdiff_t stride) \
433 { \
434  uint64_t half[16 * 2 + 17 * 2]; \
435  uint8_t * const halfH = ((uint8_t*)half) + 256; \
436  uint8_t * const halfHV = ((uint8_t*)half); \
437  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
438  stride, 17); \
439  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
440  16, 16); \
441  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
442  stride, 16, 16); \
443 } \
444  \
445 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
446  ptrdiff_t stride) \
447 { \
448  uint64_t half[16 * 2 + 17 * 2]; \
449  uint8_t * const halfH = ((uint8_t*)half) + 256; \
450  uint8_t * const halfHV = ((uint8_t*)half); \
451  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
452  stride, 17); \
453  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
454  16, 16); \
455  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
456  stride, 16, 16); \
457 } \
458  \
459 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
460  ptrdiff_t stride) \
461 { \
462  uint64_t half[17 * 2]; \
463  uint8_t * const halfH = ((uint8_t*)half); \
464  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
465  stride, 17); \
466  ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
467  stride, 17); \
468  ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
469  stride, 16); \
470 } \
471  \
472 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
473  ptrdiff_t stride) \
474 { \
475  uint64_t half[17 * 2]; \
476  uint8_t * const halfH = ((uint8_t*)half); \
477  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
478  stride, 17); \
479  ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
480  stride, 17); \
481  ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
482  stride, 16); \
483 } \
484  \
485 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
486  ptrdiff_t stride) \
487 { \
488  uint64_t half[17 * 2]; \
489  uint8_t * const halfH = ((uint8_t*)half); \
490  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
491  stride, 17); \
492  ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
493  stride, 16); \
494 }
495 
496 QPEL_OP(put_, _, mmxext)
497 QPEL_OP(avg_, _, mmxext)
498 QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
499 #endif /* HAVE_YASM */
500 
501 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
502  do { \
503  c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
504  c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
505  c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
506  c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
507  c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
508  c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
509  c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
510  c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
511  c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
512  c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
513  c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
514  c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
515  c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
516  c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
517  c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
518  c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
519  } while (0)
520 
522  int cpu_flags)
523 {
524 #if HAVE_MMX_INLINE
525  const int high_bit_depth = avctx->bits_per_raw_sample > 8;
526 
530 
531  if (!high_bit_depth) {
535  }
536 
537 #if CONFIG_VIDEODSP && (ARCH_X86_32 || !HAVE_YASM)
538  c->gmc = ff_gmc_mmx;
539 #endif
540 
542 #endif /* HAVE_MMX_INLINE */
543 
544 #if HAVE_MMX_EXTERNAL
546 #endif /* HAVE_MMX_EXTERNAL */
547 }
548 
550  int cpu_flags)
551 {
552 #if HAVE_MMXEXT_INLINE
553  const int high_bit_depth = avctx->bits_per_raw_sample > 8;
554 
555  if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX && avctx->lowres == 0) {
559  }
560 #endif /* HAVE_MMXEXT_INLINE */
561 
562 #if HAVE_MMXEXT_EXTERNAL
563  SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
564  SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
565 
566  SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
567  SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
568  SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
569  SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
570 
571  /* slower than cmov version on AMD */
572  if (!(cpu_flags & AV_CPU_FLAG_3DNOW))
574 
577 #endif /* HAVE_MMXEXT_EXTERNAL */
578 }
579 
581  int cpu_flags)
582 {
583 #if HAVE_SSE_INLINE
584  const int high_bit_depth = avctx->bits_per_raw_sample > 8;
585 
587 
588  /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
589  if (CONFIG_XVMC && avctx->hwaccel && avctx->hwaccel->decode_mb)
590  return;
591 
592  if (!high_bit_depth) {
595  }
596 #endif /* HAVE_SSE_INLINE */
597 
598 #if HAVE_YASM
599 #if HAVE_INLINE_ASM && CONFIG_VIDEODSP
600  c->gmc = ff_gmc_sse;
601 #endif
602 #endif /* HAVE_YASM */
603 }
604 
606  int cpu_flags)
607 {
608 #if HAVE_SSE2_INLINE
609  const int high_bit_depth = avctx->bits_per_raw_sample > 8;
610 
611  if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX && avctx->lowres == 0) {
614  c->idct = ff_idct_xvid_sse2;
616  }
617 #endif /* HAVE_SSE2_INLINE */
618 
619 #if HAVE_SSE2_EXTERNAL
622  if (cpu_flags & AV_CPU_FLAG_ATOM) {
624  } else {
626  }
628 #endif /* HAVE_SSE2_EXTERNAL */
629 }
630 
632  int cpu_flags)
633 {
634 #if HAVE_SSSE3_EXTERNAL
636  if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe
638 
639  if (!(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit
642 #endif /* HAVE_SSSE3_EXTERNAL */
643 }
644 
646  int cpu_flags)
647 {
648 #if HAVE_SSE4_EXTERNAL
650 #endif /* HAVE_SSE4_EXTERNAL */
651 }
652 
654 {
655  int cpu_flags = av_get_cpu_flags();
656 
657 #if HAVE_7REGS && HAVE_INLINE_ASM
658  if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_CMOV)
660 #endif
661 
662  if (X86_MMX(cpu_flags)) {
663 #if HAVE_INLINE_ASM
664  const int idct_algo = avctx->idct_algo;
665 
666  if (avctx->lowres == 0 && avctx->bits_per_raw_sample <= 8) {
667  if (idct_algo == FF_IDCT_AUTO || idct_algo == FF_IDCT_SIMPLEMMX) {
672  } else if (idct_algo == FF_IDCT_XVIDMMX) {
675  c->idct = ff_idct_xvid_mmx;
676  }
677  }
678 #endif /* HAVE_INLINE_ASM */
679 
680  dsputil_init_mmx(c, avctx, cpu_flags);
681  }
682 
683  if (X86_MMXEXT(cpu_flags))
684  dsputil_init_mmxext(c, avctx, cpu_flags);
685 
686  if (X86_SSE(cpu_flags))
687  dsputil_init_sse(c, avctx, cpu_flags);
688 
689  if (X86_SSE2(cpu_flags))
690  dsputil_init_sse2(c, avctx, cpu_flags);
691 
692  if (EXTERNAL_SSSE3(cpu_flags))
693  dsputil_init_ssse3(c, avctx, cpu_flags);
694 
695  if (EXTERNAL_SSE4(cpu_flags))
696  dsputil_init_sse4(c, avctx, cpu_flags);
697 
698  if (CONFIG_ENCODERS)
699  ff_dsputilenc_init_mmx(c, avctx);
700 }