00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "libavutil/cpu.h"
00023 #include "libavutil/x86/asm.h"
00024 #include "libavcodec/dsputil.h"
00025 #include "libavcodec/mpegvideo.h"
00026 #include "dsputil_mmx.h"
00027
00028 #if HAVE_YASM
00029 void ff_put_pixels4_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h);
00030 void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h);
00031 void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h);
00032 void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h);
00033 static void ff_put_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
00034 int line_size, int h)
00035 {
00036 ff_put_pixels8_mmxext(block, pixels, line_size, h);
00037 ff_put_pixels8_mmxext(block + 8, pixels + 8, line_size, h);
00038 }
00039 static void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
00040 int line_size, int h)
00041 {
00042 ff_avg_pixels8_mmxext(block, pixels, line_size, h);
00043 ff_avg_pixels8_mmxext(block + 8, pixels + 8, line_size, h);
00044 }
00045 void ff_put_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
00046 int dstStride, int src1Stride, int h);
00047 void ff_avg_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
00048 int dstStride, int src1Stride, int h);
00049 void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
00050 int dstStride, int src1Stride, int h);
00051 void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
00052 int dstStride, int src1Stride, int h);
00053 void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
00054 int dstStride, int src1Stride, int h);
00055 void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
00056 int dstStride, int src1Stride, int h);
00057 void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
00058 int line_size, int h);
00059 void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
00060 int line_size, int h);
00061 #define ff_put_pixels8_l2_sse2 ff_put_pixels8_l2_mmxext
00062 #define ff_avg_pixels8_l2_sse2 ff_avg_pixels8_l2_mmxext
00063 #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext
00064 #define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext
00065
00066 #define DEF_QPEL(OPNAME)\
00067 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\
00068 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\
00069 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\
00070 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride);\
00071 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_mmxext(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride);\
00072 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride);\
00073 void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\
00074 void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_op_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h);\
00075 void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h);\
00076 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(uint8_t *src, int16_t *tmp, int srcStride);\
00077 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, int dstStride);\
00078 void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_mmxext(uint8_t *src, int16_t *tmp, int srcStride, int size);\
00079 void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(uint8_t *src, int16_t *tmp, int srcStride, int size);\
00080 void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, int dstStride, int unused, int h);\
00081 void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size);\
00082 void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h);\
00083 void ff_ ## OPNAME ## _pixels8_l2_shift5_mmxext(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h);
00084
00085 DEF_QPEL(avg)
00086 DEF_QPEL(put)
00087
00088 #define QPEL_H264(OPNAME, OP, MMX)\
00089 static av_always_inline void ff_ ## OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
00090 int w=3;\
00091 src -= 2*srcStride+2;\
00092 while(w--){\
00093 ff_ ## OPNAME ## h264_qpel4_hv_lowpass_v_mmxext(src, tmp, srcStride);\
00094 tmp += 4;\
00095 src += 4;\
00096 }\
00097 tmp -= 3*4;\
00098 ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\
00099 }\
00100 \
00101 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00102 src -= 2*srcStride;\
00103 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_op_mmxext(dst, src, dstStride, srcStride, h);\
00104 src += 4;\
00105 dst += 4;\
00106 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_op_mmxext(dst, src, dstStride, srcStride, h);\
00107 }\
00108 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){\
00109 int w = (size+8)>>2;\
00110 src -= 2*srcStride+2;\
00111 while(w--){\
00112 ff_ ## OPNAME ## h264_qpel8or16_hv1_lowpass_op_mmxext(src, tmp, srcStride, size);\
00113 tmp += 4;\
00114 src += 4;\
00115 }\
00116 }\
00117 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
00118 int w = size>>4;\
00119 do{\
00120 ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_op_mmxext(dst, tmp, dstStride, 0, size);\
00121 tmp += 8;\
00122 dst += 8;\
00123 }while(w--);\
00124 }\
00125 \
00126 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00127 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
00128 }\
00129 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00130 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
00131 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
00132 }\
00133 \
00134 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00135 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
00136 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
00137 src += 8*srcStride;\
00138 dst += 8*dstStride;\
00139 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
00140 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
00141 }\
00142 \
00143 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
00144 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
00145 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
00146 src += 8*dstStride;\
00147 dst += 8*dstStride;\
00148 src2 += 8*src2Stride;\
00149 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
00150 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
00151 }\
00152 \
00153 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
00154 ff_put_h264_qpel8or16_hv1_lowpass_ ## MMX(tmp, src, tmpStride, srcStride, size);\
00155 ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
00156 }\
00157 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
00158 ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 8);\
00159 }\
00160 \
00161 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
00162 ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\
00163 }\
00164 \
00165 static av_always_inline void ff_ ## OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
00166 {\
00167 ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst , src16 , src8 , dstStride, src8Stride, h);\
00168 ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\
00169 }\
00170
00171
00172 #if ARCH_X86_64
00173 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
00174
00175 void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride);
00176 void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride);
00177
00178 #else // ARCH_X86_64
00179 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
00180 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
00181 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
00182 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
00183 src += 8*dstStride;\
00184 dst += 8*dstStride;\
00185 src2 += 8*src2Stride;\
00186 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
00187 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
00188 }
00189 #endif // ARCH_X86_64
00190
00191 #define QPEL_H264_H_XMM(OPNAME, OP, MMX)\
00192 QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
00193 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00194 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
00195 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
00196 src += 8*srcStride;\
00197 dst += 8*dstStride;\
00198 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
00199 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
00200 }\
00201
00202 #define QPEL_H264_V_XMM(OPNAME, OP, MMX)\
00203 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00204 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
00205 }\
00206 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00207 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
00208 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
00209 }
00210
00211 static av_always_inline void ff_put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){
00212 int w = (size+8)>>3;
00213 src -= 2*srcStride+2;
00214 while(w--){
00215 ff_put_h264_qpel8or16_hv1_lowpass_op_sse2(src, tmp, srcStride, size);
00216 tmp += 8;
00217 src += 8;
00218 }
00219 }
00220
00221 #define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\
00222 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
00223 ff_put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\
00224 ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
00225 }\
00226 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
00227 ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\
00228 }\
00229 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
00230 ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\
00231 }\
00232
00233 #define ff_put_h264_qpel8_h_lowpass_l2_sse2 ff_put_h264_qpel8_h_lowpass_l2_mmxext
00234 #define ff_avg_h264_qpel8_h_lowpass_l2_sse2 ff_avg_h264_qpel8_h_lowpass_l2_mmxext
00235 #define ff_put_h264_qpel16_h_lowpass_l2_sse2 ff_put_h264_qpel16_h_lowpass_l2_mmxext
00236 #define ff_avg_h264_qpel16_h_lowpass_l2_sse2 ff_avg_h264_qpel16_h_lowpass_l2_mmxext
00237
00238 #define ff_put_h264_qpel8_v_lowpass_ssse3 ff_put_h264_qpel8_v_lowpass_sse2
00239 #define ff_avg_h264_qpel8_v_lowpass_ssse3 ff_avg_h264_qpel8_v_lowpass_sse2
00240 #define ff_put_h264_qpel16_v_lowpass_ssse3 ff_put_h264_qpel16_v_lowpass_sse2
00241 #define ff_avg_h264_qpel16_v_lowpass_ssse3 ff_avg_h264_qpel16_v_lowpass_sse2
00242
00243 #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext
00244 #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext
00245
00246 #define H264_MC(OPNAME, SIZE, MMX, ALIGN) \
00247 H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
00248 H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
00249 H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
00250 H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\
00251
00252 static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
00253 ff_put_pixels16_sse2(dst, src, stride, 16);
00254 }
00255 static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
00256 ff_avg_pixels16_sse2(dst, src, stride, 16);
00257 }
00258 #define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext
00259 #define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext
00260
00261 #define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
00262 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
00263 ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\
00264 }\
00265
00266 #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \
00267 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00268 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
00269 }\
00270 \
00271 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00272 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
00273 }\
00274 \
00275 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00276 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
00277 }\
00278
00279 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \
00280 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00281 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
00282 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
00283 ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
00284 }\
00285 \
00286 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00287 ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
00288 }\
00289 \
00290 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00291 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
00292 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
00293 ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
00294 }\
00295
00296 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \
00297 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00298 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
00299 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
00300 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
00301 }\
00302 \
00303 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00304 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
00305 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
00306 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
00307 }\
00308 \
00309 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00310 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
00311 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
00312 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
00313 }\
00314 \
00315 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00316 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
00317 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
00318 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
00319 }\
00320 \
00321 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00322 DECLARE_ALIGNED(ALIGN, uint16_t, temp)[SIZE*(SIZE<8?12:24)];\
00323 ff_ ## OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\
00324 }\
00325 \
00326 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00327 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
00328 uint8_t * const halfHV= temp;\
00329 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
00330 assert(((int)temp & 7) == 0);\
00331 ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
00332 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
00333 }\
00334 \
00335 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00336 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
00337 uint8_t * const halfHV= temp;\
00338 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
00339 assert(((int)temp & 7) == 0);\
00340 ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
00341 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
00342 }\
00343 \
00344 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00345 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
00346 uint8_t * const halfHV= temp;\
00347 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
00348 assert(((int)temp & 7) == 0);\
00349 ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
00350 ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
00351 }\
00352 \
00353 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00354 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
00355 uint8_t * const halfHV= temp;\
00356 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
00357 assert(((int)temp & 7) == 0);\
00358 ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
00359 ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
00360 }\
00361
00362 #define H264_MC_4816(MMX)\
00363 H264_MC(put_, 4, MMX, 8)\
00364 H264_MC(put_, 8, MMX, 8)\
00365 H264_MC(put_, 16,MMX, 8)\
00366 H264_MC(avg_, 4, MMX, 8)\
00367 H264_MC(avg_, 8, MMX, 8)\
00368 H264_MC(avg_, 16,MMX, 8)\
00369
00370 #define H264_MC_816(QPEL, XMM)\
00371 QPEL(put_, 8, XMM, 16)\
00372 QPEL(put_, 16,XMM, 16)\
00373 QPEL(avg_, 8, XMM, 16)\
00374 QPEL(avg_, 16,XMM, 16)\
00375
00376 #undef PAVGB
00377 #define PAVGB "pavgb"
00378 QPEL_H264(put_, PUT_OP, mmxext)
00379 QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext)
00380 QPEL_H264_V_XMM(put_, PUT_OP, sse2)
00381 QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2)
00382 QPEL_H264_HV_XMM(put_, PUT_OP, sse2)
00383 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, sse2)
00384 QPEL_H264_H_XMM(put_, PUT_OP, ssse3)
00385 QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3)
00386 QPEL_H264_HV_XMM(put_, PUT_OP, ssse3)
00387 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
00388 #undef PAVGB
00389
00390 H264_MC_4816(mmxext)
00391 H264_MC_816(H264_MC_V, sse2)
00392 H264_MC_816(H264_MC_HV, sse2)
00393 H264_MC_816(H264_MC_H, ssse3)
00394 H264_MC_816(H264_MC_HV, ssse3)
00395
00396
00397
00398 #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
00399 void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
00400 (uint8_t *dst, uint8_t *src, int stride);
00401
00402 #define LUMA_MC_ALL(DEPTH, TYPE, OPT) \
00403 LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \
00404 LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT) \
00405 LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
00406 LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
00407 LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
00408 LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
00409
00410 #define LUMA_MC_816(DEPTH, TYPE, OPT) \
00411 LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
00412 LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
00413 LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
00414 LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
00415
00416 LUMA_MC_ALL(10, mc00, mmxext)
00417 LUMA_MC_ALL(10, mc10, mmxext)
00418 LUMA_MC_ALL(10, mc20, mmxext)
00419 LUMA_MC_ALL(10, mc30, mmxext)
00420 LUMA_MC_ALL(10, mc01, mmxext)
00421 LUMA_MC_ALL(10, mc11, mmxext)
00422 LUMA_MC_ALL(10, mc21, mmxext)
00423 LUMA_MC_ALL(10, mc31, mmxext)
00424 LUMA_MC_ALL(10, mc02, mmxext)
00425 LUMA_MC_ALL(10, mc12, mmxext)
00426 LUMA_MC_ALL(10, mc22, mmxext)
00427 LUMA_MC_ALL(10, mc32, mmxext)
00428 LUMA_MC_ALL(10, mc03, mmxext)
00429 LUMA_MC_ALL(10, mc13, mmxext)
00430 LUMA_MC_ALL(10, mc23, mmxext)
00431 LUMA_MC_ALL(10, mc33, mmxext)
00432
00433 LUMA_MC_816(10, mc00, sse2)
00434 LUMA_MC_816(10, mc10, sse2)
00435 LUMA_MC_816(10, mc10, sse2_cache64)
00436 LUMA_MC_816(10, mc10, ssse3_cache64)
00437 LUMA_MC_816(10, mc20, sse2)
00438 LUMA_MC_816(10, mc20, sse2_cache64)
00439 LUMA_MC_816(10, mc20, ssse3_cache64)
00440 LUMA_MC_816(10, mc30, sse2)
00441 LUMA_MC_816(10, mc30, sse2_cache64)
00442 LUMA_MC_816(10, mc30, ssse3_cache64)
00443 LUMA_MC_816(10, mc01, sse2)
00444 LUMA_MC_816(10, mc11, sse2)
00445 LUMA_MC_816(10, mc21, sse2)
00446 LUMA_MC_816(10, mc31, sse2)
00447 LUMA_MC_816(10, mc02, sse2)
00448 LUMA_MC_816(10, mc12, sse2)
00449 LUMA_MC_816(10, mc22, sse2)
00450 LUMA_MC_816(10, mc32, sse2)
00451 LUMA_MC_816(10, mc03, sse2)
00452 LUMA_MC_816(10, mc13, sse2)
00453 LUMA_MC_816(10, mc23, sse2)
00454 LUMA_MC_816(10, mc33, sse2)
00455
00456 #define QPEL16_OPMC(OP, MC, MMX)\
00457 void ff_ ## OP ## _h264_qpel16_ ## MC ## _10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00458 ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst , src , stride);\
00459 ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
00460 src += 8*stride;\
00461 dst += 8*stride;\
00462 ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst , src , stride);\
00463 ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
00464 }
00465
00466 #define QPEL16_OP(MC, MMX)\
00467 QPEL16_OPMC(put, MC, MMX)\
00468 QPEL16_OPMC(avg, MC, MMX)
00469
00470 #define QPEL16(MMX)\
00471 QPEL16_OP(mc00, MMX)\
00472 QPEL16_OP(mc01, MMX)\
00473 QPEL16_OP(mc02, MMX)\
00474 QPEL16_OP(mc03, MMX)\
00475 QPEL16_OP(mc10, MMX)\
00476 QPEL16_OP(mc11, MMX)\
00477 QPEL16_OP(mc12, MMX)\
00478 QPEL16_OP(mc13, MMX)\
00479 QPEL16_OP(mc20, MMX)\
00480 QPEL16_OP(mc21, MMX)\
00481 QPEL16_OP(mc22, MMX)\
00482 QPEL16_OP(mc23, MMX)\
00483 QPEL16_OP(mc30, MMX)\
00484 QPEL16_OP(mc31, MMX)\
00485 QPEL16_OP(mc32, MMX)\
00486 QPEL16_OP(mc33, MMX)
00487
00488 #if ARCH_X86_32 && HAVE_YASM && CONFIG_H264QPEL // ARCH_X86_64 implies SSE2+
00489 QPEL16(mmxext)
00490 #endif
00491
00492 #endif