00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "libavutil/cpu.h"
00024 #include "libavutil/x86_cpu.h"
00025 #include "libavcodec/vp8dsp.h"
00026
00027 #if HAVE_YASM
00028
00029
00030
00031
00032 extern void ff_put_vp8_epel4_h4_mmx2 (uint8_t *dst, ptrdiff_t dststride,
00033 uint8_t *src, ptrdiff_t srcstride,
00034 int height, int mx, int my);
00035 extern void ff_put_vp8_epel4_h6_mmx2 (uint8_t *dst, ptrdiff_t dststride,
00036 uint8_t *src, ptrdiff_t srcstride,
00037 int height, int mx, int my);
00038 extern void ff_put_vp8_epel4_v4_mmx2 (uint8_t *dst, ptrdiff_t dststride,
00039 uint8_t *src, ptrdiff_t srcstride,
00040 int height, int mx, int my);
00041 extern void ff_put_vp8_epel4_v6_mmx2 (uint8_t *dst, ptrdiff_t dststride,
00042 uint8_t *src, ptrdiff_t srcstride,
00043 int height, int mx, int my);
00044
00045 extern void ff_put_vp8_epel8_h4_sse2 (uint8_t *dst, ptrdiff_t dststride,
00046 uint8_t *src, ptrdiff_t srcstride,
00047 int height, int mx, int my);
00048 extern void ff_put_vp8_epel8_h6_sse2 (uint8_t *dst, ptrdiff_t dststride,
00049 uint8_t *src, ptrdiff_t srcstride,
00050 int height, int mx, int my);
00051 extern void ff_put_vp8_epel8_v4_sse2 (uint8_t *dst, ptrdiff_t dststride,
00052 uint8_t *src, ptrdiff_t srcstride,
00053 int height, int mx, int my);
00054 extern void ff_put_vp8_epel8_v6_sse2 (uint8_t *dst, ptrdiff_t dststride,
00055 uint8_t *src, ptrdiff_t srcstride,
00056 int height, int mx, int my);
00057
00058 extern void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00059 uint8_t *src, ptrdiff_t srcstride,
00060 int height, int mx, int my);
00061 extern void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00062 uint8_t *src, ptrdiff_t srcstride,
00063 int height, int mx, int my);
00064 extern void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00065 uint8_t *src, ptrdiff_t srcstride,
00066 int height, int mx, int my);
00067 extern void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00068 uint8_t *src, ptrdiff_t srcstride,
00069 int height, int mx, int my);
00070 extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00071 uint8_t *src, ptrdiff_t srcstride,
00072 int height, int mx, int my);
00073 extern void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00074 uint8_t *src, ptrdiff_t srcstride,
00075 int height, int mx, int my);
00076 extern void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00077 uint8_t *src, ptrdiff_t srcstride,
00078 int height, int mx, int my);
00079 extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00080 uint8_t *src, ptrdiff_t srcstride,
00081 int height, int mx, int my);
00082
00083 extern void ff_put_vp8_bilinear4_h_mmx2 (uint8_t *dst, ptrdiff_t dststride,
00084 uint8_t *src, ptrdiff_t srcstride,
00085 int height, int mx, int my);
00086 extern void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, ptrdiff_t dststride,
00087 uint8_t *src, ptrdiff_t srcstride,
00088 int height, int mx, int my);
00089 extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00090 uint8_t *src, ptrdiff_t srcstride,
00091 int height, int mx, int my);
00092 extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00093 uint8_t *src, ptrdiff_t srcstride,
00094 int height, int mx, int my);
00095
00096 extern void ff_put_vp8_bilinear4_v_mmx2 (uint8_t *dst, ptrdiff_t dststride,
00097 uint8_t *src, ptrdiff_t srcstride,
00098 int height, int mx, int my);
00099 extern void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, ptrdiff_t dststride,
00100 uint8_t *src, ptrdiff_t srcstride,
00101 int height, int mx, int my);
00102 extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00103 uint8_t *src, ptrdiff_t srcstride,
00104 int height, int mx, int my);
00105 extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00106 uint8_t *src, ptrdiff_t srcstride,
00107 int height, int mx, int my);
00108
00109
00110 extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, ptrdiff_t dststride,
00111 uint8_t *src, ptrdiff_t srcstride,
00112 int height, int mx, int my);
00113 extern void ff_put_vp8_pixels16_mmx(uint8_t *dst, ptrdiff_t dststride,
00114 uint8_t *src, ptrdiff_t srcstride,
00115 int height, int mx, int my);
00116 extern void ff_put_vp8_pixels16_sse(uint8_t *dst, ptrdiff_t dststride,
00117 uint8_t *src, ptrdiff_t srcstride,
00118 int height, int mx, int my);
00119
00120 #define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
00121 static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
00122 uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
00123 ptrdiff_t srcstride, int height, int mx, int my) \
00124 { \
00125 ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
00126 dst, dststride, src, srcstride, height, mx, my); \
00127 ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
00128 dst + 8, dststride, src + 8, srcstride, height, mx, my); \
00129 }
00130 #define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \
00131 static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
00132 uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
00133 ptrdiff_t srcstride, int height, int mx, int my) \
00134 { \
00135 ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
00136 dst, dststride, src, srcstride, height, mx, my); \
00137 ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
00138 dst + 4, dststride, src + 4, srcstride, height, mx, my); \
00139 }
00140
00141 #if ARCH_X86_32
00142 TAP_W8 (mmx2, epel, h4)
00143 TAP_W8 (mmx2, epel, h6)
00144 TAP_W16(mmx2, epel, h6)
00145 TAP_W8 (mmx2, epel, v4)
00146 TAP_W8 (mmx2, epel, v6)
00147 TAP_W16(mmx2, epel, v6)
00148 TAP_W8 (mmx2, bilinear, h)
00149 TAP_W16(mmx2, bilinear, h)
00150 TAP_W8 (mmx2, bilinear, v)
00151 TAP_W16(mmx2, bilinear, v)
00152 #endif
00153
00154 TAP_W16(sse2, epel, h6)
00155 TAP_W16(sse2, epel, v6)
00156 TAP_W16(sse2, bilinear, h)
00157 TAP_W16(sse2, bilinear, v)
00158
00159 TAP_W16(ssse3, epel, h6)
00160 TAP_W16(ssse3, epel, v6)
00161 TAP_W16(ssse3, bilinear, h)
00162 TAP_W16(ssse3, bilinear, v)
00163
00164 #define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
00165 static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
00166 uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
00167 ptrdiff_t srcstride, int height, int mx, int my) \
00168 { \
00169 DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + TAPNUMY - 1)]; \
00170 uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \
00171 src -= srcstride * (TAPNUMY / 2 - 1); \
00172 ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \
00173 tmp, SIZE, src, srcstride, height + TAPNUMY - 1, mx, my); \
00174 ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \
00175 dst, dststride, tmpptr, SIZE, height, mx, my); \
00176 }
00177
00178 #if ARCH_X86_32
00179 #define HVTAPMMX(x, y) \
00180 HVTAP(mmx2, 8, x, y, 4, 8) \
00181 HVTAP(mmx2, 8, x, y, 8, 16)
00182
00183 HVTAP(mmx2, 8, 6, 6, 16, 16)
00184 #else
00185 #define HVTAPMMX(x, y) \
00186 HVTAP(mmx2, 8, x, y, 4, 8)
00187 #endif
00188
00189 HVTAPMMX(4, 4)
00190 HVTAPMMX(4, 6)
00191 HVTAPMMX(6, 4)
00192 HVTAPMMX(6, 6)
00193
00194 #define HVTAPSSE2(x, y, w) \
00195 HVTAP(sse2, 16, x, y, w, 16) \
00196 HVTAP(ssse3, 16, x, y, w, 16)
00197
00198 HVTAPSSE2(4, 4, 8)
00199 HVTAPSSE2(4, 6, 8)
00200 HVTAPSSE2(6, 4, 8)
00201 HVTAPSSE2(6, 6, 8)
00202 HVTAPSSE2(6, 6, 16)
00203
00204 HVTAP(ssse3, 16, 4, 4, 4, 8)
00205 HVTAP(ssse3, 16, 4, 6, 4, 8)
00206 HVTAP(ssse3, 16, 6, 4, 4, 8)
00207 HVTAP(ssse3, 16, 6, 6, 4, 8)
00208
00209 #define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
00210 static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
00211 uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
00212 ptrdiff_t srcstride, int height, int mx, int my) \
00213 { \
00214 DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + 2)]; \
00215 ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \
00216 tmp, SIZE, src, srcstride, height + 1, mx, my); \
00217 ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \
00218 dst, dststride, tmp, SIZE, height, mx, my); \
00219 }
00220
00221 HVBILIN(mmx2, 8, 4, 8)
00222 #if ARCH_X86_32
00223 HVBILIN(mmx2, 8, 8, 16)
00224 HVBILIN(mmx2, 8, 16, 16)
00225 #endif
00226 HVBILIN(sse2, 8, 8, 16)
00227 HVBILIN(sse2, 8, 16, 16)
00228 HVBILIN(ssse3, 8, 4, 8)
00229 HVBILIN(ssse3, 8, 8, 16)
00230 HVBILIN(ssse3, 8, 16, 16)
00231
00232 extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16],
00233 ptrdiff_t stride);
00234 extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16],
00235 ptrdiff_t stride);
00236 extern void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, DCTELEM block[4][16],
00237 ptrdiff_t stride);
00238 extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16],
00239 ptrdiff_t stride);
00240 extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16],
00241 ptrdiff_t stride);
00242 extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
00243 extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]);
00244 extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16],
00245 ptrdiff_t stride);
00246 extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16],
00247 ptrdiff_t stride);
00248
00249 #define DECLARE_LOOP_FILTER(NAME)\
00250 extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, \
00251 ptrdiff_t stride, \
00252 int flim);\
00253 extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, \
00254 ptrdiff_t stride, \
00255 int flim);\
00256 extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, \
00257 ptrdiff_t stride,\
00258 int e, int i, int hvt);\
00259 extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, \
00260 ptrdiff_t stride,\
00261 int e, int i, int hvt);\
00262 extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, \
00263 uint8_t *dstV,\
00264 ptrdiff_t s, \
00265 int e, int i, int hvt);\
00266 extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, \
00267 uint8_t *dstV,\
00268 ptrdiff_t s, \
00269 int e, int i, int hvt);\
00270 extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, \
00271 ptrdiff_t stride,\
00272 int e, int i, int hvt);\
00273 extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, \
00274 ptrdiff_t stride,\
00275 int e, int i, int hvt);\
00276 extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \
00277 uint8_t *dstV,\
00278 ptrdiff_t s, \
00279 int e, int i, int hvt);\
00280 extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \
00281 uint8_t *dstV,\
00282 ptrdiff_t s, \
00283 int e, int i, int hvt);
00284
00285 DECLARE_LOOP_FILTER(mmx)
00286 DECLARE_LOOP_FILTER(mmx2)
00287 DECLARE_LOOP_FILTER(sse2)
00288 DECLARE_LOOP_FILTER(ssse3)
00289 DECLARE_LOOP_FILTER(sse4)
00290
00291 #endif
00292
00293 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
00294 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
00295 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
00296 c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT
00297
00298 #define VP8_MC_FUNC(IDX, SIZE, OPT) \
00299 c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \
00300 c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \
00301 c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \
00302 c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \
00303 c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \
00304 VP8_LUMA_MC_FUNC(IDX, SIZE, OPT)
00305
00306 #define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \
00307 c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
00308 c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
00309 c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
00310 c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
00311 c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
00312 c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
00313 c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
00314 c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
00315
00316
00317 av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
00318 {
00319 #if HAVE_YASM
00320 int mm_flags = av_get_cpu_flags();
00321
00322 if (mm_flags & AV_CPU_FLAG_MMX) {
00323 c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx;
00324 c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
00325 #if ARCH_X86_32
00326 c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmx;
00327 c->vp8_idct_add = ff_vp8_idct_add_mmx;
00328 c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmx;
00329 c->put_vp8_epel_pixels_tab[0][0][0] =
00330 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
00331 #endif
00332 c->put_vp8_epel_pixels_tab[1][0][0] =
00333 c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
00334
00335 #if ARCH_X86_32
00336 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
00337 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
00338
00339 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
00340 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
00341 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
00342 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
00343
00344 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx;
00345 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx;
00346 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx;
00347 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx;
00348 #endif
00349 }
00350
00351
00352
00353 if (mm_flags & AV_CPU_FLAG_MMX2) {
00354 VP8_MC_FUNC(2, 4, mmx2);
00355 VP8_BILINEAR_MC_FUNC(2, 4, mmx2);
00356 #if ARCH_X86_32
00357 VP8_LUMA_MC_FUNC(0, 16, mmx2);
00358 VP8_MC_FUNC(1, 8, mmx2);
00359 VP8_BILINEAR_MC_FUNC(0, 16, mmx2);
00360 VP8_BILINEAR_MC_FUNC(1, 8, mmx2);
00361
00362 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx2;
00363 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx2;
00364
00365 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx2;
00366 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx2;
00367 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx2;
00368 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx2;
00369
00370 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx2;
00371 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx2;
00372 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx2;
00373 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx2;
00374 #endif
00375 }
00376
00377 if (mm_flags & AV_CPU_FLAG_SSE) {
00378 c->vp8_idct_add = ff_vp8_idct_add_sse;
00379 c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse;
00380 c->put_vp8_epel_pixels_tab[0][0][0] =
00381 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
00382 }
00383
00384 if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) {
00385 VP8_LUMA_MC_FUNC(0, 16, sse2);
00386 VP8_MC_FUNC(1, 8, sse2);
00387 VP8_BILINEAR_MC_FUNC(0, 16, sse2);
00388 VP8_BILINEAR_MC_FUNC(1, 8, sse2);
00389
00390 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
00391
00392 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
00393 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
00394
00395 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2;
00396 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2;
00397 }
00398
00399 if (mm_flags & AV_CPU_FLAG_SSE2) {
00400 c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2;
00401
00402 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
00403
00404 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
00405 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
00406
00407 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2;
00408 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2;
00409 }
00410
00411 if (mm_flags & AV_CPU_FLAG_SSSE3) {
00412 VP8_LUMA_MC_FUNC(0, 16, ssse3);
00413 VP8_MC_FUNC(1, 8, ssse3);
00414 VP8_MC_FUNC(2, 4, ssse3);
00415 VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
00416 VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
00417 VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
00418
00419 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
00420 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
00421
00422 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
00423 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
00424 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
00425 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
00426
00427 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3;
00428 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3;
00429 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
00430 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
00431 }
00432
00433 if (mm_flags & AV_CPU_FLAG_SSE4) {
00434 c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4;
00435
00436 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4;
00437 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4;
00438 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4;
00439 }
00440 #endif
00441 }