00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include "libavutil/cpu.h"
00028 #include "libavutil/x86/cpu.h"
00029 #include "libavcodec/vc1dsp.h"
00030 #include "vc1dsp.h"
00031 #include "config.h"
00032
00033 #define LOOP_FILTER(EXT) \
00034 void ff_vc1_v_loop_filter4_ ## EXT(uint8_t *src, int stride, int pq); \
00035 void ff_vc1_h_loop_filter4_ ## EXT(uint8_t *src, int stride, int pq); \
00036 void ff_vc1_v_loop_filter8_ ## EXT(uint8_t *src, int stride, int pq); \
00037 void ff_vc1_h_loop_filter8_ ## EXT(uint8_t *src, int stride, int pq); \
00038 \
00039 static void vc1_v_loop_filter16_ ## EXT(uint8_t *src, int stride, int pq) \
00040 { \
00041 ff_vc1_v_loop_filter8_ ## EXT(src, stride, pq); \
00042 ff_vc1_v_loop_filter8_ ## EXT(src+8, stride, pq); \
00043 } \
00044 \
00045 static void vc1_h_loop_filter16_ ## EXT(uint8_t *src, int stride, int pq) \
00046 { \
00047 ff_vc1_h_loop_filter8_ ## EXT(src, stride, pq); \
00048 ff_vc1_h_loop_filter8_ ## EXT(src+8*stride, stride, pq); \
00049 }
00050
00051 #if HAVE_YASM
00052 LOOP_FILTER(mmxext)
00053 LOOP_FILTER(sse2)
00054 LOOP_FILTER(ssse3)
00055
00056 void ff_vc1_h_loop_filter8_sse4(uint8_t *src, int stride, int pq);
00057
00058 static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
00059 {
00060 ff_vc1_h_loop_filter8_sse4(src, stride, pq);
00061 ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq);
00062 }
00063 #endif
00064
00065 void ff_put_vc1_chroma_mc8_nornd_mmx (uint8_t *dst, uint8_t *src,
00066 int stride, int h, int x, int y);
00067 void ff_avg_vc1_chroma_mc8_nornd_mmxext(uint8_t *dst, uint8_t *src,
00068 int stride, int h, int x, int y);
00069 void ff_avg_vc1_chroma_mc8_nornd_3dnow(uint8_t *dst, uint8_t *src,
00070 int stride, int h, int x, int y);
00071 void ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
00072 int stride, int h, int x, int y);
00073 void ff_avg_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
00074 int stride, int h, int x, int y);
00075
00076
00077 av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
00078 {
00079 int mm_flags = av_get_cpu_flags();
00080
00081 if (INLINE_MMX(mm_flags))
00082 ff_vc1dsp_init_mmx(dsp);
00083
00084 if (INLINE_MMXEXT(mm_flags))
00085 ff_vc1dsp_init_mmxext(dsp);
00086
00087 #define ASSIGN_LF(EXT) \
00088 dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \
00089 dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_ ## EXT; \
00090 dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_ ## EXT; \
00091 dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_ ## EXT; \
00092 dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \
00093 dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT
00094
00095 #if HAVE_YASM
00096 if (mm_flags & AV_CPU_FLAG_MMX) {
00097 dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx;
00098 }
00099
00100 if (mm_flags & AV_CPU_FLAG_MMXEXT) {
00101 ASSIGN_LF(mmxext);
00102 dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;
00103 } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
00104 dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
00105 }
00106
00107 if (mm_flags & AV_CPU_FLAG_SSE2) {
00108 dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2;
00109 dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2;
00110 dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
00111 dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
00112 }
00113 if (mm_flags & AV_CPU_FLAG_SSSE3) {
00114 ASSIGN_LF(ssse3);
00115 dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3;
00116 dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3;
00117 }
00118 if (mm_flags & AV_CPU_FLAG_SSE4) {
00119 dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4;
00120 dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4;
00121 }
00122 #endif
00123 }