00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef AVCODEC_X86_DSPUTIL_MMX_H
00023 #define AVCODEC_X86_DSPUTIL_MMX_H
00024
00025 #include <stdint.h>
00026 #include "libavcodec/dsputil.h"
00027 #include "libavutil/x86/asm.h"
00028
00029 typedef struct { uint64_t a, b; } xmm_reg;
00030
00031 extern const uint64_t ff_bone;
00032 extern const uint64_t ff_wtwo;
00033
00034 extern const uint64_t ff_pdw_80000000[2];
00035
00036 extern const xmm_reg ff_pw_3;
00037 extern const xmm_reg ff_pw_4;
00038 extern const xmm_reg ff_pw_5;
00039 extern const xmm_reg ff_pw_8;
00040 extern const uint64_t ff_pw_15;
00041 extern const xmm_reg ff_pw_16;
00042 extern const xmm_reg ff_pw_18;
00043 extern const uint64_t ff_pw_20;
00044 extern const xmm_reg ff_pw_27;
00045 extern const xmm_reg ff_pw_28;
00046 extern const xmm_reg ff_pw_32;
00047 extern const uint64_t ff_pw_42;
00048 extern const uint64_t ff_pw_53;
00049 extern const xmm_reg ff_pw_63;
00050 extern const xmm_reg ff_pw_64;
00051 extern const uint64_t ff_pw_96;
00052 extern const uint64_t ff_pw_128;
00053 extern const uint64_t ff_pw_255;
00054
00055 extern const xmm_reg ff_pb_1;
00056 extern const xmm_reg ff_pb_3;
00057 extern const uint64_t ff_pb_7;
00058 extern const uint64_t ff_pb_1F;
00059 extern const uint64_t ff_pb_3F;
00060 extern const uint64_t ff_pb_81;
00061 extern const xmm_reg ff_pb_A1;
00062 extern const xmm_reg ff_pb_F8;
00063 extern const uint64_t ff_pb_FC;
00064 extern const xmm_reg ff_pb_FE;
00065
00066 extern const double ff_pd_1[2];
00067 extern const double ff_pd_2[2];
00068
00069 #define SBUTTERFLY(a,b,t,n,m)\
00070 "mov" #m " " #a ", " #t " \n\t" \
00071 "punpckl" #n " " #b ", " #a " \n\t" \
00072 "punpckh" #n " " #b ", " #t " \n\t" \
00073
00074 #define TRANSPOSE4(a,b,c,d,t)\
00075 SBUTTERFLY(a,b,t,wd,q) \
00076 SBUTTERFLY(c,d,b,wd,q) \
00077 SBUTTERFLY(a,c,d,dq,q) \
00078 SBUTTERFLY(t,b,c,dq,q)
00079
00080 #define MOVQ_WONE(regd) \
00081 __asm__ volatile ( \
00082 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
00083 "psrlw $15, %%" #regd ::)
00084
00085 void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
00086 void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
00087
00088 void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
00089 void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
00090 void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
00091
00092 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
00093 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
00094 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
00095 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
00096
00097 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
00098 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);
00099
00100 void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
00101 void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
00102 void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
00103 void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
00104
00105 void ff_mmx_idct(DCTELEM *block);
00106 void ff_mmxext_idct(DCTELEM *block);
00107
00108
00109 void ff_deinterlace_line_mmx(uint8_t *dst,
00110 const uint8_t *lum_m4, const uint8_t *lum_m3,
00111 const uint8_t *lum_m2, const uint8_t *lum_m1,
00112 const uint8_t *lum,
00113 int size);
00114
00115 void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
00116 const uint8_t *lum_m3,
00117 const uint8_t *lum_m2,
00118 const uint8_t *lum_m1,
00119 const uint8_t *lum, int size);
00120
00121 #endif