35 #define PHADDD(a, t) \
36 "movq " #a ", " #t " \n\t" \
37 "psrlq $32, " #a " \n\t" \
38 "paddd " #t ", " #a " \n\t"
45 #define PMULHRW(x, y, s, o) \
46 "pmulhw " #s ", " #x " \n\t" \
47 "pmulhw " #s ", " #y " \n\t" \
48 "paddw " #o ", " #x " \n\t" \
49 "paddw " #o ", " #y " \n\t" \
50 "psraw $1, " #x " \n\t" \
51 "psraw $1, " #y " \n\t"
52 #define DEF(x) x ## _mmx
53 #define SET_RND MOVQ_WONE
54 #define SCALE_OFFSET 1
63 #define DEF(x) x ## _3dnow
65 #define SCALE_OFFSET 0
66 #define PMULHRW(x, y, s, o) \
67 "pmulhrw " #s ", " #x " \n\t" \
68 "pmulhrw " #s ", " #y " \n\t"
79 #define DEF(x) x ## _ssse3
81 #define SCALE_OFFSET -1
83 #define PHADDD(a, t) \
84 "pshufw $0x0E, " #a ", " #t " \n\t" \
86 "paddd " #t ", " #a " \n\t"
88 #define PMULHRW(x, y, s, o) \
89 "pmulhrsw " #s ", " #x " \n\t" \
90 "pmulhrsw " #s ", " #y " \n\t"
104 int w,
int h,
int sides)
109 last_line = buf + (height - 1) * wrap;
115 "movd (%0), %%mm0 \n\t"
116 "punpcklbw %%mm0, %%mm0 \n\t"
117 "punpcklwd %%mm0, %%mm0 \n\t"
118 "punpckldq %%mm0, %%mm0 \n\t"
119 "movq %%mm0, -8(%0) \n\t"
120 "movq -8(%0, %2), %%mm1 \n\t"
121 "punpckhbw %%mm1, %%mm1 \n\t"
122 "punpckhwd %%mm1, %%mm1 \n\t"
123 "punpckhdq %%mm1, %%mm1 \n\t"
124 "movq %%mm1, (%0, %2) \n\t"
130 "r" (ptr + wrap * height));
131 }
else if (w == 16) {
134 "movd (%0), %%mm0 \n\t"
135 "punpcklbw %%mm0, %%mm0 \n\t"
136 "punpcklwd %%mm0, %%mm0 \n\t"
137 "punpckldq %%mm0, %%mm0 \n\t"
138 "movq %%mm0, -8(%0) \n\t"
139 "movq %%mm0, -16(%0) \n\t"
140 "movq -8(%0, %2), %%mm1 \n\t"
141 "punpckhbw %%mm1, %%mm1 \n\t"
142 "punpckhwd %%mm1, %%mm1 \n\t"
143 "punpckhdq %%mm1, %%mm1 \n\t"
144 "movq %%mm1, (%0, %2) \n\t"
145 "movq %%mm1, 8(%0, %2) \n\t"
156 "movd (%0), %%mm0 \n\t"
157 "punpcklbw %%mm0, %%mm0 \n\t"
158 "punpcklwd %%mm0, %%mm0 \n\t"
159 "movd %%mm0, -4(%0) \n\t"
160 "movd -4(%0, %2), %%mm1 \n\t"
161 "punpcklbw %%mm1, %%mm1 \n\t"
162 "punpckhwd %%mm1, %%mm1 \n\t"
163 "punpckhdq %%mm1, %%mm1 \n\t"
164 "movd %%mm1, (%0, %2) \n\t"
170 "r" (ptr + wrap * height));
175 for (i = 0; i <
h; i += 4) {
176 ptr = buf - (i + 1) * wrap - w;
179 "movq (%1, %0), %%mm0 \n\t"
180 "movq %%mm0, (%0) \n\t"
181 "movq %%mm0, (%0, %2) \n\t"
182 "movq %%mm0, (%0, %2, 2) \n\t"
183 "movq %%mm0, (%0, %3) \n\t"
190 "r" (ptr + width + 2 * w));
195 for (i = 0; i <
h; i += 4) {
196 ptr = last_line + (i + 1) * wrap - w;
199 "movq (%1, %0), %%mm0 \n\t"
200 "movq %%mm0, (%0) \n\t"
201 "movq %%mm0, (%0, %2) \n\t"
202 "movq %%mm0, (%0, %2, 2) \n\t"
203 "movq %%mm0, (%0, %3) \n\t"
210 "r" (ptr + width + 2 * w));
262 #if HAVE_SSSE3_INLINE
#define EXTERNAL_MMX(flags)
int(* try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, AVCodecContext *avctx)
void(* add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
int bits_per_raw_sample
Bits per sample/pixel of internal libavcodec pixel/sample format.
Macro definitions for various function/variable attributes.
#define CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
#define EXTERNAL_SSE2(flags)
#define INLINE_MMX(flags)
simple assert() macros that are a bit more flexible than ISO C assert().
int ff_pix_sum16_sse2(uint8_t *pix, int line_size)
Libavcodec external API header.
#define INLINE_SSSE3(flags)
int ff_pix_sum16_mmxext(uint8_t *pix, int line_size)
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
#define INLINE_AMD3DNOW(flags)
int(* pix_sum)(uint8_t *pix, int line_size)
main external API structure.
BYTE int const BYTE int int int height
int(* pix_norm1)(uint8_t *pix, int line_size)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
#define EXTERNAL_MMXEXT(flags)
int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
void(* draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides)
int ff_pix_norm1_sse2(uint8_t *pix, int line_size)
#define EXTERNAL_XOP(flags)
int ff_pix_sum16_xop(uint8_t *pix, int line_size)