35 #define PHADDD(a, t) \ 36 "movq " #a ", " #t " \n\t" \ 37 "psrlq $32, " #a " \n\t" \ 38 "paddd " #t ", " #a " \n\t" 45 #define PMULHRW(x, y, s, o) \ 46 "pmulhw " #s ", " #x " \n\t" \ 47 "pmulhw " #s ", " #y " \n\t" \ 48 "paddw " #o ", " #x " \n\t" \ 49 "paddw " #o ", " #y " \n\t" \ 50 "psraw $1, " #x " \n\t" \ 51 "psraw $1, " #y " \n\t" 52 #define DEF(x) x ## _mmx 53 #define SET_RND MOVQ_WONE 54 #define SCALE_OFFSET 1 63 #define DEF(x) x ## _3dnow 65 #define SCALE_OFFSET 0 66 #define PMULHRW(x, y, s, o) \ 67 "pmulhrw " #s ", " #x " \n\t" \ 68 "pmulhrw " #s ", " #y " \n\t" 79 #define DEF(x) x ## _ssse3 81 #define SCALE_OFFSET -1 83 #define PHADDD(a, t) \ 84 "pshufw $0x0E, " #a ", " #t " \n\t" \ 86 "paddd " #t ", " #a " \n\t" 88 #define PMULHRW(x, y, s, o) \ 89 "pmulhrsw " #s ", " #x " \n\t" \ 90 "pmulhrsw " #s ", " #y " \n\t" 104 int w,
int h,
int sides)
109 last_line = buf + (height - 1) * wrap;
115 "movd (%0), %%mm0 \n\t" 116 "punpcklbw %%mm0, %%mm0 \n\t" 117 "punpcklwd %%mm0, %%mm0 \n\t" 118 "punpckldq %%mm0, %%mm0 \n\t" 119 "movq %%mm0, -8(%0) \n\t" 120 "movq -8(%0, %2), %%mm1 \n\t" 121 "punpckhbw %%mm1, %%mm1 \n\t" 122 "punpckhwd %%mm1, %%mm1 \n\t" 123 "punpckhdq %%mm1, %%mm1 \n\t" 124 "movq %%mm1, (%0, %2) \n\t" 130 "r" (ptr + wrap * height));
131 }
else if (w == 16) {
134 "movd (%0), %%mm0 \n\t" 135 "punpcklbw %%mm0, %%mm0 \n\t" 136 "punpcklwd %%mm0, %%mm0 \n\t" 137 "punpckldq %%mm0, %%mm0 \n\t" 138 "movq %%mm0, -8(%0) \n\t" 139 "movq %%mm0, -16(%0) \n\t" 140 "movq -8(%0, %2), %%mm1 \n\t" 141 "punpckhbw %%mm1, %%mm1 \n\t" 142 "punpckhwd %%mm1, %%mm1 \n\t" 143 "punpckhdq %%mm1, %%mm1 \n\t" 144 "movq %%mm1, (%0, %2) \n\t" 145 "movq %%mm1, 8(%0, %2) \n\t" 156 "movd (%0), %%mm0 \n\t" 157 "punpcklbw %%mm0, %%mm0 \n\t" 158 "punpcklwd %%mm0, %%mm0 \n\t" 159 "movd %%mm0, -4(%0) \n\t" 160 "movd -4(%0, %2), %%mm1 \n\t" 161 "punpcklbw %%mm1, %%mm1 \n\t" 162 "punpckhwd %%mm1, %%mm1 \n\t" 163 "punpckhdq %%mm1, %%mm1 \n\t" 164 "movd %%mm1, (%0, %2) \n\t" 170 "r" (ptr + wrap * height));
175 for (i = 0; i <
h; i += 4) {
176 ptr = buf - (i + 1) * wrap - w;
179 "movq (%1, %0), %%mm0 \n\t" 180 "movq %%mm0, (%0) \n\t" 181 "movq %%mm0, (%0, %2) \n\t" 182 "movq %%mm0, (%0, %2, 2) \n\t" 183 "movq %%mm0, (%0, %3) \n\t" 190 "r" (ptr + width + 2 *
w));
195 for (i = 0; i <
h; i += 4) {
196 ptr = last_line + (i + 1) * wrap - w;
199 "movq (%1, %0), %%mm0 \n\t" 200 "movq %%mm0, (%0) \n\t" 201 "movq %%mm0, (%0, %2) \n\t" 202 "movq %%mm0, (%0, %2, 2) \n\t" 203 "movq %%mm0, (%0, %3) \n\t" 210 "r" (ptr + width + 2 *
w));
262 #if HAVE_SSSE3_INLINE #define EXTERNAL_MMX(flags)
int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, AVCodecContext *avctx)
static atomic_int cpu_flags
int bits_per_raw_sample
Bits per sample/pixel of internal libavcodec pixel/sample format.
Macro definitions for various function/variable attributes.
void(* draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define EXTERNAL_SSE2(flags)
#define INLINE_MMX(flags)
int flags
AV_CODEC_FLAG_*.
simple assert() macros that are a bit more flexible than ISO C assert().
int ff_pix_sum16_sse2(uint8_t *pix, int line_size)
int(* pix_norm1)(uint8_t *pix, int line_size)
int(* pix_sum)(uint8_t *pix, int line_size)
#define INLINE_SSSE3(flags)
int ff_pix_sum16_mmxext(uint8_t *pix, int line_size)
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
#define INLINE_AMD3DNOW(flags)
Libavcodec external API header.
main external API structure.
void(* add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
int(* try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
#define EXTERNAL_MMXEXT(flags)
int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
__asm__(".macro parse_r var r\n\t""\\var = -1\n\t"_IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)".iflt \\var\n\t"".error \"Unable to parse register name \\r\"\n\t"".endif\n\t"".endm")
int ff_pix_norm1_sse2(uint8_t *pix, int line_size)
#define EXTERNAL_XOP(flags)
int ff_pix_sum16_xop(uint8_t *pix, int line_size)