libavcodec/x86/vc1dsp_mmx.c File Reference

#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "dsputil_mmx.h"

Go to the source code of this file.

Defines

#define NORMALIZE_MMX(SHIFT)

Add rounder from mm7 to mm3 and pack result at destination.

#define TRANSFER_DO_PACK

#define TRANSFER_DONT_PACK

#define DO_UNPACK(reg) "punpcklbw %%mm0, " reg "\n\t"

#define DONT_UNPACK(reg)

#define LOAD_ROUNDER_MMX(ROUND)

Compute the rounder 32-r or 8-r and unpacks it to mm7.

#define SHIFT2_LINE(OFF, R0, R1, R2, R3)

#define MSPEL_FILTER13_CORE(UNPACK, MOVQ, A1, A2, A3, A4)

#define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4)

#define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4)

#define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4)

#define DECLARE_FUNCTION(a, b)

Macro to ease bicubic filter interpolation functions declarations.

Functions

DECLARE_ALIGNED_16 (const uint64_t, ff_pw_9)=0x0009000900090009ULL

static void vc1_put_ver_16b_shift2_mmx (int16_t *dst, const uint8_t *src, x86_reg stride, int rnd, int64_t shift)

Sacrifying mm6 allows to pipeline loads from src.

static void vc1_put_hor_16b_shift2_mmx (uint8_t *dst, x86_reg stride, const int16_t *src, int rnd)

Data is already unpacked, so some operations can directly be made from memory.

static void vc1_put_shift2_mmx (uint8_t *dst, const uint8_t *src, x86_reg stride, int rnd, x86_reg offset)

Purely vertical or horizontal 1/2 shift interpolation.

DECLARE_ASM_CONST (DECLARE_ASM_CONST(16, DECLARE_ASM_CONST(uint64_t, ff_pw_53)

Filter coefficients made global to allow access by all 1 or 3 quarter shift interpolation functions.

void ff_put_vc1_mspel_mc00_mmx (uint8_t *dst, const uint8_t *src, int stride, int rnd)

void ff_vc1dsp_init_mmx (DSPContext *dsp, AVCodecContext *avctx)

Define Documentation

#define DECLARE_FUNCTION	(	a,
		b		)

Value:

static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
     vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \
}

Macro to ease bicubic filter interpolation functions declarations.

Definition at line 446 of file vc1dsp_mmx.c.

#define DO_UNPACK ( reg ) "punpcklbw %%mm0, " reg "\n\t"

See also:: MSPEL_FILTER13_CORE for use as UNPACK macro

Definition at line 47 of file vc1dsp_mmx.c.

#define DONT_UNPACK ( reg )

Definition at line 48 of file vc1dsp_mmx.c.

#define LOAD_ROUNDER_MMX ( ROUND )

Value:

"movd      "ROUND", %%mm7         \n\t"    \
     "punpcklwd %%mm7, %%mm7           \n\t"    \
     "punpckldq %%mm7, %%mm7           \n\t"

Compute the rounder 32-r or 8-r and unpacks it to mm7.

Definition at line 51 of file vc1dsp_mmx.c.

Referenced by vc1_put_hor_16b_shift2_mmx(), vc1_put_shift2_mmx(), and vc1_put_ver_16b_shift2_mmx().

#define MSPEL_FILTER13_8B	(	NAME,
		A1,
		A2,
		A3,
		A4	)

Value:

static void                                                             \
vc1_put_## NAME ## _mmx(uint8_t *dst, const uint8_t *src,               \
                        x86_reg stride, int rnd, x86_reg offset)      \
{                                                                       \
    int h = 8;                                                          \
    src -= offset;                                                      \
    rnd = 32-rnd;                                                       \
    __asm__ volatile (                                                      \
        LOAD_ROUNDER_MMX("%6")                                          \
        "movq      "MANGLE(ff_pw_53)", %%mm5       \n\t"                \
        "movq      "MANGLE(ff_pw_18)", %%mm6       \n\t"                \
        ASMALIGN(3)                                                     \
        "1:                        \n\t"                                \
        MSPEL_FILTER13_CORE(DO_UNPACK, "movd   1", A1, A2, A3, A4)      \
        NORMALIZE_MMX("$6")                                             \
        TRANSFER_DO_PACK                                                \
        "add       %5, %1          \n\t"                                \
        "add       %5, %2          \n\t"                                \
        "decl      %0              \n\t"                                \
        "jnz 1b                    \n\t"                                \
        : "+r"(h), "+r" (src),  "+r" (dst)                              \
        : "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd)             \
        : "memory"                                                      \
    );                                                                  \
}

Referenced by DECLARE_ASM_CONST().

#define MSPEL_FILTER13_CORE	(	UNPACK,
		MOVQ,
		A1,
		A2,
		A3,
		A4	)

Value:

MOVQ "*0+"A1", %%mm1       \n\t"                           \
     MOVQ "*4+"A1", %%mm2       \n\t"                           \
     UNPACK("%%mm1")                                            \
     UNPACK("%%mm2")                                            \
     "pmullw    "MANGLE(ff_pw_3)", %%mm1\n\t"                   \
     "pmullw    "MANGLE(ff_pw_3)", %%mm2\n\t"                   \
     MOVQ "*0+"A2", %%mm3       \n\t"                           \
     MOVQ "*4+"A2", %%mm4       \n\t"                           \
     UNPACK("%%mm3")                                            \
     UNPACK("%%mm4")                                            \
     "pmullw    %%mm6, %%mm3    \n\t" /* *18 */                 \
     "pmullw    %%mm6, %%mm4    \n\t" /* *18 */                 \
     "psubw     %%mm1, %%mm3    \n\t" /* 18,-3 */               \
     "psubw     %%mm2, %%mm4    \n\t" /* 18,-3 */               \
     MOVQ "*0+"A4", %%mm1       \n\t"                           \
     MOVQ "*4+"A4", %%mm2       \n\t"                           \
     UNPACK("%%mm1")                                            \
     UNPACK("%%mm2")                                            \
     "psllw     $2, %%mm1       \n\t" /* 4* */                  \
     "psllw     $2, %%mm2       \n\t" /* 4* */                  \
     "psubw     %%mm1, %%mm3    \n\t" /* -4,18,-3 */            \
     "psubw     %%mm2, %%mm4    \n\t" /* -4,18,-3 */            \
     MOVQ "*0+"A3", %%mm1       \n\t"                           \
     MOVQ "*4+"A3", %%mm2       \n\t"                           \
     UNPACK("%%mm1")                                            \
     UNPACK("%%mm2")                                            \
     "pmullw    %%mm5, %%mm1    \n\t" /* *53 */                 \
     "pmullw    %%mm5, %%mm2    \n\t" /* *53 */                 \
     "paddw     %%mm1, %%mm3    \n\t" /* 4,53,18,-3 */          \
     "paddw     %%mm2, %%mm4    \n\t"

#define MSPEL_FILTER13_HOR_16B	(	NAME,
		A1,
		A2,
		A3,
		A4	)

Value:

static void                                                             \
vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride,         \
                                 const int16_t *src, int rnd)           \
{                                                                       \
    int h = 8;                                                          \
    src -= 1;                                                           \
    rnd -= (-4+58+13-3)*256; /* Add -256 bias */                        \
    __asm__ volatile(                                                       \
        LOAD_ROUNDER_MMX("%4")                                          \
        "movq      "MANGLE(ff_pw_18)", %%mm6   \n\t"                    \
        "movq      "MANGLE(ff_pw_53)", %%mm5   \n\t"                    \
        ASMALIGN(3)                                                     \
        "1:                        \n\t"                                \
        MSPEL_FILTER13_CORE(DONT_UNPACK, "movq 2", A1, A2, A3, A4)      \
        NORMALIZE_MMX("$7")                                             \
        /* Remove bias */                                               \
        "paddw     "MANGLE(ff_pw_128)", %%mm3  \n\t"                    \
        "paddw     "MANGLE(ff_pw_128)", %%mm4  \n\t"                    \
        TRANSFER_DO_PACK                                                \
        "add       $24, %1         \n\t"                                \
        "add       %3, %2          \n\t"                                \
        "decl      %0              \n\t"                                \
        "jnz 1b                    \n\t"                                \
        : "+r"(h), "+r" (src),  "+r" (dst)                              \
        : "r"(stride), "m"(rnd)                                         \
        : "memory"                                                      \
    );                                                                  \
}

#define MSPEL_FILTER13_VER_16B	(	NAME,
		A1,
		A2,
		A3,
		A4	)

#define NORMALIZE_MMX ( SHIFT )

Value:

"paddw     %%mm7, %%mm3           \n\t" /* +bias-r */      \
     "paddw     %%mm7, %%mm4           \n\t" /* +bias-r */      \
     "psraw     "SHIFT", %%mm3         \n\t"                    \
     "psraw     "SHIFT", %%mm4         \n\t"

Add rounder from mm7 to mm3 and pack result at destination.

Definition at line 32 of file vc1dsp_mmx.c.

Referenced by vc1_put_hor_16b_shift2_mmx(), and vc1_put_shift2_mmx().

#define SHIFT2_LINE ( OFF, R0, R1, R2, R3&nbs )

Value:

"movd "pmullw "punpcklbw "movd "psubw "punpcklbw "paddw "psubw "psraw "movq "add

Definition at line p; class="fragment">"paddw %%mm"#R2", %%mm"#R1" \n\t" \ (%0,%3), %%mm"#R0" \n\t" \ %%mm6, %%mm"#R1" \n\t" \ %%mm0, %%mm"#R0" \n\t" \ (%0,%2), %%mm"#R3" \n\t" \ %%mm"#R0", %%mm"#R1" \n\t" \ %%mm0, %%mm"#R3" \n\t" \ %%mm7, %%mm"#R1" \n\t" \ %%mm"#R3", %%mm"#R1" \n\t" \ %4, %%mm"#R1" \n\t" \ %%mm"#R1", "#OFF"(%1) \n\t" \ %2, %0 \n\t" href="vc1dsp__mmx_8c-source.html#l00056">56 of file vc1dsp_mmx.c.

Referenced by vc1_put_ver_16b_shift2_mmx().


Defines
#define	NORMALIZE_MMX(SHIFT)
	Add rounder from mm7 to mm3 and pack result at destination.
#define	TRANSFER_DO_PACK
#define	TRANSFER_DONT_PACK
#define	DO_UNPACK(reg) "punpcklbw %%mm0, " reg "\n\t"
#define	DONT_UNPACK(reg)
#define	LOAD_ROUNDER_MMX(ROUND)
	Compute the rounder 32-r or 8-r and unpacks it to mm7.
#define	SHIFT2_LINE(OFF, R0, R1, R2, R3)
#define	MSPEL_FILTER13_CORE(UNPACK, MOVQ, A1, A2, A3, A4)
#define	MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4)
#define	MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4)
#define	MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4)
#define	DECLARE_FUNCTION(a, b)
	Macro to ease bicubic filter interpolation functions declarations.
Functions
	DECLARE_ALIGNED_16 (const uint64_t, ff_pw_9)=0x0009000900090009ULL
static void	vc1_put_ver_16b_shift2_mmx (int16_t dst, const uint8_t src, x86_reg stride, int rnd, int64_t shift)
	Sacrifying mm6 allows to pipeline loads from src.
static void	vc1_put_hor_16b_shift2_mmx (uint8_t dst, x86_reg stride, const int16_t src, int rnd)
	Data is already unpacked, so some operations can directly be made from memory.
static void	vc1_put_shift2_mmx (uint8_t dst, const uint8_t src, x86_reg stride, int rnd, x86_reg offset)
	Purely vertical or horizontal 1/2 shift interpolation.
	DECLARE_ASM_CONST (DECLARE_ASM_CONST(16, DECLARE_ASM_CONST(uint64_t, ff_pw_53)
	Filter coefficients made global to allow access by all 1 or 3 quarter shift interpolation functions.
void	ff_put_vc1_mspel_mc00_mmx (uint8_t dst, const uint8_t src, int stride, int rnd)
void	ff_vc1dsp_init_mmx (DSPContext dsp, AVCodecContext avctx)

DECLARE_ASM_CONST	(	DECLARE_ASM_CONST(	16,
		DECLARE_ASM_CONST(	uint64_t,
		ff_pw_53
	)			`[pure virtual]`

void ff_put_vc1_mspel_mc00_mmx	(	uint8_t *	dst,
		const uint8_t *	src,
		int	stride,
		int	rnd
	)

void ff_vc1dsp_init_mmx	(	DSPContext *	dsp,
		AVCodecContext *	avctx
	)

static void vc1_put_hor_16b_shift2_mmx	(	uint8_t *	dst,
		x86_reg	stride,
		const int16_t *	src,
		int	rnd
	)			`[static]`

static void vc1_put_shift2_mmx	(	uint8_t *	dst,
		const uint8_t *	src,
		x86_reg	stride,
		int	rnd,
		x86_reg	offset
	)			`[static]`

static void vc1_put_ver_16b_shift2_mmx	(	int16_t *	dst,
		const uint8_t *	src,
		x86_reg	stride,
		int	rnd,
		int64_t	shift
	)			`[static]`

libavcodec/x86/vc1dsp_mmx.c File Reference

Defines

Functions

Define Documentation

Function Documentation