#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "dsputil_mmx.h"
Go to the source code of this file.
Defines | |
#define | NORMALIZE_MMX(SHIFT) |
Add rounder from mm7 to mm3 and pack result at destination. | |
#define | TRANSFER_DO_PACK |
#define | TRANSFER_DONT_PACK |
#define | DO_UNPACK(reg) "punpcklbw %%mm0, " reg "\n\t" |
#define | DONT_UNPACK(reg) |
#define | LOAD_ROUNDER_MMX(ROUND) |
Compute the rounder 32-r or 8-r and unpacks it to mm7. | |
#define | SHIFT2_LINE(OFF, R0, R1, R2, R3) |
#define | MSPEL_FILTER13_CORE(UNPACK, MOVQ, A1, A2, A3, A4) |
#define | MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4) |
#define | MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4) |
#define | MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4) |
#define | DECLARE_FUNCTION(a, b) |
Macro to ease bicubic filter interpolation functions declarations. | |
Functions | |
DECLARE_ALIGNED_16 (const uint64_t, ff_pw_9)=0x0009000900090009ULL | |
static void | vc1_put_ver_16b_shift2_mmx (int16_t *dst, const uint8_t *src, x86_reg stride, int rnd, int64_t shift) |
Sacrifying mm6 allows to pipeline loads from src. | |
static void | vc1_put_hor_16b_shift2_mmx (uint8_t *dst, x86_reg stride, const int16_t *src, int rnd) |
Data is already unpacked, so some operations can directly be made from memory. | |
static void | vc1_put_shift2_mmx (uint8_t *dst, const uint8_t *src, x86_reg stride, int rnd, x86_reg offset) |
Purely vertical or horizontal 1/2 shift interpolation. | |
DECLARE_ASM_CONST (DECLARE_ASM_CONST(16, DECLARE_ASM_CONST(uint64_t, ff_pw_53) | |
Filter coefficients made global to allow access by all 1 or 3 quarter shift interpolation functions. | |
void | ff_put_vc1_mspel_mc00_mmx (uint8_t *dst, const uint8_t *src, int stride, int rnd) |
void | ff_vc1dsp_init_mmx (DSPContext *dsp, AVCodecContext *avctx) |
#define DECLARE_FUNCTION | ( | a, | |||
b | ) |
Value:
static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ vc1_mspel_mc(dst, src, stride, a, b, rnd); \ }
Definition at line 446 of file vc1dsp_mmx.c.
#define DONT_UNPACK | ( | reg | ) |
Definition at line 48 of file vc1dsp_mmx.c.
#define LOAD_ROUNDER_MMX | ( | ROUND | ) |
Value:
"movd "ROUND", %%mm7 \n\t" \ "punpcklwd %%mm7, %%mm7 \n\t" \ "punpckldq %%mm7, %%mm7 \n\t"
Definition at line 51 of file vc1dsp_mmx.c.
Referenced by vc1_put_hor_16b_shift2_mmx(), vc1_put_shift2_mmx(), and vc1_put_ver_16b_shift2_mmx().
#define MSPEL_FILTER13_8B | ( | NAME, | |||
A1, | |||||
A2, | |||||
A3, | |||||
A4 | ) |
Value:
static void \ vc1_put_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \ x86_reg stride, int rnd, x86_reg offset) \ { \ int h = 8; \ src -= offset; \ rnd = 32-rnd; \ __asm__ volatile ( \ LOAD_ROUNDER_MMX("%6") \ "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ ASMALIGN(3) \ "1: \n\t" \ MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \ NORMALIZE_MMX("$6") \ TRANSFER_DO_PACK \ "add %5, %1 \n\t" \ "add %5, %2 \n\t" \ "decl %0 \n\t" \ "jnz 1b \n\t" \ : "+r"(h), "+r" (src), "+r" (dst) \ : "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd) \ : "memory" \ ); \ }
Referenced by DECLARE_ASM_CONST().
#define MSPEL_FILTER13_CORE | ( | UNPACK, | |||
MOVQ, | |||||
A1, | |||||
A2, | |||||
A3, | |||||
A4 | ) |
Value:
MOVQ "*0+"A1", %%mm1 \n\t" \ MOVQ "*4+"A1", %%mm2 \n\t" \ UNPACK("%%mm1") \ UNPACK("%%mm2") \ "pmullw "MANGLE(ff_pw_3)", %%mm1\n\t" \ "pmullw "MANGLE(ff_pw_3)", %%mm2\n\t" \ MOVQ "*0+"A2", %%mm3 \n\t" \ MOVQ "*4+"A2", %%mm4 \n\t" \ UNPACK("%%mm3") \ UNPACK("%%mm4") \ "pmullw %%mm6, %%mm3 \n\t" /* *18 */ \ "pmullw %%mm6, %%mm4 \n\t" /* *18 */ \ "psubw %%mm1, %%mm3 \n\t" /* 18,-3 */ \ "psubw %%mm2, %%mm4 \n\t" /* 18,-3 */ \ MOVQ "*0+"A4", %%mm1 \n\t" \ MOVQ "*4+"A4", %%mm2 \n\t" \ UNPACK("%%mm1") \ UNPACK("%%mm2") \ "psllw $2, %%mm1 \n\t" /* 4* */ \ "psllw $2, %%mm2 \n\t" /* 4* */ \ "psubw %%mm1, %%mm3 \n\t" /* -4,18,-3 */ \ "psubw %%mm2, %%mm4 \n\t" /* -4,18,-3 */ \ MOVQ "*0+"A3", %%mm1 \n\t" \ MOVQ "*4+"A3", %%mm2 \n\t" \ UNPACK("%%mm1") \ UNPACK("%%mm2") \ "pmullw %%mm5, %%mm1 \n\t" /* *53 */ \ "pmullw %%mm5, %%mm2 \n\t" /* *53 */ \ "paddw %%mm1, %%mm3 \n\t" /* 4,53,18,-3 */ \ "paddw %%mm2, %%mm4 \n\t"
#define MSPEL_FILTER13_HOR_16B | ( | NAME, | |||
A1, | |||||
A2, | |||||
A3, | |||||
A4 | ) |
Value:
static void \ vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \ const int16_t *src, int rnd) \ { \ int h = 8; \ src -= 1; \ rnd -= (-4+58+13-3)*256; /* Add -256 bias */ \ __asm__ volatile( \ LOAD_ROUNDER_MMX("%4") \ "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ ASMALIGN(3) \ "1: \n\t" \ MSPEL_FILTER13_CORE(DONT_UNPACK, "movq 2", A1, A2, A3, A4) \ NORMALIZE_MMX("$7") \ /* Remove bias */ \ "paddw "MANGLE(ff_pw_128)", %%mm3 \n\t" \ "paddw "MANGLE(ff_pw_128)", %%mm4 \n\t" \ TRANSFER_DO_PACK \ "add $24, %1 \n\t" \ "add %3, %2 \n\t" \ "decl %0 \n\t" \ "jnz 1b \n\t" \ : "+r"(h), "+r" (src), "+r" (dst) \ : "r"(stride), "m"(rnd) \ : "memory" \ ); \ }
#define MSPEL_FILTER13_VER_16B | ( | NAME, | |||
A1, | |||||
A2, | |||||
A3, | |||||
A4 | ) |
#define NORMALIZE_MMX | ( | SHIFT | ) |
Value:
"paddw %%mm7, %%mm3 \n\t" /* +bias-r */ \ "paddw %%mm7, %%mm4 \n\t" /* +bias-r */ \ "psraw "SHIFT", %%mm3 \n\t" \ "psraw "SHIFT", %%mm4 \n\t"
Definition at line 32 of file vc1dsp_mmx.c.
Referenced by vc1_put_hor_16b_shift2_mmx(), and vc1_put_shift2_mmx().
#define SHIFT2_LINE | ( | OFF, | |||
R0, | |||||
R1, | |||||
R2, | |||||
R3 | ) |
Value:
"paddw %%mm"#R2", %%mm"#R1" \n\t" \ "movd (%0,%3), %%mm"#R0" \n\t" \ "pmullw %%mm6, %%mm"#R1" \n\t" \ "punpcklbw %%mm0, %%mm"#R0" \n\t" \ "movd (%0,%2), %%mm"#R3" \n\t" \ "psubw %%mm"#R0", %%mm"#R1" \n\t" \ "punpcklbw %%mm0, %%mm"#R3" \n\t" \ "paddw %%mm7, %%mm"#R1" \n\t" \ "psubw %%mm"#R3", %%mm"#R1" \n\t" \ "psraw %4, %%mm"#R1" \n\t" \ "movq %%mm"#R1", "#OFF"(%1) \n\t" \ "add %2, %0 \n\t"
Definition at line 56 of file vc1dsp_mmx.c.
Referenced by vc1_put_ver_16b_shift2_mmx().
#define TRANSFER_DO_PACK |
Value:
"packuswb %%mm4, %%mm3 \n\t" \ "movq %%mm3, (%2) \n\t"
Definition at line 38 of file vc1dsp_mmx.c.
Referenced by vc1_put_hor_16b_shift2_mmx().
#define TRANSFER_DONT_PACK |
Value:
"movq %%mm3, 0(%2) \n\t" \ "movq %%mm4, 8(%2) \n\t"
Definition at line 42 of file vc1dsp_mmx.c.
DECLARE_ALIGNED_16 | ( | const | uint64_t, | |
ff_pw_9 | ||||
) | [pure virtual] |
DECLARE_ASM_CONST | ( | DECLARE_ASM_CONST( | 16, | |
DECLARE_ASM_CONST( | uint64_t, | |||
ff_pw_53 | ||||
) | [pure virtual] |
Filter coefficients made global to allow access by all 1 or 3 quarter shift interpolation functions.
Definition at line 206 of file vc1dsp_mmx.c.
void ff_put_vc1_mspel_mc00_mmx | ( | uint8_t * | dst, | |
const uint8_t * | src, | |||
int | stride, | |||
int | rnd | |||
) |
void ff_vc1dsp_init_mmx | ( | DSPContext * | dsp, | |
AVCodecContext * | avctx | |||
) |
static void vc1_put_hor_16b_shift2_mmx | ( | uint8_t * | dst, | |
x86_reg | stride, | |||
const int16_t * | src, | |||
int | rnd | |||
) | [static] |
Data is already unpacked, so some operations can directly be made from memory.
Definition at line 110 of file vc1dsp_mmx.c.
Referenced by DECLARE_ASM_CONST().
static void vc1_put_shift2_mmx | ( | uint8_t * | dst, | |
const uint8_t * | src, | |||
x86_reg | stride, | |||
int | rnd, | |||
x86_reg | offset | |||
) | [static] |
Purely vertical or horizontal 1/2 shift interpolation.
Sacrify mm6 for *9 factor.
Definition at line 154 of file vc1dsp_mmx.c.
Referenced by DECLARE_ASM_CONST().
static void vc1_put_ver_16b_shift2_mmx | ( | int16_t * | dst, | |
const uint8_t * | src, | |||
x86_reg | stride, | |||
int | rnd, | |||
int64_t | shift | |||
) | [static] |
Sacrifying mm6 allows to pipeline loads from src.
Definition at line 73 of file vc1dsp_mmx.c.
Referenced by DECLARE_ASM_CONST().