[FFmpeg-devel] [PATCH 1/7] x86: sbrdsp: implement SSE/SSE2 qmf_pre_shuffle
Michael Niedermayer
michaelni at gmx.at
Sat Apr 6 14:57:30 CEST 2013
On Sat, Apr 06, 2013 at 10:52:08AM +0000, Christophe Gisquet wrote:
> From 253 to 70(sse)/52(sse2) cycles on Arrandale and Win64.
> 61/55 cycles on SandyBridge.
SSE2 is 41 cycles now on sb :)
> ---
> libavcodec/x86/sbrdsp.asm | 54 ++++++++++++++++++++++++++++++++++++++++++++
> libavcodec/x86/sbrdsp_init.c | 7 ++++++
> 2 files changed, 61 insertions(+)
>
> diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
> index 999e5af..f3c30d0 100644
> --- a/libavcodec/x86/sbrdsp.asm
> +++ b/libavcodec/x86/sbrdsp.asm
> @@ -242,3 +242,57 @@ cglobal sbr_neg_odd_64, 1,2,4,z
> cmp zq, r1q
> jne .loop
> REP_RET
> +
> +%macro SBR_QMF_PRE_SHUFFLE 0
> +cglobal sbr_qmf_pre_shuffle, 1,4,7,z
> +%define OFFSET (32*4-2*mmsize)
> + mov r3q, OFFSET
> + lea r1q, [zq + (32+1)*4]
> + lea r2q, [zq + 64*4]
> + mova m6, [ps_neg]
> +.loop:
> + movu m0, [r1q]
> + movu m2, [r1q + mmsize]
> + movu m1, [zq + r3q + 4 + mmsize]
> + movu m3, [zq + r3q + 4]
> +%if cpuflag(sse2)
> +%define XOR pxor
> +%define SHUFFLE pshufd
> +%define UNPACKL punpckldq
> +%define UNPACKH punpckhdq
> +%define MOVH movq
> +%else
> +%define XOR xorps
> +%define SHUFFLE shufps
> +%define UNPACKL unpcklps
> +%define UNPACKH unpckhps
> +%define MOVH movlps
> +%endif
> +
> + XOR m2, m6
> + XOR m0, m6
> + SHUFFLE m2, m2, q0123
> + SHUFFLE m0, m0, q0123
doing the shuffles before the XOR is 1 cycle faster on my sb
if its not for you then ignore
@@ -269,10 +269,10 @@ cglobal sbr_qmf_pre_shuffle, 1,4,7,z
%define MOVH movlps
%endif
- XOR m2, m6
- XOR m0, m6
SHUFFLE m2, m2, q0123
SHUFFLE m0, m0, q0123
+ XOR m2, m6
+ XOR m0, m6
mova m5, m2
mova m4, m0
UNPACKL m2, m3
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Many things microsoft did are stupid, but not doing something just because
microsoft did it is even more stupid. If everything ms did were stupid they
would be bankrupt already.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130406/b9caeebc/attachment.asc>
More information about the ffmpeg-devel
mailing list