[FFmpeg-devel] [PATCH] x86/dsputil: implement 3DNow version of vector_clipf
James Almer
jamrial at gmail.com
Tue May 27 20:16:03 CEST 2014
Signed-off-by: James Almer <jamrial at gmail.com>
---
Those old k6-2 and k7 need some love
libavcodec/x86/dsputil.asm | 47 +++++++++++++++++++++++++++++++++----------
libavcodec/x86/dsputil_init.c | 11 ++++++++++
libavcodec/x86/dsputil_x86.h | 2 ++
3 files changed, 49 insertions(+), 11 deletions(-)
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index 4804682..36c9258 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -630,19 +630,35 @@ PUT_SIGNED_PIXELS_CLAMPED 3
;void ff_vector_clipf(float *dst, const float *src,
; float min, float max, int len)
;-----------------------------------------------------
-INIT_XMM sse
+%macro CLIPF_3DNOW 3
+ pfmin %1, %3
+ pfmax %1, %2
+%endmacro
+
+%macro CLIPF_SSE 3
+ minps %1, %3
+ maxps %1, %2
+%endmacro
+
+; %1 = number of xmm registers used
+%macro VECTOR_CLIPF 1
%if UNIX64
-cglobal vector_clipf, 3,3,6, dst, src, len
+cglobal vector_clipf, 3,3,%1, dst, src, len
%else
-cglobal vector_clipf, 5,5,6, dst, src, min, max, len
+cglobal vector_clipf, 5,5,%1, dst, src, min, max, len
%endif
%if WIN64
SWAP 0, 2
SWAP 1, 3
%elif ARCH_X86_32
+%if mmsize == 8
+ movd m0, mind
+ movd m1, maxd
+%else
movss m0, minm
movss m1, maxm
%endif
+%endif
SPLATD m0
SPLATD m1
shl lend, 2
@@ -654,18 +670,27 @@ cglobal vector_clipf, 5,5,6, dst, src, min, max, len
mova m3, [srcq+lenq+mmsize*1]
mova m4, [srcq+lenq+mmsize*2]
mova m5, [srcq+lenq+mmsize*3]
- maxps m2, m0
- maxps m3, m0
- maxps m4, m0
- maxps m5, m0
- minps m2, m1
- minps m3, m1
- minps m4, m1
- minps m5, m1
+ CLIPF m2, m0, m1
+ CLIPF m3, m0, m1
+ CLIPF m4, m0, m1
+ CLIPF m5, m0, m1
mova [dstq+lenq+mmsize*0], m2
mova [dstq+lenq+mmsize*1], m3
mova [dstq+lenq+mmsize*2], m4
mova [dstq+lenq+mmsize*3], m5
add lenq, mmsize*4
jl .loop
+%if mmsize == 8
+ femms
+%endif
REP_RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX 3dnow
+%define CLIPF CLIPF_3DNOW
+VECTOR_CLIPF 0
+%endif
+INIT_XMM sse
+%define CLIPF CLIPF_SSE
+VECTOR_CLIPF 6
diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c
index 30829ae..e6fe456 100644
--- a/libavcodec/x86/dsputil_init.c
+++ b/libavcodec/x86/dsputil_init.c
@@ -550,6 +550,14 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
#endif /* HAVE_MMX_EXTERNAL */
}
+static av_cold void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
+ int cpu_flags, unsigned high_bit_depth)
+{
+#if ARCH_X86_32
+ c->vector_clipf = ff_vector_clipf_3dnow;
+#endif
+}
+
static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
int cpu_flags, unsigned high_bit_depth)
{
@@ -679,6 +687,9 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth);
}
+ if (EXTERNAL_AMD3DNOW(cpu_flags))
+ dsputil_init_3dnow(c, avctx, cpu_flags, high_bit_depth);
+
if (X86_MMXEXT(cpu_flags))
dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth);
diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h
index 1f4711d..f6247c6 100644
--- a/libavcodec/x86/dsputil_x86.h
+++ b/libavcodec/x86/dsputil_x86.h
@@ -64,6 +64,8 @@ void ff_gmc_sse(uint8_t *dst, uint8_t *src,
int dxx, int dxy, int dyx, int dyy,
int shift, int r, int width, int height);
+void ff_vector_clipf_3dnow(float *dst, const float *src,
+ float min, float max, int len);
void ff_vector_clipf_sse(float *dst, const float *src,
float min, float max, int len);
--
1.8.5.5
More information about the ffmpeg-devel
mailing list