[FFmpeg-cvslog] audiodsp/x86: yasmify vector_clipf_sse

Anton Khirnov git at videolan.org
Mon Mar 20 23:35:32 EET 2017


ffmpeg | branch: master | Anton Khirnov <anton at khirnov.net> | Tue Aug  9 20:20:00 2016 +0200| [12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5] | committer: Anton Khirnov

audiodsp/x86: yasmify vector_clipf_sse

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5
---

 libavcodec/x86/Makefile        |  1 -
 libavcodec/x86/audiodsp.asm    | 43 +++++++++++++++++++++++++++++++
 libavcodec/x86/audiodsp_init.c |  2 +-
 libavcodec/x86/audiodsp_mmx.c  | 58 ------------------------------------------
 4 files changed, 44 insertions(+), 60 deletions(-)

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 204c856..872b7fa 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -58,7 +58,6 @@ OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o
 
 # GCC inline assembly optimizations
 # subsystems
-MMX-OBJS-$(CONFIG_AUDIODSP)            += x86/audiodsp_mmx.o
 MMX-OBJS-$(CONFIG_FDCTDSP)             += x86/fdct.o
 MMX-OBJS-$(CONFIG_HPELDSP)             += x86/fpel_mmx.o                \
                                           x86/hpeldsp_mmx.o
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index d7e63eb..1bc7e32 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -135,3 +135,46 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
 %else
 VECTOR_CLIP_INT32 6, 1, 0, 0
 %endif
+
+; void ff_vector_clipf_sse(float *dst, const float *src,
+;                          int len, float min, float max)
+INIT_XMM sse
+cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max
+%if ARCH_X86_32
+    VBROADCASTSS m0, minm
+    VBROADCASTSS m1, maxm
+%elif WIN64
+    VBROADCASTSS m0, m3
+    VBROADCASTSS m1, maxm
+%else ; 64bit sysv
+    VBROADCASTSS m0, m0
+    VBROADCASTSS m1, m1
+%endif
+
+    movsxdifnidn lenq, lend
+
+.loop
+    mova m2, [srcq + 4 * lenq - 4 * mmsize]
+    mova m3, [srcq + 4 * lenq - 3 * mmsize]
+    mova m4, [srcq + 4 * lenq - 2 * mmsize]
+    mova m5, [srcq + 4 * lenq - 1 * mmsize]
+
+    maxps m2, m0
+    maxps m3, m0
+    maxps m4, m0
+    maxps m5, m0
+
+    minps m2, m1
+    minps m3, m1
+    minps m4, m1
+    minps m5, m1
+
+    mova [dstq + 4 * lenq - 4 * mmsize], m2
+    mova [dstq + 4 * lenq - 3 * mmsize], m3
+    mova [dstq + 4 * lenq - 2 * mmsize], m4
+    mova [dstq + 4 * lenq - 1 * mmsize], m5
+
+    sub lenq, mmsize
+    jg .loop
+
+    RET
diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c
index 8eb2e56..2373115 100644
--- a/libavcodec/x86/audiodsp_init.c
+++ b/libavcodec/x86/audiodsp_init.c
@@ -49,7 +49,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
     if (EXTERNAL_MMXEXT(cpu_flags))
         c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
 
-    if (INLINE_SSE(cpu_flags))
+    if (EXTERNAL_SSE(cpu_flags))
         c->vector_clipf = ff_vector_clipf_sse;
 
     if (EXTERNAL_SSE2(cpu_flags)) {
diff --git a/libavcodec/x86/audiodsp_mmx.c b/libavcodec/x86/audiodsp_mmx.c
deleted file mode 100644
index 04cbb90..0000000
--- a/libavcodec/x86/audiodsp_mmx.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-#include "libavutil/x86/asm.h"
-#include "audiodsp.h"
-
-#if HAVE_INLINE_ASM
-
-void ff_vector_clipf_sse(float *dst, const float *src,
-                         int len, float min, float max)
-{
-    x86_reg i = (len - 16) * 4;
-    __asm__ volatile (
-        "movss          %3, %%xmm4      \n\t"
-        "movss          %4, %%xmm5      \n\t"
-        "shufps $0, %%xmm4, %%xmm4      \n\t"
-        "shufps $0, %%xmm5, %%xmm5      \n\t"
-        "1:                             \n\t"
-        "movaps   (%2, %0), %%xmm0      \n\t" // 3/1 on intel
-        "movaps 16(%2, %0), %%xmm1      \n\t"
-        "movaps 32(%2, %0), %%xmm2      \n\t"
-        "movaps 48(%2, %0), %%xmm3      \n\t"
-        "maxps      %%xmm4, %%xmm0      \n\t"
-        "maxps      %%xmm4, %%xmm1      \n\t"
-        "maxps      %%xmm4, %%xmm2      \n\t"
-        "maxps      %%xmm4, %%xmm3      \n\t"
-        "minps      %%xmm5, %%xmm0      \n\t"
-        "minps      %%xmm5, %%xmm1      \n\t"
-        "minps      %%xmm5, %%xmm2      \n\t"
-        "minps      %%xmm5, %%xmm3      \n\t"
-        "movaps     %%xmm0,   (%1, %0)  \n\t"
-        "movaps     %%xmm1, 16(%1, %0)  \n\t"
-        "movaps     %%xmm2, 32(%1, %0)  \n\t"
-        "movaps     %%xmm3, 48(%1, %0)  \n\t"
-        "sub           $64, %0          \n\t"
-        "jge            1b              \n\t"
-        : "+&r" (i)
-        : "r" (dst), "r" (src), "m" (min), "m" (max)
-        : "memory");
-}
-
-#endif /* HAVE_INLINE_ASM */



More information about the ffmpeg-cvslog mailing list