[FFmpeg-cvslog] x86/blockdsp: move asm code out of dsputil

James Almer git at videolan.org
Thu Jun 19 13:14:36 CEST 2014


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Thu Jun 19 01:00:17 2014 -0300| [fe782233aa6a74da59e92f21e2a72381a6d3d788] | committer: Michael Niedermayer

x86/blockdsp: move asm code out of dsputil

Also replace INLINE_<opt> with EXTERNAL_<opt> that were wrongly
changed by commit 2b05db4f8102148d013755ac2a7e47f6d79ff7ca

Signed-off-by: James Almer <jamrial at gmail.com>
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fe782233aa6a74da59e92f21e2a72381a6d3d788
---

 libavcodec/x86/Makefile       |    3 +-
 libavcodec/x86/blockdsp.asm   |   84 +++++++++++++++++++++++++++++++++++++++++
 libavcodec/x86/blockdsp_mmx.c |    5 +--
 libavcodec/x86/dsputil.asm    |   60 -----------------------------
 4 files changed, 88 insertions(+), 64 deletions(-)

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 002144c..fa03f7c 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -2,6 +2,7 @@ OBJS                                   += x86/constants.o               \
                                           x86/fmtconvert_init.o         \
 
 OBJS-$(CONFIG_AC3DSP)                  += x86/ac3dsp_init.o
+OBJS-$(CONFIG_BLOCKDSP)                += x86/blockdsp_mmx.o
 OBJS-$(CONFIG_DCT)                     += x86/dct_init.o
 OBJS-$(CONFIG_DSPUTIL)                 += x86/dsputil_init.o
 OBJS-$(CONFIG_ENCODERS)                += x86/dsputilenc_mmx.o          \
@@ -53,7 +54,6 @@ OBJS-$(CONFIG_VP8_DECODER)             += x86/vp8dsp_init.o
 OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o
 OBJS-$(CONFIG_WEBP_DECODER)            += x86/vp8dsp_init.o
 
-MMX-OBJS-$(CONFIG_BLOCKDSP)            += x86/blockdsp_mmx.o
 MMX-OBJS-$(CONFIG_DSPUTIL)             += x86/dsputil_mmx.o             \
                                           x86/idct_mmx_xvid.o           \
                                           x86/idct_sse2_xvid.o          \
@@ -69,6 +69,7 @@ YASM-OBJS                              += x86/deinterlace.o             \
                                           x86/fmtconvert.o              \
 
 YASM-OBJS-$(CONFIG_AC3DSP)             += x86/ac3dsp.o
+YASM-OBJS-$(CONFIG_BLOCKDSP)           += x86/blockdsp.o
 YASM-OBJS-$(CONFIG_DCT)                += x86/dct32.o
 YASM-OBJS-$(CONFIG_DIRAC_DECODER)      += x86/diracdsp_mmx.o x86/diracdsp_yasm.o\
                                           x86/dwt_yasm.o
diff --git a/libavcodec/x86/blockdsp.asm b/libavcodec/x86/blockdsp.asm
new file mode 100644
index 0000000..247c750
--- /dev/null
+++ b/libavcodec/x86/blockdsp.asm
@@ -0,0 +1,84 @@
+;******************************************************************************
+;*
+;* SIMD-optimized clear block functions
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_TEXT
+
+;----------------------------------------
+; void ff_clear_block(int16_t *blocks);
+;----------------------------------------
+; %1 = number of xmm registers used
+; %2 = number of inline store loops
+%macro CLEAR_BLOCK 2
+cglobal clear_block, 1, 1, %1, blocks
+    ZERO  m0, m0
+%assign %%i 0
+%rep %2
+    mova  [blocksq+mmsize*(0+%%i)], m0
+    mova  [blocksq+mmsize*(1+%%i)], m0
+    mova  [blocksq+mmsize*(2+%%i)], m0
+    mova  [blocksq+mmsize*(3+%%i)], m0
+    mova  [blocksq+mmsize*(4+%%i)], m0
+    mova  [blocksq+mmsize*(5+%%i)], m0
+    mova  [blocksq+mmsize*(6+%%i)], m0
+    mova  [blocksq+mmsize*(7+%%i)], m0
+%assign %%i %%i+8
+%endrep
+    RET
+%endmacro
+
+INIT_MMX mmx
+%define ZERO pxor
+CLEAR_BLOCK 0, 2
+INIT_XMM sse
+%define ZERO xorps
+CLEAR_BLOCK 1, 1
+
+;-----------------------------------------
+; void ff_clear_blocks(int16_t *blocks);
+;-----------------------------------------
+; %1 = number of xmm registers used
+%macro CLEAR_BLOCKS 1
+cglobal clear_blocks, 1, 2, %1, blocks, len
+    add   blocksq, 768
+    mov      lenq, -768
+    ZERO       m0, m0
+.loop
+    mova  [blocksq+lenq+mmsize*0], m0
+    mova  [blocksq+lenq+mmsize*1], m0
+    mova  [blocksq+lenq+mmsize*2], m0
+    mova  [blocksq+lenq+mmsize*3], m0
+    mova  [blocksq+lenq+mmsize*4], m0
+    mova  [blocksq+lenq+mmsize*5], m0
+    mova  [blocksq+lenq+mmsize*6], m0
+    mova  [blocksq+lenq+mmsize*7], m0
+    add   lenq, mmsize*8
+    js .loop
+    RET
+%endmacro
+
+INIT_MMX mmx
+%define ZERO pxor
+CLEAR_BLOCKS 0
+INIT_XMM sse
+%define ZERO xorps
+CLEAR_BLOCKS 1
diff --git a/libavcodec/x86/blockdsp_mmx.c b/libavcodec/x86/blockdsp_mmx.c
index 53baad3..7780184 100644
--- a/libavcodec/x86/blockdsp_mmx.c
+++ b/libavcodec/x86/blockdsp_mmx.c
@@ -22,7 +22,6 @@
 #include "libavutil/attributes.h"
 #include "libavutil/internal.h"
 #include "libavutil/cpu.h"
-#include "libavutil/x86/asm.h"
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/blockdsp.h"
 #include "libavcodec/version.h"
@@ -43,7 +42,7 @@ av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth)
     int cpu_flags = av_get_cpu_flags();
 
     if (!high_bit_depth) {
-        if (INLINE_MMX(cpu_flags)) {
+        if (EXTERNAL_MMX(cpu_flags)) {
             c->clear_block  = ff_clear_block_mmx;
             c->clear_blocks = ff_clear_blocks_mmx;
         }
@@ -52,7 +51,7 @@ av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth)
     if (CONFIG_XVMC && avctx->hwaccel && avctx->hwaccel->decode_mb)
         return;
 
-        if (INLINE_SSE(cpu_flags)) {
+        if (EXTERNAL_SSE(cpu_flags)) {
             c->clear_block  = ff_clear_block_sse;
             c->clear_blocks = ff_clear_blocks_sse;
         }
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index 2209c52..3bb5d9c 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -248,66 +248,6 @@ BSWAP32_BUF
 INIT_XMM ssse3
 BSWAP32_BUF
 
-;----------------------------------------
-; void ff_clear_block(int16_t *blocks);
-;----------------------------------------
-; %1 = number of xmm registers used
-; %2 = number of inline store loops
-%macro CLEAR_BLOCK 2
-cglobal clear_block, 1, 1, %1, blocks
-    ZERO  m0, m0
-%assign %%i 0
-%rep %2
-    mova  [blocksq+mmsize*(0+%%i)], m0
-    mova  [blocksq+mmsize*(1+%%i)], m0
-    mova  [blocksq+mmsize*(2+%%i)], m0
-    mova  [blocksq+mmsize*(3+%%i)], m0
-    mova  [blocksq+mmsize*(4+%%i)], m0
-    mova  [blocksq+mmsize*(5+%%i)], m0
-    mova  [blocksq+mmsize*(6+%%i)], m0
-    mova  [blocksq+mmsize*(7+%%i)], m0
-%assign %%i %%i+8
-%endrep
-    RET
-%endmacro
-
-INIT_MMX mmx
-%define ZERO pxor
-CLEAR_BLOCK 0, 2
-INIT_XMM sse
-%define ZERO xorps
-CLEAR_BLOCK 1, 1
-
-;-----------------------------------------
-; void ff_clear_blocks(int16_t *blocks);
-;-----------------------------------------
-; %1 = number of xmm registers used
-%macro CLEAR_BLOCKS 1
-cglobal clear_blocks, 1, 2, %1, blocks, len
-    add   blocksq, 768
-    mov      lenq, -768
-    ZERO       m0, m0
-.loop
-    mova  [blocksq+lenq+mmsize*0], m0
-    mova  [blocksq+lenq+mmsize*1], m0
-    mova  [blocksq+lenq+mmsize*2], m0
-    mova  [blocksq+lenq+mmsize*3], m0
-    mova  [blocksq+lenq+mmsize*4], m0
-    mova  [blocksq+lenq+mmsize*5], m0
-    mova  [blocksq+lenq+mmsize*6], m0
-    mova  [blocksq+lenq+mmsize*7], m0
-    add   lenq, mmsize*8
-    js .loop
-    RET
-%endmacro
-
-INIT_MMX mmx
-%define ZERO pxor
-CLEAR_BLOCKS 0
-INIT_XMM sse
-%define ZERO xorps
-CLEAR_BLOCKS 1
-
 ;--------------------------------------------------------------------------
 ;void ff_put_signed_pixels_clamped(const int16_t *block, uint8_t *pixels,
 ;                                  int line_size)



More information about the ffmpeg-cvslog mailing list