[FFmpeg-cvslog] x86: dsputil: Fix h263 loop filter link error in some configurations

Daniel Kang git at videolan.org
Tue Feb 19 12:59:51 CET 2013


ffmpeg | branch: master | Daniel Kang <daniel.d.kang at gmail.com> | Mon Feb 18 10:50:06 2013 -0500| [9acd23d655b5e3a3b56f9916480356fe0e48c70c] | committer: Diego Biurrun

x86: dsputil: Fix h263 loop filter link error in some configurations

This was caused by unconditionally referencing a conditionally compiled
table. Now the code is also compiled conditionally.

Signed-off-by: Diego Biurrun <diego at biurrun.de>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9acd23d655b5e3a3b56f9916480356fe0e48c70c
---

 libavcodec/x86/Makefile            |    2 +
 libavcodec/x86/dsputil.asm         |  162 -------------------------------
 libavcodec/x86/h263_loopfilter.asm |  187 ++++++++++++++++++++++++++++++++++++
 3 files changed, 189 insertions(+), 162 deletions(-)

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index c740573..a759e6e 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -44,6 +44,8 @@ YASM-OBJS-$(CONFIG_AC3DSP)             += x86/ac3dsp.o
 YASM-OBJS-$(CONFIG_DCT)                += x86/dct32.o
 YASM-OBJS-$(CONFIG_ENCODERS)           += x86/dsputilenc.o
 YASM-OBJS-$(CONFIG_FFT)                += x86/fft.o
+YASM-OBJS-$(CONFIG_H263_DECODER)       += x86/h263_loopfilter.o
+YASM-OBJS-$(CONFIG_H263_ENCODER)       += x86/h263_loopfilter.o
 YASM-OBJS-$(CONFIG_H264CHROMA)         += x86/h264_chromamc.o           \
                                           x86/h264_chromamc_10bit.o
 YASM-OBJS-$(CONFIG_H264DSP)            += x86/h264_deblock.o            \
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index 7ea796d..4539e5c 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -22,8 +22,6 @@
 %include "libavutil/x86/x86util.asm"
 
 SECTION_RODATA
-cextern pb_FC
-cextern h263_loop_filter_strength
 pb_f: times 16 db 15
 pb_zzzzzzzz77777777: times 8 db -1
 pb_7: times 8 db 7
@@ -651,163 +649,3 @@ BSWAP32_BUF
 INIT_XMM ssse3
 BSWAP32_BUF
 
-
-%macro H263_LOOP_FILTER 5
-    pxor         m7, m7
-    mova         m0, [%1]
-    mova         m1, [%1]
-    mova         m2, [%4]
-    mova         m3, [%4]
-    punpcklbw    m0, m7
-    punpckhbw    m1, m7
-    punpcklbw    m2, m7
-    punpckhbw    m3, m7
-    psubw        m0, m2
-    psubw        m1, m3
-    mova         m2, [%2]
-    mova         m3, [%2]
-    mova         m4, [%3]
-    mova         m5, [%3]
-    punpcklbw    m2, m7
-    punpckhbw    m3, m7
-    punpcklbw    m4, m7
-    punpckhbw    m5, m7
-    psubw        m4, m2
-    psubw        m5, m3
-    psllw        m4, 2
-    psllw        m5, 2
-    paddw        m4, m0
-    paddw        m5, m1
-    pxor         m6, m6
-    pcmpgtw      m6, m4
-    pcmpgtw      m7, m5
-    pxor         m4, m6
-    pxor         m5, m7
-    psubw        m4, m6
-    psubw        m5, m7
-    psrlw        m4, 3
-    psrlw        m5, 3
-    packuswb     m4, m5
-    packsswb     m6, m7
-    pxor         m7, m7
-    movd         m2, %5
-    punpcklbw    m2, m2
-    punpcklbw    m2, m2
-    punpcklbw    m2, m2
-    psubusb      m2, m4
-    mova         m3, m2
-    psubusb      m3, m4
-    psubb        m2, m3
-    mova         m3, [%2]
-    mova         m4, [%3]
-    pxor         m3, m6
-    pxor         m4, m6
-    paddusb      m3, m2
-    psubusb      m4, m2
-    pxor         m3, m6
-    pxor         m4, m6
-    paddusb      m2, m2
-    packsswb     m0, m1
-    pcmpgtb      m7, m0
-    pxor         m0, m7
-    psubb        m0, m7
-    mova         m1, m0
-    psubusb      m0, m2
-    psubb        m1, m0
-    pand         m1, [pb_FC]
-    psrlw        m1, 2
-    pxor         m1, m7
-    psubb        m1, m7
-    mova         m5, [%1]
-    mova         m6, [%4]
-    psubb        m5, m1
-    paddb        m6, m1
-%endmacro
-
-INIT_MMX mmx
-; void h263_v_loop_filter(uint8_t *src, int stride, int qscale)
-cglobal h263_v_loop_filter, 3,5
-    movsxdifnidn r1, r1d
-    movsxdifnidn r2, r2d
-
-    lea          r4, [h263_loop_filter_strength]
-    movzx       r3d, BYTE [r4+r2]
-    movsx        r2, r3b
-    shl          r2, 1
-
-    mov          r3, r0
-    sub          r3, r1
-    mov          r4, r3
-    sub          r4, r1
-    H263_LOOP_FILTER r4, r3, r0, r0+r1, r2d
-
-    mova       [r3], m3
-    mova       [r0], m4
-    mova       [r4], m5
-    mova    [r0+r1], m6
-    RET
-
-%macro TRANSPOSE4X4 2
-    movd      m0, [%1]
-    movd      m1, [%1+r1]
-    movd      m2, [%1+r1*2]
-    movd      m3, [%1+r3]
-    punpcklbw m0, m1
-    punpcklbw m2, m3
-    mova      m1, m0
-    punpcklwd m0, m2
-    punpckhwd m1, m2
-    movd [%2+ 0], m0
-    punpckhdq m0, m0
-    movd [%2+ 8], m0
-    movd [%2+16], m1
-    punpckhdq m1, m1
-    movd [%2+24], m1
-%endmacro
-
-
-; void h263_h_loop_filter(uint8_t *src, int stride, int qscale)
-INIT_MMX mmx
-cglobal h263_h_loop_filter, 3,5,0,32
-    movsxdifnidn r1, r1d
-    movsxdifnidn r2, r2d
-
-    lea          r4, [h263_loop_filter_strength]
-    movzx       r3d, BYTE [r4+r2]
-    movsx        r2, r3b
-    shl          r2, 1
-
-    sub          r0, 2
-    lea          r3, [r1*3]
-
-    TRANSPOSE4X4 r0, rsp
-    lea          r4, [r0+r1*4]
-    TRANSPOSE4X4 r4, rsp+4
-
-    H263_LOOP_FILTER rsp, rsp+8, rsp+16, rsp+24, r2d
-
-    mova         m1, m5
-    mova         m0, m4
-    punpcklbw    m5, m3
-    punpcklbw    m4, m6
-    punpckhbw    m1, m3
-    punpckhbw    m0, m6
-    mova         m3, m5
-    mova         m6, m1
-    punpcklwd    m5, m4
-    punpcklwd    m1, m0
-    punpckhwd    m3, m4
-    punpckhwd    m6, m0
-    movd       [r0], m5
-    punpckhdq    m5, m5
-    movd  [r0+r1*1], m5
-    movd  [r0+r1*2], m3
-    punpckhdq    m3, m3
-    movd    [r0+r3], m3
-    movd       [r4], m1
-    punpckhdq    m1, m1
-    movd  [r4+r1*1], m1
-    movd  [r4+r1*2], m6
-    punpckhdq    m6, m6
-    movd    [r4+r3], m6
-    RET
diff --git a/libavcodec/x86/h263_loopfilter.asm b/libavcodec/x86/h263_loopfilter.asm
new file mode 100644
index 0000000..a940aad
--- /dev/null
+++ b/libavcodec/x86/h263_loopfilter.asm
@@ -0,0 +1,187 @@
+;******************************************************************************
+;* MMX-optimized H.263 loop filter
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+cextern pb_FC
+cextern h263_loop_filter_strength
+
+SECTION_TEXT
+
+%macro H263_LOOP_FILTER 5
+    pxor         m7, m7
+    mova         m0, [%1]
+    mova         m1, [%1]
+    mova         m2, [%4]
+    mova         m3, [%4]
+    punpcklbw    m0, m7
+    punpckhbw    m1, m7
+    punpcklbw    m2, m7
+    punpckhbw    m3, m7
+    psubw        m0, m2
+    psubw        m1, m3
+    mova         m2, [%2]
+    mova         m3, [%2]
+    mova         m4, [%3]
+    mova         m5, [%3]
+    punpcklbw    m2, m7
+    punpckhbw    m3, m7
+    punpcklbw    m4, m7
+    punpckhbw    m5, m7
+    psubw        m4, m2
+    psubw        m5, m3
+    psllw        m4, 2
+    psllw        m5, 2
+    paddw        m4, m0
+    paddw        m5, m1
+    pxor         m6, m6
+    pcmpgtw      m6, m4
+    pcmpgtw      m7, m5
+    pxor         m4, m6
+    pxor         m5, m7
+    psubw        m4, m6
+    psubw        m5, m7
+    psrlw        m4, 3
+    psrlw        m5, 3
+    packuswb     m4, m5
+    packsswb     m6, m7
+    pxor         m7, m7
+    movd         m2, %5
+    punpcklbw    m2, m2
+    punpcklbw    m2, m2
+    punpcklbw    m2, m2
+    psubusb      m2, m4
+    mova         m3, m2
+    psubusb      m3, m4
+    psubb        m2, m3
+    mova         m3, [%2]
+    mova         m4, [%3]
+    pxor         m3, m6
+    pxor         m4, m6
+    paddusb      m3, m2
+    psubusb      m4, m2
+    pxor         m3, m6
+    pxor         m4, m6
+    paddusb      m2, m2
+    packsswb     m0, m1
+    pcmpgtb      m7, m0
+    pxor         m0, m7
+    psubb        m0, m7
+    mova         m1, m0
+    psubusb      m0, m2
+    psubb        m1, m0
+    pand         m1, [pb_FC]
+    psrlw        m1, 2
+    pxor         m1, m7
+    psubb        m1, m7
+    mova         m5, [%1]
+    mova         m6, [%4]
+    psubb        m5, m1
+    paddb        m6, m1
+%endmacro
+
+INIT_MMX mmx
+; void h263_v_loop_filter(uint8_t *src, int stride, int qscale)
+cglobal h263_v_loop_filter, 3,5
+    movsxdifnidn r1, r1d
+    movsxdifnidn r2, r2d
+
+    lea          r4, [h263_loop_filter_strength]
+    movzx       r3d, BYTE [r4+r2]
+    movsx        r2, r3b
+    shl          r2, 1
+
+    mov          r3, r0
+    sub          r3, r1
+    mov          r4, r3
+    sub          r4, r1
+    H263_LOOP_FILTER r4, r3, r0, r0+r1, r2d
+
+    mova       [r3], m3
+    mova       [r0], m4
+    mova       [r4], m5
+    mova    [r0+r1], m6
+    RET
+
+%macro TRANSPOSE4X4 2
+    movd      m0, [%1]
+    movd      m1, [%1+r1]
+    movd      m2, [%1+r1*2]
+    movd      m3, [%1+r3]
+    punpcklbw m0, m1
+    punpcklbw m2, m3
+    mova      m1, m0
+    punpcklwd m0, m2
+    punpckhwd m1, m2
+    movd [%2+ 0], m0
+    punpckhdq m0, m0
+    movd [%2+ 8], m0
+    movd [%2+16], m1
+    punpckhdq m1, m1
+    movd [%2+24], m1
+%endmacro
+
+
+; void h263_h_loop_filter(uint8_t *src, int stride, int qscale)
+INIT_MMX mmx
+cglobal h263_h_loop_filter, 3,5,0,32
+    movsxdifnidn r1, r1d
+    movsxdifnidn r2, r2d
+
+    lea          r4, [h263_loop_filter_strength]
+    movzx       r3d, BYTE [r4+r2]
+    movsx        r2, r3b
+    shl          r2, 1
+
+    sub          r0, 2
+    lea          r3, [r1*3]
+
+    TRANSPOSE4X4 r0, rsp
+    lea          r4, [r0+r1*4]
+    TRANSPOSE4X4 r4, rsp+4
+
+    H263_LOOP_FILTER rsp, rsp+8, rsp+16, rsp+24, r2d
+
+    mova         m1, m5
+    mova         m0, m4
+    punpcklbw    m5, m3
+    punpcklbw    m4, m6
+    punpckhbw    m1, m3
+    punpckhbw    m0, m6
+    mova         m3, m5
+    mova         m6, m1
+    punpcklwd    m5, m4
+    punpcklwd    m1, m0
+    punpckhwd    m3, m4
+    punpckhwd    m6, m0
+    movd       [r0], m5
+    punpckhdq    m5, m5
+    movd  [r0+r1*1], m5
+    movd  [r0+r1*2], m3
+    punpckhdq    m3, m3
+    movd    [r0+r3], m3
+    movd       [r4], m1
+    punpckhdq    m1, m1
+    movd  [r4+r1*1], m1
+    movd  [r4+r1*2], m6
+    punpckhdq    m6, m6
+    movd    [r4+r3], m6
+    RET



More information about the ffmpeg-cvslog mailing list