[FFmpeg-cvslog] x86/vf_w3fdif: 32-bit compatibility for w3fdif_simple_high

Hendrik Leppkes git at videolan.org
Fri Jan 8 12:03:11 CET 2016


ffmpeg | branch: master | Hendrik Leppkes <h.leppkes at gmail.com> | Thu Jan  7 03:29:21 2016 +0100| [53ada3af62d566bfd53dfc0a90b79cb91328615e] | committer: Hendrik Leppkes

x86/vf_w3fdif: 32-bit compatibility for w3fdif_simple_high

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=53ada3af62d566bfd53dfc0a90b79cb91328615e
---

 libavfilter/x86/vf_w3fdif.asm    |   35 +++++++++++++++++++++++++++++++++--
 libavfilter/x86/vf_w3fdif_init.c |    2 +-
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/libavfilter/x86/vf_w3fdif.asm b/libavfilter/x86/vf_w3fdif.asm
index c3c73ea..52628c3 100644
--- a/libavfilter/x86/vf_w3fdif.asm
+++ b/libavfilter/x86/vf_w3fdif.asm
@@ -102,14 +102,22 @@ cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize
 REP_RET
 
 %if ARCH_X86_64
-
 cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
+%else
+cglobal w3fdif_simple_high, 4, 7, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
+%endif
     movq                  m2, [coefq]
+%if ARCH_X86_64
     DEFINE_ARGS    work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, linesize, offset, in_lines_cur2, in_lines_adj1, in_lines_adj2
+    xor              offsetq, offsetq
+%else
+    DEFINE_ARGS    work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, in_lines_cur2, in_lines_adj1, in_lines_adj2
+    %define linesized r4mp
+%endif
+
     pshufd                m0, m2, q0000
     SPLATW                m2, m2, 2
     pxor                  m7, m7
-    mov              offsetq, 0
     mov       in_lines_cur2q, [in_lines_cur0q+gprsize*2]
     mov       in_lines_cur1q, [in_lines_cur0q+gprsize]
     mov       in_lines_cur0q, [in_lines_cur0q]
@@ -117,8 +125,21 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0,
     mov       in_lines_adj1q, [in_lines_adj0q+gprsize]
     mov       in_lines_adj0q, [in_lines_adj0q]
 
+%if ARCH_X86_32
+    sub in_lines_cur1q, in_lines_cur0q
+    sub in_lines_cur2q, in_lines_cur0q
+    sub in_lines_adj0q, in_lines_cur0q
+    sub in_lines_adj1q, in_lines_cur0q
+    sub in_lines_adj2q, in_lines_cur0q
+    %define offsetq in_lines_cur0q
+%endif
+
 .loop:
+%if ARCH_X86_64
     movh                                   m3, [in_lines_cur0q+offsetq]
+%else
+    movh                                   m3, [in_lines_cur0q]
+%endif
     movh                                   m4, [in_lines_cur1q+offsetq]
     punpcklbw                              m3, m7
     punpcklbw                              m4, m7
@@ -143,15 +164,25 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0,
     pmaddwd                                m6, m2
     paddd                                  m3, m5
     paddd                                  m4, m6
+%if ARCH_X86_64
     paddd                                  m3, [work_lineq+offsetq*4]
     paddd                                  m4, [work_lineq+offsetq*4+mmsize]
     mova               [work_lineq+offsetq*4], m3
     mova        [work_lineq+offsetq*4+mmsize], m4
+%else
+    paddd                                  m3, [work_lineq]
+    paddd                                  m4, [work_lineq+mmsize]
+    mova                         [work_lineq], m3
+    mova                  [work_lineq+mmsize], m4
+    add                            work_lineq, mmsize*2
+%endif
     add                               offsetq, mmsize/2
     sub                             linesized, mmsize/2
     jg .loop
 REP_RET
 
+%if ARCH_X86_64
+
 cglobal w3fdif_complex_high, 5, 13, 10, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
     movq                  m0, [coefq+0]
     movd                  m4, [coefq+8]
diff --git a/libavfilter/x86/vf_w3fdif_init.c b/libavfilter/x86/vf_w3fdif_init.c
index 72ea657..9bf06e8 100644
--- a/libavfilter/x86/vf_w3fdif_init.c
+++ b/libavfilter/x86/vf_w3fdif_init.c
@@ -51,12 +51,12 @@ av_cold void ff_w3fdif_init_x86(W3FDIFDSPContext *dsp)
 
     if (EXTERNAL_SSE2(cpu_flags)) {
         dsp->filter_simple_low   = ff_w3fdif_simple_low_sse2;
+        dsp->filter_simple_high  = ff_w3fdif_simple_high_sse2;
         dsp->filter_complex_low  = ff_w3fdif_complex_low_sse2;
         dsp->filter_scale        = ff_w3fdif_scale_sse2;
     }
 
     if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) {
-        dsp->filter_simple_high  = ff_w3fdif_simple_high_sse2;
         dsp->filter_complex_high = ff_w3fdif_complex_high_sse2;
     }
 }



More information about the ffmpeg-cvslog mailing list