[FFmpeg-cvslog] x86: hpeldsp: Split off VP3-specific bits into a separate file

Tue Jan 31 19:51:44 EET 2017

ffmpeg | branch: master | Diego Biurrun <diego at biurrun.de> | Mon Jan 11 15:53:41 2016 +0100| [1dfc3cf89d0eb026af28be46294b85d79499ffb5] | committer: Diego Biurrun

x86: hpeldsp: Split off VP3-specific bits into a separate file

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1dfc3cf89d0eb026af28be46294b85d79499ffb5
---

 libavcodec/x86/Makefile           |   2 +
 libavcodec/x86/hpeldsp.asm        |  89 ------------------------------
 libavcodec/x86/hpeldsp.h          |   4 ++
 libavcodec/x86/hpeldsp_init.c     |  25 ++-------
 libavcodec/x86/hpeldsp_vp3.asm    | 111 ++++++++++++++++++++++++++++++++++++++
 libavcodec/x86/hpeldsp_vp3_init.c |  56 +++++++++++++++++++
 6 files changed, 176 insertions(+), 111 deletions(-)

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 1460197..3208699 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -51,6 +51,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc.o
 OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp.o
 OBJS-$(CONFIG_V210_ENCODER)            += x86/v210enc_init.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += x86/vorbisdsp_init.o
+OBJS-$(CONFIG_VP3_DECODER)             += x86/hpeldsp_vp3_init.o
 OBJS-$(CONFIG_VP6_DECODER)             += x86/vp6dsp_init.o
 OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o
 
@@ -122,5 +123,6 @@ YASM-OBJS-$(CONFIG_PRORES_DECODER)     += x86/proresdsp.o
 YASM-OBJS-$(CONFIG_RV40_DECODER)       += x86/rv40dsp.o
 YASM-OBJS-$(CONFIG_V210_ENCODER)       += x86/v210enc.o
 YASM-OBJS-$(CONFIG_VORBIS_DECODER)     += x86/vorbisdsp.o
+YASM-OBJS-$(CONFIG_VP3_DECODER)        += x86/hpeldsp_vp3.o
 YASM-OBJS-$(CONFIG_VP6_DECODER)        += x86/vp6dsp.o
 YASM-OBJS-$(CONFIG_VP9_DECODER)        += x86/vp9dsp.o
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index b8929b9..8e21114 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -142,53 +142,6 @@ INIT_MMX 3dnow
 PUT_NO_RND_PIXELS8_X2
 
 
-; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_X2_EXACT 0
-cglobal put_no_rnd_pixels8_x2_exact, 4,5
-    lea          r4, [r2*3]
-    pcmpeqb      m6, m6
-.loop:
-    mova         m0, [r1]
-    mova         m2, [r1+r2]
-    mova         m1, [r1+1]
-    mova         m3, [r1+r2+1]
-    pxor         m0, m6
-    pxor         m2, m6
-    pxor         m1, m6
-    pxor         m3, m6
-    PAVGB        m0, m1
-    PAVGB        m2, m3
-    pxor         m0, m6
-    pxor         m2, m6
-    mova       [r0], m0
-    mova    [r0+r2], m2
-    mova         m0, [r1+r2*2]
-    mova         m1, [r1+r2*2+1]
-    mova         m2, [r1+r4]
-    mova         m3, [r1+r4+1]
-    pxor         m0, m6
-    pxor         m1, m6
-    pxor         m2, m6
-    pxor         m3, m6
-    PAVGB        m0, m1
-    PAVGB        m2, m3
-    pxor         m0, m6
-    pxor         m2, m6
-    mova  [r0+r2*2], m0
-    mova    [r0+r4], m2
-    lea          r1, [r1+r2*4]
-    lea          r0, [r0+r2*4]
-    sub         r3d, 4
-    jg .loop
-    REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_X2_EXACT
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_X2_EXACT
-
-
 ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 %macro PUT_PIXELS8_Y2 0
 cglobal put_pixels8_y2, 4,5
@@ -260,48 +213,6 @@ INIT_MMX 3dnow
 PUT_NO_RND_PIXELS8_Y2
 
 
-; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
-cglobal put_no_rnd_pixels8_y2_exact, 4,5
-    lea          r4, [r2*3]
-    mova         m0, [r1]
-    pcmpeqb      m6, m6
-    add          r1, r2
-    pxor         m0, m6
-.loop:
-    mova         m1, [r1]
-    mova         m2, [r1+r2]
-    pxor         m1, m6
-    pxor         m2, m6
-    PAVGB        m0, m1
-    PAVGB        m1, m2
-    pxor         m0, m6
-    pxor         m1, m6
-    mova       [r0], m0
-    mova    [r0+r2], m1
-    mova         m1, [r1+r2*2]
-    mova         m0, [r1+r4]
-    pxor         m1, m6
-    pxor         m0, m6
-    PAVGB        m2, m1
-    PAVGB        m1, m0
-    pxor         m2, m6
-    pxor         m1, m6
-    mova  [r0+r2*2], m2
-    mova    [r0+r4], m1
-    lea          r1, [r1+r2*4]
-    lea          r0, [r0+r2*4]
-    sub         r3d, 4
-    jg .loop
-    REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_Y2_EXACT
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_Y2_EXACT
-
-
 ; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 %macro AVG_PIXELS8 0
 cglobal avg_pixels8, 4,5
diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h
index 47b0b8b..d624ed9 100644
--- a/libavcodec/x86/hpeldsp.h
+++ b/libavcodec/x86/hpeldsp.h
@@ -22,6 +22,8 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include "libavcodec/hpeldsp.h"
+
 void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
                            ptrdiff_t line_size, int h);
 
@@ -35,4 +37,6 @@ void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
 void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
                              ptrdiff_t line_size, int h);
 
+void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags);
+
 #endif /* AVCODEC_X86_HPELDSP_H */
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
index 65aac71..6731428 100644
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -43,12 +43,6 @@ void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
                                      ptrdiff_t line_size, int h);
 void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
                                     ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
-                                           const uint8_t *pixels,
-                                           ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
-                                          const uint8_t *pixels,
-                                          ptrdiff_t line_size, int h);
 void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
                               ptrdiff_t line_size, int h);
 void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
@@ -57,12 +51,6 @@ void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
                                      ptrdiff_t line_size, int h);
 void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
                                     ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
-                                           const uint8_t *pixels,
-                                           ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
-                                          const uint8_t *pixels,
-                                          ptrdiff_t line_size, int h);
 void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels,
                           ptrdiff_t line_size, int h);
 void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
@@ -209,11 +197,6 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
         c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
         c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
     }
-
-    if (flags & AV_CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
-        c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
-        c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
-    }
 #endif /* HAVE_MMXEXT_EXTERNAL */
 }
 
@@ -243,11 +226,6 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
         c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
         c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
     }
-
-    if (flags & AV_CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
-        c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
-        c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
-    }
 #endif /* HAVE_AMD3DNOW_EXTERNAL */
 }
 
@@ -278,4 +256,7 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
 
     if (EXTERNAL_SSE2(cpu_flags))
         hpeldsp_init_sse2(c, flags, cpu_flags);
+
+    if (CONFIG_VP3_DECODER)
+        ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
 }
diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm
new file mode 100644
index 0000000..513f14e
--- /dev/null
+++ b/libavcodec/x86/hpeldsp_vp3.asm
@@ -0,0 +1,111 @@
+;******************************************************************************
+;* SIMD-optimized halfpel functions for VP3
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+%macro PUT_NO_RND_PIXELS8_X2_EXACT 0
+cglobal put_no_rnd_pixels8_x2_exact, 4,5
+    lea          r4, [r2*3]
+    pcmpeqb      m6, m6
+.loop:
+    mova         m0, [r1]
+    mova         m2, [r1+r2]
+    mova         m1, [r1+1]
+    mova         m3, [r1+r2+1]
+    pxor         m0, m6
+    pxor         m2, m6
+    pxor         m1, m6
+    pxor         m3, m6
+    PAVGB        m0, m1
+    PAVGB        m2, m3
+    pxor         m0, m6
+    pxor         m2, m6
+    mova       [r0], m0
+    mova    [r0+r2], m2
+    mova         m0, [r1+r2*2]
+    mova         m1, [r1+r2*2+1]
+    mova         m2, [r1+r4]
+    mova         m3, [r1+r4+1]
+    pxor         m0, m6
+    pxor         m1, m6
+    pxor         m2, m6
+    pxor         m3, m6
+    PAVGB        m0, m1
+    PAVGB        m2, m3
+    pxor         m0, m6
+    pxor         m2, m6
+    mova  [r0+r2*2], m0
+    mova    [r0+r4], m2
+    lea          r1, [r1+r2*4]
+    lea          r0, [r0+r2*4]
+    sub         r3d, 4
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PUT_NO_RND_PIXELS8_X2_EXACT
+INIT_MMX 3dnow
+PUT_NO_RND_PIXELS8_X2_EXACT
+
+
+; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
+cglobal put_no_rnd_pixels8_y2_exact, 4,5
+    lea          r4, [r2*3]
+    mova         m0, [r1]
+    pcmpeqb      m6, m6
+    add          r1, r2
+    pxor         m0, m6
+.loop:
+    mova         m1, [r1]
+    mova         m2, [r1+r2]
+    pxor         m1, m6
+    pxor         m2, m6
+    PAVGB        m0, m1
+    PAVGB        m1, m2
+    pxor         m0, m6
+    pxor         m1, m6
+    mova       [r0], m0
+    mova    [r0+r2], m1
+    mova         m1, [r1+r2*2]
+    mova         m0, [r1+r4]
+    pxor         m1, m6
+    pxor         m0, m6
+    PAVGB        m2, m1
+    PAVGB        m1, m0
+    pxor         m2, m6
+    pxor         m1, m6
+    mova  [r0+r2*2], m2
+    mova    [r0+r4], m1
+    lea          r1, [r1+r2*4]
+    lea          r0, [r0+r2*4]
+    sub         r3d, 4
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PUT_NO_RND_PIXELS8_Y2_EXACT
+INIT_MMX 3dnow
+PUT_NO_RND_PIXELS8_Y2_EXACT
diff --git a/libavcodec/x86/hpeldsp_vp3_init.c b/libavcodec/x86/hpeldsp_vp3_init.c
new file mode 100644
index 0000000..2510c11
--- /dev/null
+++ b/libavcodec/x86/hpeldsp_vp3_init.c
@@ -0,0 +1,56 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/hpeldsp.h"
+
+#include "hpeldsp.h"
+
+void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
+                                           const uint8_t *pixels,
+                                           ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
+                                          const uint8_t *pixels,
+                                          ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
+                                           const uint8_t *pixels,
+                                           ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
+                                          const uint8_t *pixels,
+                                          ptrdiff_t line_size, int h);
+
+av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags)
+{
+    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
+        if (flags & AV_CODEC_FLAG_BITEXACT) {
+            c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
+            c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
+        }
+    }
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        if (flags & AV_CODEC_FLAG_BITEXACT) {
+            c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
+            c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
+        }
+    }
+}