[FFmpeg-devel] [PATCH/RFC] avutil/pixelutils: port ppc/altivec sad functions

Clément Bœsch u at pkh.me
Sat Aug 23 21:24:33 CEST 2014


---
This is 100% untested and probably doesn't even compile.

Can anyone with PPC/Altivec HW test or provide such access?

After the altivec optims are ported we can drop the duplicated version in
libavcodec entirely.

The fate-pixelutils tests should cover the alignment checks; there might be
some fixes in this area as I'm not sure about the requirement.

About the copyright, I put Brian Foley only, because of
59925ef2044e3ba2b5be49a35d37929550e3d6bc. If someone feels like some more
people should be added, please tell me.
---
 libavutil/pixelutils.c             |   6 +--
 libavutil/ppc/Makefile             |   3 ++
 libavutil/ppc/pixelutils.h         |  26 +++++++++
 libavutil/ppc/pixelutils_altivec.c | 106 +++++++++++++++++++++++++++++++++++++
 libavutil/ppc/pixelutils_altivec.h |  30 +++++++++++
 libavutil/ppc/pixelutils_init.c    |  33 ++++++++++++
 6 files changed, 201 insertions(+), 3 deletions(-)
 create mode 100644 libavutil/ppc/pixelutils.h
 create mode 100644 libavutil/ppc/pixelutils_altivec.c
 create mode 100644 libavutil/ppc/pixelutils_altivec.h
 create mode 100644 libavutil/ppc/pixelutils_init.c

diff --git a/libavutil/pixelutils.c b/libavutil/pixelutils.c
index 10ff7e8..9a7b7f1 100644
--- a/libavutil/pixelutils.c
+++ b/libavutil/pixelutils.c
@@ -23,6 +23,7 @@
 #if CONFIG_PIXELUTILS
 
 #include "x86/pixelutils.h"
+#include "ppc/pixelutils.h"
 
 static av_always_inline int sad_wxh(const uint8_t *src1, ptrdiff_t stride1,
                                     const uint8_t *src2, ptrdiff_t stride2,
@@ -77,9 +78,8 @@ av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, int aligne
     if (w_bits != h_bits) // only squared sad for now
         return NULL;
 
-#if ARCH_X86
-    ff_pixelutils_sad_init_x86(sad, aligned);
-#endif
+    if (ARCH_X86) ff_pixelutils_sad_init_x86(sad, aligned);
+    if (ARCH_PPC) ff_pixelutils_sad_init_ppc(sad, aligned);
 
     return sad[w_bits - 1];
 #endif
diff --git a/libavutil/ppc/Makefile b/libavutil/ppc/Makefile
index 4fd8d6d..295e4e5 100644
--- a/libavutil/ppc/Makefile
+++ b/libavutil/ppc/Makefile
@@ -1,4 +1,7 @@
 OBJS += ppc/cpu.o                                                       \
         ppc/float_dsp_init.o                                            \
 
+OBJS-$(CONFIG_PIXELUTILS) += ppc/pixelutils_init.o
+
 ALTIVEC-OBJS += ppc/float_dsp_altivec.o                                 \
+ALTIVEC-OBJS-$(CONFIG_PIXELUTILS) += ppc/pixelutils_altivec.o           \
diff --git a/libavutil/ppc/pixelutils.h b/libavutil/ppc/pixelutils.h
new file mode 100644
index 0000000..c737a69
--- /dev/null
+++ b/libavutil/ppc/pixelutils.h
@@ -0,0 +1,26 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_PPC_PIXELUTILS_H
+#define AVUTIL_PPC_PIXELUTILS_H
+
+#include "libavutil/pixelutils.h"
+
+void ff_pixelutils_sad_init_ppc(av_pixelutils_sad_fn *sad, int aligned);
+
+#endif /* AVUTIL_PPC_PIXELUTILS_H */
diff --git a/libavutil/ppc/pixelutils_altivec.c b/libavutil/ppc/pixelutils_altivec.c
new file mode 100644
index 0000000..5cf338a
--- /dev/null
+++ b/libavutil/ppc/pixelutils_altivec.c
@@ -0,0 +1,106 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * Copyright (c) 2002 Brian Foley
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "util_altivec.h"
+#include "pixelutils_altivec.h"
+
+int ff_pixelutils_sad_8x8_altivec(const uint8_t *src1, ptrdiff_t stride1,
+                                  const uint8_t *src2, ptrdiff_t stride2)
+{
+    int i, s;
+    const vector unsigned int zero =
+        (const vector unsigned int) vec_splat_u32(0);
+    const vector unsigned char permclear =
+        (vector unsigned char)
+        { 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 };
+    vector unsigned char perm1 = vec_lvsl(0, src1);
+    vector unsigned char perm2 = vec_lvsl(0, src2);
+    vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
+    vector signed int sumdiffs;
+
+    for (i = 0; i < 8; i++) {
+        /* Read potentially unaligned pixels into t1 and t2.
+         * Since we're reading 16 pixels, and actually only want 8,
+         * mask out the last 8 pixels. The 0s don't change the sum. */
+        vector unsigned char src1l = vec_ld(0, src1);
+        vector unsigned char src1r = vec_ld(7, src1);
+        vector unsigned char src2l = vec_ld(0, src2);
+        vector unsigned char src2r = vec_ld(7, src2);
+        vector unsigned char t1 = vec_and(vec_perm(src1l, src1r, perm1),
+                                          permclear);
+        vector unsigned char t2 = vec_and(vec_perm(src2l, src2r, perm2),
+                                          permclear);
+
+        /* Calculate a sum of abs differences vector. */
+        vector unsigned char t3 = vec_max(t1, t2);
+        vector unsigned char t4 = vec_min(t1, t2);
+        vector unsigned char t5 = vec_sub(t3, t4);
+
+        /* Add each 4 pixel group together and put 4 results into sad. */
+        sad = vec_sum4s(t5, sad);
+
+        src1 += stride1;
+        src2 += stride2;
+    }
+
+    /* Sum up the four partial sums, and put the result into s. */
+    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
+    sumdiffs = vec_splat(sumdiffs, 3);
+    vec_ste(sumdiffs, 0, &s);
+
+    return s;
+}
+
+int ff_pixelutils_sad_16x16_altivec(const uint8_t *src1, ptrdiff_t stride1,
+                                    const uint8_t *src2, ptrdiff_t stride2)
+{
+    int i, s;
+    const vector unsigned int zero =
+        (const vector unsigned int) vec_splat_u32(0);
+    vector unsigned char perm = vec_lvsl(0, src2);
+    vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
+    vector signed int sumdiffs;
+
+    for (i = 0; i < 16; i++) {
+        /* Read potentially unaligned pixels into t1 and t2. */
+        vector unsigned char src2l = vec_ld(0,  src2);
+        vector unsigned char src2r = vec_ld(15, src2);
+        vector unsigned char t1 = vec_ld(0, src1);
+        vector unsigned char t2 = vec_perm(src2l, src2r, perm);
+
+        /* Calculate a sum of abs differences vector. */
+        vector unsigned char t3 = vec_max(t1, t2);
+        vector unsigned char t4 = vec_min(t1, t2);
+        vector unsigned char t5 = vec_sub(t3, t4);
+
+        /* Add each 4 pixel group together and put 4 results into sad. */
+        sad = vec_sum4s(t5, sad);
+
+        src1 += stride1;
+        src2 += stride2;
+    }
+
+    /* Sum up the four partial sums, and put the result into s. */
+    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
+    sumdiffs = vec_splat(sumdiffs, 3);
+    vec_ste(sumdiffs, 0, &s);
+
+    return s;
+}
diff --git a/libavutil/ppc/pixelutils_altivec.h b/libavutil/ppc/pixelutils_altivec.h
new file mode 100644
index 0000000..435ba36
--- /dev/null
+++ b/libavutil/ppc/pixelutils_altivec.h
@@ -0,0 +1,30 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * Copyright (c) 2002 Brian Foley
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_PPC_PIXELUTILS_ALTIVEC_H
+#define AVUTIL_PPC_PIXELUTILS_ALTIVEC_H
+
+int ff_pixelutils_sad_8x8_altivec(const uint8_t *src1, ptrdiff_t stride1,
+                                  const uint8_t *src2, ptrdiff_t stride2);
+
+int ff_pixelutils_sad_16x16_altivec(const uint8_t *src1, ptrdiff_t stride1,
+                                    const uint8_t *src2, ptrdiff_t stride2);
+
+#endif /* AVUTIL_PPC_PIXELUTILS_ALTIVEC_H */
diff --git a/libavutil/ppc/pixelutils_init.c b/libavutil/ppc/pixelutils_init.c
new file mode 100644
index 0000000..ed69232
--- /dev/null
+++ b/libavutil/ppc/pixelutils_init.c
@@ -0,0 +1,33 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/ppc/cpu.h"
+#include "pixelutils.h"
+#include "pixelutils_altivec.h"
+
+void ff_pixelutils_sad_init_ppc(av_pixelutils_sad_fn *sad, int aligned)
+{
+    if (PPC_ALTIVEC(av_get_cpu_flags())) {
+        /* XXX: aligned? */
+        sad[2] = ff_pixelutils_sad_8x8_altivec;
+        sad[3] = ff_pixelutils_sad_16x16_altivec;
+    }
+}
-- 
2.1.0



More information about the ffmpeg-devel mailing list