[FFmpeg-devel] [PATCH 2/3] libavcodec: arm: Add a NEON implementation of pixblockdsp
Martin Storsjö
martin at martin.st
Wed May 13 16:27:56 EEST 2020
Cortex A7 A8 A9 A53 A72
get_pixels_c: 144.7 146.0 143.0 137.7 69.0
get_pixels_armv6: 112.0 106.7 90.2 95.0 72.5
get_pixels_neon: 69.0 29.7 68.7 40.2 19.0
get_pixels_unaligned_c: 144.7 146.2 143.0 137.7 69.0
get_pixels_unaligned_neon: 77.0 36.5 72.5 48.5 19.0
diff_pixels_c: 376.7 319.7 265.5 307.7 148.0
diff_pixels_armv6: 179.0 159.5 205.5 139.0 142.0
diff_pixels_neon: 69.0 40.2 77.5 53.2 26.0
diff_pixels_unaligned_c: 376.7 319.7 265.5 307.7 148.0
diff_pixels_unaligned_neon: 85.0 54.5 93.5 66.7 26.0
---
libavcodec/arm/Makefile | 1 +
libavcodec/arm/pixblockdsp_init_arm.c | 18 +++++++
libavcodec/arm/pixblockdsp_neon.S | 69 +++++++++++++++++++++++++++
3 files changed, 88 insertions(+)
create mode 100644 libavcodec/arm/pixblockdsp_neon.S
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index c99e8e1bd1..c6be814153 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -126,6 +126,7 @@ NEON-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_neon.o \
NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o \
arm/mdct_fixed_neon.o
NEON-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_neon.o
+NEON-OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_neon.o
NEON-OBJS-$(CONFIG_RDFT) += arm/rdft_neon.o
NEON-OBJS-$(CONFIG_VC1DSP) += arm/vc1dsp_init_neon.o \
arm/vc1dsp_neon.o
diff --git a/libavcodec/arm/pixblockdsp_init_arm.c b/libavcodec/arm/pixblockdsp_init_arm.c
index 59d2b49381..5481c0178c 100644
--- a/libavcodec/arm/pixblockdsp_init_arm.c
+++ b/libavcodec/arm/pixblockdsp_init_arm.c
@@ -29,6 +29,15 @@ void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels,
void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
const uint8_t *s2, ptrdiff_t stride);
+void ff_get_pixels_neon(int16_t *block, const uint8_t *pixels,
+ ptrdiff_t stride);
+void ff_get_pixels_unaligned_neon(int16_t *block, const uint8_t *pixels,
+ ptrdiff_t stride);
+void ff_diff_pixels_neon(int16_t *block, const uint8_t *s1,
+ const uint8_t *s2, ptrdiff_t stride);
+void ff_diff_pixels_unaligned_neon(int16_t *block, const uint8_t *s1,
+ const uint8_t *s2, ptrdiff_t stride);
+
av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
AVCodecContext *avctx,
unsigned high_bit_depth)
@@ -40,4 +49,13 @@ av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
c->get_pixels = ff_get_pixels_armv6;
c->diff_pixels = ff_diff_pixels_armv6;
}
+
+ if (have_neon(cpu_flags)) {
+ if (!high_bit_depth) {
+ c->get_pixels_unaligned = ff_get_pixels_unaligned_neon;
+ c->get_pixels = ff_get_pixels_neon;
+ }
+ c->diff_pixels_unaligned = ff_diff_pixels_unaligned_neon;
+ c->diff_pixels = ff_diff_pixels_neon;
+ }
}
diff --git a/libavcodec/arm/pixblockdsp_neon.S b/libavcodec/arm/pixblockdsp_neon.S
new file mode 100644
index 0000000000..25674586ea
--- /dev/null
+++ b/libavcodec/arm/pixblockdsp_neon.S
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Martin Storsjo
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+.macro vld1_8 dst, src, incr, aligned
+.if \aligned
+ vld1.8 {\dst}, [\src, :64], \incr
+.else
+ vld1.8 {\dst}, [\src], \incr
+.endif
+.endm
+
+.macro get_pixels suffix, aligned
+function ff_get_pixels\suffix\()_neon, export=1
+ mov r3, #8
+1:
+ vld1_8 d0, r1, r2, \aligned
+ subs r3, r3, #2
+ vld1_8 d2, r1, r2, \aligned
+ vmovl.u8 q0, d0
+ vmovl.u8 q1, d2
+ vst1.16 {q0, q1}, [r0, :128]!
+ bgt 1b
+
+ bx lr
+endfunc
+.endm
+
+get_pixels , aligned=1
+get_pixels _unaligned, aligned=0
+
+.macro diff_pixels suffix, aligned=0
+function ff_diff_pixels\suffix\()_neon, export=1
+ mov r12, #8
+1:
+ vld1_8 d0, r1, r3, \aligned
+ vld1_8 d1, r2, r3, \aligned
+ subs r12, r12, #2
+ vld1_8 d2, r1, r3, \aligned
+ vsubl.u8 q0, d0, d1
+ vld1_8 d3, r2, r3, \aligned
+ vsubl.u8 q1, d2, d3
+ vst1.16 {q0, q1}, [r0]!
+ bgt 1b
+
+ bx lr
+endfunc
+.endm
+
+diff_pixels , aligned=1
+diff_pixels _unaligned, aligned=0
--
2.17.1
More information about the ffmpeg-devel
mailing list