[FFmpeg-cvslog] lavc/aarch64: Move non-neon vp9 copy functions out of neon source file.
Carl Eugen Hoyos
git at videolan.org
Wed Mar 11 15:17:04 EET 2020
ffmpeg | branch: master | Carl Eugen Hoyos <ceffmpeg at gmail.com> | Wed Mar 11 13:01:02 2020 +0100| [9a217549043ff25a37973555f71122f4725ba54e] | committer: Carl Eugen Hoyos
lavc/aarch64: Move non-neon vp9 copy functions out of neon source file.
Fixes part of ticket #8565.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9a217549043ff25a37973555f71122f4725ba54e
---
libavcodec/aarch64/Makefile | 1 +
libavcodec/aarch64/vp9mc_16bpp_neon.S | 25 -----------
libavcodec/aarch64/vp9mc_aarch64.c | 81 +++++++++++++++++++++++++++++++++++
libavcodec/aarch64/vp9mc_neon.S | 30 -------------
4 files changed, 82 insertions(+), 55 deletions(-)
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 00f93bf59f..90e7210ee0 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -21,6 +21,7 @@ OBJS-$(CONFIG_VC1DSP) += aarch64/vc1dsp_init_aarch64.o
OBJS-$(CONFIG_VORBIS_DECODER) += aarch64/vorbisdsp_init.o
OBJS-$(CONFIG_VP9_DECODER) += aarch64/vp9dsp_init_10bpp_aarch64.o \
aarch64/vp9dsp_init_12bpp_aarch64.o \
+ aarch64/vp9mc_aarch64.o \
aarch64/vp9dsp_init_aarch64.o
# ARMv8 optimizations
diff --git a/libavcodec/aarch64/vp9mc_16bpp_neon.S b/libavcodec/aarch64/vp9mc_16bpp_neon.S
index cac6428709..53b372c262 100644
--- a/libavcodec/aarch64/vp9mc_16bpp_neon.S
+++ b/libavcodec/aarch64/vp9mc_16bpp_neon.S
@@ -25,31 +25,6 @@
// const uint8_t *ref, ptrdiff_t ref_stride,
// int h, int mx, int my);
-function ff_vp9_copy128_aarch64, export=1
-1:
- ldp x5, x6, [x2]
- ldp x7, x8, [x2, #16]
- stp x5, x6, [x0]
- ldp x9, x10, [x2, #32]
- stp x7, x8, [x0, #16]
- subs w4, w4, #1
- ldp x11, x12, [x2, #48]
- stp x9, x10, [x0, #32]
- stp x11, x12, [x0, #48]
- ldp x5, x6, [x2, #64]
- ldp x7, x8, [x2, #80]
- stp x5, x6, [x0, #64]
- ldp x9, x10, [x2, #96]
- stp x7, x8, [x0, #80]
- ldp x11, x12, [x2, #112]
- stp x9, x10, [x0, #96]
- stp x11, x12, [x0, #112]
- add x2, x2, x3
- add x0, x0, x1
- b.ne 1b
- ret
-endfunc
-
function ff_vp9_avg64_16_neon, export=1
mov x5, x0
sub x1, x1, #64
diff --git a/libavcodec/aarch64/vp9mc_aarch64.c b/libavcodec/aarch64/vp9mc_aarch64.c
new file mode 100644
index 0000000000..f17a8cf04a
--- /dev/null
+++ b/libavcodec/aarch64/vp9mc_aarch64.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+// All public functions in this file have the following signature:
+// typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
+// const uint8_t *ref, ptrdiff_t ref_stride,
+// int h, int mx, int my);
+
+function ff_vp9_copy128_aarch64, export=1
+1:
+ ldp x5, x6, [x2]
+ ldp x7, x8, [x2, #16]
+ stp x5, x6, [x0]
+ ldp x9, x10, [x2, #32]
+ stp x7, x8, [x0, #16]
+ subs w4, w4, #1
+ ldp x11, x12, [x2, #48]
+ stp x9, x10, [x0, #32]
+ stp x11, x12, [x0, #48]
+ ldp x5, x6, [x2, #64]
+ ldp x7, x8, [x2, #80]
+ stp x5, x6, [x0, #64]
+ ldp x9, x10, [x2, #96]
+ stp x7, x8, [x0, #80]
+ ldp x11, x12, [x2, #112]
+ stp x9, x10, [x0, #96]
+ stp x11, x12, [x0, #112]
+ add x2, x2, x3
+ add x0, x0, x1
+ b.ne 1b
+ ret
+endfunc
+
+function ff_vp9_copy64_aarch64, export=1
+1:
+ ldp x5, x6, [x2]
+ ldp x7, x8, [x2, #16]
+ stp x5, x6, [x0]
+ ldp x9, x10, [x2, #32]
+ stp x7, x8, [x0, #16]
+ subs w4, w4, #1
+ ldp x11, x12, [x2, #48]
+ stp x9, x10, [x0, #32]
+ stp x11, x12, [x0, #48]
+ add x2, x2, x3
+ add x0, x0, x1
+ b.ne 1b
+ ret
+endfunc
+
+function ff_vp9_copy32_aarch64, export=1
+1:
+ ldp x5, x6, [x2]
+ ldp x7, x8, [x2, #16]
+ stp x5, x6, [x0]
+ subs w4, w4, #1
+ stp x7, x8, [x0, #16]
+ add x2, x2, x3
+ add x0, x0, x1
+ b.ne 1b
+ ret
+endfunc
diff --git a/libavcodec/aarch64/vp9mc_neon.S b/libavcodec/aarch64/vp9mc_neon.S
index f67624ca04..abf2bae9db 100644
--- a/libavcodec/aarch64/vp9mc_neon.S
+++ b/libavcodec/aarch64/vp9mc_neon.S
@@ -25,23 +25,6 @@
// const uint8_t *ref, ptrdiff_t ref_stride,
// int h, int mx, int my);
-function ff_vp9_copy64_aarch64, export=1
-1:
- ldp x5, x6, [x2]
- ldp x7, x8, [x2, #16]
- stp x5, x6, [x0]
- ldp x9, x10, [x2, #32]
- stp x7, x8, [x0, #16]
- subs w4, w4, #1
- ldp x11, x12, [x2, #48]
- stp x9, x10, [x0, #32]
- stp x11, x12, [x0, #48]
- add x2, x2, x3
- add x0, x0, x1
- b.ne 1b
- ret
-endfunc
-
function ff_vp9_avg64_neon, export=1
mov x5, x0
1:
@@ -64,19 +47,6 @@ function ff_vp9_avg64_neon, export=1
ret
endfunc
-function ff_vp9_copy32_aarch64, export=1
-1:
- ldp x5, x6, [x2]
- ldp x7, x8, [x2, #16]
- stp x5, x6, [x0]
- subs w4, w4, #1
- stp x7, x8, [x0, #16]
- add x2, x2, x3
- add x0, x0, x1
- b.ne 1b
- ret
-endfunc
-
function ff_vp9_avg32_neon, export=1
1:
ld1 {v2.16b, v3.16b}, [x2], x3
More information about the ffmpeg-cvslog
mailing list