[FFmpeg-devel] [PATCH v2 1/5] configure: aarch64: Support assembling the dotprod and i8mm arch extensions
Martin Storsjö
martin at martin.st
Tue May 30 15:30:39 EEST 2023
These are available since ARMv8.4-a and ARMv8.6-a respectively,
but can also be available optionally since ARMv8.2-a.
Check if ".arch armv8.2-a" and ".arch_extension {dotprod,i8mm}" are
supported, and check if the instructions can be assembled.
Current clang versions fail to support the dotprod and i8mm
features in the .arch_extension directive, but do support them
if enabled with -march=armv8.4-a on the command line. (Curiously,
lowering the arch level with ".arch armv8.2-a" doesn't make the
extensions unavailable if they were enabled with -march; if that
changes, Clang should also learn to support these extensions via
.arch_extension for them to remain usable here.)
---
Simplified the detection logic somewhat; check if ".arch armv8.2-a"
and ".arch_extension {dotprod,i8mm}" are available, then check if
the instruction can be assembled. This way, we check exactly the same
thing as we are going to assemble in the end, so there shouldn't be
any risk of build breakage due to testing and building subtly different
things.
---
configure | 81 ++++++++++++++++++++++++++++++++++++++++-
libavutil/aarch64/asm.S | 11 ++++++
2 files changed, 91 insertions(+), 1 deletion(-)
diff --git a/configure b/configure
index 495493aa0e..50eb27ba0e 100755
--- a/configure
+++ b/configure
@@ -454,6 +454,8 @@ Optimization options (experts only):
--disable-armv6t2 disable armv6t2 optimizations
--disable-vfp disable VFP optimizations
--disable-neon disable NEON optimizations
+ --disable-dotprod disable DOTPROD optimizations
+ --disable-i8mm disable I8MM optimizations
--disable-inline-asm disable use of inline assembly
--disable-x86asm disable use of standalone x86 assembly
--disable-mipsdsp disable MIPS DSP ASE R1 optimizations
@@ -1154,6 +1156,43 @@ check_insn(){
check_as ${1}_external "$2"
}
+check_arch_level(){
+ log check_arch_level "$@"
+ level="$1"
+ check_as tested_arch_level ".arch $level"
+ enabled tested_arch_level && as_arch_level="$level"
+}
+
+check_archext_insn(){
+ log check_archext_insn "$@"
+ feature="$1"
+ instr="$2"
+ # Check if the assembly is accepted in inline assembly.
+ check_inline_asm ${feature}_inline "\"$instr\""
+ # We don't check if the instruction is supported out of the box by the
+ # external assembler (we don't try to set ${feature}_external) as we don't
+ # need to use these instructions in non-runtime detected codepaths.
+
+ disable $feature
+
+ enabled as_arch_directive && arch_directive=".arch $as_arch_level" || arch_directive=""
+
+ # Test if the assembler supports the .arch_extension $feature directive.
+ arch_extension_directive=".arch_extension $feature"
+ test_as <<EOF && enable as_archext_${feature}_directive || arch_extension_directive=""
+$arch_directive
+$arch_extension_directive
+EOF
+
+ # Test if we can assemble the instruction after potential .arch and
+ # .arch_extension directives.
+ test_as <<EOF && enable ${feature}
+$arch_directive
+$arch_extension_directive
+$instr
+EOF
+}
+
check_x86asm(){
log check_x86asm "$@"
name=$1
@@ -2059,6 +2098,8 @@ ARCH_EXT_LIST_ARM="
armv6
armv6t2
armv8
+ dotprod
+ i8mm
neon
vfp
vfpv3
@@ -2322,6 +2363,8 @@ SYSTEM_LIBRARIES="
TOOLCHAIN_FEATURES="
as_arch_directive
+ as_archext_dotprod_directive
+ as_archext_i8mm_directive
as_dn_directive
as_fpu_directive
as_func
@@ -2622,6 +2665,8 @@ intrinsics_neon_deps="neon"
vfp_deps_any="aarch64 arm"
vfpv3_deps="vfp"
setend_deps="arm"
+dotprod_deps="aarch64 neon"
+i8mm_deps="aarch64 neon"
map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
@@ -5988,12 +6033,27 @@ check_inline_asm inline_asm_labels '"1:\n"'
check_inline_asm inline_asm_nonlocal_labels '"Label:\n"'
if enabled aarch64; then
+ as_arch_level="armv8-a"
+ check_as as_arch_directive ".arch $as_arch_level"
+ enabled as_arch_directive && check_arch_level armv8.2-a
+
enabled armv8 && check_insn armv8 'prfm pldl1strm, [x0]'
# internal assembler in clang 3.3 does not support this instruction
enabled neon && check_insn neon 'ext v0.8B, v0.8B, v1.8B, #1'
enabled vfp && check_insn vfp 'fmadd d0, d0, d1, d2'
- map 'enabled_any ${v}_external ${v}_inline || disable $v' $ARCH_EXT_LIST_ARM
+ archext_list="dotprod i8mm"
+ enabled dotprod && check_archext_insn dotprod 'udot v0.4s, v0.16b, v0.16b'
+ enabled i8mm && check_archext_insn i8mm 'usdot v0.4s, v0.16b, v0.16b'
+
+ # Disable the main feature (e.g. HAVE_NEON) if neither inline nor external
+ # assembly support the feature out of the box. Skip this for the features
+ # checked with check_archext_insn above, as that function takes care of
+ # updating all the variables as necessary.
+ for v in $ARCH_EXT_LIST_ARM; do
+ is_in $v $archext_list && continue
+ enabled_any ${v}_external ${v}_inline || disable $v
+ done
elif enabled alpha; then
@@ -6022,6 +6082,12 @@ EOF
warn "Compiler does not indicate floating-point ABI, guessing $fpabi."
fi
+ # Test for various instruction sets, testing support both in inline and
+ # external assembly. This sets the ${v}_inline or ${v}_external flags
+ # if the instruction can be used unconditionally in either inline or
+ # external assembly. This means that if the ${v}_external feature is set,
+ # that feature can be used unconditionally in various support macros
+ # anywhere in external assembly, in any function.
enabled armv5te && check_insn armv5te 'qadd r0, r0, r0'
enabled armv6 && check_insn armv6 'sadd16 r0, r0, r0'
enabled armv6t2 && check_insn armv6t2 'movt r0, #0'
@@ -6030,6 +6096,14 @@ EOF
enabled vfpv3 && check_insn vfpv3 'vmov.f32 s0, #1.0'
enabled setend && check_insn setend 'setend be'
+ # If neither inline nor external assembly can use the feature by default,
+ # disable the main unsuffixed feature (e.g. HAVE_NEON).
+ #
+ # For targets that support runtime CPU feature detection, don't disable
+ # the main feature flag - there we assume that all supported toolchains
+ # can assemble code for all instruction set features (e.g. NEON) with
+ # suitable assembly flags (such as ".fpu neon"); we don't check
+ # specifically that they really do.
[ $target_os = linux ] || [ $target_os = android ] ||
map 'enabled_any ${v}_external ${v}_inline || disable $v' \
$ARCH_EXT_LIST_ARM
@@ -7610,6 +7684,8 @@ fi
if enabled aarch64; then
echo "NEON enabled ${neon-no}"
echo "VFP enabled ${vfp-no}"
+ echo "DOTPROD enabled ${dotprod-no}"
+ echo "I8MM enabled ${i8mm-no}"
fi
if enabled arm; then
echo "ARMv5TE enabled ${armv5te-no}"
@@ -7900,6 +7976,9 @@ test -n "$assert_level" &&
test -n "$malloc_prefix" &&
echo "#define MALLOC_PREFIX $malloc_prefix" >>$TMPH
+enabled aarch64 &&
+ echo "#define AS_ARCH_LEVEL $as_arch_level" >>$TMPH
+
if enabled x86asm; then
append config_files $TMPASM
cat > $TMPASM <<EOF
diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
index a7782415d7..8589cf74fc 100644
--- a/libavutil/aarch64/asm.S
+++ b/libavutil/aarch64/asm.S
@@ -36,6 +36,17 @@
# define __has_feature(x) 0
#endif
+#if HAVE_AS_ARCH_DIRECTIVE
+ .arch AS_ARCH_LEVEL
+#endif
+
+#if HAVE_AS_ARCHEXT_DOTPROD_DIRECTIVE
+ .arch_extension dotprod
+#endif
+#if HAVE_AS_ARCHEXT_I8MM_DIRECTIVE
+ .arch_extension i8mm
+#endif
+
/* Support macros for
* - Armv8.3-A Pointer Authentication and
--
2.37.1 (Apple Git-137.1)
More information about the ffmpeg-devel
mailing list