[FFmpeg-cvslog] lavu/x86/tx_float: fix FMA3 implying AVX2 is available
Lynne
git at videolan.org
Sat Apr 24 20:00:43 EEST 2021
ffmpeg | branch: master | Lynne <dev at lynne.ee> | Sat Apr 24 18:45:02 2021 +0200| [e448a4b4ea535aa2ec06f0aee167820df794a299] | committer: Lynne
lavu/x86/tx_float: fix FMA3 implying AVX2 is available
It's the other way around - AVX2 implies FMA3 is available.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e448a4b4ea535aa2ec06f0aee167820df794a299
---
libavutil/x86/tx_float.asm | 4 ++--
libavutil/x86/tx_float_init.c | 11 ++++++++---
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/libavutil/x86/tx_float.asm b/libavutil/x86/tx_float.asm
index 58ec657116..def61d86c1 100644
--- a/libavutil/x86/tx_float.asm
+++ b/libavutil/x86/tx_float.asm
@@ -100,7 +100,7 @@ SECTION .text
; %6 - temporary register (for avx only)
; %7 - temporary register (for avx only, enables vgatherdpd (AVX2) if FMA3 is set)
%macro LOAD64_LUT 5-7
-%if %0 > 6 && cpuflag(fma3)
+%if %0 > 6 && cpuflag(avx2)
pcmpeqd %6, %6 ; pcmpeqq has a 0.5 throughput on Zen 3, this has 0.25
movapd xmm%7, [%3 + %4] ; float mov since vgatherdpd is a float instruction
vgatherdpd %1, [%2 + xmm%7*8], %6 ; must use separate registers for args
@@ -1208,5 +1208,5 @@ FFT_SPLIT_RADIX_DEF 131072
%if ARCH_X86_64
FFT_SPLIT_RADIX_FN avx
-FFT_SPLIT_RADIX_FN fma3
+FFT_SPLIT_RADIX_FN avx2
%endif
diff --git a/libavutil/x86/tx_float_init.c b/libavutil/x86/tx_float_init.c
index 993933317c..8b77a5f29f 100644
--- a/libavutil/x86/tx_float_init.c
+++ b/libavutil/x86/tx_float_init.c
@@ -32,7 +32,7 @@ void ff_fft32_float_avx (AVTXContext *s, void *out, void *in, ptrdiff_t stri
void ff_fft32_float_fma3 (AVTXContext *s, void *out, void *in, ptrdiff_t stride);
void ff_split_radix_fft_float_avx (AVTXContext *s, void *out, void *in, ptrdiff_t stride);
-void ff_split_radix_fft_float_fma3(AVTXContext *s, void *out, void *in, ptrdiff_t stride);
+void ff_split_radix_fft_float_avx2(AVTXContext *s, void *out, void *in, ptrdiff_t stride);
av_cold void ff_tx_init_float_x86(AVTXContext *s, av_tx_fn *tx)
{
@@ -87,10 +87,15 @@ av_cold void ff_tx_init_float_x86(AVTXContext *s, av_tx_fn *tx)
#if ARCH_X86_64
else if (s->m == 32)
TXFN(ff_fft32_float_fma3, 1, 8, 2);
- else if (s->m >= 64 && s->m <= 131072 && !(s->flags & AV_TX_INPLACE))
- TXFN(ff_split_radix_fft_float_fma3, 1, 8, 2);
#endif
}
+
+#if ARCH_X86_64
+ if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+ if (s->m >= 64 && s->m <= 131072 && !(s->flags & AV_TX_INPLACE))
+ TXFN(ff_split_radix_fft_float_avx2, 1, 8, 2);
+ }
+#endif
}
if (gen_revtab)
More information about the ffmpeg-cvslog
mailing list