[FFmpeg-cvslog] x86/tx_float: enable AVX-only split-radix FFT codelets

Lynne git at videolan.org
Sat Sep 24 05:18:39 EEST 2022


ffmpeg | branch: master | Lynne <dev at lynne.ee> | Sat Sep 24 03:51:48 2022 +0200| [f21899db7dae114e4519c0d14dd047efe022e16b] | committer: Lynne

x86/tx_float: enable AVX-only split-radix FFT codelets

Sandy Bridge, Ivy Bridge and Bulldozer cores don't support FMA3.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f21899db7dae114e4519c0d14dd047efe022e16b
---

 libavutil/x86/tx_float.asm    | 2 ++
 libavutil/x86/tx_float_init.c | 8 ++++++++
 2 files changed, 10 insertions(+)

diff --git a/libavutil/x86/tx_float.asm b/libavutil/x86/tx_float.asm
index 0fbab99e45..5ed0007530 100644
--- a/libavutil/x86/tx_float.asm
+++ b/libavutil/x86/tx_float.asm
@@ -1379,6 +1379,8 @@ cglobal fft_sr_ns_float, 4, 10, 16, 272, ctx, out, in, tmp, len, lut, itab, rtab
 %endmacro
 
 %if ARCH_X86_64
+FFT_SPLIT_RADIX_FN avx, 0
+FFT_SPLIT_RADIX_FN avx, 1
 FFT_SPLIT_RADIX_FN fma3, 0
 FFT_SPLIT_RADIX_FN fma3, 1
 %if HAVE_AVX2_EXTERNAL
diff --git a/libavutil/x86/tx_float_init.c b/libavutil/x86/tx_float_init.c
index 20c1ad6869..8e2babb539 100644
--- a/libavutil/x86/tx_float_init.c
+++ b/libavutil/x86/tx_float_init.c
@@ -38,6 +38,8 @@ TX_DECL_FN(fft32,     avx)
 TX_DECL_FN(fft32_ns,  avx)
 TX_DECL_FN(fft32,     fma3)
 TX_DECL_FN(fft32_ns,  fma3)
+TX_DECL_FN(fft_sr,    avx)
+TX_DECL_FN(fft_sr_ns, avx)
 TX_DECL_FN(fft_sr,    fma3)
 TX_DECL_FN(fft_sr_ns, fma3)
 TX_DECL_FN(fft_sr,    avx2)
@@ -57,6 +59,7 @@ TX_DECL_FN(fft16_asm, avx)
 TX_DECL_FN(fft16_asm, fma3)
 TX_DECL_FN(fft32_asm, avx)
 TX_DECL_FN(fft32_asm, fma3)
+TX_DECL_FN(fft_sr_asm, avx)
 TX_DECL_FN(fft_sr_asm, fma3)
 TX_DECL_FN(fft_sr_asm, avx2)
 
@@ -214,6 +217,11 @@ const FFTXCodelet * const ff_tx_codelet_list_float_x86[] = {
            AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW),
     TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE,
            AV_CPU_FLAG_AVXSLOW),
+    TX_DEF(fft_sr,    FFT, 64, 131072, 2, 0, 256, b8_i2, avx, AVX,  0, AV_CPU_FLAG_AVXSLOW),
+    TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX,
+           AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW),
+    TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX,  AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+           AV_CPU_FLAG_AVXSLOW),
     TX_DEF(fft_sr,    FFT, 64, 131072, 2, 0, 288, b8_i2, fma3,  FMA3,  0, AV_CPU_FLAG_AVXSLOW),
     TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3,  FMA3,
            AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW),



More information about the ffmpeg-cvslog mailing list