[FFmpeg-cvslog] x86/tx_float: add AV_CPU_FLAG_AVXSLOW/SLOW_GATHER flags where appropriate
Lynne
git at videolan.org
Sat May 21 03:13:58 EEST 2022
ffmpeg | branch: master | Lynne <dev at lynne.ee> | Sat May 21 00:11:45 2022 +0200| [19c0bb2aa91443d716f12fa3d1e019711243689c] | committer: Lynne
x86/tx_float: add AV_CPU_FLAG_AVXSLOW/SLOW_GATHER flags where appropriate
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=19c0bb2aa91443d716f12fa3d1e019711243689c
---
libavutil/x86/tx_float_init.c | 35 +++++++++++++++++++++--------------
1 file changed, 21 insertions(+), 14 deletions(-)
diff --git a/libavutil/x86/tx_float_init.c b/libavutil/x86/tx_float_init.c
index e60faf7fa6..108f9b4b04 100644
--- a/libavutil/x86/tx_float_init.c
+++ b/libavutil/x86/tx_float_init.c
@@ -71,24 +71,31 @@ const FFTXCodelet * const ff_tx_codelet_list_float_x86[] = {
TX_DEF(fft4_inv, FFT, 4, 4, 2, 0, 128, NULL, sse2, SSE2, AV_TX_INPLACE | FF_TX_INVERSE_ONLY, 0),
TX_DEF(fft8, FFT, 8, 8, 2, 0, 128, b8_i0, sse3, SSE3, AV_TX_INPLACE, 0),
TX_DEF(fft8_ns, FFT, 8, 8, 2, 0, 192, b8_i0, sse3, SSE3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 0),
- TX_DEF(fft8, FFT, 8, 8, 2, 0, 256, b8_i0, avx, AVX, AV_TX_INPLACE, 0),
- TX_DEF(fft8_ns, FFT, 8, 8, 2, 0, 320, b8_i0, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 0),
- TX_DEF(fft16, FFT, 16, 16, 2, 0, 256, b8_i2, avx, AVX, AV_TX_INPLACE, 0),
- TX_DEF(fft16_ns, FFT, 16, 16, 2, 0, 320, b8_i2, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 0),
- TX_DEF(fft16, FFT, 16, 16, 2, 0, 288, b8_i2, fma3, FMA3, AV_TX_INPLACE, 0),
- TX_DEF(fft16_ns, FFT, 16, 16, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 0),
+ TX_DEF(fft8, FFT, 8, 8, 2, 0, 256, b8_i0, avx, AVX, AV_TX_INPLACE, AV_CPU_FLAG_AVXSLOW),
+ TX_DEF(fft8_ns, FFT, 8, 8, 2, 0, 320, b8_i0, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+ AV_CPU_FLAG_AVXSLOW),
+ TX_DEF(fft16, FFT, 16, 16, 2, 0, 256, b8_i2, avx, AVX, AV_TX_INPLACE, AV_CPU_FLAG_AVXSLOW),
+ TX_DEF(fft16_ns, FFT, 16, 16, 2, 0, 320, b8_i2, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+ AV_CPU_FLAG_AVXSLOW),
+ TX_DEF(fft16, FFT, 16, 16, 2, 0, 288, b8_i2, fma3, FMA3, AV_TX_INPLACE, AV_CPU_FLAG_AVXSLOW),
+ TX_DEF(fft16_ns, FFT, 16, 16, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+ AV_CPU_FLAG_AVXSLOW),
#if ARCH_X86_64
- TX_DEF(fft32, FFT, 32, 32, 2, 0, 256, b8_i2, avx, AVX, AV_TX_INPLACE, 0),
- TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 320, b8_i2, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 0),
- TX_DEF(fft32, FFT, 32, 32, 2, 0, 288, b8_i2, fma3, FMA3, AV_TX_INPLACE, 0),
- TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 0),
+ TX_DEF(fft32, FFT, 32, 32, 2, 0, 256, b8_i2, avx, AVX, AV_TX_INPLACE, AV_CPU_FLAG_AVXSLOW),
+ TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 320, b8_i2, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+ AV_CPU_FLAG_AVXSLOW),
+ TX_DEF(fft32, FFT, 32, 32, 2, 0, 288, b8_i2, fma3, FMA3, AV_TX_INPLACE, AV_CPU_FLAG_AVXSLOW),
+ TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+ AV_CPU_FLAG_AVXSLOW),
#if HAVE_AVX2_EXTERNAL
- TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 256, b8_i2, avx, AVX, 0, 0),
- TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 0),
- TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 288, b8_i2, avx2, AVX2, 0, AV_CPU_FLAG_AVXSLOW),
- TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 352, b8_i2, avx2, AVX2, AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+ TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 256, b8_i2, avx, AVX, 0, AV_CPU_FLAG_AVXSLOW),
+ TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE,
AV_CPU_FLAG_AVXSLOW),
+ TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 288, b8_i2, avx2, AVX2, 0,
+ AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER),
+ TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 352, b8_i2, avx2, AVX2, AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+ AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER),
#endif
#endif
More information about the ffmpeg-cvslog
mailing list