[FFmpeg-cvslog] lavu/tx: make slow ISA extension penalties smarter
Lynne
git at videolan.org
Sat May 21 03:14:00 EEST 2022
ffmpeg | branch: master | Lynne <dev at lynne.ee> | Sat May 21 00:04:11 2022 +0200| [805e8d1921438e36cb0c6f92fa42a93298df4053] | committer: Lynne
lavu/tx: make slow ISA extension penalties smarter
Instead of having a fixed -64 prio penalty, make the penalties
more granular.
As the prio is based on the register size in bits, decrementing
it by 129 makes AVX SLOW functions be avoided in favor of any
SSE versions.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=805e8d1921438e36cb0c6f92fa42a93298df4053
---
libavutil/tx.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/libavutil/tx.c b/libavutil/tx.c
index c648d4a1cd..4cc3a98751 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -469,6 +469,15 @@ av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
AV_CPU_FLAG_ATOM | AV_CPU_FLAG_SSSE3SLOW |
AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER;
+ static const int slow_penalties[][2] = {
+ { AV_CPU_FLAG_SSE2SLOW, 1 + 64 },
+ { AV_CPU_FLAG_SSE3SLOW, 1 + 64 },
+ { AV_CPU_FLAG_SSSE3SLOW, 1 + 64 },
+ { AV_CPU_FLAG_ATOM, 1 + 128 },
+ { AV_CPU_FLAG_AVXSLOW, 1 + 128 },
+ { AV_CPU_FLAG_SLOW_GATHER, 1 + 32 },
+ };
+
/* Flags the transform wants */
uint64_t req_flags = flags;
@@ -535,8 +544,10 @@ av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
/* If the CPU has a SLOW flag, and the instruction is also flagged
* as being slow for such, reduce its priority */
- if ((cpu_flags & cd->cpu_flags) & slow_mask)
- cd_matches[nb_cd_matches].prio -= 64;
+ for (int i = 0; i < FF_ARRAY_ELEMS(slow_penalties); i++) {
+ if ((cpu_flags & cd->cpu_flags) & slow_penalties[i][0])
+ cd_matches[nb_cd_matches].prio -= slow_penalties[i][1];
+ }
/* Prioritize aligned-only codelets */
if ((cd->flags & FF_TX_ALIGNED) && !(cd->flags & AV_TX_UNALIGNED))
More information about the ffmpeg-cvslog
mailing list