[FFmpeg-cvslog] lavc/g722dsp: optimise R-V V apply_qmf
Rémi Denis-Courmont
git at videolan.org
Thu Nov 23 19:05:09 EET 2023
ffmpeg | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Sun Nov 19 14:49:28 2023 +0200| [b88d4058f95de7ebf8322358d2e72cbeaffec49e] | committer: Rémi Denis-Courmont
lavc/g722dsp: optimise R-V V apply_qmf
This stores the constant coefficients deinterleaved, so that they can be
loaded directly with NF=0. Unfortunately, we cannot optimise loading the
input, due to insufficient memory alignment (not 32-bit).
Before:
g722_apply_qmf_c: 82.5
g722_apply_qmf_rvv_i32: 78.2
After:
g722_apply_qmf_c: 82.5
g722_apply_qmf_rvv_i32: 65.2
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b88d4058f95de7ebf8322358d2e72cbeaffec49e
---
libavcodec/riscv/g722dsp_rvv.S | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/libavcodec/riscv/g722dsp_rvv.S b/libavcodec/riscv/g722dsp_rvv.S
index 350be8dc1f..981d5cecd8 100644
--- a/libavcodec/riscv/g722dsp_rvv.S
+++ b/libavcodec/riscv/g722dsp_rvv.S
@@ -24,7 +24,9 @@ func ff_g722_apply_qmf_rvv, zve32x
lla t0, qmf_coeffs
vsetivli zero, 12, e16, m2, ta, ma
vlseg2e16.v v28, (a0)
- vlseg2e16.v v24, (t0)
+ addi t1, t0, 12 * 2
+ vle16.v v24, (t0)
+ vle16.v v26, (t1)
vwmul.vv v16, v28, v24
vwmul.vv v20, v30, v26
vsetivli zero, 12, e32, m4, ta, ma
@@ -41,26 +43,26 @@ endfunc
const qmf_coeffs, align=2
.short 3
.short -11
- .short -11
- .short 53
.short 12
- .short -156
.short 32
- .short 362
.short -210
- .short -805
.short 951
.short 3876
- .short 3876
- .short 951
.short -805
- .short -210
.short 362
- .short 32
.short -156
- .short 12
.short 53
.short -11
.short -11
+ .short 53
+ .short -156
+ .short 362
+ .short -805
+ .short 3876
+ .short 951
+ .short -210
+ .short 32
+ .short 12
+ .short -11
.short 3
endconst
More information about the ffmpeg-cvslog
mailing list