[FFmpeg-cvslog] x86/ac3dsp: add ff_float_to_fixed24_avx()
James Almer
git at videolan.org
Sun Nov 26 02:51:59 EET 2023
ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Wed Nov 22 16:25:54 2023 -0300| [e40ea9f34b196176d80f68c0925de7dc785a5df6] | committer: James Almer
x86/ac3dsp: add ff_float_to_fixed24_avx()
Signed-off-by: James Almer <jamrial at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e40ea9f34b196176d80f68c0925de7dc785a5df6
---
libavcodec/ac3dsp.h | 4 ++--
libavcodec/ac3enc_template.c | 4 ++--
libavcodec/x86/ac3dsp.asm | 24 ++++++++++++++++++++++++
libavcodec/x86/ac3dsp_init.c | 4 ++++
4 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index 9996ef19ec..ec2f598451 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -47,9 +47,9 @@ typedef struct AC3DSPContext {
* [-(1<<24),(1<<24)]
*
* @param dst destination array of int32_t.
- * constraints: 16-byte aligned
+ * constraints: 32-byte aligned
* @param src source array of float.
- * constraints: 16-byte aligned
+ * constraints: 32-byte aligned
* @param len number of elements to convert.
* constraints: multiple of 32 greater than zero
*/
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index be4ecebc9c..ce9ef58a33 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -110,9 +110,9 @@ static void apply_mdct(AC3EncodeContext *s)
*/
static void apply_channel_coupling(AC3EncodeContext *s)
{
- LOCAL_ALIGNED_16(CoefType, cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
+ LOCAL_ALIGNED_32(CoefType, cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
#if AC3ENC_FLOAT
- LOCAL_ALIGNED_16(int32_t, fixed_cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
+ LOCAL_ALIGNED_32(int32_t, fixed_cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
#else
int32_t (*fixed_cpl_coords)[AC3_MAX_CHANNELS][16] = cpl_coords;
#endif
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 42c8310462..0ba980aa7b 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -128,6 +128,30 @@ cglobal float_to_fixed24, 3, 3, 9, dst, src, len
jl .loop
RET
+INIT_YMM avx
+cglobal float_to_fixed24, 3, 3, 5, dst, src, len
+ vbroadcastf128 m0, [pf_1_24]
+ shl lenq, 2
+ add srcq, lenq
+ add dstq, lenq
+ neg lenq
+.loop:
+ mulps m1, m0, [srcq+lenq+mmsize*0]
+ mulps m2, m0, [srcq+lenq+mmsize*1]
+ mulps m3, m0, [srcq+lenq+mmsize*2]
+ mulps m4, m0, [srcq+lenq+mmsize*3]
+ cvtps2dq m1, m1
+ cvtps2dq m2, m2
+ cvtps2dq m3, m3
+ cvtps2dq m4, m4
+ mova [dstq+lenq+mmsize*0], m1
+ mova [dstq+lenq+mmsize*1], m2
+ mova [dstq+lenq+mmsize*2], m3
+ mova [dstq+lenq+mmsize*3], m4
+ add lenq, mmsize*4
+ jl .loop
+ RET
+
;------------------------------------------------------------------------------
; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16])
;------------------------------------------------------------------------------
diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c
index 472d39fa5e..353cf38f86 100644
--- a/libavcodec/x86/ac3dsp_init.c
+++ b/libavcodec/x86/ac3dsp_init.c
@@ -27,6 +27,7 @@
void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, size_t len);
+void ff_float_to_fixed24_avx (int32_t *dst, const float *src, size_t len);
int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
@@ -48,6 +49,9 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
if (!(cpu_flags & AV_CPU_FLAG_ATOM))
c->extract_exponents = ff_ac3_extract_exponents_ssse3;
}
+ if (EXTERNAL_AVX_FAST(cpu_flags)) {
+ c->float_to_fixed24 = ff_float_to_fixed24_avx;
+ }
}
#define DOWNMIX_FUNC_OPT(ch, opt) \
More information about the ffmpeg-cvslog
mailing list