[FFmpeg-devel] [PATCH] avcodec: split off synth_filter

James Almer jamrial at gmail.com
Sun Jan 24 21:53:19 CET 2016


Signed-off-by: James Almer <jamrial at gmail.com>
---
This should make replacing the current dca decoder for foo86's easier, without
having to disable/reenable the compilation of synth_filter files in between
removal/addition commits.

aarch64 and arm changes untested.

 configure                                          |   3 +-
 libavcodec/Makefile                                |   3 +-
 libavcodec/aarch64/Makefile                        |   5 +-
 libavcodec/aarch64/dcadsp_init.c                   |  21 --
 .../aarch64/{dcadsp_init.c => synth_filter_init.c} |  16 +-
 libavcodec/arm/Makefile                            |   9 +-
 libavcodec/arm/dcadsp_init_arm.c                   |  22 --
 .../dcadsp_init.c => arm/synth_filter_init_arm.c}  |  33 +--
 libavcodec/x86/Makefile                            |   2 +
 libavcodec/x86/dcadsp.asm                          | 222 ---------------------
 libavcodec/x86/dcadsp_init.c                       |  51 -----
 libavcodec/x86/{dcadsp.asm => synth_filter.asm}    |  99 ---------
 .../x86/{dcadsp_init.c => synth_filter_init.c}     |  21 +-
 13 files changed, 26 insertions(+), 481 deletions(-)
 copy libavcodec/aarch64/{dcadsp_init.c => synth_filter_init.c} (78%)
 copy libavcodec/{aarch64/dcadsp_init.c => arm/synth_filter_init_arm.c} (66%)
 copy libavcodec/x86/{dcadsp.asm => synth_filter.asm} (76%)
 copy libavcodec/x86/{dcadsp_init.c => synth_filter_init.c} (83%)

diff --git a/configure b/configure
index 8f4642b..1719a5b 100755
--- a/configure
+++ b/configure
@@ -2039,6 +2039,7 @@ CONFIG_EXTRA="
     sinewin
     snappy
     startcode
+    synth_filter
     texturedsp
     texturedspenc
     tpeldsp
@@ -2276,7 +2277,7 @@ comfortnoise_encoder_select="lpc"
 cook_decoder_select="audiodsp mdct sinewin"
 cscd_decoder_select="lzo"
 cscd_decoder_suggest="zlib"
-dca_decoder_select="fmtconvert mdct"
+dca_decoder_select="fmtconvert mdct synth_filter"
 dds_decoder_select="texturedsp"
 dirac_decoder_select="dirac_parse dwt golomb videodsp mpegvideoenc"
 dnxhd_decoder_select="blockdsp idctdsp"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index b9ffdb9..3a6a453 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -115,6 +115,7 @@ OBJS-$(CONFIG_SHARED)                  += log2_tab.o reverse.o
 OBJS-$(CONFIG_SINEWIN)                 += sinewin.o sinewin_fixed.o
 OBJS-$(CONFIG_SNAPPY)                  += snappy.o
 OBJS-$(CONFIG_STARTCODE)               += startcode.o
+OBJS-$(CONFIG_SYNTH_FILTER)            += synth_filter.o
 OBJS-$(CONFIG_TEXTUREDSP)              += texturedsp.o
 OBJS-$(CONFIG_TEXTUREDSPENC)           += texturedspenc.o
 OBJS-$(CONFIG_TPELDSP)                 += tpeldsp.o
@@ -223,7 +224,7 @@ OBJS-$(CONFIG_CSCD_DECODER)            += cscd.o
 OBJS-$(CONFIG_CYUV_DECODER)            += cyuv.o
 OBJS-$(CONFIG_DCA_DECODER)             += dcadec.o dca.o dcadsp.o      \
                                           dcadata.o dca_exss.o         \
-                                          dca_xll.o synth_filter.o
+                                          dca_xll.o
 OBJS-$(CONFIG_DCA_ENCODER)             += dcaenc.o dca.o dcadata.o
 OBJS-$(CONFIG_DDS_DECODER)             += dds.o
 OBJS-$(CONFIG_DIRAC_DECODER)           += diracdec.o dirac.o diracdsp.o \
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 022ed84..2df6325 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -10,6 +10,7 @@ OBJS-$(CONFIG_IMDCT15)                  += aarch64/imdct15_init.o
 OBJS-$(CONFIG_MPEGAUDIODSP)             += aarch64/mpegaudiodsp_init.o
 OBJS-$(CONFIG_NEON_CLOBBER_TEST)        += aarch64/neontest.o
 OBJS-$(CONFIG_VIDEODSP)                 += aarch64/videodsp_init.o
+OBJS-$(CONFIG_SYNTH_FILTER)             += aarch64/synth_filter_init.o
 
 OBJS-$(CONFIG_RV40_DECODER)             += aarch64/rv40dsp_init_aarch64.o
 OBJS-$(CONFIG_VC1_DECODER)              += aarch64/vc1dsp_init_aarch64.o
@@ -17,8 +18,7 @@ OBJS-$(CONFIG_VORBIS_DECODER)           += aarch64/vorbisdsp_init.o
 
 ARMV8-OBJS-$(CONFIG_VIDEODSP)           += aarch64/videodsp.o
 
-NEON-OBJS-$(CONFIG_DCA_DECODER)         += aarch64/dcadsp_neon.o               \
-                                           aarch64/synth_filter_neon.o
+NEON-OBJS-$(CONFIG_DCA_DECODER)         += aarch64/dcadsp_neon.o
 NEON-OBJS-$(CONFIG_FFT)                 += aarch64/fft_neon.o
 NEON-OBJS-$(CONFIG_FMTCONVERT)          += aarch64/fmtconvert_neon.o
 NEON-OBJS-$(CONFIG_H264CHROMA)          += aarch64/h264cmc_neon.o
@@ -31,5 +31,6 @@ NEON-OBJS-$(CONFIG_HPELDSP)             += aarch64/hpeldsp_neon.o
 NEON-OBJS-$(CONFIG_IMDCT15)             += aarch64/imdct15_neon.o
 NEON-OBJS-$(CONFIG_MPEGAUDIODSP)        += aarch64/mpegaudiodsp_neon.o
 NEON-OBJS-$(CONFIG_MDCT)                += aarch64/mdct_neon.o
+NEON-OBJS-$(CONFIG_SYNTH_FILTER)        += aarch64/synth_filter_neon.o
 
 NEON-OBJS-$(CONFIG_VORBIS_DECODER)      += aarch64/vorbisdsp_neon.o
diff --git a/libavcodec/aarch64/dcadsp_init.c b/libavcodec/aarch64/dcadsp_init.c
index 78642a5..4440e4b 100644
--- a/libavcodec/aarch64/dcadsp_init.c
+++ b/libavcodec/aarch64/dcadsp_init.c
@@ -24,23 +24,10 @@
 #include "libavutil/attributes.h"
 #include "libavutil/internal.h"
 #include "libavcodec/dcadsp.h"
-#include "libavcodec/fft.h"
-
-#include "asm-offsets.h"
-
-#if HAVE_NEON || HAVE_VFP
-AV_CHECK_OFFSET(FFTContext, imdct_half, IMDCT_HALF);
-#endif
 
 void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs);
 void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs);
 
-void ff_synth_filter_float_neon(FFTContext *imdct,
-                                float *synth_buf_ptr, int *synth_buf_offset,
-                                float synth_buf2[32], const float window[512],
-                                float out[32], const float in[32],
-                                float scale);
-
 av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
@@ -50,11 +37,3 @@ av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
         s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
     }
 }
-
-av_cold void ff_synth_filter_init_aarch64(SynthFilterContext *s)
-{
-    int cpu_flags = av_get_cpu_flags();
-
-    if (have_neon(cpu_flags))
-        s->synth_filter_float = ff_synth_filter_float_neon;
-}
diff --git a/libavcodec/aarch64/dcadsp_init.c b/libavcodec/aarch64/synth_filter_init.c
similarity index 78%
copy from libavcodec/aarch64/dcadsp_init.c
copy to libavcodec/aarch64/synth_filter_init.c
index 78642a5..df51ae8 100644
--- a/libavcodec/aarch64/dcadsp_init.c
+++ b/libavcodec/aarch64/synth_filter_init.c
@@ -23,8 +23,7 @@
 #include "libavutil/aarch64/cpu.h"
 #include "libavutil/attributes.h"
 #include "libavutil/internal.h"
-#include "libavcodec/dcadsp.h"
-#include "libavcodec/fft.h"
+#include "libavcodec/synth_filter.h"
 
 #include "asm-offsets.h"
 
@@ -32,25 +31,12 @@
 AV_CHECK_OFFSET(FFTContext, imdct_half, IMDCT_HALF);
 #endif
 
-void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs);
-void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs);
-
 void ff_synth_filter_float_neon(FFTContext *imdct,
                                 float *synth_buf_ptr, int *synth_buf_offset,
                                 float synth_buf2[32], const float window[512],
                                 float out[32], const float in[32],
                                 float scale);
 
-av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
-{
-    int cpu_flags = av_get_cpu_flags();
-
-    if (have_neon(cpu_flags)) {
-        s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
-        s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
-    }
-}
-
 av_cold void ff_synth_filter_init_aarch64(SynthFilterContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index cdd35b0..f18e800 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -29,6 +29,7 @@ OBJS-$(CONFIG_MPEGVIDEOENC)            += arm/mpegvideoencdsp_init_arm.o
 OBJS-$(CONFIG_NEON_CLOBBER_TEST)       += arm/neontest.o
 OBJS-$(CONFIG_PIXBLOCKDSP)             += arm/pixblockdsp_init_arm.o
 OBJS-$(CONFIG_RV34DSP)                 += arm/rv34dsp_init_arm.o
+OBJS-$(CONFIG_SYNTH_FILTER)            += arm/synth_filter_init_arm.o
 OBJS-$(CONFIG_VIDEODSP)                += arm/videodsp_init_arm.o
 OBJS-$(CONFIG_VP3DSP)                  += arm/vp3dsp_init_arm.o
 OBJS-$(CONFIG_VP8DSP)                  += arm/vp8dsp_init_arm.o
@@ -85,10 +86,10 @@ ARMV6-OBJS-$(CONFIG_STARTCODE)         += arm/startcode_armv6.o
 VFP-OBJS-$(CONFIG_FFT)                 += arm/fft_vfp.o
 VFP-OBJS-$(CONFIG_FMTCONVERT)          += arm/fmtconvert_vfp.o
 VFP-OBJS-$(CONFIG_MDCT)                += arm/mdct_vfp.o
+VFP-OBJS-$(CONFIG_SYNTH_FILTER)        += arm/synth_filter_vfp.o
 
 # decoders/encoders
-VFP-OBJS-$(CONFIG_DCA_DECODER)         += arm/dcadsp_vfp.o              \
-                                          arm/synth_filter_vfp.o
+VFP-OBJS-$(CONFIG_DCA_DECODER)         += arm/dcadsp_vfp.o
 
 
 # NEON optimizations
@@ -119,6 +120,7 @@ NEON-OBJS-$(CONFIG_MDCT)               += arm/mdct_neon.o               \
                                           arm/mdct_fixed_neon.o
 NEON-OBJS-$(CONFIG_MPEGVIDEO)          += arm/mpegvideo_neon.o
 NEON-OBJS-$(CONFIG_RDFT)               += arm/rdft_neon.o
+NEON-OBJS-$(CONFIG_SYNTH_FILTER)       += arm/synth_filter_neon.o
 NEON-OBJS-$(CONFIG_VP3DSP)             += arm/vp3dsp_neon.o
 NEON-OBJS-$(CONFIG_VP8DSP)             += arm/vp8dsp_init_neon.o        \
                                           arm/vp8dsp_neon.o
@@ -127,8 +129,7 @@ NEON-OBJS-$(CONFIG_VP8DSP)             += arm/vp8dsp_init_neon.o        \
 NEON-OBJS-$(CONFIG_AAC_DECODER)        += arm/aacpsdsp_neon.o           \
                                           arm/sbrdsp_neon.o
 NEON-OBJS-$(CONFIG_LLAUDDSP)           += arm/lossless_audiodsp_neon.o
-NEON-OBJS-$(CONFIG_DCA_DECODER)        += arm/dcadsp_neon.o             \
-                                          arm/synth_filter_neon.o
+NEON-OBJS-$(CONFIG_DCA_DECODER)        += arm/dcadsp_neon.o
 NEON-OBJS-$(CONFIG_HEVC_DECODER)       += arm/hevcdsp_init_neon.o       \
                                           arm/hevcdsp_deblock_neon.o    \
                                           arm/hevcdsp_idct_neon.o       \
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c
index 0f2e4c4..febb444 100644
--- a/libavcodec/arm/dcadsp_init_arm.c
+++ b/libavcodec/arm/dcadsp_init_arm.c
@@ -37,18 +37,6 @@ void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
                                 const float window[512], float *samples_out,
                                 float raXin[32], float scale);
 
-void ff_synth_filter_float_vfp(FFTContext *imdct,
-                               float *synth_buf_ptr, int *synth_buf_offset,
-                               float synth_buf2[32], const float window[512],
-                               float out[32], const float in[32],
-                               float scale);
-
-void ff_synth_filter_float_neon(FFTContext *imdct,
-                                float *synth_buf_ptr, int *synth_buf_offset,
-                                float synth_buf2[32], const float window[512],
-                                float out[32], const float in[32],
-                                float scale);
-
 av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
@@ -63,13 +51,3 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
         s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
     }
 }
-
-av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
-{
-    int cpu_flags = av_get_cpu_flags();
-
-    if (have_vfp_vm(cpu_flags))
-        s->synth_filter_float = ff_synth_filter_float_vfp;
-    if (have_neon(cpu_flags))
-        s->synth_filter_float = ff_synth_filter_float_neon;
-}
diff --git a/libavcodec/aarch64/dcadsp_init.c b/libavcodec/arm/synth_filter_init_arm.c
similarity index 66%
copy from libavcodec/aarch64/dcadsp_init.c
copy to libavcodec/arm/synth_filter_init_arm.c
index 78642a5..3dae5b9 100644
--- a/libavcodec/aarch64/dcadsp_init.c
+++ b/libavcodec/arm/synth_filter_init_arm.c
@@ -20,20 +20,15 @@
 
 #include "config.h"
 
-#include "libavutil/aarch64/cpu.h"
+#include "libavutil/arm/cpu.h"
 #include "libavutil/attributes.h"
-#include "libavutil/internal.h"
-#include "libavcodec/dcadsp.h"
-#include "libavcodec/fft.h"
+#include "libavcodec/synth_filter.h"
 
-#include "asm-offsets.h"
-
-#if HAVE_NEON || HAVE_VFP
-AV_CHECK_OFFSET(FFTContext, imdct_half, IMDCT_HALF);
-#endif
-
-void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs);
-void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs);
+void ff_synth_filter_float_vfp(FFTContext *imdct,
+                               float *synth_buf_ptr, int *synth_buf_offset,
+                               float synth_buf2[32], const float window[512],
+                               float out[32], const float in[32],
+                               float scale);
 
 void ff_synth_filter_float_neon(FFTContext *imdct,
                                 float *synth_buf_ptr, int *synth_buf_offset,
@@ -41,20 +36,12 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
                                 float out[32], const float in[32],
                                 float scale);
 
-av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
-{
-    int cpu_flags = av_get_cpu_flags();
-
-    if (have_neon(cpu_flags)) {
-        s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
-        s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
-    }
-}
-
-av_cold void ff_synth_filter_init_aarch64(SynthFilterContext *s)
+av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
 
+    if (have_vfp_vm(cpu_flags))
+        s->synth_filter_float = ff_synth_filter_float_vfp;
     if (have_neon(cpu_flags))
         s->synth_filter_float = ff_synth_filter_float_neon;
 }
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 0d09fe6..90ff29d 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -31,6 +31,7 @@ OBJS-$(CONFIG_MPEGVIDEOENC)            += x86/mpegvideoenc.o           \
 OBJS-$(CONFIG_PIXBLOCKDSP)             += x86/pixblockdsp_init.o
 OBJS-$(CONFIG_QPELDSP)                 += x86/qpeldsp_init.o
 OBJS-$(CONFIG_RV34DSP)                 += x86/rv34dsp_init.o
+OBJS-$(CONFIG_SYNTH_FILTER)            += x86/synth_filter_init.o
 OBJS-$(CONFIG_VIDEODSP)                += x86/videodsp_init.o
 OBJS-$(CONFIG_VP3DSP)                  += x86/vp3dsp_init.o
 OBJS-$(CONFIG_VP8DSP)                  += x86/vp8dsp_init.o
@@ -119,6 +120,7 @@ YASM-OBJS-$(CONFIG_QPELDSP)            += x86/qpeldsp.o                 \
                                           x86/fpel.o                    \
                                           x86/qpel.o
 YASM-OBJS-$(CONFIG_RV34DSP)            += x86/rv34dsp.o
+YASM-OBJS-$(CONFIG_SYNTH_FILTER)       += x86/synth_filter.o
 YASM-OBJS-$(CONFIG_IDCTDSP)            += x86/simple_idct10.o
 YASM-OBJS-$(CONFIG_VIDEODSP)           += x86/videodsp.o
 YASM-OBJS-$(CONFIG_VP3DSP)             += x86/vp3dsp.o
diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 502b70a..55e73bc 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -121,225 +121,3 @@ DCA_LFE_FIR 1
 INIT_XMM fma3
 DCA_LFE_FIR 0
 %endif
-
-%macro SETZERO 1
-%if cpuflag(sse2) && notcpuflag(avx)
-    pxor          %1, %1
-%else
-    xorps         %1, %1, %1
-%endif
-%endmacro
-
-%macro SHUF 3
-%if cpuflag(avx)
-    mova          %3, [%2 - 16]
-    vperm2f128    %1, %3, %3, 1
-    vshufps       %1, %1, %1, q0123
-%elif cpuflag(sse2)
-    pshufd        %1, [%2], q0123
-%else
-    mova          %1, [%2]
-    shufps        %1, %1, q0123
-%endif
-%endmacro
-
-%macro INNER_LOOP   1
-    ; reading backwards:  ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
-    ;~ a += window[i + j]      * (-synth_buf[15 - i + j])
-    ;~ b += window[i + j + 16] * (synth_buf[i + j])
-    SHUF          m5,  ptr2 + j + (15 - 3) * 4, m6
-    mova          m6, [ptr1 + j]
-%if ARCH_X86_64
-    SHUF         m11,  ptr2 + j + (15 - 3) * 4 - mmsize, m12
-    mova         m12, [ptr1 + j + mmsize]
-%endif
-%if cpuflag(fma3)
-    fmaddps       m2, m6,  [win + %1 + j + 16 * 4], m2
-    fnmaddps      m1, m5,  [win + %1 + j], m1
-%if ARCH_X86_64
-    fmaddps       m8, m12, [win + %1 + j + mmsize + 16 * 4], m8
-    fnmaddps      m7, m11, [win + %1 + j + mmsize], m7
-%endif
-%else ; non-FMA
-    mulps         m6, m6,  [win + %1 + j + 16 * 4]
-    mulps         m5, m5,  [win + %1 + j]
-%if ARCH_X86_64
-    mulps        m12, m12, [win + %1 + j + mmsize + 16 * 4]
-    mulps        m11, m11, [win + %1 + j + mmsize]
-%endif
-    addps         m2, m2, m6
-    subps         m1, m1, m5
-%if ARCH_X86_64
-    addps         m8, m8, m12
-    subps         m7, m7, m11
-%endif
-%endif ; cpuflag(fma3)
-    ;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
-    ;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
-    SHUF          m6,  ptr2 + j + (31 - 3) * 4, m5
-    mova          m5, [ptr1 + j + 16 * 4]
-%if ARCH_X86_64
-    SHUF         m12,  ptr2 + j + (31 - 3) * 4 - mmsize, m11
-    mova         m11, [ptr1 + j + mmsize + 16 * 4]
-%endif
-%if cpuflag(fma3)
-    fmaddps       m3, m5,  [win + %1 + j + 32 * 4], m3
-    fmaddps       m4, m6,  [win + %1 + j + 48 * 4], m4
-%if ARCH_X86_64
-    fmaddps       m9, m11, [win + %1 + j + mmsize + 32 * 4], m9
-    fmaddps      m10, m12, [win + %1 + j + mmsize + 48 * 4], m10
-%endif
-%else ; non-FMA
-    mulps         m5, m5,  [win + %1 + j + 32 * 4]
-    mulps         m6, m6,  [win + %1 + j + 48 * 4]
-%if ARCH_X86_64
-    mulps        m11, m11, [win + %1 + j + mmsize + 32 * 4]
-    mulps        m12, m12, [win + %1 + j + mmsize + 48 * 4]
-%endif
-    addps         m3, m3, m5
-    addps         m4, m4, m6
-%if ARCH_X86_64
-    addps         m9, m9, m11
-    addps        m10, m10, m12
-%endif
-%endif ; cpuflag(fma3)
-    sub            j, 64 * 4
-%endmacro
-
-; void ff_synth_filter_inner_<opt>(float *synth_buf, float synth_buf2[32],
-;                                  const float window[512], float out[32],
-;                                  intptr_t offset, float scale)
-%macro SYNTH_FILTER 0
-cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
-                              synth_buf, synth_buf2, window, out, off, scale
-%define scale m0
-%if ARCH_X86_32 || WIN64
-%if cpuflag(sse2) && notcpuflag(avx)
-    movd       scale, scalem
-    SPLATD        m0
-%else
-    VBROADCASTSS  m0, scalem
-%endif
-; Make sure offset is in a register and not on the stack
-%define OFFQ  r4q
-%else
-    SPLATD      xmm0
-%if cpuflag(avx)
-    vinsertf128   m0, m0, xmm0, 1
-%endif
-%define OFFQ  offq
-%endif
-    ; prepare inner counter limit 1
-    mov          r5q, 480
-    sub          r5q, offmp
-    and          r5q, -64
-    shl          r5q, 2
-%if ARCH_X86_32 || notcpuflag(avx)
-    mov         OFFQ, r5q
-%define i        r5q
-    mov            i, 16 * 4 - (ARCH_X86_64 + 1) * mmsize  ; main loop counter
-%else
-%define i 0
-%define OFFQ  r5q
-%endif
-
-%define buf2     synth_buf2q
-%if ARCH_X86_32
-    mov         buf2, synth_buf2mp
-%endif
-.mainloop:
-    ; m1 = a  m2 = b  m3 = c  m4 = d
-    SETZERO       m3
-    SETZERO       m4
-    mova          m1, [buf2 + i]
-    mova          m2, [buf2 + i + 16 * 4]
-%if ARCH_X86_32
-%define ptr1     r0q
-%define ptr2     r1q
-%define win      r2q
-%define j        r3q
-    mov          win, windowm
-    mov         ptr1, synth_bufm
-%if ARCH_X86_32 || notcpuflag(avx)
-    add          win, i
-    add         ptr1, i
-%endif
-%else ; ARCH_X86_64
-%define ptr1     r6q
-%define ptr2     r7q ; must be loaded
-%define win      r8q
-%define j        r9q
-    SETZERO       m9
-    SETZERO      m10
-    mova          m7, [buf2 + i + mmsize]
-    mova          m8, [buf2 + i + mmsize + 16 * 4]
-    lea          win, [windowq + i]
-    lea         ptr1, [synth_bufq + i]
-%endif
-    mov         ptr2, synth_bufmp
-    ; prepare the inner loop counter
-    mov            j, OFFQ
-%if ARCH_X86_32 || notcpuflag(avx)
-    sub         ptr2, i
-%endif
-.loop1:
-    INNER_LOOP  0
-    jge       .loop1
-
-    mov            j, 448 * 4
-    sub            j, OFFQ
-    jz          .end
-    sub         ptr1, j
-    sub         ptr2, j
-    add          win, OFFQ ; now at j-64, so define OFFSET
-    sub            j, 64 * 4
-.loop2:
-    INNER_LOOP  64 * 4
-    jge       .loop2
-
-.end:
-%if ARCH_X86_32
-    mov         buf2, synth_buf2m ; needed for next iteration anyway
-    mov         outq, outmp       ; j, which will be set again during it
-%endif
-    ;~ out[i]      = a * scale;
-    ;~ out[i + 16] = b * scale;
-    mulps         m1, m1, scale
-    mulps         m2, m2, scale
-%if ARCH_X86_64
-    mulps         m7, m7, scale
-    mulps         m8, m8, scale
-%endif
-    ;~ synth_buf2[i]      = c;
-    ;~ synth_buf2[i + 16] = d;
-    mova   [buf2 + i +  0 * 4], m3
-    mova   [buf2 + i + 16 * 4], m4
-%if ARCH_X86_64
-    mova   [buf2 + i +  0 * 4 + mmsize], m9
-    mova   [buf2 + i + 16 * 4 + mmsize], m10
-%endif
-    ;~ out[i]      = a;
-    ;~ out[i + 16] = a;
-    mova   [outq + i +  0 * 4], m1
-    mova   [outq + i + 16 * 4], m2
-%if ARCH_X86_64
-    mova   [outq + i +  0 * 4 + mmsize], m7
-    mova   [outq + i + 16 * 4 + mmsize], m8
-%endif
-%if ARCH_X86_32 || notcpuflag(avx)
-    sub            i, (ARCH_X86_64 + 1) * mmsize
-    jge    .mainloop
-%endif
-    RET
-%endmacro
-
-%if ARCH_X86_32
-INIT_XMM sse
-SYNTH_FILTER
-%endif
-INIT_XMM sse2
-SYNTH_FILTER
-INIT_YMM avx
-SYNTH_FILTER
-INIT_YMM fma3
-SYNTH_FILTER
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index 1321dda..c27c045 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -40,54 +40,3 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
         s->lfe_fir[0]        = ff_dca_lfe_fir0_fma3;
     }
 }
-
-
-#define SYNTH_FILTER_FUNC(opt)                                                 \
-void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32],   \
-                                 const float window[512],                      \
-                                 float out[32], intptr_t offset, float scale); \
-static void synth_filter_##opt(FFTContext *imdct,                              \
-                               float *synth_buf_ptr, int *synth_buf_offset,    \
-                               float synth_buf2[32], const float window[512],  \
-                               float out[32], const float in[32], float scale) \
-{                                                                              \
-    float *synth_buf= synth_buf_ptr + *synth_buf_offset;                       \
-                                                                               \
-    imdct->imdct_half(imdct, synth_buf, in);                                   \
-                                                                               \
-    ff_synth_filter_inner_##opt(synth_buf, synth_buf2, window,                 \
-                                out, *synth_buf_offset, scale);                \
-                                                                               \
-    *synth_buf_offset = (*synth_buf_offset - 32) & 511;                        \
-}                                                                              \
-
-#if HAVE_YASM
-#if ARCH_X86_32
-SYNTH_FILTER_FUNC(sse)
-#endif
-SYNTH_FILTER_FUNC(sse2)
-SYNTH_FILTER_FUNC(avx)
-SYNTH_FILTER_FUNC(fma3)
-#endif /* HAVE_YASM */
-
-av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
-{
-#if HAVE_YASM
-    int cpu_flags = av_get_cpu_flags();
-
-#if ARCH_X86_32
-    if (EXTERNAL_SSE(cpu_flags)) {
-        s->synth_filter_float = synth_filter_sse;
-    }
-#endif
-    if (EXTERNAL_SSE2(cpu_flags)) {
-        s->synth_filter_float = synth_filter_sse2;
-    }
-    if (EXTERNAL_AVX_FAST(cpu_flags)) {
-        s->synth_filter_float = synth_filter_avx;
-    }
-    if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) {
-        s->synth_filter_float = synth_filter_fma3;
-    }
-#endif /* HAVE_YASM */
-}
diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/synth_filter.asm
similarity index 76%
copy from libavcodec/x86/dcadsp.asm
copy to libavcodec/x86/synth_filter.asm
index 502b70a..bc1a48f 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/synth_filter.asm
@@ -21,107 +21,8 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_RODATA
-pf_inv16:  times 4 dd 0x3D800000 ; 1/16
-
 SECTION .text
 
-; %1=v0/v1  %2=in1  %3=in2
-%macro FIR_LOOP 2-3
-.loop%1:
-%define va          m1
-%define vb          m2
-%if %1
-%define OFFSET      0
-%else
-%define OFFSET      NUM_COEF*count
-%endif
-; for v0, incrementing and for v1, decrementing
-    mova        va, [cf0q + OFFSET]
-    mova        vb, [cf0q + OFFSET + 4*NUM_COEF]
-%if %0 == 3
-    mova        m4, [cf0q + OFFSET + mmsize]
-    mova        m0, [cf0q + OFFSET + 4*NUM_COEF + mmsize]
-%endif
-    mulps       va, %2
-    mulps       vb, %2
-%if %0 == 3
-%if cpuflag(fma3)
-    fmaddps     va, m4, %3, va
-    fmaddps     vb, m0, %3, vb
-%else
-    mulps       m4, %3
-    mulps       m0, %3
-    addps       va, m4
-    addps       vb, m0
-%endif
-%endif
-    ; va = va1 va2 va3 va4
-    ; vb = vb1 vb2 vb3 vb4
-%if %1
-    SWAP        va, vb
-%endif
-    mova        m4, va
-    unpcklps    va, vb ; va3 vb3 va4 vb4
-    unpckhps    m4, vb ; va1 vb1 va2 vb2
-    addps       m4, va ; va1+3 vb1+3 va2+4 vb2+4
-    movhlps     vb, m4 ; va1+3  vb1+3
-    addps       vb, m4 ; va0..4 vb0..4
-    movlps  [outq + count], vb
-%if %1
-    sub       cf0q, 8*NUM_COEF
-%endif
-    add      count, 8
-    jl   .loop%1
-%endmacro
-
-; void dca_lfe_fir(float *out, float *in, float *coefs)
-%macro DCA_LFE_FIR 1
-cglobal dca_lfe_fir%1, 3,3,6-%1, out, in, cf0
-%define IN1       m3
-%define IN2       m5
-%define count     inq
-%define NUM_COEF  4*(2-%1)
-%define NUM_OUT   32*(%1+1)
-
-    movu     IN1, [inq + 4 - 1*mmsize]
-    shufps   IN1, IN1, q0123
-%if %1 == 0
-    movu     IN2, [inq + 4 - 2*mmsize]
-    shufps   IN2, IN2, q0123
-%endif
-
-    mov    count, -4*NUM_OUT
-    add     cf0q, 4*NUM_COEF*NUM_OUT
-    add     outq, 4*NUM_OUT
-    ; compute v0 first
-%if %1 == 0
-    FIR_LOOP   0, IN1, IN2
-%else
-    FIR_LOOP   0, IN1
-%endif
-    shufps   IN1, IN1, q0123
-    mov    count, -4*NUM_OUT
-    ; cf1 already correctly positioned
-    add     outq, 4*NUM_OUT          ; outq now at out2
-    sub     cf0q, 8*NUM_COEF
-%if %1 == 0
-    shufps   IN2, IN2, q0123
-    FIR_LOOP   1, IN2, IN1
-%else
-    FIR_LOOP   1, IN1
-%endif
-    RET
-%endmacro
-
-INIT_XMM sse
-DCA_LFE_FIR 0
-DCA_LFE_FIR 1
-%if HAVE_FMA3_EXTERNAL
-INIT_XMM fma3
-DCA_LFE_FIR 0
-%endif
-
 %macro SETZERO 1
 %if cpuflag(sse2) && notcpuflag(avx)
     pxor          %1, %1
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/synth_filter_init.c
similarity index 83%
copy from libavcodec/x86/dcadsp_init.c
copy to libavcodec/x86/synth_filter_init.c
index 1321dda..0649ea2 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/synth_filter_init.c
@@ -21,26 +21,7 @@
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
 #include "libavutil/x86/cpu.h"
-#include "libavcodec/dcadsp.h"
-
-void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs);
-void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs);
-void ff_dca_lfe_fir0_fma3(float *out, const float *in, const float *coefs);
-
-av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
-{
-    int cpu_flags = av_get_cpu_flags();
-
-    if (EXTERNAL_SSE(cpu_flags)) {
-        s->lfe_fir[0]        = ff_dca_lfe_fir0_sse;
-        s->lfe_fir[1]        = ff_dca_lfe_fir1_sse;
-    }
-
-    if (EXTERNAL_FMA3(cpu_flags)) {
-        s->lfe_fir[0]        = ff_dca_lfe_fir0_fma3;
-    }
-}
-
+#include "libavcodec/synth_filter.h"
 
 #define SYNTH_FILTER_FUNC(opt)                                                 \
 void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32],   \
-- 
2.7.0



More information about the ffmpeg-devel mailing list