[FFmpeg-devel] [PATCH 2/2] x86: use the new helper macros where useful

James Almer jamrial at gmail.com
Fri Feb 12 02:09:23 CET 2016


Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavcodec/x86/hevcdsp_init.c               | 11 ++++++++++-
 libavcodec/x86/huffyuvencdsp_mmx.c          |  2 +-
 libavcodec/x86/jpeg2000dsp_init.c           |  2 +-
 libavcodec/x86/mlpdsp_init.c                |  2 +-
 libavcodec/x86/synth_filter_init.c          |  2 +-
 libavcodec/x86/v210enc_init.c               |  2 +-
 libavcodec/x86/vp9dsp_init.c                |  2 +-
 libavcodec/x86/vp9dsp_init_16bpp.c          |  2 +-
 libavcodec/x86/vp9dsp_init_16bpp_template.c |  2 +-
 libavutil/x86/float_dsp_init.c              |  2 +-
 libavutil/x86/lls_init.c                    |  2 +-
 libswresample/x86/audio_convert_init.c      |  2 +-
 libswresample/x86/resample_init.c           |  2 +-
 13 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index 0de0163..09eb06d 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -753,6 +753,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
             c->transform_add[3]    = ff_hevc_transform_add32_8_avx;
         }
         if (EXTERNAL_AVX2(cpu_flags)) {
+            c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
+            c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
+        }
+        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
             c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2;
             c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2;
             if (ARCH_X86_64) {
@@ -897,7 +901,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
             SAO_BAND_INIT(10, avx);
         }
         if (EXTERNAL_AVX2(cpu_flags)) {
-
+            c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
+        }
+        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
             c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2;
             c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2;
             if (ARCH_X86_64) {
@@ -1095,6 +1101,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
             SAO_BAND_INIT(12, avx);
         }
         if (EXTERNAL_AVX2(cpu_flags)) {
+            c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
+        }
+        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
             c->idct_dc[2] = ff_hevc_idct16x16_dc_12_avx2;
             c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2;
 
diff --git a/libavcodec/x86/huffyuvencdsp_mmx.c b/libavcodec/x86/huffyuvencdsp_mmx.c
index 0ba4358..9767b21 100644
--- a/libavcodec/x86/huffyuvencdsp_mmx.c
+++ b/libavcodec/x86/huffyuvencdsp_mmx.c
@@ -98,7 +98,7 @@ av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
         c->diff_bytes = ff_diff_bytes_sse2;
     }
 
-    if (EXTERNAL_AVX2(cpu_flags)) {
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
         c->diff_bytes = ff_diff_bytes_avx2;
     }
 }
diff --git a/libavcodec/x86/jpeg2000dsp_init.c b/libavcodec/x86/jpeg2000dsp_init.c
index 0dbd2db..baa8138 100644
--- a/libavcodec/x86/jpeg2000dsp_init.c
+++ b/libavcodec/x86/jpeg2000dsp_init.c
@@ -44,7 +44,7 @@ av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
         c->mct_decode[FF_DWT97] = ff_ict_float_avx;
     }
 
-    if (EXTERNAL_AVX2(cpu_flags)) {
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
         c->mct_decode[FF_DWT53] = ff_rct_int_avx2;
     }
 }
diff --git a/libavcodec/x86/mlpdsp_init.c b/libavcodec/x86/mlpdsp_init.c
index e9d9b1b..7f5e6b1 100644
--- a/libavcodec/x86/mlpdsp_init.c
+++ b/libavcodec/x86/mlpdsp_init.c
@@ -199,6 +199,6 @@ av_cold void ff_mlpdsp_init_x86(MLPDSPContext *c)
 #endif
     if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags))
         c->mlp_rematrix_channel = ff_mlp_rematrix_channel_sse4;
-    if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags) && cpu_flags & AV_CPU_FLAG_BMI2)
+    if (ARCH_X86_64 && EXTERNAL_AVX2_FAST(cpu_flags) && cpu_flags & AV_CPU_FLAG_BMI2)
         c->mlp_rematrix_channel = ff_mlp_rematrix_channel_avx2_bmi2;
 }
diff --git a/libavcodec/x86/synth_filter_init.c b/libavcodec/x86/synth_filter_init.c
index 0649ea2..9ef00cd 100644
--- a/libavcodec/x86/synth_filter_init.c
+++ b/libavcodec/x86/synth_filter_init.c
@@ -67,7 +67,7 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
     if (EXTERNAL_AVX_FAST(cpu_flags)) {
         s->synth_filter_float = synth_filter_avx;
     }
-    if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) {
+    if (EXTERNAL_FMA3_FAST(cpu_flags)) {
         s->synth_filter_float = synth_filter_fma3;
     }
 #endif /* HAVE_YASM */
diff --git a/libavcodec/x86/v210enc_init.c b/libavcodec/x86/v210enc_init.c
index ee48e80..8abb152 100644
--- a/libavcodec/x86/v210enc_init.c
+++ b/libavcodec/x86/v210enc_init.c
@@ -45,7 +45,7 @@ av_cold void ff_v210enc_init_x86(V210EncContext *s)
     if (EXTERNAL_AVX(cpu_flags))
         s->pack_line_8 = ff_v210_planar_pack_8_avx;
 
-    if (EXTERNAL_AVX2(cpu_flags)) {
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
         s->pack_line_8 = ff_v210_planar_pack_8_avx2;
         s->pack_line_10 = ff_v210_planar_pack_10_avx2;
         s->sample_factor = 2;
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 8efb18c..469a661 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -377,7 +377,7 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
         init_ipred(32, avx, v, VERT);
     }
 
-    if (EXTERNAL_AVX2(cpu_flags)) {
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
         init_fpel_func(1, 1, 32, avg, _8, avx2);
         init_fpel_func(0, 1, 64, avg, _8, avx2);
         if (ARCH_X86_64) {
diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c b/libavcodec/x86/vp9dsp_init_16bpp.c
index 4ceb4d4..eb67499 100644
--- a/libavcodec/x86/vp9dsp_init_16bpp.c
+++ b/libavcodec/x86/vp9dsp_init_16bpp.c
@@ -129,7 +129,7 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
         init_ipred_funcs(hd, HOR_DOWN, 16, avx);
     }
 
-    if (EXTERNAL_AVX2(cpu_flags)) {
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
         init_fpel_func(2, 1,  32, avg, _16, avx2);
         init_fpel_func(1, 1,  64, avg, _16, avx2);
         init_fpel_func(0, 1, 128, avg, _16, avx2);
diff --git a/libavcodec/x86/vp9dsp_init_16bpp_template.c b/libavcodec/x86/vp9dsp_init_16bpp_template.c
index 90cdcc9..4840b28 100644
--- a/libavcodec/x86/vp9dsp_init_16bpp_template.c
+++ b/libavcodec/x86/vp9dsp_init_16bpp_template.c
@@ -225,7 +225,7 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp, int bitexact)
         init_lpf_funcs(BPC, avx);
     }
 
-    if (EXTERNAL_AVX2(cpu_flags)) {
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
 #if HAVE_AVX2_EXTERNAL
         init_subpel3_32_64(0,  put, BPC, avx2);
         init_subpel3_32_64(1,  avg, BPC, avx2);
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index f211f23..c836a78 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c
@@ -92,7 +92,7 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
         fdsp->vector_fmul_add    = ff_vector_fmul_add_avx;
         fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
     }
-    if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) {
+    if (EXTERNAL_FMA3_FAST(cpu_flags)) {
         fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3;
         fdsp->vector_fmul_add    = ff_vector_fmul_add_fma3;
     }
diff --git a/libavutil/x86/lls_init.c b/libavutil/x86/lls_init.c
index 9f0d862..1c5dca4 100644
--- a/libavutil/x86/lls_init.c
+++ b/libavutil/x86/lls_init.c
@@ -39,7 +39,7 @@ av_cold void ff_init_lls_x86(LLSModel *m)
     if (EXTERNAL_AVX_FAST(cpu_flags)) {
         m->update_lls = ff_update_lls_avx;
     }
-    if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) {
+    if (EXTERNAL_FMA3_FAST(cpu_flags)) {
         m->update_lls = ff_update_lls_fma3;
     }
 }
diff --git a/libswresample/x86/audio_convert_init.c b/libswresample/x86/audio_convert_init.c
index 5e5e91d..bb89cf6 100644
--- a/libswresample/x86/audio_convert_init.c
+++ b/libswresample/x86/audio_convert_init.c
@@ -174,7 +174,7 @@ MULTI_CAPS_FUNC(SSE2, sse2)
                 ac->simd_f =  ff_pack_8ch_float_to_int32_a_avx;
         }
     }
-    if(EXTERNAL_AVX2(mm_flags)) {
+    if(EXTERNAL_AVX2_FAST(mm_flags)) {
         if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
             ac->simd_f =  ff_float_to_int32_a_avx2;
     }
diff --git a/libswresample/x86/resample_init.c b/libswresample/x86/resample_init.c
index bc444cf..9d7d5cf 100644
--- a/libswresample/x86/resample_init.c
+++ b/libswresample/x86/resample_init.c
@@ -71,7 +71,7 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
             c->dsp.resample = c->linear ? ff_resample_linear_float_avx
                                         : ff_resample_common_float_avx;
         }
-        if (EXTERNAL_FMA3(mm_flags) && !(mm_flags & AV_CPU_FLAG_AVXSLOW)) {
+        if (EXTERNAL_FMA3_FAST(mm_flags)) {
             c->dsp.resample = c->linear ? ff_resample_linear_float_fma3
                                         : ff_resample_common_float_fma3;
         }
-- 
2.7.0



More information about the ffmpeg-devel mailing list