[FFmpeg-cvslog] x86/hevc_idct: add 12bit idct_dc

James Almer git at videolan.org
Sun Jul 27 00:36:13 CEST 2014


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Sat Jul 26 15:08:51 2014 -0300| [bfb3b2b7a6ec0e35ba400941183862137b91bf32] | committer: Michael Niedermayer

x86/hevc_idct: add 12bit idct_dc

Signed-off-by: James Almer <jamrial at gmail.com>
Reviewed-by: Mickaël Raulet <mraulet at gmail.com>
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bfb3b2b7a6ec0e35ba400941183862137b91bf32
---

 libavcodec/x86/hevc_idct.asm  |   16 ++++++++++++++++
 libavcodec/x86/hevcdsp_init.c |   15 ++++++++++++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/libavcodec/x86/hevc_idct.asm b/libavcodec/x86/hevc_idct.asm
index 7a44f08..2ad6e97 100644
--- a/libavcodec/x86/hevc_idct.asm
+++ b/libavcodec/x86/hevc_idct.asm
@@ -104,3 +104,19 @@ INIT_YMM avx2
 IDCT_DC    16,  2, 10
 IDCT_DC    32,  8, 10
 %endif ;HAVE_AVX2_EXTERNAL
+
+; 12-bit
+INIT_MMX mmxext
+IDCT_DC_NL  4,     12
+IDCT_DC     8,  2, 12
+
+INIT_XMM sse2
+IDCT_DC_NL  8,     12
+IDCT_DC    16,  4, 12
+IDCT_DC    32, 16, 12
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+IDCT_DC    16,  2, 12
+IDCT_DC    32,  8, 12
+%endif ;HAVE_AVX2_EXTERNAL
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index 3357593..3e8704a 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -54,7 +54,8 @@ LFL_FUNCS(uint8_t,  12, ssse3)
 
 #define IDCT_FUNCS(W, opt) \
 void ff_hevc_idct##W##_dc_8_##opt(int16_t *coeffs); \
-void ff_hevc_idct##W##_dc_10_##opt(int16_t *coeffs)
+void ff_hevc_idct##W##_dc_10_##opt(int16_t *coeffs); \
+void ff_hevc_idct##W##_dc_12_##opt(int16_t *coeffs)
 
 IDCT_FUNCS(4x4,   mmxext);
 IDCT_FUNCS(8x8,   mmxext);
@@ -533,6 +534,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
 
         }
     } else if (bit_depth == 12) {
+        if (EXTERNAL_MMXEXT(cpu_flags)) {
+            c->idct_dc[0] = ff_hevc_idct4x4_dc_12_mmxext;
+            c->idct_dc[1] = ff_hevc_idct8x8_dc_12_mmxext;
+        }
         if (EXTERNAL_SSE2(cpu_flags)) {
             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
@@ -540,6 +545,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
             }
+
+            c->idct_dc[1] = ff_hevc_idct8x8_dc_12_sse2;
+            c->idct_dc[2] = ff_hevc_idct16x16_dc_12_sse2;
+            c->idct_dc[3] = ff_hevc_idct32x32_dc_12_sse2;
         }
         if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
             c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
@@ -556,5 +565,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
             QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     12, sse4);
             QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    12, sse4);
         }
+        if (EXTERNAL_AVX2(cpu_flags)) {
+            c->idct_dc[2] = ff_hevc_idct16x16_dc_12_avx2;
+            c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2;
+        }
     }
 }



More information about the ffmpeg-cvslog mailing list