[FFmpeg-devel] [PATCH 1/3] avcodec/h264: mmxext 4:2:2 chroma intra deblock/loop filter

James Darnley jdarnley at obe.tv
Tue Nov 29 13:52:33 EET 2016


2.1 times faster (401 vs. 194 cycles)
---
 libavcodec/x86/h264_deblock.asm | 14 ++++++++++++++
 libavcodec/x86/h264dsp_init.c   |  2 ++
 2 files changed, 16 insertions(+)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 4aabbc0..fe0ab20 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -946,6 +946,20 @@ cglobal deblock_h_chroma_intra_8, 4,6
     TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
     RET
 
+cglobal deblock_h_chroma422_intra_8, 4, 6
+    CHROMA_H_START
+    TRANSPOSE4x8_LOAD  bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
+    call ff_chroma_intra_body_mmxext
+    TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
+
+    lea r0, [r0+r1*8]
+    lea t5, [t5+r1*8]
+
+    TRANSPOSE4x8_LOAD  bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
+    call ff_chroma_intra_body_mmxext
+    TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
+RET
+
 ALIGN 16
 ff_chroma_intra_body_mmxext:
     LOAD_MASK r2d, r3d
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index d2452c7..027c1ae 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -130,6 +130,7 @@ LF_FUNCS(uint8_t,   8)
 LF_FUNCS(uint16_t, 10)
 
 void ff_deblock_h_chroma422_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
+LF_IFUNC(h, chroma422_intra, 8, mmxext)
 
 #if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
 LF_FUNC(v8, luma, 8, mmxext)
@@ -249,6 +250,7 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
                 c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext;
             } else {
                 c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_mmxext;
+                c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_mmxext;
             }
 #if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
             c->h264_v_loop_filter_luma       = deblock_v_luma_8_mmxext;
-- 
2.10.2



More information about the ffmpeg-devel mailing list