[FFmpeg-devel] [PATCH] avfilter/x86/vf_blend.asm: add hardmix and phoenix sse2 SIMD

Paul B Mahol onemda at gmail.com
Wed Oct 7 11:38:39 CEST 2015


Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
 libavfilter/x86/vf_blend.asm    | 62 +++++++++++++++++++++++++++++++++++++++++
 libavfilter/x86/vf_blend_init.c | 14 ++++++++++
 2 files changed, 76 insertions(+)

diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 167e72b..7180817 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -27,6 +27,8 @@ SECTION_RODATA
 
 pw_128: times 8 dw 128
 pw_255: times 8 dw 255
+pb_128: times 16 db 128
+pb_255: times 16 db 255
 
 SECTION .text
 
@@ -273,6 +275,36 @@ cglobal blend_darken, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, d
     jg .nextrow
 REP_RET
 
+cglobal blend_hardmix, 9, 10, 3, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
+    add      topq, widthq
+    add   bottomq, widthq
+    add      dstq, widthq
+    sub      endq, startq
+    neg    widthq
+.nextrow:
+    mov       r10q, widthq
+    %define      x  r10q
+
+    .loop:
+        movu            m0, [topq + x]
+        movu            m1, [bottomq + x]
+        mova            m2, [pb_255]
+        psubusb         m2, m1
+        pxor            m0, [pb_128]
+        pxor            m2, [pb_128]
+        pcmpgtb         m2, m0
+        pxor            m2, [pb_255]
+        mova    [dstq + x], m2
+        add           r10q, mmsize
+    jl .loop
+
+    add          topq, top_linesizeq
+    add       bottomq, bottom_linesizeq
+    add          dstq, dst_linesizeq
+    sub          endd, 1
+    jg .nextrow
+REP_RET
+
 cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
     add      topq, widthq
     add   bottomq, widthq
@@ -298,6 +330,36 @@ cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize,
     jg .nextrow
 REP_RET
 
+cglobal blend_phoenix, 9, 10, 3, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
+    add      topq, widthq
+    add   bottomq, widthq
+    add      dstq, widthq
+    sub      endq, startq
+    neg    widthq
+.nextrow:
+    mov       r10q, widthq
+    %define      x  r10q
+
+    .loop:
+        movu            m0, [topq + x]
+        movu            m1, [bottomq + x]
+        mova            m2, m0
+        pminub          m0, m1
+        pmaxub          m1, m2
+        mova            m2, [pb_255]
+        psubusb         m2, m1
+        paddusb         m2, m0
+        mova    [dstq + x], m2
+        add           r10q, mmsize
+    jl .loop
+
+    add          topq, top_linesizeq
+    add       bottomq, bottom_linesizeq
+    add          dstq, dst_linesizeq
+    sub          endd, 1
+    jg .nextrow
+REP_RET
+
 INIT_XMM ssse3
 cglobal blend_difference, 9, 10, 3, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
     add      topq, widthq
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index 61e90f8..454d030 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -59,6 +59,12 @@ void ff_blend_difference128_sse2(const uint8_t *top, ptrdiff_t top_linesize,
                                  ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
                                  struct FilterParams *param, double *values);
 
+void ff_blend_hardmix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
+                           const uint8_t *bottom, ptrdiff_t bottom_linesize,
+                           uint8_t *dst, ptrdiff_t dst_linesize,
+                           ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
+                           struct FilterParams *param, double *values);
+
 void ff_blend_lighten_sse2(const uint8_t *top, ptrdiff_t top_linesize,
                            const uint8_t *bottom, ptrdiff_t bottom_linesize,
                            uint8_t *dst, ptrdiff_t dst_linesize,
@@ -71,6 +77,12 @@ void ff_blend_or_sse2(const uint8_t *top, ptrdiff_t top_linesize,
                       ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
                       struct FilterParams *param, double *values);
 
+void ff_blend_phoenix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
+                           const uint8_t *bottom, ptrdiff_t bottom_linesize,
+                           uint8_t *dst, ptrdiff_t dst_linesize,
+                           ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
+                           struct FilterParams *param, double *values);
+
 void ff_blend_subtract_sse2(const uint8_t *top, ptrdiff_t top_linesize,
                             const uint8_t *bottom, ptrdiff_t bottom_linesize,
                             uint8_t *dst, ptrdiff_t dst_linesize,
@@ -107,8 +119,10 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
         case BLEND_AVERAGE:  param->blend = ff_blend_average_sse2;  break;
         case BLEND_DARKEN:   param->blend = ff_blend_darken_sse2;   break;
         case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break;
+        case BLEND_HARDMIX:  param->blend = ff_blend_hardmix_sse2;  break;
         case BLEND_LIGHTEN:  param->blend = ff_blend_lighten_sse2;  break;
         case BLEND_OR:       param->blend = ff_blend_or_sse2;       break;
+        case BLEND_PHOENIX:  param->blend = ff_blend_phoenix_sse2;  break;
         case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
         case BLEND_XOR:      param->blend = ff_blend_xor_sse2;      break;
         }
-- 
1.9.1



More information about the ffmpeg-devel mailing list