[FFmpeg-devel] [PATCH] yadif: restore speed of the C filtering code

James Darnley james.darnley at gmail.com
Fri Mar 1 18:20:19 CET 2013


Always use the special filter for the first and last 3 columns (only).

The changes made in 64ed397 slowed the filter to just under 3/4 of what
it was.  This commit restores almost all of that speed while maintaining
identical output.

For reference, on my Athlon64:
1733222 decicycles in old
2358563 decicycles in new
1740014 decicycles in this
---
 libavfilter/vf_yadif.c          |   93 +++++++++++++++++++++++---------------
 libavfilter/x86/vf_yadif_init.c |   12 +----
 libavfilter/yadif.h             |    4 +-
 3 files changed, 60 insertions(+), 49 deletions(-)

diff --git a/libavfilter/vf_yadif.c b/libavfilter/vf_yadif.c
index b7c2d80..3bd0d17 100644
--- a/libavfilter/vf_yadif.c
+++ b/libavfilter/vf_yadif.c
@@ -34,9 +34,9 @@
 #define PERM_RWP AV_PERM_WRITE | AV_PERM_PRESERVE | AV_PERM_REUSE
 
 #define CHECK(j)\
-    {   int score = FFABS(cur[mrefs + off_left + (j)] - cur[prefs + off_left - (j)])\
+    {   int score = FFABS(cur[mrefs - 1 + (j)] - cur[prefs - 1 - (j)])\
                   + FFABS(cur[mrefs  +(j)] - cur[prefs  -(j)])\
-                  + FFABS(cur[mrefs + off_right + (j)] - cur[prefs + off_right - (j)]);\
+                  + FFABS(cur[mrefs + 1 + (j)] - cur[prefs + 1 - (j)]);\
         if (score < spatial_score) {\
             spatial_score= score;\
             spatial_pred= (cur[mrefs  +(j)] + cur[prefs  -(j)])>>1;\
@@ -51,15 +51,46 @@
         int temporal_diff2 =(FFABS(next[mrefs] - c) + FFABS(next[prefs] - e) )>>1; \
         int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); \
         int spatial_pred = (c+e) >> 1; \
-        int off_right = (x < w - 1) ? 1 : -1;\
-        int off_left  = x ? -1 : 1;\
-        int spatial_score = FFABS(cur[mrefs + off_left]  - cur[prefs + off_left]) + FFABS(c-e) \
-                          + FFABS(cur[mrefs + off_right] - cur[prefs + off_right]) - 1; \
+        int spatial_score = FFABS(cur[mrefs - 1] - cur[prefs - 1]) + FFABS(c-e) \
+                          + FFABS(cur[mrefs + 1] - cur[prefs + 1]) - 1; \
  \
-        if (x > 2 && x < w - 3) {\
-            CHECK(-1) CHECK(-2) }} }} \
-            CHECK( 1) CHECK( 2) }} }} \
-        }\
+        CHECK(-1) CHECK(-2) }} }} \
+        CHECK( 1) CHECK( 2) }} }} \
+ \
+        if (mode < 2) { \
+            int b = (prev2[2 * mrefs] + next2[2 * mrefs])>>1; \
+            int f = (prev2[2 * prefs] + next2[2 * prefs])>>1; \
+            int max = FFMAX3(d - e, d - c, FFMIN(b - c, f - e)); \
+            int min = FFMIN3(d - e, d - c, FFMAX(b - c, f - e)); \
+ \
+            diff = FFMAX3(diff, min, -max); \
+        } \
+ \
+        if (spatial_pred > d + diff) \
+           spatial_pred = d + diff; \
+        else if (spatial_pred < d - diff) \
+           spatial_pred = d - diff; \
+ \
+        dst[0] = spatial_pred; \
+ \
+        dst++; \
+        cur++; \
+        prev++; \
+        next++; \
+        prev2++; \
+        next2++; \
+    }
+
+#define FILTER_EDGES(start, end) \
+    for (x = start;  x < end; x++) { \
+        int c = cur[mrefs]; \
+        int d = (prev2[0] + next2[0])>>1; \
+        int e = cur[prefs]; \
+        int temporal_diff0 = FFABS(prev2[0] - next2[0]); \
+        int temporal_diff1 =(FFABS(prev[mrefs] - c) + FFABS(prev[prefs] - e) )>>1; \
+        int temporal_diff2 =(FFABS(next[mrefs] - c) + FFABS(next[prefs] - e) )>>1; \
+        int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); \
+        int spatial_pred = (c+e) >> 1; \
  \
         if (mode < 2) { \
             int b = (prev2[2 * mrefs] + next2[2 * mrefs])>>1; \
@@ -101,8 +132,7 @@ static void filter_line_c(void *dst1,
 }
 
 static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1,
-                         int w, int prefs, int mrefs, int parity, int mode,
-                         int l_edge)
+                         int w, int prefs, int mrefs, int parity, int mode)
 {
     uint8_t *dst  = dst1;
     uint8_t *prev = prev1;
@@ -112,7 +142,7 @@ static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1,
     uint8_t *prev2 = parity ? prev : cur ;
     uint8_t *next2 = parity ? cur  : next;
 
-    FILTER(0, l_edge)
+    FILTER_EDGES(0, 3)
 
     dst  = (uint8_t*)dst1  + w - 3;
     prev = (uint8_t*)prev1 + w - 3;
@@ -121,7 +151,7 @@ static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1,
     prev2 = (uint8_t*)(parity ? prev : cur);
     next2 = (uint8_t*)(parity ? cur  : next);
 
-    FILTER(w - 3, w)
+    FILTER_EDGES(w - 3, w)
 }
 
 
@@ -144,8 +174,7 @@ static void filter_line_c_16bit(void *dst1,
 }
 
 static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1,
-                               int w, int prefs, int mrefs, int parity, int mode,
-                               int l_edge)
+                               int w, int prefs, int mrefs, int parity, int mode)
 {
     uint16_t *dst  = dst1;
     uint16_t *prev = prev1;
@@ -155,7 +184,7 @@ static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1,
     uint16_t *prev2 = parity ? prev : cur ;
     uint16_t *next2 = parity ? cur  : next;
 
-    FILTER(0, l_edge)
+    FILTER_EDGES(0, 3)
 
     dst   = (uint16_t*)dst1  + w - 3;
     prev  = (uint16_t*)prev1 + w - 3;
@@ -164,7 +193,7 @@ static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1,
     prev2 = (uint16_t*)(parity ? prev : cur);
     next2 = (uint16_t*)(parity ? cur  : next);
 
-    FILTER(w - 3, w)
+    FILTER_EDGES(w - 3, w)
 }
 
 static void filter(AVFilterContext *ctx, AVFilterBufferRef *dstpic,
@@ -178,7 +207,7 @@ static void filter(AVFilterContext *ctx, AVFilterBufferRef *dstpic,
         int h = dstpic->video->h;
         int refs = yadif->cur->linesize[i];
         int df = (yadif->csp->comp[i].depth_minus1 + 8) / 8;
-        int l_edge, l_edge_pix;
+        int pix_3 = 3 * df;
 
         if (i == 1 || i == 2) {
         /* Why is this not part of the per-plane description thing? */
@@ -189,8 +218,6 @@ static void filter(AVFilterContext *ctx, AVFilterBufferRef *dstpic,
         /* filtering reads 3 pixels to the left/right; to avoid invalid reads,
          * we need to call the c variant which avoids this for border pixels
          */
-        l_edge     = yadif->req_align;
-        l_edge_pix = l_edge / df;
 
         for (y = 0; y < h; y++) {
             if ((y ^ parity) & 1) {
@@ -199,22 +226,14 @@ static void filter(AVFilterContext *ctx, AVFilterBufferRef *dstpic,
                 uint8_t *next = &yadif->next->data[i][y * refs];
                 uint8_t *dst  = &dstpic->data[i][y * dstpic->linesize[i]];
                 int     mode  = y == 1 || y + 2 == h ? 2 : yadif->mode;
-                if (yadif->req_align) {
-                    yadif->filter_line(dst + l_edge, prev + l_edge, cur + l_edge,
-                                       next + l_edge, w - l_edge_pix - 3,
-                                       y + 1 < h ? refs : -refs,
-                                       y ? -refs : refs,
-                                       parity ^ tff, mode);
-                    yadif->filter_edges(dst, prev, cur, next, w,
-                                         y + 1 < h ? refs : -refs,
-                                         y ? -refs : refs,
-                                         parity ^ tff, mode, l_edge_pix);
-                } else {
-                    yadif->filter_line(dst, prev, cur, next + l_edge, w,
-                                       y + 1 < h ? refs : -refs,
-                                       y ? -refs : refs,
-                                       parity ^ tff, mode);
-                }
+                yadif->filter_line(dst + pix_3, prev + pix_3, cur + pix_3, next + pix_3, w - 6,
+                                    y + 1 < h ? refs : -refs,
+                                    y ? -refs : refs,
+                                    parity ^ tff, mode);
+                yadif->filter_edges(dst, prev, cur, next, w,
+                                    y + 1 < h ? refs : -refs,
+                                    y ? -refs : refs,
+                                    parity ^ tff, mode);
             } else {
                 memcpy(&dstpic->data[i][y * dstpic->linesize[i]],
                        &yadif->cur->data[i][y * refs], w * df);
diff --git a/libavfilter/x86/vf_yadif_init.c b/libavfilter/x86/vf_yadif_init.c
index 2873744..8d5e768 100644
--- a/libavfilter/x86/vf_yadif_init.c
+++ b/libavfilter/x86/vf_yadif_init.c
@@ -42,18 +42,12 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
 
 #if HAVE_YASM
 #if ARCH_X86_32
-    if (EXTERNAL_MMXEXT(cpu_flags)) {
+    if (EXTERNAL_MMXEXT(cpu_flags))
         yadif->filter_line = ff_yadif_filter_line_mmxext;
-        yadif->req_align   = 8;
-    }
 #endif /* ARCH_X86_32 */
-    if (EXTERNAL_SSE2(cpu_flags)) {
+    if (EXTERNAL_SSE2(cpu_flags))
         yadif->filter_line = ff_yadif_filter_line_sse2;
-        yadif->req_align   = 16;
-    }
-    if (EXTERNAL_SSSE3(cpu_flags)) {
+    if (EXTERNAL_SSSE3(cpu_flags))
         yadif->filter_line = ff_yadif_filter_line_ssse3;
-        yadif->req_align   = 16;
-    }
 #endif /* HAVE_YASM */
 }
diff --git a/libavfilter/yadif.h b/libavfilter/yadif.h
index 50fc856..2c3f125 100644
--- a/libavfilter/yadif.h
+++ b/libavfilter/yadif.h
@@ -57,13 +57,11 @@ typedef struct YADIFContext {
     /**
      * Required alignment for filter_line
      */
-    int req_align;
     void (*filter_line)(void *dst,
                         void *prev, void *cur, void *next,
                         int w, int prefs, int mrefs, int parity, int mode);
     void (*filter_edges)(void *dst, void *prev, void *cur, void *next,
-                         int w, int prefs, int mrefs, int parity, int mode,
-                         int l_edge);
+                         int w, int prefs, int mrefs, int parity, int mode);
 
     const AVPixFmtDescriptor *csp;
     int eof;
-- 
1.7.9



More information about the ffmpeg-devel mailing list