[FFmpeg-devel] [PATCH] vf_overlay: unroll Y plane output computation in blend_slice()

Michael Niedermayer michaelni at gmx.at
Mon Oct 31 14:46:02 CET 2011


On Mon, Oct 31, 2011 at 12:46:50PM +0100, Stefano Sabatini wrote:
> Faster, as avoids unnecessary comparation instructions.

i suggest something along the lines of below
its entirely untested and surely buggy, just to show what i mean
it avoid code duplicatios and lets gcc by inlining remove the
unnecessary comparation instructions
it also fixes the right/bottom blend code which was wrong for
even width/height


diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index 06967c2..faa8a1f 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c
@@ -335,6 +335,35 @@ static void start_frame_overlay(AVFilterLink *inlink, AVFilterBufferRef *inpicre
 // apply a fast variant: (X+127)/255 = ((X+127)*257+257)>>16 = ((X+128)*257)>>16
 #define FAST_DIV255(x) ((((x) + 128) * 257) >> 16)

+static void av_always_inline blend_plane(AVFilterBufferRef *dst, AVFilterBufferRef *src, int i,
+                                         uint8_t *dp, uint8_t *sp, uint8_t *ap, int wp, int hp, int hsub, int vsub, int hcheck, int vcheck)
+{
+    int j,k;
+    for (j = 0; j < hp; j++) {
+        uint8_t *d = dp, *s = sp, *a = ap;
+        for (k = 0; k < wp; k++) {
+            // average alpha for color components, improve quality
+            int alpha;
+            if (   (hsub && (!hcheck || k+1<wp))
+                && (vsub && (!vcheck || j+1<hp))) {
+                alpha = (a[0] + a[src->linesize[3]] +
+                         a[1] + a[src->linesize[3]+1]) >> 2;
+            } else if (hsub && (!hcheck || k+1<wp)) {
+                alpha= (a[0] + a[1]) >> 1;
+            } else if (vsub && (!vcheck || j+1<hp)) {
+                alpha= (a[0] + a[src->linesize[3]]) >> 1;
+            } else
+                alpha = a[0];
+            *d = (*d * (0xff - alpha) + *s++ * alpha + 128) >> 8;
+            d++;
+            a += 1 << hsub;
+        }
+        dp += dst->linesize[i];
+        sp += src->linesize[i];
+        ap += (1 << vsub) * src->linesize[3];
+    }
+}
+
 static void blend_slice(AVFilterContext *ctx,
                         AVFilterBufferRef *dst, AVFilterBufferRef *src,
                         int x, int y, int w, int h,
@@ -436,30 +465,13 @@ static void blend_slice(AVFilterContext *ctx,
                 sp += ((slice_y - y) >> vsub) * src->linesize[i];
                 ap += (slice_y - y) * src->linesize[3];
             }
-            for (j = 0; j < hp; j++) {
-                uint8_t *d = dp, *s = sp, *a = ap;
-                for (k = 0; k < wp; k++) {
-                    // average alpha for color components, improve quality
-                    int alpha_v, alpha_h, alpha;
-                    if (hsub && vsub && j+1 < hp && k+1 < wp) {
-                        alpha = (a[0] + a[src->linesize[3]] +
-                                 a[1] + a[src->linesize[3]+1]) >> 2;
-                    } else if (hsub || vsub) {
-                        alpha_h = hsub && k+1 < wp ?
-                            (a[0] + a[1]) >> 1 : a[0];
-                        alpha_v = vsub && j+1 < hp ?
-                            (a[0] + a[src->linesize[3]]) >> 1 : a[0];
-                        alpha = (alpha_v + alpha_h) >> 1;
-                    } else
-                        alpha = a[0];
-                    *d = (*d * (0xff - alpha) + *s++ * alpha + 128) >> 8;
-                    d++;
-                    a += 1 << hsub;
-                }
-                dp += dst->linesize[i];
-                sp += src->linesize[i];
-                ap += (1 << vsub) * src->linesize[3];
-            }
+            if(hsub||vsub){
+                if(wp<<hsub == width && hp<<vsub == height)
+                    blend_plane(dst, src, i, dp, sp, ap, wp, hp, hsub, vsub, 0, 0);
+                else
+                    blend_plane(dst, src, i, dp, sp, ap, wp, hp, hsub, vsub, wp<<hsub != width, hp<<vsub != height);
+            }else
+                blend_plane(dst, src, i, dp, sp, ap, wp, hp, 0, 0, 0, 0);
         }
     }
 }

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I do not agree with what you have to say, but I'll defend to the death your
right to say it. -- Voltaire
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20111031/db2626dc/attachment.asc>


More information about the ffmpeg-devel mailing list