[FFmpeg-devel] [PATCH] swscale alpha channel support

Wed Mar 11 02:59:51 CET 2009

On Tue, Mar 10, 2009 at 05:05:53PM +0100, C?dric Schieli wrote:
> 2009/3/5 Michael Niedermayer <michaelni at gmx.at>:
> > On Thu, Mar 05, 2009 at 03:09:26PM +0100, C?dric Schieli wrote:
> >> 2009/3/2 Michael Niedermayer <michaelni at gmx.at>:
> >> > On Fri, Feb 27, 2009 at 11:30:25PM +0100, C?dric Schieli wrote:
> 
> [...]
> 
> >> #5 : sws_configure_alpha.patch
> >> add --swscale-alpha configure option
> 
> patch updated to take care of Mans' comment
> 
> >
> > not maintained by me
> >
> >
> >>
> >> #6 : sws_yuva2rgb.patch
> >> updated to have a mmx version even without HAVE_7REGS
> >
> > I would prefer to have simple code over working around gcc bugs
> > with #ifdefs
> 
> ok, patch updated
> 
> [...]
> 
> >> @@ -2144,11 +2293,17 @@
> >> ? ? ?}
> >> ? ? ?else if (srcFormat==PIX_FMT_RGB32)
> >> ? ? ?{
> >> + ? ? ? ?if (isAlpha)
> >> + ? ? ? ? ? ?RENAME(bgr32ToA)(formatConvBuffer, src+3, srcW, pal);
> >> + ? ? ? ?else
> >> ? ? ? ? ?RENAME(bgr32ToY)(formatConvBuffer, src, srcW, pal);
> >> ? ? ? ? ?src= formatConvBuffer;
> >> ? ? ?}
> >> ? ? ?else if (srcFormat==PIX_FMT_RGB32_1)
> >> ? ? ?{
> >> + ? ? ? ?if (isAlpha)
> >> + ? ? ? ? ? ?RENAME(bgr32ToA)(formatConvBuffer, src, srcW, pal);
> >> + ? ? ? ?else
> >> ? ? ? ? ?RENAME(bgr32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
> >> ? ? ? ? ?src= formatConvBuffer;
> >> ? ? ?}
> >> @@ -2169,11 +2324,17 @@
> >> ? ? ?}
> >> ? ? ?else if (srcFormat==PIX_FMT_BGR32)
> >> ? ? ?{
> >> + ? ? ? ?if (isAlpha)
> >> + ? ? ? ? ? ?RENAME(bgr32ToA)(formatConvBuffer, src+3, srcW, pal);
> >> + ? ? ? ?else
> >> ? ? ? ? ?RENAME(rgb32ToY)(formatConvBuffer, src, srcW, pal);
> >> ? ? ? ? ?src= formatConvBuffer;
> >> ? ? ?}
> >> ? ? ?else if (srcFormat==PIX_FMT_BGR32_1)
> >> ? ? ?{
> >> + ? ? ? ?if (isAlpha)
> >> + ? ? ? ? ? ?RENAME(bgr32ToA)(formatConvBuffer, src, srcW, pal);
> >> + ? ? ? ?else
> >> ? ? ? ? ?RENAME(rgb32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
> >> ? ? ? ? ?src= formatConvBuffer;
> >> ? ? ?}
> >
> > This doesnt look like it would work on big endian systems
> 
> I think it will work (but I can't test). Only one byte is read,
> through a uint8_t* pointer.
> Or I misunderstood the way RGB32/RGB32_1 vs. ARGB/BGRA is handled.
> What I understand is that RGB32 is the same as BGRA on disk and in
> memory, while it is the same as ARGB in LE CPUs and BGRA in BE CPUs.
> Am I wrong ?

no, it seems fine, i must have misread it ...

> 
> >
> >
> > [...]
> >
> >>
> >> + ? ?if (CONFIG_SWSCALE_ALPHA && (dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
> >> + ? ? ? ?memset(dst[3], 255, dstStride[3]*(dstY-lastDstY));
> >> +
> >
> > you cant write outside 0..width per line, also stride can be negative
> >
> >
> > [...]
> >
> > #8
> >> Index: ffmpeg/libswscale/swscale.c
> >> ===================================================================
> >> --- ffmpeg.orig/libswscale/swscale.c ?2009-03-03 09:31:07.000000000 +0100
> >> +++ ffmpeg/libswscale/swscale.c ? ? ? 2009-03-03 10:00:28.000000000 +0100
> >> @@ -133,6 +133,7 @@
> >> ? ? ?)
> >> ?#define isSupportedOut(x) ? ( ? ? ? \
> >> ? ? ? ? ? ? (x)==PIX_FMT_YUV420P ? ? \
> >> + ? ? ? ?|| (x)==PIX_FMT_YUVA420P ? ?\
> >> ? ? ? ? ?|| (x)==PIX_FMT_YUYV422 ? ? \
> >> ? ? ? ? ?|| (x)==PIX_FMT_UYVY422 ? ? \
> >> ? ? ? ? ?|| (x)==PIX_FMT_YUV444P ? ? \
> >> @@ -2053,12 +2054,16 @@
> >> ? ? ? ? ? ? ? ? ? ? ? ?int srcSliceH, uint8_t* dst[], int dstStride[])
> >> ?{
> >> ? ? ?int plane;
> >> - ? ?for (plane=0; plane<3; plane++)
> >> + ? ?for (plane=0; plane<4; plane++)
> >> ? ? ?{
> >> - ? ? ? ?int length= plane==0 ? c->srcW ?: -((-c->srcW ?)>>c->chrDstHSubSample);
> >> - ? ? ? ?int y= ? ? ?plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
> >> - ? ? ? ?int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
> >> -
> >> + ? ? ? ?int length= (plane==0 || plane==3) ? c->srcW ?: -((-c->srcW ?)>>c->chrDstHSubSample);
> >> + ? ? ? ?int y= ? ? ?(plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
> >> + ? ? ? ?int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
> >> +
> >
> >> + ? ? ? ?if (((!isALPHA(c->srcFormat)) || (!isALPHA(c->dstFormat))) && plane == 3){
> >
> > !(isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) && plane == 3
> >
> >
> >> + ? ? ? ? ? ?if (isALPHA(c->dstFormat))
> >> + ? ? ? ? ? ? ? ?memset(dst[3], 255, dstStride[3]*height);
> >
> > as said elewhere writing must be limited to 0..width
> >
> 
> Patch updated
> 
> 

> #1 : sws_fix_initMMX2HScaler.patch

ok (assuming tested and improvment confirmed)

> #2 : swscale-example_yuva.patch

see below

> #3 : sws_parametrized_yscaleyuv2packed.patch

ok if no change to striped obj files

> #4 : sws_configure_alpha.patch

not maintained by me

> #5 : sws_yuva2rgb.patch

ok

> #6 : sws_scale_alpha.patch

see below

> #7 : sws_output_yuva420p.patch

see below

> #8 : swscale-example_use_alpha.patch

see below

[...]

>          // avoid stride % bpp != 0
> +        if (srcFormat==PIX_FMT_YUVA420P)
> +            srcStride[i]= refStride[i];
> +        else
>          if (srcFormat==PIX_FMT_RGB24 || srcFormat==PIX_FMT_BGR24)
>              srcStride[i]= srcW*3;
>          else
> @@ -72,7 +75,10 @@ static int doTest(uint8_t *ref[3], int r
>          else
>              dstStride[i]= dstW*4;
>  
> -        src[i]= (uint8_t*) malloc(srcStride[i]*srcH);
> +        if (srcFormat==PIX_FMT_YUVA420P)
> +            src[i]= ref[i];
> +        else
> +            src[i]= (uint8_t*) malloc(srcStride[i]*srcH);
>          dst[i]= (uint8_t*) malloc(dstStride[i]*dstH);
>          out[i]= (uint8_t*) malloc(refStride[i]*h);
>          if (!src[i] || !dst[i] || !out[i]) {
> @@ -84,14 +90,16 @@ static int doTest(uint8_t *ref[3], int r
>      }
>  
>      dstContext = outContext = NULL;
> -    srcContext= sws_getContext(w, h, PIX_FMT_YUV420P, srcW, srcH, srcFormat, flags, NULL, NULL, NULL);
> -    if (!srcContext) {
> -        fprintf(stderr, "Failed to get %s ---> %s\n",
> -                sws_format_name(PIX_FMT_YUV420P),
> -                sws_format_name(srcFormat));
> -        res = -1;
> +    if (srcFormat!=PIX_FMT_YUVA420P){
> +        srcContext= sws_getContext(w, h, PIX_FMT_YUVA420P, srcW, srcH, srcFormat, flags, NULL, NULL, NULL);
> +        if (!srcContext) {
> +            fprintf(stderr, "Failed to get %s ---> %s\n",
> +                    sws_format_name(PIX_FMT_YUVA420P),
> +                    sws_format_name(srcFormat));
> +            res = -1;

why?

>  
> -        goto end;
> +            goto end;

cosmetic, doesnt belong in functional patches ...

> +        }
>      }
>      dstContext= sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, NULL, NULL, NULL);
>      if (!dstContext) {
> @@ -114,7 +122,8 @@ static int doTest(uint8_t *ref[3], int r
>  //    printf("test %X %X %X -> %X %X %X\n", (int)ref[0], (int)ref[1], (int)ref[2],
>  //        (int)src[0], (int)src[1], (int)src[2]);
>  
> +    if (srcFormat!=PIX_FMT_YUVA420P)

> -    sws_scale(srcContext, ref, refStride, 0, h   , src, srcStride);
> +        sws_scale(srcContext, ref, refStride, 0, h   , src, srcStride);

same

>      sws_scale(dstContext, src, srcStride, 0, srcH, dst, dstStride);
>      sws_scale(outContext, dst, dstStride, 0, dstH, out, refStride);
>  
> @@ -136,12 +145,14 @@ static int doTest(uint8_t *ref[3], int r
>  
>      end:
>  
> +    if (srcFormat!=PIX_FMT_YUVA420P)

> -    sws_freeContext(srcContext);
> +        sws_freeContext(srcContext);

same

[...]
> @@ -203,6 +214,11 @@ int main(int argc, char **argv){
>          }
>      }
>      sws_scale(sws, rgb_src, rgb_stride, 0, H, src, stride);
> +    for (y=0; y<H; y++){
> +        for (x=0; x<W; x++){
> +            src[3][ x + y*W]= random();
> +        }
> +    }
>  
>      selfTest(src, stride, W, H);
>  

more "why?" code

[...]
> +            A = 0;\
> +            for (j=0; j<lumFilterSize; j++)\
> +                A += alpSrc[j][i     ] * lumFilter[j];\
> +            A >>=10;\
> +            A += rnd;\
> +            A >>=9;\

this contains a few useless operations
a single shift should be enough,so should be chnaging the initial value of A

[...]
> Index: ffmpeg/libswscale/swscale_template.c
> ===================================================================
> --- ffmpeg.orig/libswscale/swscale_template.c	2009-03-10 16:35:58.000000000 +0100
> +++ ffmpeg/libswscale/swscale_template.c	2009-03-10 16:37:12.000000000 +0100
> @@ -458,11 +458,11 @@
>      "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
>      /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
>  
> -#define REAL_YSCALEYUV2RGB_YA(index, c) \
> -    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
> -    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
> -    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
> -    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
> +#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
> +    "movq  ("#b1", "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
> +    "movq  ("#b2", "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
> +    "movq 8("#b1", "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
> +    "movq 8("#b2", "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
>      "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
>      "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
>      "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
> @@ -501,11 +501,11 @@
>      "packuswb          %%mm6, %%mm5     \n\t"\
>      "packuswb          %%mm3, %%mm4     \n\t"\
>  
> -#define YSCALEYUV2RGB_YA(index, c) REAL_YSCALEYUV2RGB_YA(index, c)
> +#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
>  
>  #define YSCALEYUV2RGB(index, c) \
>      REAL_YSCALEYUV2RGB_UV(index, c) \
> -    REAL_YSCALEYUV2RGB_YA(index, c) \
> +    REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
>      REAL_YSCALEYUV2RGB_COEFF(c)
>  
>  #define REAL_YSCALEYUV2PACKED1(index, c) \

this looks like it should be a seperate patch

[...]
> @@ -952,16 +966,32 @@
>               dest, uDest, dstW, chrDstW, dstFormat);
>  }
>  
> -static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc,
> -                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
> +static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc, int16_t *alpSrc,
> +                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
>  {
>      int i;
>  #if HAVE_MMX
>      if(!(c->flags & SWS_BITEXACT)){
> +#if CONFIG_SWSCALE_ALPHA
> +        long p= uDest ? 4 : 2;
> +        uint8_t *src2[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
> +        uint8_t *dst2[4]= {aDest, dest, uDest, vDest};
> +        long counter2[4] = {dstW, dstW, chrDstW, chrDstW};
> +        uint8_t **src = src2, **dst = dst2;
> +        long *counter = counter2;
> +
> +        if (!aDest){
> +            src++;
> +            dst++;
> +            counter++;
> +            p--;
> +        }

hmm
looks like a mess

for()
    if(dest[i])
        do filter

[...]
> @@ -2055,12 +2056,22 @@
>                        int srcSliceH, uint8_t* dst[], int dstStride[])
>  {
>      int plane;
> -    for (plane=0; plane<3; plane++)
> +    for (plane=0; plane<4; plane++)
>      {
> -        int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
> -        int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
> -        int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
> +        int length= (plane==0 || plane==3) ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
> +        int y=      (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
> +        int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
>  
> +        if (!(isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) && plane == 3){
> +            if (isALPHA(c->dstFormat)){
> +                int i;
> +                uint8_t *dstPtr= dst[3] + dstStride[3]*y;
> +                for (i=0; i<height; i++){
> +                    memset(dstPtr, 255, length);
> +                    dstPtr+= dstStride[3];
> +                }
> +            }
> +        } else
>          if ((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
>          {
>              if (!isGray(c->dstFormat))

the memset fill code is duplicated relative to the gray case

[...]
> Index: ffmpeg/libswscale/swscale_template.c
> ===================================================================
> --- ffmpeg.orig/libswscale/swscale_template.c	2009-03-10 16:37:12.000000000 +0100
> +++ ffmpeg/libswscale/swscale_template.c	2009-03-10 16:38:05.000000000 +0100
> @@ -3218,6 +3218,15 @@
>          }
>      }
>  
> +    if (CONFIG_SWSCALE_ALPHA && (dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf){
> +        int i;
> +        uint8_t *dstPtr= dst[3] + dstStride[3]*lastDstY;
> +        for (i=lastDstY; i<dstY; i++){
> +            memset(dstPtr, 255, dstW);
> +            dstPtr+= dstStride[3];
> +        }
> +    }
> +

another duplicate

[...]

> @@ -55,16 +61,14 @@
>      uint8_t *out[4];
>      int srcStride[4], dstStride[4];
>      int i;
> -    uint64_t ssdY, ssdU, ssdV;
> +    uint64_t ssdY, ssdU, ssdV, ssdA;
> +    int needAlpha = (CONFIG_SWSCALE_ALPHA && isALPHA(srcFormat) && isALPHA(dstFormat));
>      struct SwsContext *srcContext, *dstContext, *outContext;
>      int res;

i dont see what sense CONFIG_SWSCALE_ALPHA checks have in the example code.

>  
>      res = 0;
>      for (i=0; i<4; i++){
>          // avoid stride % bpp != 0
> -        if (srcFormat==PIX_FMT_YUVA420P)
> -            srcStride[i]= refStride[i];
> -        else
>          if (srcFormat==PIX_FMT_RGB24 || srcFormat==PIX_FMT_BGR24)
>              srcStride[i]= srcW*3;
>          else

a patch adding code and another removing it again means both are rejected.

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I am the wisest man alive, for I know one thing, and that is that I know
nothing. -- Socrates
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090311/69b44db7/attachment.pgp>