[FFmpeg-devel] Extend/optimize RGB to RGB conversions funcsintorgb2rgb.c

Tue Sep 11 00:12:27 CEST 2012

I have tested with 2 versions that I have found on the net but the result is
always the same ... less speed than the original :(

void fast_unpack(const uint8_t* rgb, uint8_t* rgba, const int count) {

    int i, j;

    if(count==0)
        return;
    for( i=count; --i; rgba+=4, rgb+=3) {
        *(uint32_t*)(void*)rgba = *(const uint32_t*)(const void*)rgb;
    }
    for( j=0; j<3; ++j) {
        rgba[j] = rgb[j];
    }
}

void RGB8ToBGRX8(int w, const void *in, void *out)
    {
        int i;
        int width = w;
        const unsigned char *src= (const unsigned char*) in;
        unsigned int *dst= (unsigned int*) out;
        unsigned int invalue, outvalue;

        for (i=0; i<width; i++, src+=3, dst++)
        {
                invalue = src[0];
                outvalue = (invalue<<16);
                invalue = src[1];
                outvalue |= (invalue<<8);
                invalue = src[2];
                outvalue |= (invalue);
                *dst = outvalue | 0xff000000;
        }
      }

The concerned part on my test procedure is here :

	printf("Test new rgb24to32() func : ");
	t2 = GetTimestamp();
	for( i = 0 ; i < NB_TESTS ; i++)
	{
		// rgb24to32_alpha((uint8_t *)rgbTab, (uint8_t *)rgbaTab, NB_PIXELS * 3, 255);
		// rgb24to32_uint32((uint8_t *)rgbTab, (uint8_t *)rgbaTab, NB_PIXELS * 3);
		// fast_unpack((uint8_t *)rgbTab, (uint8_t *)rgbaTab, NB_PIXELS);
		RGB8ToBGRX8(NB_PIXELS, rgbTab, rgbaTab);
	}
	t3 = GetTimestamp();
	printf( "%d ms \n", t3 - t2);

@+
Yannoo

Selon yann.lepetitcorps at free.fr:

> The pb don't seem very difficult to resolve because this is only the red and
> blue components that are systematically interchanged
>
> But the gain is really too small (and sometimes negative) :(
>
> => I begin to think now that only MMX/SSE instructions (or vectorized
> equivalents)  can handle this more speedly ...
>
> ==> I take a look into my olds MMX source codes for to see if I find this :)
>     (on other side, I'm sure that this can be finded in the net ...)
>
>
> @+
> Yannoo
>
> Selon yann.lepetitcorps at free.fr:
>
> > Thanks for your contribution
> >
> > I have tested it and found that the conversion isn't valid :(
> >
> > void rgb24to32_uint32(const uint8_t *src, uint8_t *dst, int src_size )
> > {
> >    int nPixels = src_size / 3;
> >    int pixels4 = nPixels >> 2;
> >    int extra = nPixels % 4;
> >    uint32_t * pDst =  (uint32_t*)dst;
> >    uint32_t * pSrc = (uint32_t*)src;
> >    uint8_t* pBytes;
> >    int i;
> >
> >    for ( i = 0; i < pixels4; ++i)    {
> > #if HAVE_BIGENDIAN
> >        pDst[0] = 0xFF000000 | (pSrc[0] >> 8);
> >        pDst[1] = 0xFF000000 | (pSrc[0] << 16) | (pSrc[1] >> 16);
> >        pDst[2] = 0xFF000000 | (pSrc[1] << 8) | (pSrc[2] >> 24);
> >        pDst[3] = 0xFF000000 | pSrc[2];
> > #else
> >        pDst[0] = 0xFF000000 | pSrc[0];
> >        pDst[1] = 0xFF000000 | (pSrc[1] << 8) | (pSrc[0] >> 24);
> >        pDst[2] = 0xFF000000 | (pSrc[2] << 16) | (pSrc[1] >> 16);
> >        pDst[3] = 0xFF000000 | (pSrc[2] >> 8);
> > #endif
> >        pDst +=4;
> >        pSrc +=3;
> >     }
> >
> >     pBytes = (uint8_t*)pSrc;
> >
> >   for ( i = 0; i < extra; i++)    {
> > #if HAVE_BIGENDIAN
> >        *pDst++ = 0xFF000000 | (pBytes[0] << 16) | (pBytes[1] << 8) |
> > (pBytes[2]);
> > #else
> >        *pDst++ = 0xFF000000 | (pBytes[2] << 16) | (pBytes[1] << 8) |
> > (pBytes[0]);
> > #endif
> >        pBytes += 3;
> >    }
> > }
> >
> >
> > Because it give this in my procedure test :
> >
> > Test original rgb24to32() func : 477 ms
> > Test new rgb24to32() func : 474 ms
> > R components of entry 0 aren't the sames (51 vs 223) :(
> > B components of entry 0 aren't the sames (223 vs 51) :(
> > R components of entry 1 aren't the sames (46 vs 50) :(
> > B components of entry 1 aren't the sames (50 vs 46) :(
> > R components of entry 2 aren't the sames (205 vs 188) :(
> > B components of entry 2 aren't the sames (188 vs 205) :(
> > R components of entry 3 aren't the sames (146 vs 87) :(
> > B components of entry 3 aren't the sames (87 vs 146) :(
> > R components of entry 4 aren't the sames (109 vs 35) :(
> > B components of entry 4 aren't the sames (35 vs 109) :(
> > R components of entry 5 aren't the sames (229 vs 92) :(
> > B components of entry 5 aren't the sames (92 vs 229) :(
> >
> > => we have a very little gain but the conversion is false :(
> >     (my procedure test automatically exit when it found more than 10
> errors)
> >     [but each loop work with 3x components tests, so this make 3x4 = 12
> > errors
> > before to automaticaly  exit]
> >
> > My procedure test is outside my FFMPEG git repertory, so I put the source
> > code
> > of this test procedure as an attachment
> >
> >
> > @+
> > Yannoo
> >
> >
> > Selon Don Moir <donmoir at comcast.net>:
> >
> > >
> > > ----- Original Message -----
> > > From: "Don Moir" <donmoir at comcast.net>
> > > To: "FFmpeg development discussions and patches"
> <ffmpeg-devel at ffmpeg.org>
> > > Sent: Monday, September 10, 2012 3:48 PM
> > > Subject: Re: [FFmpeg-devel] Extend/optimize RGB to RGB conversions
> > > funcsintorgb2rgb.c
> > >
> > >
> > > >> void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size )
> > > >> {
> > > >>    int i;
> > > >>    uint8_t *psrc = src;
> > > >>
> > > >>    for ( i = 0 ; i < src_size ; i += 3, psrc +=3, dst +=4 )
> > > >>    {
> > > >> #if HAVE_BIGENDIAN
> > > >>        /* RGB24 (= R,G,B) -> BGR32 (= 255,R,G,B) */
> > > >>        dst[0] = 255;
> > > >>        dst[1] = psrc[0];
> > > >>        dst[2] = psrc[1];
> > > >>        dst[3] = psrc[2];
> > > >> #else
> > > >>        dst[0] = psrc[2];
> > > >>        dst[1] = psrc[1];
> > > >>        dst[2] = psrc[0];
> > > >>        dst[3] = 255;
> > > >> #endif
> > > >>    }
> > > >> }
> > > >
> > > > You might try something like this that does 4 pixels within the loop.
> It
> > > > might be interesting to see if performance is better for this. I do it
> > asm
> > > > and don't do it line by line for my own purposes.
> > > >
> > > > Note: somewhat pseudo code. I do it differently so modified here.
> > > >
> > > > void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size )
> > > > {
> > > >    int nPixels = src_size / 3;
> > > >    int pixels4 = nPixels >> 2;
> > > >    int extra = nPixels % 4;
> > > >    uint32_t* pDst (uint32_t*)dst;
> > > >    uint32_t* pSrc (uint32_t*)src;
> > > >
> > > >    for (int i = 0; i < pixels4; ++i)
> > > >    {
> > > > #if HAVE_BIGENDIAN
> > > >        pDst[0] = 0xFF000000 | (pSrc[0] >> 8);
> > > >        pDst[1] = 0xFF000000 | (pSrc[0] << 16) | (pSrc[1] >> 16);
> > > >        pDst[2] = 0xFF000000 | (pSrc[1] << 8) | (pSrc[2] >> 24);
> > > >        pDst[3] = 0xFF000000 | pSrc[2];
> > > > #else
> > > >        pDst[0] = 0xFF000000 | pSrc[0];
> > > >        pDst[1] = 0xFF000000 | (pSrc[1] << 8) | (pSrc[0] >> 24);
> > > >        pDst[2] = 0xFF000000 | ((pSrc[2] << 16) | (pSrc[1] >> 16);
> > > >        pDst[3] = 0xFF000000 | (pSrc[2] >> 8);
> > > > #endif
> > > >        pDst +=4;
> > > >        pSrc +=3;
> > > >    }
> > > >
> > >
> > > Sorry mistake:
> > >
> > > -    uint8_t* pBytes = (uint8_t*)pDst;
> > > +   uint8_t* pBytes = (uint8_t*)pSrc;
> > >
> > > >    for (int i = 0; i < extra; i++)
> > > >    {
> > > > #if HAVE_BIGENDIAN
> > > >        *pDst++ = 0xFF000000 | (pBytes[0] << 16) | (pBytes[1] << 8) |
> > > > (pBytes[2]);
> > > > #else
> > > >        *pDst++ = 0xFF000000 | (pBytes[2] << 16) | (pBytes[1] << 8) |
> > > > (pBytes[0]);
> > > > #endif
> > > >        pBytes += 3;
> > > >    }
> > > > }
> > > > _______________________________________________
> > > > ffmpeg-devel mailing list
> > > > ffmpeg-devel at ffmpeg.org
> > > > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > >
> > > _______________________________________________
> > > ffmpeg-devel mailing list
> > > ffmpeg-devel at ffmpeg.org
> > > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > >
> >
> >
> >
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>