[FFmpeg-devel] Extend/optimize RGB to RGB conversions funcsintorgb2rgb.c
yann.lepetitcorps at free.fr
yann.lepetitcorps at free.fr
Tue Sep 11 00:12:27 CEST 2012
I have tested with 2 versions that I have found on the net but the result is
always the same ... less speed than the original :(
void fast_unpack(const uint8_t* rgb, uint8_t* rgba, const int count) {
int i, j;
if(count==0)
return;
for( i=count; --i; rgba+=4, rgb+=3) {
*(uint32_t*)(void*)rgba = *(const uint32_t*)(const void*)rgb;
}
for( j=0; j<3; ++j) {
rgba[j] = rgb[j];
}
}
void RGB8ToBGRX8(int w, const void *in, void *out)
{
int i;
int width = w;
const unsigned char *src= (const unsigned char*) in;
unsigned int *dst= (unsigned int*) out;
unsigned int invalue, outvalue;
for (i=0; i<width; i++, src+=3, dst++)
{
invalue = src[0];
outvalue = (invalue<<16);
invalue = src[1];
outvalue |= (invalue<<8);
invalue = src[2];
outvalue |= (invalue);
*dst = outvalue | 0xff000000;
}
}
The concerned part on my test procedure is here :
printf("Test new rgb24to32() func : ");
t2 = GetTimestamp();
for( i = 0 ; i < NB_TESTS ; i++)
{
// rgb24to32_alpha((uint8_t *)rgbTab, (uint8_t *)rgbaTab, NB_PIXELS * 3, 255);
// rgb24to32_uint32((uint8_t *)rgbTab, (uint8_t *)rgbaTab, NB_PIXELS * 3);
// fast_unpack((uint8_t *)rgbTab, (uint8_t *)rgbaTab, NB_PIXELS);
RGB8ToBGRX8(NB_PIXELS, rgbTab, rgbaTab);
}
t3 = GetTimestamp();
printf( "%d ms \n", t3 - t2);
@+
Yannoo
Selon yann.lepetitcorps at free.fr:
> The pb don't seem very difficult to resolve because this is only the red and
> blue components that are systematically interchanged
>
> But the gain is really too small (and sometimes negative) :(
>
> => I begin to think now that only MMX/SSE instructions (or vectorized
> equivalents) can handle this more speedly ...
>
> ==> I take a look into my olds MMX source codes for to see if I find this :)
> (on other side, I'm sure that this can be finded in the net ...)
>
>
> @+
> Yannoo
>
> Selon yann.lepetitcorps at free.fr:
>
> > Thanks for your contribution
> >
> > I have tested it and found that the conversion isn't valid :(
> >
> > void rgb24to32_uint32(const uint8_t *src, uint8_t *dst, int src_size )
> > {
> > int nPixels = src_size / 3;
> > int pixels4 = nPixels >> 2;
> > int extra = nPixels % 4;
> > uint32_t * pDst = (uint32_t*)dst;
> > uint32_t * pSrc = (uint32_t*)src;
> > uint8_t* pBytes;
> > int i;
> >
> > for ( i = 0; i < pixels4; ++i) {
> > #if HAVE_BIGENDIAN
> > pDst[0] = 0xFF000000 | (pSrc[0] >> 8);
> > pDst[1] = 0xFF000000 | (pSrc[0] << 16) | (pSrc[1] >> 16);
> > pDst[2] = 0xFF000000 | (pSrc[1] << 8) | (pSrc[2] >> 24);
> > pDst[3] = 0xFF000000 | pSrc[2];
> > #else
> > pDst[0] = 0xFF000000 | pSrc[0];
> > pDst[1] = 0xFF000000 | (pSrc[1] << 8) | (pSrc[0] >> 24);
> > pDst[2] = 0xFF000000 | (pSrc[2] << 16) | (pSrc[1] >> 16);
> > pDst[3] = 0xFF000000 | (pSrc[2] >> 8);
> > #endif
> > pDst +=4;
> > pSrc +=3;
> > }
> >
> > pBytes = (uint8_t*)pSrc;
> >
> > for ( i = 0; i < extra; i++) {
> > #if HAVE_BIGENDIAN
> > *pDst++ = 0xFF000000 | (pBytes[0] << 16) | (pBytes[1] << 8) |
> > (pBytes[2]);
> > #else
> > *pDst++ = 0xFF000000 | (pBytes[2] << 16) | (pBytes[1] << 8) |
> > (pBytes[0]);
> > #endif
> > pBytes += 3;
> > }
> > }
> >
> >
> > Because it give this in my procedure test :
> >
> > Test original rgb24to32() func : 477 ms
> > Test new rgb24to32() func : 474 ms
> > R components of entry 0 aren't the sames (51 vs 223) :(
> > B components of entry 0 aren't the sames (223 vs 51) :(
> > R components of entry 1 aren't the sames (46 vs 50) :(
> > B components of entry 1 aren't the sames (50 vs 46) :(
> > R components of entry 2 aren't the sames (205 vs 188) :(
> > B components of entry 2 aren't the sames (188 vs 205) :(
> > R components of entry 3 aren't the sames (146 vs 87) :(
> > B components of entry 3 aren't the sames (87 vs 146) :(
> > R components of entry 4 aren't the sames (109 vs 35) :(
> > B components of entry 4 aren't the sames (35 vs 109) :(
> > R components of entry 5 aren't the sames (229 vs 92) :(
> > B components of entry 5 aren't the sames (92 vs 229) :(
> >
> > => we have a very little gain but the conversion is false :(
> > (my procedure test automatically exit when it found more than 10
> errors)
> > [but each loop work with 3x components tests, so this make 3x4 = 12
> > errors
> > before to automaticaly exit]
> >
> > My procedure test is outside my FFMPEG git repertory, so I put the source
> > code
> > of this test procedure as an attachment
> >
> >
> > @+
> > Yannoo
> >
> >
> > Selon Don Moir <donmoir at comcast.net>:
> >
> > >
> > > ----- Original Message -----
> > > From: "Don Moir" <donmoir at comcast.net>
> > > To: "FFmpeg development discussions and patches"
> <ffmpeg-devel at ffmpeg.org>
> > > Sent: Monday, September 10, 2012 3:48 PM
> > > Subject: Re: [FFmpeg-devel] Extend/optimize RGB to RGB conversions
> > > funcsintorgb2rgb.c
> > >
> > >
> > > >> void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size )
> > > >> {
> > > >> int i;
> > > >> uint8_t *psrc = src;
> > > >>
> > > >> for ( i = 0 ; i < src_size ; i += 3, psrc +=3, dst +=4 )
> > > >> {
> > > >> #if HAVE_BIGENDIAN
> > > >> /* RGB24 (= R,G,B) -> BGR32 (= 255,R,G,B) */
> > > >> dst[0] = 255;
> > > >> dst[1] = psrc[0];
> > > >> dst[2] = psrc[1];
> > > >> dst[3] = psrc[2];
> > > >> #else
> > > >> dst[0] = psrc[2];
> > > >> dst[1] = psrc[1];
> > > >> dst[2] = psrc[0];
> > > >> dst[3] = 255;
> > > >> #endif
> > > >> }
> > > >> }
> > > >
> > > > You might try something like this that does 4 pixels within the loop.
> It
> > > > might be interesting to see if performance is better for this. I do it
> > asm
> > > > and don't do it line by line for my own purposes.
> > > >
> > > > Note: somewhat pseudo code. I do it differently so modified here.
> > > >
> > > > void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size )
> > > > {
> > > > int nPixels = src_size / 3;
> > > > int pixels4 = nPixels >> 2;
> > > > int extra = nPixels % 4;
> > > > uint32_t* pDst (uint32_t*)dst;
> > > > uint32_t* pSrc (uint32_t*)src;
> > > >
> > > > for (int i = 0; i < pixels4; ++i)
> > > > {
> > > > #if HAVE_BIGENDIAN
> > > > pDst[0] = 0xFF000000 | (pSrc[0] >> 8);
> > > > pDst[1] = 0xFF000000 | (pSrc[0] << 16) | (pSrc[1] >> 16);
> > > > pDst[2] = 0xFF000000 | (pSrc[1] << 8) | (pSrc[2] >> 24);
> > > > pDst[3] = 0xFF000000 | pSrc[2];
> > > > #else
> > > > pDst[0] = 0xFF000000 | pSrc[0];
> > > > pDst[1] = 0xFF000000 | (pSrc[1] << 8) | (pSrc[0] >> 24);
> > > > pDst[2] = 0xFF000000 | ((pSrc[2] << 16) | (pSrc[1] >> 16);
> > > > pDst[3] = 0xFF000000 | (pSrc[2] >> 8);
> > > > #endif
> > > > pDst +=4;
> > > > pSrc +=3;
> > > > }
> > > >
> > >
> > > Sorry mistake:
> > >
> > > - uint8_t* pBytes = (uint8_t*)pDst;
> > > + uint8_t* pBytes = (uint8_t*)pSrc;
> > >
> > > > for (int i = 0; i < extra; i++)
> > > > {
> > > > #if HAVE_BIGENDIAN
> > > > *pDst++ = 0xFF000000 | (pBytes[0] << 16) | (pBytes[1] << 8) |
> > > > (pBytes[2]);
> > > > #else
> > > > *pDst++ = 0xFF000000 | (pBytes[2] << 16) | (pBytes[1] << 8) |
> > > > (pBytes[0]);
> > > > #endif
> > > > pBytes += 3;
> > > > }
> > > > }
> > > > _______________________________________________
> > > > ffmpeg-devel mailing list
> > > > ffmpeg-devel at ffmpeg.org
> > > > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > >
> > > _______________________________________________
> > > ffmpeg-devel mailing list
> > > ffmpeg-devel at ffmpeg.org
> > > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > >
> >
> >
> >
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
More information about the ffmpeg-devel
mailing list