[FFmpeg-devel] [PATCH] Yet another stab at RGB48 support
Kostya
kostya.shishkov
Fri May 15 07:56:28 CEST 2009
On Wed, May 13, 2009 at 01:59:19AM +0200, Michael Niedermayer wrote:
> On Sun, May 10, 2009 at 03:21:23PM +0300, Kostya wrote:
> > On Sat, May 09, 2009 at 07:31:20PM +0200, Diego Biurrun wrote:
> > > On Sat, May 09, 2009 at 02:28:18PM +0300, Kostya wrote:
> > > > $subj
> > >
> > > Changelog update is missing.
> > >
> > > $nits below
> > >
> > [...]
> > reformatted, splitted some long lines.
> > Since this is diff against libswscale, no Changelog entry (but I
> > remember about it).
>
> [...]
> > +static int rgb48togray16_template(SwsContext *c, uint8_t *src[],
> > + int srcStride[], int srcSliceY,
> > + int srcSliceH, uint8_t *dst[],
> > + int dstStride[], const int srcLE,
> > + const int dstLE) {
> > + int length = c->srcW,
> > + y = srcSliceY,
> > + height = srcSliceH,
> > + stride_s2 = srcStride[0] / 2,
> > + stride_d2 = dstStride[0] / 2,
> > + i;
> > + uint16_t *s = (uint16_t *) src[0], *d = (uint16_t *) dst[0] + stride_d2 * y;
> > +
> > + for (i = 0; i < height; i++) {
> > + if (!srcLE && !dstLE) rgb48togray16row_template(s, d, length, 0, 0);
> > + else if (!srcLE && dstLE) rgb48togray16row_template(s, d, length, 0, 1);
> > + else if ( srcLE && !dstLE) rgb48togray16row_template(s, d, length, 1, 0);
> > + else rgb48togray16row_template(s, d, length, 1, 1);
> > + s += stride_s2;
> > + d += stride_d2;
> > + }
> > + return height;
> > +}
> > +
>
> > +#define FUNC_RGB48TOGRAY16(name, sLE, dLE) \
> > + static int name(SwsContext *c, uint8_t *s[], int sS[], int sSY, int sSH, \
> > + uint8_t *d[], int dS[]) { \
> > + return rgb48togray16_template(c, s, sS, sSY, sSH, d, dS, sLE, dLE); \
> > + }
> > +
> > +FUNC_RGB48TOGRAY16( rgb48letogray16le, 1, 1)
> > +FUNC_RGB48TOGRAY16( rgb48betogray16le, 0, 1)
> > +FUNC_RGB48TOGRAY16( rgb48letogray16be, 1, 0)
> > +FUNC_RGB48TOGRAY16( rgb48betogray16be, 0, 0)
>
> bloat, a single function does the trick too in this case
huh? What do you mean "single function"?
That's the only place where low bits of 16-bit variable are used.
> [...]
> > +static inline void rgb1516to48(const uint8_t *src, uint8_t *d, long src_size,
> > + const int be, const int g6, const int bgr)
> > +{
>
> > + const uint16_t *s = (const uint16_t*) src, *end = s + src_size/2;
>
> unreadable
split
> [...]
> > +static void rgb48to48(const uint8_t *src, uint8_t *dst, long src_size)
> > +{
> > + long i = 23 - src_size;
> > + uint8_t *d = dst - i;
> > + const uint8_t *s = src - i;
> > +#if HAVE_MMX
> > + __asm__ volatile(
> > + "test %0, %0 \n\t"
> > + "jns 2f \n\t"
> > + PREFETCH" (%1, %0) \n\t"
> > + ASMALIGN(4)
> > + "1: \n\t"
> > + PREFETCH" 32(%1, %0) \n\t"
> > + "movq (%1, %0), %%mm0 \n\t"
> > + "movq 8(%1, %0), %%mm1 \n\t"
> > + "movq 16(%1, %0), %%mm2 \n\t"
> > + "movq %%mm0, %%mm3 \n\t"
> > + "movq %%mm1, %%mm4 \n\t"
> > + "movq %%mm2, %%mm5 \n\t"
> > + "psllw $8, %%mm0 \n\t"
> > + "psllw $8, %%mm1 \n\t"
> > + "psllw $8, %%mm2 \n\t"
> > + "psrlw $8, %%mm3 \n\t"
> > + "psrlw $8, %%mm4 \n\t"
> > + "psrlw $8, %%mm5 \n\t"
> > + "por %%mm3, %%mm0 \n\t"
> > + "por %%mm4, %%mm1 \n\t"
> > + "por %%mm5, %%mm2 \n\t"
> > + "movq %%mm0, (%2, %0) \n\t"
> > + "movq %%mm1, 8(%2, %0) \n\t"
> > + "movq %%mm2, 16(%2, %0) \n\t"
> > + "add $24, %0 \n\t"
> > + "js 1b \n\t"
> > + "2: \n\t"
> > + SFENCE" \n\t"
> > + EMMS" \n\t"
> > + : "+&r"(i)
> > + : "r" (s), "r" (d)
> > + : "memory");
> > +#endif
> > + for (; i < 21; i += 4) {
> > + unsigned int x = *(uint32_t*)&s[i];
> > + *(uint32_t*)&d[i] = ((x>>8) & 0x00FF00FF) + ((x<<8) & 0xFF00FF00);
> > + }
> > + if (i < 23) {
> > + d[i] = s[i+1];
> > + d[i+1] = s[i];
> > + }
>
> is this some duplicate of a gray le <-> be function?
Haven't found that byteswapping function but should be, yes.
> > +}
> > +
> > +static void rgb48leto24(const uint8_t *s, uint8_t *dst, long src_size)
> > +{
> > + uint8_t *d = dst;
> > + const uint8_t *end = s + src_size;
> > +#if HAVE_MMX
> > + __asm__ volatile(
> > + PREFETCH" (%1) \n\t"
> > + "jmp 2f \n\t"
> > + ASMALIGN(4)
> > + "1: \n\t"
> > + PREFETCH" 32(%1) \n\t"
> > + "movq (%1), %%mm0 \n\t"
> > + "movq 8(%1), %%mm1 \n\t"
> > + "movq 16(%1), %%mm2 \n\t"
> > + "movq 24(%1), %%mm3 \n\t"
> > + "movq 32(%1), %%mm4 \n\t"
> > + "movq 40(%1), %%mm5 \n\t"
> > + "psrlw $8, %%mm0 \n\t"
> > + "psrlw $8, %%mm1 \n\t"
> > + "psrlw $8, %%mm2 \n\t"
> > + "psrlw $8, %%mm3 \n\t"
> > + "psrlw $8, %%mm4 \n\t"
> > + "psrlw $8, %%mm5 \n\t"
> > + "packuswb %%mm1, %%mm0 \n\t"
> > + "packuswb %%mm3, %%mm2 \n\t"
> > + "packuswb %%mm5, %%mm4 \n\t"
> > + "movq %%mm0, (%0) \n\t"
> > + "movq %%mm2, 8(%0) \n\t"
> > + "movq %%mm4, 16(%0) \n\t"
> > + "add $24, %0 \n\t"
> > + "add $48, %1 \n\t"
> > + "2: \n\t"
> > + "cmp %1, %2 \n\t"
> > + "ja 1b \n\t"
> > + SFENCE" \n\t"
> > + EMMS" \n\t"
> > + : "+&r"(d), "+&r"(s)
> > + : "r" (end-47)
> > + : "memory");
> > +#endif
> > + for (; s < end; s += 2, d++)
> > + *d = *(s+1);
> > +}
>
> same?
No. That was byte-swapping, this is byte-shaving.
> > +
> > +static void rgb48beto24(const uint8_t *s, uint8_t *dst, long src_size)
> > +{
> > + uint8_t *d = dst;
> > + const uint8_t *end = s + src_size;
> > +#if HAVE_MMX
> > + __asm__ volatile(
> > + PREFETCH" (%1) \n\t"
> > + "pcmpeqw %%mm7, %%mm7 \n\t"
> > + "psrlw $8, %%mm7 \n\t"
> > + "movq %%mm7, %%mm6 \n\t"
> > + "jmp 2f \n\t"
> > + ASMALIGN(4)
> > + "1: \n\t"
> > + PREFETCH" 32(%1) \n\t"
> > + "movq (%1), %%mm0 \n\t"
> > + "movq 8(%1), %%mm1 \n\t"
> > + "movq 16(%1), %%mm2 \n\t"
> > + "movq 24(%1), %%mm3 \n\t"
> > + "movq 32(%1), %%mm4 \n\t"
> > + "movq 40(%1), %%mm5 \n\t"
> > + "pand %%mm6, %%mm0 \n\t"
> > + "pand %%mm7, %%mm1 \n\t"
> > + "pand %%mm6, %%mm2 \n\t"
> > + "pand %%mm7, %%mm3 \n\t"
> > + "pand %%mm6, %%mm4 \n\t"
> > + "pand %%mm7, %%mm5 \n\t"
> > + "packuswb %%mm1, %%mm0 \n\t"
> > + "packuswb %%mm3, %%mm2 \n\t"
> > + "packuswb %%mm5, %%mm4 \n\t"
> > + "movq %%mm0, (%0) \n\t"
> > + "movq %%mm2, 8(%0) \n\t"
> > + "movq %%mm4, 16(%0) \n\t"
> > + "add $24, %0 \n\t"
> > + "add $48, %1 \n\t"
> > + "2: \n\t"
> > + "cmp %1, %2 \n\t"
> > + "ja 1b \n\t"
> > + SFENCE" \n\t"
> > + EMMS" \n\t"
> > + : "+&r"(d), "+&r"(s)
> > + : "r" (end-47)
> > + : "memory");
> > +#endif
> > + for (; s < end; s += 2, d++)
> > + *d = *s;
> > +}
> > +
>
> so seems this ...
same as the previous one
> > +static void rgb24to48(const uint8_t *s, uint8_t *dst, long src_size)
> > +{
> > + uint8_t *d = dst;
> > + const uint8_t *end = s + src_size;
> > +#if HAVE_MMX
> > + __asm__ volatile(
> > + PREFETCH" (%1) \n\t"
> > + "jmp 2f \n\t"
> > + ASMALIGN(4)
> > + "1: \n\t"
> > + PREFETCH" 32(%1) \n\t"
> > + "movd (%1), %%mm0 \n\t"
> > + "movd 4(%1), %%mm1 \n\t"
> > + "movd 8(%1), %%mm2 \n\t"
> > + "movd 12(%1), %%mm3 \n\t"
> > + "movd 16(%1), %%mm4 \n\t"
> > + "movd 20(%1), %%mm5 \n\t"
> > + "punpcklbw %%mm0, %%mm0 \n\t"
> > + "punpcklbw %%mm1, %%mm1 \n\t"
> > + "punpcklbw %%mm2, %%mm2 \n\t"
> > + "punpcklbw %%mm3, %%mm3 \n\t"
> > + "punpcklbw %%mm4, %%mm4 \n\t"
> > + "punpcklbw %%mm5, %%mm5 \n\t"
> > + MOVNTQ" %%mm0, (%0) \n\t"
> > + MOVNTQ" %%mm1, 8(%0) \n\t"
> > + MOVNTQ" %%mm2, 16(%0) \n\t"
> > + MOVNTQ" %%mm3, 24(%0) \n\t"
> > + MOVNTQ" %%mm4, 32(%0) \n\t"
> > + MOVNTQ" %%mm5, 40(%0) \n\t"
> > + "add $48, %0 \n\t"
> > + "add $24, %1 \n\t"
> > + "2: \n\t"
> > + "cmp %1, %2 \n\t"
> > + "ja 1b \n\t"
> > + SFENCE" \n\t"
> > + EMMS" \n\t"
> > + : "+&r"(d), "+&r"(s)
> > + : "r" (end-23)
> > + : "memory");
> > +#endif
> > + for (; s < end; s++, d += 2) {
> > + int x = *s;
> > + *(uint16_t*)d = (x << 8) + x;
> > + }
> > +}
> > +
>
> and this
>
>
> > +SwsRGBFunc sws_hires_getRGBFunc(SwsContext *c)
> > +{
> > + const enum PixelFormat srcFormat = c->srcFormat;
> > + const enum PixelFormat dstFormat = c->dstFormat;
> > + const int srcCbe = srcFormat==PIX_FMT_RGB48BE; /* components big-endian */
> > + const int dstCbe = dstFormat==PIX_FMT_RGB48BE;
> > + const int srcBpp = (fmt_depth(srcFormat) + 7) >> 3;
> > + const int dstBpp = (fmt_depth(dstFormat) + 7) >> 3;
> > + const int srcId = (fmt_depth(srcFormat) >> 2) - (srcBpp > 4); /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8, 48:11 */
> > + const int dstId = (fmt_depth(dstFormat) >> 2) - (dstBpp > 4);
> > + SwsRGBFunc conv = NULL;
> > +
> > + if ( (isBGR(srcFormat) && isBGR(dstFormat))
> > + || (isRGB(srcFormat) && isRGB(dstFormat))) {
> > + switch(srcId | (dstId<<4) | (srcCbe<<8) | (dstCbe<<12)){
> > + case 0x013B: conv = rgb48beto15; break;
> > + case 0x003B: conv = rgb48leto15; break;
> > + case 0x014B: conv = rgb48beto16; break;
> > + case 0x004B: conv = rgb48leto16; break;
> > + case 0x016B: conv = rgb48beto24; break;
> > + case 0x006B: conv = rgb48leto24; break;
> > + case 0x018B: conv = rgb48beto32; break;
> > + case 0x008B: conv = rgb48leto32; break;
> > + case 0x10B3: conv = rgb15to48be; break;
> > + case 0x00B3: conv = rgb15to48le; break;
> > + case 0x10B4: conv = rgb16to48be; break;
> > + case 0x00B4: conv = rgb16to48le; break;
> > + case 0x10B6:
> > + case 0x00B6: conv = rgb24to48; break;
> > + case 0x10B8:
> > + case 0x00B8: conv = rgb32to48; break;
> > + case 0x10BB:
> > + case 0x01BB: conv = rgb48to48; break;
> > + default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
> > + sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
> > + }
> > + } else if ( (isBGR(srcFormat) && isRGB(dstFormat))
> > + || (isRGB(srcFormat) && isBGR(dstFormat))) {
> > + switch(srcId | (dstId<<4) | (srcCbe<<8) | (dstCbe<<12)){
> > + case 0x013B: conv = rgb48betobgr15; break;
> > + case 0x003B: conv = rgb48letobgr15; break;
> > + case 0x014B: conv = rgb48betobgr16; break;
> > + case 0x004B: conv = rgb48letobgr16; break;
> > + case 0x016B: conv = rgb48betobgr24; break;
> > + case 0x006B: conv = rgb48letobgr24; break;
> > + case 0x018B: conv = rgb48betobgr32; break;
> > + case 0x008B: conv = rgb48letobgr32; break;
> > + case 0x10B3: conv = bgr15torgb48be; break;
> > + case 0x00B3: conv = bgr15torgb48le; break;
> > + case 0x10B4: conv = bgr16torgb48be; break;
> > + case 0x00B4: conv = bgr16torgb48le; break;
> > + case 0x10B6:
> > + case 0x00B6: conv = bgr24torgb48; break;
> > + case 0x10B8:
> > + case 0x00B8: conv = bgr32torgb48; break;
> > + default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
> > + sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
> > + }
> > + } else {
> > + av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
> > + sws_format_name(srcFormat), sws_format_name(dstFormat));
> > + }
> > +
> > + return conv;
> > +}
>
> code duplication
hmm?
> [...]
> > Index: hires.h
> > ===================================================================
> > --- hires.h (revision 0)
> > +++ hires.h (revision 0)
> > @@ -0,0 +1,32 @@
> > +/*
> > + * colorspace conversion routines for depths >8 bits
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > + */
> > +
> > +#ifndef SWSCALE_HIRES_H
> > +#define SWSCALE_HIRES_H
> > +
> > +#include "swscale.h"
> > +#include "swscale_internal.h"
> > +
> > +typedef void (*SwsRGBFunc)(const uint8_t *src, uint8_t *dst, long src_size);
> > +
> > +SwsRGBFunc sws_hires_getRGBFunc(SwsContext *c);
> > +SwsFunc sws_hires_gray16rgbConv(SwsContext *c);
> > +
> > +#endif /* SWSCALE_HIRES_H */
>
> I think we dont need a new header for this
>
>
> > Index: swscale_template.c
> > ===================================================================
> > --- swscale_template.c (revision 29274)
> > +++ swscale_template.c (working copy)
> > @@ -2130,6 +2130,90 @@
> > }
> > }
> >
> > +static inline void RENAME(rgb48beToY)(uint8_t *dst, const uint8_t *src, int width)
> > +{
> > + int i;
> > + for (i = 0; i < width; i++) {
> > + int r = src[i*6+0];
> > + int g = src[i*6+2];
> > + int b = src[i*6+4];
> > +
> > + dst[i] = ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT);
> > + }
> > +}
> > +
>
> > +static inline void RENAME(rgb48beToUV)(uint8_t *dstU, uint8_t *dstV,
> > + uint8_t *src1, uint8_t *src2, int width)
> > +{
> > + int i;
> > + assert(src1==src2);
> > + for (i = 0; i < width; i++) {
> > + int r = src1[12*i + 0];
> > + int g = src1[12*i + 2];
> > + int b = src1[12*i + 4];
> > +
> > + dstU[i] = ((RU*r + GU*g + BU*b) >> RGB2YUV_SHIFT) + 128;
> > + dstV[i] = ((RV*r + GV*g + BV*b) >> RGB2YUV_SHIFT) + 128;
> > + }
> > +}
> [...]
> > +
> > +static inline void RENAME(rgb48leToUV)(uint8_t *dstU, uint8_t *dstV,
> > + uint8_t *src1, uint8_t *src2, int width)
> > +{
> > + int i;
> > + assert(src1==src2);
> > + for (i = 0; i < width; i++) {
> > + int r = src1[12*i + 1];
> > + int g = src1[12*i + 3];
> > + int b = src1[12*i + 5];
> > +
> > + dstU[i] = ((RU*r + GU*g + BU*b) >> RGB2YUV_SHIFT) + 128;
> > + dstV[i] = ((RV*r + GV*g + BV*b) >> RGB2YUV_SHIFT) + 128;
> > + }
> > +}
>
> we dont need a seperate function to add +1 to one of its arguments
got rid of that
> [...]
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 48bpp.patch
Type: text/x-diff
Size: 19156 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090515/d3e3a825/attachment.patch>
More information about the ffmpeg-devel
mailing list