[FFmpeg-devel] [PATCH] Yet another stab at RGB48 support

Wed May 13 01:59:19 CEST 2009

On Sun, May 10, 2009 at 03:21:23PM +0300, Kostya wrote:
> On Sat, May 09, 2009 at 07:31:20PM +0200, Diego Biurrun wrote:
> > On Sat, May 09, 2009 at 02:28:18PM +0300, Kostya wrote:
> > > $subj
> > 
> > Changelog update is missing.
> > 
> > $nits below
> > 
> [...]
> reformatted, splitted some long lines.
> Since this is diff against libswscale, no Changelog entry (but I
> remember about it).

[...]
> +static int rgb48togray16_template(SwsContext *c, uint8_t *src[],
> +                                  int srcStride[], int srcSliceY,
> +                                  int srcSliceH, uint8_t *dst[],
> +                                  int dstStride[], const int srcLE,
> +                                  const int dstLE) {
> +    int length      = c->srcW,
> +        y           = srcSliceY,
> +        height      = srcSliceH,
> +        stride_s2   = srcStride[0] / 2,
> +        stride_d2   = dstStride[0] / 2,
> +        i;
> +    uint16_t *s = (uint16_t *) src[0], *d = (uint16_t *) dst[0] + stride_d2 * y;
> +
> +    for (i = 0; i < height; i++) {
> +             if (!srcLE && !dstLE) rgb48togray16row_template(s, d, length, 0, 0);
> +        else if (!srcLE &&  dstLE) rgb48togray16row_template(s, d, length, 0, 1);
> +        else if ( srcLE && !dstLE) rgb48togray16row_template(s, d, length, 1, 0);
> +        else                       rgb48togray16row_template(s, d, length, 1, 1);
> +        s += stride_s2;
> +        d += stride_d2;
> +    }
> +    return height;
> +}
> +

> +#define FUNC_RGB48TOGRAY16(name, sLE, dLE) \
> +    static int name(SwsContext *c, uint8_t *s[], int sS[], int sSY, int sSH, \
> +                    uint8_t *d[], int dS[]) { \
> +        return rgb48togray16_template(c, s, sS, sSY, sSH, d, dS, sLE, dLE); \
> +    }
> +
> +FUNC_RGB48TOGRAY16( rgb48letogray16le, 1, 1)
> +FUNC_RGB48TOGRAY16( rgb48betogray16le, 0, 1)
> +FUNC_RGB48TOGRAY16( rgb48letogray16be, 1, 0)
> +FUNC_RGB48TOGRAY16( rgb48betogray16be, 0, 0)

bloat, a single function does the trick too in this case

[...]
> +static inline void rgb1516to48(const uint8_t *src, uint8_t *d, long src_size,
> +                               const int be, const int g6, const int bgr)
> +{

> +    const uint16_t *s = (const uint16_t*) src, *end = s + src_size/2;

unreadable

[...]
> +static void rgb48to48(const uint8_t *src, uint8_t *dst, long src_size)
> +{
> +    long i = 23 - src_size;
> +    uint8_t *d = dst - i;
> +    const uint8_t *s = src - i;
> +#if HAVE_MMX
> +    __asm__ volatile(
> +    "test           %0,   %0            \n\t"
> +    "jns            2f                  \n\t"
> +    PREFETCH"     (%1,    %0)           \n\t"
> +    ASMALIGN(4)
> +    "1:                                 \n\t"
> +    PREFETCH"    32(%1,   %0)           \n\t"
> +    "movq          (%1,   %0), %%mm0    \n\t"
> +    "movq         8(%1,   %0), %%mm1    \n\t"
> +    "movq        16(%1,   %0), %%mm2    \n\t"
> +    "movq        %%mm0, %%mm3           \n\t"
> +    "movq        %%mm1, %%mm4           \n\t"
> +    "movq        %%mm2, %%mm5           \n\t"
> +    "psllw          $8, %%mm0           \n\t"
> +    "psllw          $8, %%mm1           \n\t"
> +    "psllw          $8, %%mm2           \n\t"
> +    "psrlw          $8, %%mm3           \n\t"
> +    "psrlw          $8, %%mm4           \n\t"
> +    "psrlw          $8, %%mm5           \n\t"
> +    "por         %%mm3, %%mm0           \n\t"
> +    "por         %%mm4, %%mm1           \n\t"
> +    "por         %%mm5, %%mm2           \n\t"
> +    "movq        %%mm0,   (%2,  %0)     \n\t"
> +    "movq        %%mm1,  8(%2,  %0)     \n\t"
> +    "movq        %%mm2, 16(%2,  %0)     \n\t"
> +    "add           $24,    %0           \n\t"
> +    "js 1b                              \n\t"
> +    "2:                                 \n\t"
> +    SFENCE"                             \n\t"
> +    EMMS"                               \n\t"
> +    : "+&r"(i)
> +    : "r" (s), "r" (d)
> +    : "memory");
> +#endif
> +    for (; i < 21; i += 4) {
> +        unsigned int x = *(uint32_t*)&s[i];
> +        *(uint32_t*)&d[i] = ((x>>8) & 0x00FF00FF) + ((x<<8) & 0xFF00FF00);
> +    }
> +    if (i < 23) {
> +        d[i]   = s[i+1];
> +        d[i+1] = s[i];
> +    }

is this some duplicate of a gray le <-> be function?

> +}
> +
> +static void rgb48leto24(const uint8_t *s, uint8_t *dst, long src_size)
> +{
> +    uint8_t *d = dst;
> +    const uint8_t *end = s + src_size;
> +#if HAVE_MMX
> +    __asm__ volatile(
> +    PREFETCH"     (%1)          \n\t"
> +    "jmp 2f                     \n\t"
> +    ASMALIGN(4)
> +    "1:                         \n\t"
> +    PREFETCH"   32(%1)          \n\t"
> +    "movq         (%1), %%mm0   \n\t"
> +    "movq        8(%1), %%mm1   \n\t"
> +    "movq       16(%1), %%mm2   \n\t"
> +    "movq       24(%1), %%mm3   \n\t"
> +    "movq       32(%1), %%mm4   \n\t"
> +    "movq       40(%1), %%mm5   \n\t"
> +    "psrlw          $8, %%mm0   \n\t"
> +    "psrlw          $8, %%mm1   \n\t"
> +    "psrlw          $8, %%mm2   \n\t"
> +    "psrlw          $8, %%mm3   \n\t"
> +    "psrlw          $8, %%mm4   \n\t"
> +    "psrlw          $8, %%mm5   \n\t"
> +    "packuswb    %%mm1, %%mm0   \n\t"
> +    "packuswb    %%mm3, %%mm2   \n\t"
> +    "packuswb    %%mm5, %%mm4   \n\t"
> +    "movq        %%mm0,   (%0)  \n\t"
> +    "movq        %%mm2,  8(%0)  \n\t"
> +    "movq        %%mm4, 16(%0)  \n\t"
> +    "add           $24,    %0   \n\t"
> +    "add           $48,    %1   \n\t"
> +    "2:                         \n\t"
> +    "cmp            %1,    %2   \n\t"
> +    "ja 1b                      \n\t"
> +    SFENCE"                     \n\t"
> +    EMMS"                       \n\t"
> +    : "+&r"(d), "+&r"(s)
> +    : "r" (end-47)
> +    : "memory");
> +#endif
> +    for (; s < end; s += 2, d++)
> +        *d = *(s+1);
> +}

same?

> +
> +static void rgb48beto24(const uint8_t *s, uint8_t *dst, long src_size)
> +{
> +    uint8_t *d = dst;
> +    const uint8_t *end = s + src_size;
> +#if HAVE_MMX
> +    __asm__ volatile(
> +    PREFETCH"     (%1)          \n\t"
> +    "pcmpeqw     %%mm7, %%mm7   \n\t"
> +    "psrlw          $8, %%mm7   \n\t"
> +    "movq        %%mm7, %%mm6   \n\t"
> +    "jmp 2f                     \n\t"
> +    ASMALIGN(4)
> +    "1:                         \n\t"
> +    PREFETCH"   32(%1)          \n\t"
> +    "movq         (%1), %%mm0   \n\t"
> +    "movq        8(%1), %%mm1   \n\t"
> +    "movq       16(%1), %%mm2   \n\t"
> +    "movq       24(%1), %%mm3   \n\t"
> +    "movq       32(%1), %%mm4   \n\t"
> +    "movq       40(%1), %%mm5   \n\t"
> +    "pand        %%mm6, %%mm0   \n\t"
> +    "pand        %%mm7, %%mm1   \n\t"
> +    "pand        %%mm6, %%mm2   \n\t"
> +    "pand        %%mm7, %%mm3   \n\t"
> +    "pand        %%mm6, %%mm4   \n\t"
> +    "pand        %%mm7, %%mm5   \n\t"
> +    "packuswb    %%mm1, %%mm0   \n\t"
> +    "packuswb    %%mm3, %%mm2   \n\t"
> +    "packuswb    %%mm5, %%mm4   \n\t"
> +    "movq        %%mm0,   (%0)  \n\t"
> +    "movq        %%mm2,  8(%0)  \n\t"
> +    "movq        %%mm4, 16(%0)  \n\t"
> +    "add           $24,    %0   \n\t"
> +    "add           $48,    %1   \n\t"
> +    "2:                         \n\t"
> +    "cmp            %1,    %2   \n\t"
> +    "ja 1b                      \n\t"
> +    SFENCE"                     \n\t"
> +    EMMS"                       \n\t"
> +    : "+&r"(d), "+&r"(s)
> +    : "r" (end-47)
> +    : "memory");
> +#endif
> +    for (; s < end; s += 2, d++)
> +        *d = *s;
> +}
> +

so seems this ...

> +static void rgb24to48(const uint8_t *s, uint8_t *dst, long src_size)
> +{
> +    uint8_t *d = dst;
> +    const uint8_t *end = s + src_size;
> +#if HAVE_MMX
> +    __asm__ volatile(
> +    PREFETCH"     (%1)          \n\t"
> +    "jmp 2f                     \n\t"
> +    ASMALIGN(4)
> +    "1:                         \n\t"
> +    PREFETCH"   32(%1)          \n\t"
> +    "movd         (%1), %%mm0   \n\t"
> +    "movd        4(%1), %%mm1   \n\t"
> +    "movd        8(%1), %%mm2   \n\t"
> +    "movd       12(%1), %%mm3   \n\t"
> +    "movd       16(%1), %%mm4   \n\t"
> +    "movd       20(%1), %%mm5   \n\t"
> +    "punpcklbw   %%mm0, %%mm0   \n\t"
> +    "punpcklbw   %%mm1, %%mm1   \n\t"
> +    "punpcklbw   %%mm2, %%mm2   \n\t"
> +    "punpcklbw   %%mm3, %%mm3   \n\t"
> +    "punpcklbw   %%mm4, %%mm4   \n\t"
> +    "punpcklbw   %%mm5, %%mm5   \n\t"
> +    MOVNTQ"      %%mm0,   (%0)  \n\t"
> +    MOVNTQ"      %%mm1,  8(%0)  \n\t"
> +    MOVNTQ"      %%mm2, 16(%0)  \n\t"
> +    MOVNTQ"      %%mm3, 24(%0)  \n\t"
> +    MOVNTQ"      %%mm4, 32(%0)  \n\t"
> +    MOVNTQ"      %%mm5, 40(%0)  \n\t"
> +    "add           $48,    %0   \n\t"
> +    "add           $24,    %1   \n\t"
> +    "2:                         \n\t"
> +    "cmp            %1,    %2   \n\t"
> +    "ja 1b                      \n\t"
> +    SFENCE"                     \n\t"
> +    EMMS"                       \n\t"
> +   : "+&r"(d), "+&r"(s)
> +   : "r" (end-23)
> +   : "memory");
> +#endif
> +    for (; s < end; s++, d += 2) {
> +        int x = *s;
> +        *(uint16_t*)d = (x << 8) + x;
> +    }
> +}
> +

and this

> +SwsRGBFunc sws_hires_getRGBFunc(SwsContext *c)
> +{
> +    const enum PixelFormat srcFormat = c->srcFormat;
> +    const enum PixelFormat dstFormat = c->dstFormat;
> +    const int srcCbe = srcFormat==PIX_FMT_RGB48BE; /* components big-endian */
> +    const int dstCbe = dstFormat==PIX_FMT_RGB48BE;
> +    const int srcBpp = (fmt_depth(srcFormat) + 7) >> 3;
> +    const int dstBpp = (fmt_depth(dstFormat) + 7) >> 3;
> +    const int srcId = (fmt_depth(srcFormat) >> 2) - (srcBpp > 4); /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8, 48:11 */
> +    const int dstId = (fmt_depth(dstFormat) >> 2) - (dstBpp > 4);
> +    SwsRGBFunc conv = NULL;
> +
> +    if (  (isBGR(srcFormat) && isBGR(dstFormat))
> +       || (isRGB(srcFormat) && isRGB(dstFormat))) {
> +        switch(srcId | (dstId<<4) | (srcCbe<<8) | (dstCbe<<12)){
> +        case 0x013B: conv = rgb48beto15; break;
> +        case 0x003B: conv = rgb48leto15; break;
> +        case 0x014B: conv = rgb48beto16; break;
> +        case 0x004B: conv = rgb48leto16; break;
> +        case 0x016B: conv = rgb48beto24; break;
> +        case 0x006B: conv = rgb48leto24; break;
> +        case 0x018B: conv = rgb48beto32; break;
> +        case 0x008B: conv = rgb48leto32; break;
> +        case 0x10B3: conv = rgb15to48be; break;
> +        case 0x00B3: conv = rgb15to48le; break;
> +        case 0x10B4: conv = rgb16to48be; break;
> +        case 0x00B4: conv = rgb16to48le; break;
> +        case 0x10B6:
> +        case 0x00B6: conv = rgb24to48; break;
> +        case 0x10B8:
> +        case 0x00B8: conv = rgb32to48; break;
> +        case 0x10BB:
> +        case 0x01BB: conv = rgb48to48; break;
> +        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
> +                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
> +        }
> +    } else if (  (isBGR(srcFormat) && isRGB(dstFormat))
> +             || (isRGB(srcFormat) && isBGR(dstFormat))) {
> +        switch(srcId | (dstId<<4) | (srcCbe<<8) | (dstCbe<<12)){
> +        case 0x013B: conv = rgb48betobgr15; break;
> +        case 0x003B: conv = rgb48letobgr15; break;
> +        case 0x014B: conv = rgb48betobgr16; break;
> +        case 0x004B: conv = rgb48letobgr16; break;
> +        case 0x016B: conv = rgb48betobgr24; break;
> +        case 0x006B: conv = rgb48letobgr24; break;
> +        case 0x018B: conv = rgb48betobgr32; break;
> +        case 0x008B: conv = rgb48letobgr32; break;
> +        case 0x10B3: conv = bgr15torgb48be; break;
> +        case 0x00B3: conv = bgr15torgb48le; break;
> +        case 0x10B4: conv = bgr16torgb48be; break;
> +        case 0x00B4: conv = bgr16torgb48le; break;
> +        case 0x10B6:
> +        case 0x00B6: conv = bgr24torgb48; break;
> +        case 0x10B8:
> +        case 0x00B8: conv = bgr32torgb48; break;
> +        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
> +                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
> +        }
> +    } else {
> +        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
> +               sws_format_name(srcFormat), sws_format_name(dstFormat));
> +    }
> +
> +    return conv;
> +}

code duplication

[...]
> Index: hires.h
> ===================================================================
> --- hires.h	(revision 0)
> +++ hires.h	(revision 0)
> @@ -0,0 +1,32 @@
> +/*
> + * colorspace conversion routines for depths >8 bits
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef SWSCALE_HIRES_H
> +#define SWSCALE_HIRES_H
> +
> +#include "swscale.h"
> +#include "swscale_internal.h"
> +
> +typedef void (*SwsRGBFunc)(const uint8_t *src, uint8_t *dst, long src_size);
> +
> +SwsRGBFunc sws_hires_getRGBFunc(SwsContext *c);
> +SwsFunc sws_hires_gray16rgbConv(SwsContext *c);
> +
> +#endif /* SWSCALE_HIRES_H */

I think we dont need a new header for this

> Index: swscale_template.c
> ===================================================================
> --- swscale_template.c	(revision 29274)
> +++ swscale_template.c	(working copy)
> @@ -2130,6 +2130,90 @@
>      }
>  }
>  
> +static inline void RENAME(rgb48beToY)(uint8_t *dst, const uint8_t *src, int width)
> +{
> +    int i;
> +    for (i = 0; i < width; i++) {
> +        int r = src[i*6+0];
> +        int g = src[i*6+2];
> +        int b = src[i*6+4];
> +
> +        dst[i] = ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT);
> +    }
> +}
> +

> +static inline void RENAME(rgb48beToUV)(uint8_t *dstU, uint8_t *dstV,
> +                                       uint8_t *src1, uint8_t *src2, int width)
> +{
> +    int i;
> +    assert(src1==src2);
> +    for (i = 0; i < width; i++) {
> +        int r = src1[12*i + 0];
> +        int g = src1[12*i + 2];
> +        int b = src1[12*i + 4];
> +
> +        dstU[i] = ((RU*r + GU*g + BU*b) >> RGB2YUV_SHIFT) + 128;
> +        dstV[i] = ((RV*r + GV*g + BV*b) >> RGB2YUV_SHIFT) + 128;
> +    }
> +}
[...]
> +
> +static inline void RENAME(rgb48leToUV)(uint8_t *dstU, uint8_t *dstV,
> +                                       uint8_t *src1, uint8_t *src2, int width)
> +{
> +    int i;
> +    assert(src1==src2);
> +    for (i = 0; i < width; i++) {
> +        int r = src1[12*i + 1];
> +        int g = src1[12*i + 3];
> +        int b = src1[12*i + 5];
> +
> +        dstU[i] = ((RU*r + GU*g + BU*b) >> RGB2YUV_SHIFT) + 128;
> +        dstV[i] = ((RV*r + GV*g + BV*b) >> RGB2YUV_SHIFT) + 128;
> +    }
> +}

we dont need a seperate function to add +1 to one of its arguments

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Republics decline into democracies and democracies degenerate into
despotisms. -- Aristotle
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090513/c874092c/attachment.pgp>