[FFmpeg-devel] [PATCH] yuv8->yuv16 vertical scaler.

Thu Aug 13 18:10:10 CEST 2009

On Thu, Aug 13, 2009 at 11:22:08AM -0300, Ramiro Polla wrote:
> On Wed, Aug 12, 2009 at 10:43 PM, Michael Niedermayer<michaelni at gmx.at> wrote:
> > On Wed, Aug 12, 2009 at 09:38:38PM -0300, Ramiro Polla wrote:
> >> $subj
> >
> >> ?swscale.c ? ? ? ? ?| ? 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >> ?swscale_template.c | ? 20 ++++++++++++++++
> >> ?2 files changed, 86 insertions(+)
> >> e97c6ae96069d3e1b576d547dd8674b278b94efd ?0002-yuv8-yuv16-vertical-scaler.patch
> >> From 7bb251a34a9121dcdbb522b8446a3cd6aec4a2b3 Mon Sep 17 00:00:00 2001
> >> From: Ramiro Polla <ramiro.polla at gmail.com>
> >> Date: Wed, 12 Aug 2009 20:10:05 -0300
> >> Subject: [PATCH] yuv8->yuv16 vertical scaler.
> >>
> >> ---
> >> ?swscale.c ? ? ? ? ?| ? 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >> ?swscale_template.c | ? 20 +++++++++++++++
> >> ?2 files changed, 86 insertions(+), 0 deletions(-)
> >>
> >> diff --git a/swscale.c b/swscale.c
> >> index 6e0535c..1880bbf 100644
> >> --- a/swscale.c
> >> +++ b/swscale.c
> >> @@ -474,6 +474,72 @@ const char *sws_format_name(enum PixelFormat format)
> >> ? ? ?}
> >> ?}
> >>
> >> +static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW,
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum PixelFormat dstFormat)
> >> +{
> >> + ? ?//FIXME Optimize (just quickly written not optimized..)
> >> + ? ?int i;
> >> + ? ?for (i=0; i<dstW; i++)
> >> + ? ?{
> >> + ? ? ? ?int val=1<<18;
> >> + ? ? ? ?int j;
> >> + ? ? ? ?for (j=0; j<lumFilterSize; j++)
> >> + ? ? ? ? ? ?val += lumSrc[j][i] * lumFilter[j];
> >> +
> >> + ? ? ? ?if (isBE(dstFormat)) {
> >> + ? ? ? ? ? ?dest[2*i+0]= av_clip_uint8(val>>19);
> >> + ? ? ? ? ? ?dest[2*i+1]= 0;
> >> + ? ? ? ?} else {
> >> + ? ? ? ? ? ?dest[2*i+0]= 0;
> >> + ? ? ? ? ? ?dest[2*i+1]= av_clip_uint8(val>>19);
> >> + ? ? ? ?}
> >
> > its
> >
> > dest[2*i+0]=
> > dest[2*i+1]= av_clip_uint8(val>>19);
> >
> > white-> white 0xFF -> 0xFFFF
> >
> > but you should not throw bits away thus it really should be writing 16bit
> 
> Hmm, that felt stupid.
> 
> > also this touches at the "how to handle non native endian formats" issue,
> > checking in the inner loop instead of a seperate bswap loop may or may not
> > be wise ...
> 
> Attached patch templates the code for BE and LE. It needs another
> patch for common.h which is also attached.
> 
> Ramiro Polla

>  common.h |   11 +++++++++++
>  1 file changed, 11 insertions(+)
> 1aecd7fd4cd19aabe79e0603cac34b09cb8fd286  av_clip_uint16.diff
> diff --git a/libavutil/common.h b/libavutil/common.h
> index 47666ab..9fac1c0 100644
> --- a/libavutil/common.h
> +++ b/libavutil/common.h

ok

[...]
>  swscale.c          |  101 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  swscale_template.c |   20 ++++++++++
>  2 files changed, 121 insertions(+)
> b74deb46c8fb7701b24a0624654e5acae91280db  yuv8_yuv16.diff
> diff --git a/swscale.c b/swscale.c
> index 2adf393..0576dde 100644
> --- a/swscale.c
> +++ b/swscale.c
> @@ -74,6 +74,7 @@ untested special converters
>  #include "swscale.h"
>  #include "swscale_internal.h"
>  #include "rgb2rgb.h"
> +#include "libavutil/intreadwrite.h"
>  #include "libavutil/x86_cpu.h"
>  #include "libavutil/bswap.h"
>  
> @@ -474,6 +475,106 @@ const char *sws_format_name(enum PixelFormat format)
>      }
>  }
>  
> +static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
> +                                 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
> +                                 const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
> +                                 int big_endian)
> +{
> +    //FIXME Optimize (just quickly written not optimized..)
> +    int i;
> +    for (i=0; i<dstW; i++)
> +    {
> +        int val=1<<10;
> +        int j;
> +        for (j=0; j<lumFilterSize; j++)
> +            val += lumSrc[j][i] * lumFilter[j];
> +
> +        if (big_endian) {
> +            AV_WB16(&dest[i], av_clip_uint16(val>>11));
> +        } else {
> +            AV_WL16(&dest[i], av_clip_uint16(val>>11));
> +        }
> +    }
> +
> +    if (uDest)
> +        for (i=0; i<chrDstW; i++)
> +        {
> +            int u=1<<10;
> +            int v=1<<10;
> +            int j;
> +            for (j=0; j<chrFilterSize; j++)
> +            {
> +                u += chrSrc[j][i] * chrFilter[j];
> +                v += chrSrc[j][i + VOFW] * chrFilter[j];
> +            }
> +
> +            if (big_endian) {
> +                AV_WB16(&uDest[i], av_clip_uint16(u>>11));
> +                AV_WB16(&vDest[i], av_clip_uint16(v>>11));
> +            } else {
> +                AV_WL16(&uDest[i], av_clip_uint16(u>>11));
> +                AV_WL16(&vDest[i], av_clip_uint16(v>>11));
> +            }
> +        }
> +
> +    if (CONFIG_SWSCALE_ALPHA && aDest)
> +        for (i=0; i<dstW; i++){
> +            int val=1<<10;
> +            int j;
> +            for (j=0; j<lumFilterSize; j++)
> +                val += alpSrc[j][i] * lumFilter[j];
> +
> +            if (big_endian) {
> +                AV_WB16(&aDest[i], av_clip_uint16(val>>11));
> +            } else {
> +                AV_WL16(&aDest[i], av_clip_uint16(val>>11));
> +            }
> +        }
> +
> +}
> +
> +static inline void yuv2yuvX16BEinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
> +                                   const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
> +                                   const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW)
> +{
> +        yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
> +                               chrFilter, chrSrc, chrFilterSize,
> +                               alpSrc,
> +                               dest, uDest, vDest, aDest,
> +                               dstW, chrDstW, 1);
> +}
> +
> +static inline void yuv2yuvX16LEinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
> +                                   const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
> +                                   const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW)
> +{
> +        yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
> +                               chrFilter, chrSrc, chrFilterSize,
> +                               alpSrc,
> +                               dest, uDest, vDest, aDest,
> +                               dstW, chrDstW, 0);
> +}
> +
> +static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
> +                                 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
> +                                 const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
> +                                 enum PixelFormat dstFormat)
> +{
> +    if (isBE(dstFormat))
> +        yuv2yuvX16BEinC(lumFilter, lumSrc, lumFilterSize,
> +                        chrFilter, chrSrc, chrFilterSize,
> +                        alpSrc,
> +                        dest, uDest, vDest, aDest,
> +                        dstW, chrDstW);
> +    else
> +        yuv2yuvX16LEinC(lumFilter, lumSrc, lumFilterSize,
> +                        chrFilter, chrSrc, chrFilterSize,
> +                        alpSrc,
> +                        dest, uDest, vDest, aDest,
> +                        dstW, chrDstW);
> +
> +}

isnt there one layer (yuv2yuvX16BEinC/yuv2yuvX16LEinC) too much?

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The greatest way to live with honor in this world is to be what we pretend
to be. -- Socrates
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090813/36b7c92c/attachment.pgp>