[FFmpeg-devel] [PATCH] Electronic Arts TGQ decoder

Sat Sep 27 03:23:18 CEST 2008

On Sat, Sep 27, 2008 at 10:12:34AM +1000, Peter Ross wrote:
> Patches enclosed.
> 
> Info: http://wiki.multimedia.cx/index.php?title=Electronic_Arts_TGQ
> Samples: http://samples.mplayerhq.hu/game-formats/ea-tgq-uv/

[...]
> +const uint8_t ea_zigzag_scan[64]={
> +   0,  8,  1,  2,  9, 16, 24, 17,
> +  10,  3,  4, 11, 18, 25, 32, 40,
> +  33, 26, 19, 12,  5,  6, 13, 20,
> +  27, 34, 41, 48, 56, 49, 42, 35,
> +  28, 21, 14,  7, 15, 22, 29, 36,
> +  43, 50, 57, 58, 51, 44, 37, 30,
> +  23, 31, 38, 45, 52, 59, 60, 53,
> +  46, 39, 47, 54, 61, 62, 55, 63,
> +};

private non static things should have ff_ prefixes

> +
> +const int ea_base_qtable[64]={
> +   8192,   5906,   6270,   6967,   8192,  10426,  15137,  29692,
> +   5906,   4258,   4520,   5023,   5906,   7517,  10913,  21407,
> +   6270,   4520,   4799,   5332,   6270,   7980,  11585,  22725,
> +   6967,   5023,   5332,   5925,   6967,   8867,  12873,  25251,
> +   8192,   5906,   6270,   6967,   8192,  10426,  15137,  29692,
> +  10426,   7517,   7980,   8867,  10426,  13270,  19266,  37791,
> +  15137,  10913,  11585,  12873,  15137,  19266,  27969,  54864,
> +  29692,  21407,  22725,  25251,  29692,  37791,  54864, 107619,
> +};

duplicate of inv_aanscales

> +
> +const uint8_t tqi_coeff_vlc_tab[113][2]={
> +  {0x02, 2},//EOB
> +  {0x01, 6},//escape
> +  {0x03, 2},
> +  {0x03, 3},
> +  {0x04, 4},{0x05, 4},
> +  {0x05, 5},{0x06, 5},{0x07, 5},
> +  {0x04, 6},{0x05, 6},{0x06, 6},{0x07, 6},
> +  {0x04, 7},{0x05, 7},{0x06, 7},{0x07, 7},
> +  {0x20, 8},{0x21, 8},{0x22, 8},{0x23, 8},{0x24, 8},{0x25, 8},{0x26, 8},{0x27, 8},
> +  {0x08,10},{0x09,10},{0x0A,10},{0x0B,10},{0x0C,10},{0x0D,10},{0x0E,10},{0x0F,10},
> +  {0x10,12},{0x11,12},{0x12,12},{0x13,12},{0x14,12},{0x15,12},{0x16,12},{0x17,12},
> +  {0x18,12},{0x19,12},{0x1A,12},{0x1B,12},{0x1C,12},{0x1D,12},{0x1E,12},{0x1F,12},
> +  {0x10,13},{0x11,13},{0x12,13},{0x13,13},{0x14,13},{0x15,13},{0x16,13},{0x17,13},
> +  {0x18,13},{0x19,13},{0x1A,13},{0x1B,13},{0x1C,13},{0x1D,13},{0x1E,13},{0x1F,13},
> +  {0x10,14},{0x11,14},{0x12,14},{0x13,14},{0x14,14},{0x15,14},{0x16,14},{0x17,14},
> +  {0x18,14},{0x19,14},{0x1A,14},{0x1B,14},{0x1C,14},{0x1D,14},{0x1E,14},{0x1F,14},
> +  {0x10,15},{0x11,15},{0x12,15},{0x13,15},{0x14,15},{0x15,15},{0x16,15},{0x17,15},
> +  {0x18,15},{0x19,15},{0x1A,15},{0x1B,15},{0x1C,15},{0x1D,15},{0x1E,15},{0x1F,15},
> +  {0x10,16},{0x11,16},{0x12,16},{0x13,16},{0x14,16},{0x15,16},{0x16,16},{0x17,16},
> +  {0x18,16},{0x19,16},{0x1A,16},{0x1B,16},{0x1C,16},{0x1D,16},{0x1E,16},{0x1F,16},
> +};

looks like a duplicate of mpeg1_vlc[]
the other tables and related code are possibly as well duplicates of mpeg1.

[...]

> +/** Electronic Arts TGQ/TQI/MAD IDCT algorithm */
> +
> +#define A4 1.3065630f
> +#define A2 0.5411961f
> +#define A5 0.3826834f
> +
> +#if 0
> +/* not portable, but retained for bit-for-bit compatibility on x86 */
> +#define DIV_SQRT2(x)  ((((int64_t)(x)*0x5a82799aL)>>32)<<1)
> +static int EA_FTOL(double d) {
> +  int result;
> +  d += 6755399441055744.0f;
> +  memcpy(&result, &d, 4);
> +  return result;
> +}
> +#else
> +#define DIV_SQRT2(x)  ((int)((x)/1.41421356237309514547))
> +#define EA_FTOL(x)    floor(x)
> +#endif
> +
> +#define IDCT_TRANSFORM(dest,d0,d1,d2,d3,d4,d5,d6,d7,munge,src) {\
> +    const int src7add1 = (src)[7] + (src)[1]; \
> +    const int src3add5 = (src)[3] + (src)[5]; \
> +    const int value1 = DIV_SQRT2(src7add1 - src3add5); \
> +    const int value2 = DIV_SQRT2((src)[2] - (src)[6]); \
> +    const int src1sub7 = (src)[1] - (src)[7]; \
> +    const int src5sub3 = (src)[5] - (src)[3]; \
> +    const int result0 = EA_FTOL( src5sub3*A2 + (src5sub3+src1sub7)*A5 ); \
> +    const int result2 = EA_FTOL( src1sub7*A4 - (src5sub3+src1sub7)*A5 ); \
> +    const int b0 = result2 + src3add5 + src7add1; \
> +    const int b1 = result2 + value1; \
> +    const int b2 = result0 + value1; \
> +    const int b3 = result0; \
> +    const int src0add4 = (src)[0] + (src)[4]; \
> +    const int src0sub4 = (src)[0] - (src)[4]; \
> +    const int src26value2 = (src)[2] + (src)[6] + value2; \
> +    const int a0 = src0add4 + src26value2; \
> +    const int a1 = src0sub4 + value2; \
> +    const int a2 = src0sub4 - value2; \
> +    const int a3 = src0add4 - src26value2; \
> +    (dest)[d0] = munge(a0 + b0); \
> +    (dest)[d1] = munge(a1 + b1); \
> +    (dest)[d2] = munge(a2 + b2); \
> +    (dest)[d3] = munge(a3 + b3); \
> +    (dest)[d4] = munge(a3 - b3); \
> +    (dest)[d5] = munge(a2 - b2); \
> +    (dest)[d6] = munge(a1 - b1); \
> +    (dest)[d7] = munge(a0 - b0); \
> +}
> +/* end IDCT_TRANSFORM macro */
> +
> +#define MUNGE_NONE(x) (x)
> +#define IDCT_ROW(dest,src)  IDCT_TRANSFORM(dest,0,8,16,24,32,40,48,56,MUNGE_NONE,src)
> +
> +#define MUNGE_16(x)   av_clip_uint8((x)>>16)
> +#define IDCT_COL(dest,src)  IDCT_TRANSFORM(dest,0,1, 2, 3, 4, 5, 6, 7,MUNGE_16,src)
> +
> +static inline void idct_row(int *dest, const int *src) {
> +    if ((src[1]|src[2]|src[3]|src[4]|src[5]|src[6]|src[7])==0) {
> +        dest[0] =
> +        dest[8] =
> +        dest[16] =
> +        dest[24] =
> +        dest[32] =
> +        dest[40] =
> +        dest[48] =
> +        dest[56] = src[0];
> +    }else{
> +        IDCT_ROW(dest, src);
> +    }
> +}

If you want to add a new IDCT, that should be done cleanly through dsputil.

> +
> +void ea_idct16_put(uint8_t *dst, int linesize, const int *block) {
> +    int i;
> +    int temp[64];
> +    for (i=0; i<8; i++)
> +        idct_row(&temp[i], &block[i*8]);
> +    for (i=0; i<8; i++)
> +        IDCT_COL( (&dst[i*linesize]), (&temp[8*i]) );
> +}

unused

[...]
> +static void tgq_decode_block(TgqContext *s, int block[64], GetBitContext *gb){
> +    int i,j,value;

> +    block[0] = (get_sbits(gb,8)) * s->qtable[0];

superflous ()

> +    for(i=1; i<64; ) {
> +        switch(show_bits(gb,3)) {
> +        case 0:
> +            skip_bits(gb,3);
> +            block[ea_zigzag_scan[i++]] = 0;
> +            break;
> +        case 5:  /* see case 1 for skip bits */
> +            block[ea_zigzag_scan[i++]] = 0;
> +        case 1:
> +            skip_bits(gb,3);
> +            value = 2*get_bits(gb,5);
> +            for(j=0; j<value; j++)
> +                block[ea_zigzag_scan[i++]] = 0;
> +            break;
> +        case 2:
> +            skip_bits(gb,3);
> +            block[ea_zigzag_scan[i]] = s->qtable[ea_zigzag_scan[i]];
> +            i++;
> +            break;
> +        case 3: // 011b
> +        case 7: // 111b

> +            skip_bits(gb,2);
> +            if (show_bits(gb,6)==0x3F) {
> +              skip_bits(gb, 6);
> +              block[ea_zigzag_scan[i]] = get_sbits(gb,8) * s->qtable[ea_zigzag_scan[i]];
> +            }else{
> +              block[ea_zigzag_scan[i]] = get_sbits(gb,6) * s->qtable[ea_zigzag_scan[i]];
> +            }

indention is inconsistant

> +            i++;
> +            break;

> +        case 4:
> +            skip_bits(gb,3);
> +            block[ea_zigzag_scan[i++]] = 0;
> +            block[ea_zigzag_scan[i++]] = 0;
> +            break;

this case can be merged with case 0
I also think it then shows a interresting symmetry with case 1/5 that
likely allows more simplifications

> +        case 6:
> +            skip_bits(gb,3);
> +            block[ea_zigzag_scan[i]] = -(s->qtable[ea_zigzag_scan[i]]);
> +            i++;
> +            break;
> +        }
> +    }
> +}
> +
> +static void tgq_idct_put_mb(TgqContext *s, int (*block)[64], int mb_x, int mb_y){
> +    int linesize= s->frame.linesize[0];
> +    uint8_t *dest_y  = s->frame.data[0] + (mb_y * 16* linesize            ) + mb_x * 16;
> +    uint8_t *dest_cb = s->frame.data[1] + (mb_y * 8 * s->frame.linesize[1]) + mb_x * 8;
> +    uint8_t *dest_cr = s->frame.data[2] + (mb_y * 8 * s->frame.linesize[2]) + mb_x * 8;
> +
> +    ea_idct16_put_128(dest_y                 , linesize, block[0]);
> +    ea_idct16_put_128(dest_y              + 8, linesize, block[1]);
> +    ea_idct16_put_128(dest_y + 8*linesize    , linesize, block[2]);
> +    ea_idct16_put_128(dest_y + 8*linesize + 8, linesize, block[3]);
> +    if(!(s->avctx->flags&CODEC_FLAG_GRAY)){
> +         ea_idct16_put_128(dest_cb, s->frame.linesize[1], block[4]);
> +         ea_idct16_put_128(dest_cr, s->frame.linesize[2], block[5]);
> +    }
> +}
> +
> +static inline void tgq_dconly(TgqContext *s, unsigned char *dst, int dst_stride, int dc){
> +    int j;
> +    for(j=0;j<8;j++)
> +        memset(dst+j*dst_stride, dc, 8);
> +}
> +
> +static inline void tgq_dconly_block(TgqContext *s, int mb_x, int mb_y, int i, int dc_level){
> +    int linesize= s->frame.linesize[0];
> +    uint8_t *dest_y  = s->frame.data[0] + (mb_y * 16* linesize            ) + mb_x * 16;
> +    uint8_t *dest_cb = s->frame.data[1] + (mb_y * 8 * s->frame.linesize[1]) + mb_x * 8;
> +    uint8_t *dest_cr = s->frame.data[2] + (mb_y * 8 * s->frame.linesize[2]) + mb_x * 8;
> +    int dc = av_clip_uint8(128 + ((dc_level*s->qtable[0]) >> 16));
> +
> +    switch(i) {
> +    case 0: tgq_dconly(s,dest_y                 , linesize, dc); break;
> +    case 1: tgq_dconly(s,dest_y              + 8, linesize, dc); break;
> +    case 2: tgq_dconly(s,dest_y + 8*linesize    , linesize, dc); break;
> +    case 3: tgq_dconly(s,dest_y + 8*linesize + 8, linesize, dc); break;
> +    case 4: if(!(s->avctx->flags&CODEC_FLAG_GRAY))
> +                tgq_dconly(s,dest_cb, s->frame.linesize[1], dc);
> +            break;
> +    case 5: if(!(s->avctx->flags&CODEC_FLAG_GRAY))
> +                tgq_dconly(s,dest_cr, s->frame.linesize[2], dc);
> +            break;
> +    }
> +}
> +
> +static void tgq_decode_mb(TgqContext *s, int mb_y, int mb_x, const int8_t **bs, const int8_t *buf_end){
> +    int mode;
> +    int i; // block counter
> +    int block[6][64];
> +
> +    mode = bytestream_get_byte((const uint8_t**)bs);
> +    if (mode>buf_end-*bs) {
> +        av_log(s->avctx, AV_LOG_ERROR, "truncated macroblock\n");
> +        return;
> +    }
> +    if (mode==3) {
> +        for(i=0; i<4;i++)
> +            tgq_dconly_block(s, mb_x, mb_y, i, (*bs)[0]);
> +        tgq_dconly_block(s, mb_x, mb_y, 4, (*bs)[1]);
> +        tgq_dconly_block(s, mb_x, mb_y, 5, (*bs)[2]);
> +    }else if (mode==6) {
> +        for(i=0; i<6;i++)
> +            tgq_dconly_block(s, mb_x, mb_y, i, (*bs)[i]);
> +    }else if (mode==12) {
> +        for(i=0; i<6;i++)
> +            tgq_dconly_block(s, mb_x, mb_y, i, (*bs)[i*2]);
> +    }else if (mode>12) {
> +        GetBitContext gb;
> +        init_get_bits(&gb, *bs, mode*8);
> +        for(i=0; i<6; i++)
> +            tgq_decode_block(s, block[i], &gb);
> +        tgq_idct_put_mb(s, block, mb_x, mb_y);
> +    }else {
> +        av_log(s->avctx, AV_LOG_ERROR, "unsupported mb mode %i\n", mode);
> +    }
> +    *bs += mode;
> +}

I think it would be better if the tgq_dconly_block() would be factored
out and replaced by 6 tgq_dconly() calls.
The way it is currently things would become quite bloated when the compiler
inlined them.

> +
> +static void tgq_calculate_qtable(TgqContext *s, int quant){
> +    int i,j;

> +    const int a =((50*(100-quant))/100 - (22*(100-quant))/100) + 2;
> +    const int b = (22*(100-quant))/100 + 8;

this can be simplified, (22*(100-quant))/100 occurs twice for example.

[...]

> Index: libavcodec/avcodec.h
> ===================================================================
> --- libavcodec/avcodec.h	(revision 15434)
> +++ libavcodec/avcodec.h	(working copy)
> @@ -189,6 +189,7 @@
>      CODEC_ID_CMV,
>      CODEC_ID_MOTIONPIXELS,
>      CODEC_ID_TGV,
> +    CODEC_ID_TGQ,
>  
>      /* various PCM "codecs" */
>      CODEC_ID_PCM_S16LE= 0x10000,

ok

[...]
> Index: libavformat/electronicarts.c
> ===================================================================
> --- libavformat/electronicarts.c	(revision 15434)
> +++ libavformat/electronicarts.c	(working copy)
> @@ -47,6 +47,8 @@
>  #define mTCD_TAG MKTAG('m', 'T', 'C', 'D')    /* MDEC */
>  #define MADk_TAG MKTAG('M', 'A', 'D', 'k')    /* MAD i-frame */
>  #define MPCh_TAG MKTAG('M', 'P', 'C', 'h')    /* MPEG2 */
> +#define TGQs_TAG MKTAG('T', 'G', 'Q', 's')    /* TGQ i-frame (appears in .TGQ files) */
> +#define pQGT_TAG MKTAG('p', 'Q', 'G', 'T')    /* TGQ i-frame (appears in .UV files) */
>  #define MVhd_TAG MKTAG('M', 'V', 'h', 'd')
>  #define MV0K_TAG MKTAG('M', 'V', '0', 'K')
>  #define MV0F_TAG MKTAG('M', 'V', '0', 'F')
> @@ -341,6 +343,11 @@
>                  ea->video_codec = CODEC_ID_MPEG2VIDEO;
>                  break;
>  
> +            case pQGT_TAG:
> +            case TGQs_TAG:
> +                ea->video_codec = CODEC_ID_TGQ;
> +                break;
> +
>              case MVhd_TAG :
>                  err = process_video_header_vp6(s);
>                  break;
> @@ -497,6 +504,8 @@
>  
>          case MVIh_TAG:
>          case kVGT_TAG:
> +        case pQGT_TAG:
> +        case TGQs_TAG:
>              key = PKT_FLAG_KEY;
>          case MVIf_TAG:
>          case fVGT_TAG:

ok

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I am the wisest man alive, for I know one thing, and that is that I know
nothing. -- Socrates
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080927/2bcca452/attachment.pgp>