[Ffmpeg-devel] LucasArts SANM (Smush v2) decoder

Wed Jan 17 00:08:47 CET 2007

Hi

On Tue, Jan 16, 2007 at 12:44:54PM -0500, Cyril Zorin wrote:
> >
> >Other than that at a glance I saw that you should fix the license
> >headers, just copy + paste them from one of the other files, e.g. 4xm.c
> >or whatever.  Also make sure you respect alphabetical order in sections
> >that are already sorted.
> >
> 
> Alright, fixed. Here's the new one. Let me know if any other corrections are
> required.

[...]
> Index: libavcodec/avcodec.h
> ===================================================================
> --- libavcodec/avcodec.h	(revision 7545)
> +++ libavcodec/avcodec.h	(working copy)
> @@ -135,6 +135,7 @@
>      CODEC_ID_ZMBV,
>      CODEC_ID_AVS,
>      CODEC_ID_SMACKVIDEO,
> +    CODEC_ID_SANM,
>      CODEC_ID_NUV,

put it at another place so that the values of the existing codecs dont change

[...]
> +#include "common.h"
> +#include "avcodec.h"
> +
> +#define GLYPH_COORD_VECT_SIZE 16
> +static const int glyph4_x[GLYPH_COORD_VECT_SIZE] = { 0, 1, 2, 3, 3, 3, 3, 2, 1, 0, 0, 0, 1, 2, 2, 1 };
> +static const int glyph4_y[GLYPH_COORD_VECT_SIZE] = { 0, 0, 0, 0, 1, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2 };
> +static const int glyph8_x[GLYPH_COORD_VECT_SIZE] = { 0, 2, 5, 7, 7, 7, 7, 7, 7, 5, 2, 0, 0, 0, 0, 0 };
> +static const int glyph8_y[GLYPH_COORD_VECT_SIZE] = { 0, 0, 0, 0, 1, 3, 4, 6, 7, 7, 7, 7, 6, 4, 3, 1 };

these fit in a uint8_t (16*3*4 bytes less space ...)

> +
> +typedef struct point_t
> +{
> +    int x, y;
> +} point_t;

please use a array [2] thingy, this has the tendency to allow code to be
simplified (when the same function is applied to x and y)

> +
> +static const point_t motion_vectors[256] =

should fit in int8_t[256][2] (saves 256*2*3 bytes)

[...]

> +
> +#define NGLYPHS 256
> +static uint16_t p4x4glyphs[NGLYPHS][16];
> +static uint16_t p8x8glyphs[NGLYPHS][64];

this fits in uint8_t (256*(64+16) less space)

> +
> +typedef struct sanm_ctx_t
> +{
> +    const uint8_t* psrc;
> +
> +    int pitch;
> +    int width, height;

duplicate of AVCodecContext.width/height? if so this need some justification

[...]
> +static uint16_t* point_at(uint16_t* pbuf, int x, int y, int width)
> +{
> +    return &pbuf[y * width + x];
> +}

such trivial wraper functions are unacceptable as they make the code hard to
understand _and_ bigger

> +
> +static edge_t which_edge(int x, int y, int edge_size)
> +{
> +    edge_t edge;
> +    const int edge_max = edge_size - 1;
> +
> +    if (!y)
> +    {
> +        edge = bottom_edge;
> +    }
> +    else if (edge_max == y)
> +    {
> +        edge = top_edge;
> +    }
> +    else if (!x)
> +    {
> +        edge = left_edge;
> +    }
> +    else if (edge_max == x)
> +    {
> +        edge = right_edge;
> +    }
> +    else
> +    {
> +        edge = no_edge;
> +    }
> +
> +    return edge;
> +}

if this function needs more 16byte then replace it by a table as this
can be done with 2 simple 8 byte tables indexed by i>>1 where
x= pxvector[i], y = pyvector[i];

> +
> +static dir_t which_direction(edge_t edge0, edge_t edge1)
> +{
> +    dir_t dir = -1;
> +
> +    if ((left_edge == edge0 && right_edge == edge1) ||
> +        (left_edge == edge1 && right_edge == edge0) ||
> +        (bottom_edge == edge0 && top_edge != edge1) ||
> +        (bottom_edge == edge1 && top_edge != edge0))
> +    {
> +        dir = dir_up;
> +    }
> +    else if ((top_edge == edge0 && bottom_edge != edge1) ||
> +             (top_edge == edge1 && bottom_edge != edge0))
> +    {
> +        dir = dir_down;
> +    }
> +    else if ((left_edge == edge0 && right_edge != edge1) ||
> +             (left_edge == edge1 && right_edge != edge0))

the != right_edge check is useless it can never be right_edge

> +    {
> +        dir = dir_left;
> +    }
> +    else if ((top_edge == edge0 && bottom_edge == edge1) ||
> +             (top_edge == edge1 && bottom_edge == edge0) ||
> +             (right_edge == edge0 && left_edge != edge1) ||
> +             (right_edge == edge1 && left_edge != edge0))

this messy if() is equivalent to edge0 != no_edge && edge1 != no_edge

> +    {
> +        dir = dir_right;
> +    }
> +
> +    return dir;
> +}

(x,y) = (y,x) half of the checks are useless, just do a
if(edge0 > edge1) FFSWAP(int, edge0, edge1); and remove half of the checks

also the meaning of top_edge, bottom_edge and dir_down and dir_up doesnt
match top_edge + top_edge -> dir_down but its really toward that edge not
away if i understood the code correctly

also this can be done with a 25byte table so if thats smaller please use
a table, the code above is totally unreadable anyway

> +
> +static int max(int left, int right)
> +{
> +    return left > right ? left : right;
> +}

FFMAX

> +
> +static void interp_point(point_t* ppoint, int x0, int y0, int x1, int y1, int ipoint, int npoints)
> +{
> +    if (npoints)
> +    {
> +        ppoint->x = (x0 * ipoint + x1 * (npoints - ipoint) + npoints / 2) / npoints;
> +        ppoint->y = (y0 * ipoint + y1 * (npoints - ipoint) + npoints / 2) / npoints;
> +    }
> +    else
> +    {
> +        ppoint->x = x0;
> +        ppoint->y = y0;
> +    }
> +}

if(!npoints)
    npoints=1;
for(i=0; i<2; i++)
    ppoint[i]= (c0[i] * ipoint + c1[i] * (npoints - ipoint) + npoints / 2) / npoints;

> +static void make_glyphs(uint16_t* pglyphs, const int* pxvector, const int* pyvector, const int side_length)
> +{
> +    const int glyph_size = side_length * side_length;
> +    uint16_t* pglyph = pglyphs;
> +
> +    int i, j;
> +    for (i = 0; i != GLYPH_COORD_VECT_SIZE; ++i)

id use i<GLYPH_COORD_VECT_SIZE but iam fine with this too it just hmm 
uncommon

> +    {
> +        int x0 = pxvector[i], y0 = pyvector[i];
> +        edge_t edge0 = which_edge(x0, y0, side_length);
> +
> +        for (j = 0; j != GLYPH_COORD_VECT_SIZE; ++j, pglyph += glyph_size)
> +        {
> +            int x1 = pxvector[j], y1 = pyvector[j];
> +            edge_t edge1 = which_edge(x1, y1, side_length);
> +            dir_t dir = which_direction(edge0, edge1);
> +            int ipoint, npoints = max(abs(x1 - x0), abs(y1 - y0));

FFABS

> +
> +            memset(pglyph, 0, glyph_size * sizeof(pglyph[0]));
> +            for (ipoint = 0; ipoint <= npoints; ++ipoint)
> +            {
> +                point_t point;
> +                int irow, icol;
> +
> +                interp_point(&point, x0, y0, x1, y1, ipoint, npoints);
> +
> +                switch (dir)
> +                {
> +                case dir_up:
> +                    for (irow = point.y; irow >= 0; --irow)
> +                    {
> +                        *point_at(pglyph, point.x, irow, side_length) = 1;
> +                    }
> +                    break;
> +
> +                case dir_down:
> +                    for (irow = point.y; irow < side_length; ++irow)
> +                    {
> +                        *point_at(pglyph, point.x, irow, side_length) = 1;
> +                    }
> +                    break;
> +
> +                case dir_left:
> +                    for (icol = point.x; icol >= 0; --icol)
> +                    {
> +                        *point_at(pglyph, icol, point.y, side_length) = 1;
> +                    }
> +                    break;
> +
> +                case dir_right:
> +                    for (icol = point.x; icol < side_length; ++icol)
> +                    {
> +                        *point_at(pglyph, icol, point.y, side_length) = 1;
> +                    }
> +                    break;

what is with the dir= -1 case?

also i think this can be simplified alot ...

[...]
> +    pctx->aligned_height = ((height + 7) >> 3) << 3;

(x+7) & ~7

[...]
> +static void init_buffers(sanm_ctx_t* pctx)
> +{
> +    pctx->pdb0 = av_malloc(pctx->buf_size);
> +    memset(pctx->pdb0, 0, pctx->buf_size);
> +
> +    pctx->pdb1 = av_malloc(pctx->buf_size);
> +    memset(pctx->pdb1, 0, pctx->buf_size);
> +
> +    pctx->pdb2 = av_malloc(pctx->buf_size);
> +    memset(pctx->pdb2, 0, pctx->buf_size);

for(i=0; i<3; i++)
    pctx->pdb[i]= av_mallocz(pctx->buf_size);

and after that change this function is so trivial that it should be
removed and the for loop be placed at the one and onle spot where this
is called

[...]

> +    pav_ctx->has_b_frames = 0;

not needed, its 0 by default

[...]
> +    pheader->width = LE_32(pinput); pinput += 4;
> +    pheader->height = LE_32(pinput); pinput += 4;

please see bytestream.h

> +
> +    if (pheader->width != pctx->width || pheader->height != pctx->height)
> +    {
> +        av_log(0, AV_LOG_ERROR, "sanm decoder: variable size frames are not implemented. video may be garbled until next keyframe.\n");
> +        return -1;
> +    }
> +
> +    pheader->seq_num = LE_16(pinput); pinput += 2;
> +    pheader->codec = *pinput; pinput += 1;
> +    pheader->rotate_code = *pinput; pinput += 1;
> +
> +    pinput += 4; // skip pad
> +
> +    pctx->psmall_codebook = (uint16_t*) pinput; pinput += 8;
> +    pheader->bg_color = LE_16(pinput); pinput += 2;
> +
> +    pinput += 2; // skip pad
> +
> +    pheader->rle_output_size = LE_32(pinput); pinput += 4;
> +    pctx->pcodebook = (uint16_t*) pinput; pinput += 512; // 256 entries in codebook

pcodebook and psmall_codebook should be loaded into arrays with native
endianness instead of repeatedly converting their endianness in the
innermost loops
or store the whole picture in little endian and convert the whole at once
if the cpu is bigendian or add little endian rgb565 support

[...]
> +static void fill_db(uint16_t* pbuf, int buf_size, uint16_t color)
> +{
> +    while (buf_size--)
> +    {
> +        *pbuf++ = color;
> +    }
> +}

code duplication see msmpeg4_memsetw()

> +
> +static void swap(uint16_t** ppleft, uint16_t** ppright)
> +{
> +    uint16_t* ptemp = *ppleft;
> +    *ppleft = *ppright;
> +    *ppright = ptemp;
> +}

FFSWAP

> +
> +static void rotate_bufs(sanm_ctx_t* pctx, int rotate_code)
> +{
> +    if (1 == rotate_code)
> +    {
> +        swap(&pctx->pdb0, &pctx->pdb2);
> +    }
> +    else if (2 == rotate_code)
> +    {
> +        swap(&pctx->pdb1, &pctx->pdb2);
> +        swap(&pctx->pdb2, &pctx->pdb0);
> +    }
> +}

if(2 == rotate_code)
    swap(&pctx->pdb1, &pctx->pdb2);
swap(&pctx->pdb2, &pctx->pdb0);

> +
> +static void codec0(sanm_ctx_t* pctx, const uint8_t* pinput)
> +{
> +    uint16_t* pdb0 = pctx->pdb0;
> +
> +    int x, y;
> +    for (y = 0; y != pctx->height; ++y)
> +    {
> +        for (x = 0; x != pctx->width; ++x)
> +        {
> +            *point_at(pdb0, x, y, pctx->pitch) = LE_16(pinput); pinput += 2;
> +        }
> +    }
> +}
> +
> +static void codec6(sanm_ctx_t* pctx, const uint8_t* pinput)
> +{
> +    int npixels = pctx->npixels;
> +    uint16_t* pdb0 = pctx->pdb0;
> +    uint16_t* pcodebook = pctx->pcodebook;
> +
> +    int index;
> +    while (npixels--)
> +    {
> +        index = *pinput++;
> +        *pdb0++ = LE_16(&pcodebook[index]);
> +    }
> +}

the 2 functions above address the array in fairly different ways why?

> +
> +static void copy_block(uint16_t* pdest, uint16_t* psrc, int block_size, int pitch)
> +{
> +    int y;
> +    for (y = 0; y != block_size; ++y, pdest += pitch, psrc += pitch)
> +    {
> +        memcpy(pdest, psrc, block_size * sizeof(pdest[0]));
> +    }
> +}

code duplication see DSPContext.put_pixels_tab

> +
> +static void fill_block(uint16_t* pdest, uint16_t color, int block_size, int pitch)
> +{
> +    int x, y;
> +    pitch -= block_size;
> +    for (y = 0; y != block_size; ++y, pdest += pitch)
> +    {
> +        for (x = 0; x != block_size; ++x)
> +        {
> +            *pdest++ = color;
> +        }

code duplication see msmpeg4_memsetw() / fill_db()

[...]
> +static void opcode_0xf7(sanm_ctx_t* pctx, int cx, int cy, int block_size, int pitch)
> +{
> +    if (2 == block_size)
> +    {
> +        uint16_t* pcodebook = pctx->pcodebook;
> +        uint16_t* pdest = point_at(pctx->pdb0, cx, cy, pctx->pitch);
> +        uint32_t indices = LE_32(pctx->psrc); pctx->psrc += 4;
> +
> +        pdest[0] = LE_16(&pcodebook[indices & 0xff]); indices >>= 8;
> +        pdest[1] = LE_16(&pcodebook[indices & 0xff]); indices >>= 8;
> +        pdest[pitch] = LE_16(&pcodebook[indices & 0xff]); indices >>= 8;
> +        pdest[pitch + 1] = LE_16(&pcodebook[indices & 0xff]);
> +    }
> +    else
> +    {
> +        uint16_t fgcolor, bgcolor;
> +
> +        int index = *pctx->psrc++;
> +        uint16_t color_indices = LE_16(pctx->psrc); pctx->psrc += 2;
> +        fgcolor = LE_16(&pctx->pcodebook[color_indices >> 8]);
> +        bgcolor = LE_16(&pctx->pcodebook[color_indices & 0xff]);

why not read 8bit twice? instead of reding 16bit and then spliting them?
same applies to a few other spots

[...]
> +static int good_mvec(sanm_ctx_t* pctx, int cx, int cy, int mx, int my, int block_size)
> +{
> +    int good = point_at(pctx->pdb2, cx + mx + block_size - 1, cy + my + block_size - 1, pctx->pitch) < (&pctx->pdb2[pctx->buf_size / 2]);
> +    if (!good)
> +    {
> +        av_log(0, AV_LOG_ERROR, "sanm decoder: ignoring segfault-inducing motion vector (%i, %i)->(%u, %u), block size = %u.\n",
> +               cx + mx, cy + my, cx, cy, block_size);
> +    }

what about vectors outside the left and right borders? and what about
vectors which point to prior of the array?

> +
> +    return good;
> +}
> +
> +static void codec2level(sanm_ctx_t* pctx, int cx, int cy, int block_size)
> +{
> +    int16_t mx, my, index;
> +    uint16_t color;
> +
> +    int opcode = *pctx->psrc++;
> +
> +    switch (opcode)
> +    {
> +    default:
> +        mx = motion_vectors[opcode].x;
> +        my = motion_vectors[opcode].y;
> +
> +        if (good_mvec(pctx, cx, cy, mx, my, block_size))
> +        {
> +            copy_block(point_at(pctx->pdb0, cx, cy, pctx->pitch),
> +                       point_at(pctx->pdb2, cx + mx, cy + my, pctx->pitch),
> +                       block_size, pctx->pitch);
> +        }
> +        break;
> +
> +    case 0xf5:
> +        index = LE_16(pctx->psrc); pctx->psrc += 2;
> +
> +        mx = index % pctx->width;
> +        my = index / pctx->width;
> +
> +        if (good_mvec(pctx, cx, cy, mx, my, block_size))
> +        {
> +            copy_block(point_at(pctx->pdb0, cx, cy, pctx->pitch),
> +                       point_at(pctx->pdb2, cx + mx, cy + my, pctx->pitch),
> +                       block_size, pctx->pitch);
> +        }
> +        break;
> +
> +    case 0xf6:
> +        copy_block(point_at(pctx->pdb0, cx, cy, pctx->pitch),
> +                   point_at(pctx->pdb1, cx, cy, pctx->pitch),
> +                   block_size, pctx->pitch);
> +        break;

mx=my=0;
...
default:
case 0xf5:
    if(opcode == 0xf5){
        index = LE_16(pctx->psrc); pctx->psrc += 2;
        mx = index % pctx->width;
        my = index / pctx->width;
    }else {
        mx = motion_vectors[opcode].x;
        my = motion_vectors[opcode].y;
    }
    if(!good_mvec(pctx, cx, cy, mx, my, block_size))
        return -1;
case 0xf6:
    copy_block(point_at(pctx->pdb0, cx, cy, pctx->pitch),
               point_at(pctx->pdb2, cx + mx, cy + my, pctx->pitch),
               block_size, pctx->pitch);

[...]
> +static void rle_decode(uint8_t* pdest, const uint8_t* pinput, const int out_size)
> +{
> +    int opcode, color, run_len, remaining = out_size;
> +
> +    assert(out_size > 0);
> +
> +    while (remaining)
> +    {
> +        opcode = *pinput++;
> +        run_len = (opcode >> 1) + 1;
> +        assert(run_len <= remaining);

asserts are not accpetable to check bytestream values, a library must never
assert(0)/abort() due to an invalid file

the remaining value is a unchecked value read from the bytestream, which 
makes this an heap overflow too independant of the assert() above

> +
> +        if (opcode & 1) // rle strip
> +        {
> +            color = *pinput++;
> +            memset(pdest, color, run_len);
> +        }
> +        else
> +        {
> +            memcpy(pdest, pinput, run_len);
> +            pinput += run_len;
> +        }
> +
> +        pdest += run_len;
> +        remaining -= run_len;
> +    }
> +}
> +
> +static void codec5(sanm_ctx_t* pctx, const uint8_t* pinput, const int decoded_size)
> +{
> +    uint8_t* pdest = (uint8_t*) pctx->pdb0;
> +    int npixels = pctx->npixels;
> +    uint16_t* pdb0 = pctx->pdb0;
> +
> +    assert(!(decode_size & 1));
> +
> +    rle_decode(pdest, pinput, decoded_size);
> +
> +    while (npixels--)
> +    {
> +        *pdb0 = LE_16(pdb0);
> +        ++pdb0;
> +    }

this does nothing on little endian and as such should not be executed

[...]
> +static void codec8(sanm_ctx_t* pctx, const uint8_t* pinput, const int decoded_size)
> +{
> +    uint16_t* pdest = pctx->pdb0;
> +    uint8_t* pindices = av_malloc(decoded_size);
> +    uint8_t* pidx_cursor = pindices;
> +    long npixels = pctx->npixels;
> +
> +    rle_decode(pindices, pinput, decoded_size);

pindices should be checked for NULL

[...]
> +static void copy_output(sanm_ctx_t* pctx, frame_header_t* pheader)
> +{
> +    uint8_t* poutput = pctx->output.data[0];
> +    const uint8_t* pinput = (uint8_t*) pctx->pdb0;
> +
> +    int height = pctx->height;
> +    long inpitch = pctx->pitch * sizeof(pctx->pdb0[0]);
> +    long outpitch = pctx->output.linesize[0];
> +    while (height--)
> +    {
> +        memcpy(poutput, pinput, inpitch);
> +        pinput += inpitch;
> +        poutput += outpitch;
> +    }

this is unacceptable, either use the buffers provided by get_buffer()
or return your buffer

> +}
> +
> +static int dispatch_codec(sanm_ctx_t* pctx, const uint8_t* pinput)
> +{

why not remove this function and place the code into decode_frame?

> +    frame_header_t header;
> +    if (read_frame_header(pctx, pinput, &header))
> +    {
> +        return -1;
> +    }
> +
> +    if ((pctx->output.key_frame = !header.seq_num))
> +    {
> +        pctx->output.pict_type = FF_I_TYPE;
> +        fill_db(pctx->pdb1, pctx->npixels, header.bg_color);
> +        fill_db(pctx->pdb2, pctx->npixels, header.bg_color);
> +    }
> +    else
> +    {
> +        pctx->output.pict_type = FF_P_TYPE;
> +    }
> +
> +    switch (header.codec)
> +    {
> +    case 0:
> +        codec0(pctx, header.pvstream);
> +        break;
> +
> +    case 2:
> +        codec2(pctx, header.pvstream);
> +        break;

read_frame_header stores the last bytestream pointer in header.pvstream
which gets passed here and then is stored in pctx->psrc why this mess?
put the bytestream pointer in _one_ spot in _one_ context please 

[...]
> +        av_log(0, AV_LOG_ERROR, "sanm decoder: subcodec %u is not implemented. video may be garbled until next keyframe.\n", header.codec);

0 is unacceptable as context for av_log, please pass yor nearest
AVCodecContext into it

[...]
> Property changes on: libavcodec/sanm.c
> ___________________________________________________________________
> Name: svn:keywords
>    + Id URL
> Name: svn:eol-style
>    + native

what is this?!

[...]
> +typedef struct sanm_vinfo_t
> +{
> +    int width, height;

why this duplication of AVCodecContext.width/height?

[...]
> +typedef struct sanm_ainfo_t
> +{
> +    int freq, nchannels;

why this duplication of AVCodecContext.sample_rate/channels?

[...]
> +static int sanm_probe(AVProbeData* p)
> +{
> +    int score = 0;
> +
> +    if (p->buf_size >= 4
> +        && p->buf[0] == 'S' && p->buf[1] == 'A'
> +        && p->buf[2] == 'N' && p->buf[3] == 'M')
> +    {
> +        score = AVPROBE_SCORE_MAX;
> +    }
> +
> +    return score;

the score variable is redundant just do a if() return MAX; return 0;

[...]
> +        case MKBETAG('B', 'l', '1', '6'):
> +            if (size != av_get_packet(pbuf, ppacket, size))
> +            {
> +                return AVERROR_IO;

memleak at EOF?

> +            }
> +
> +            ppacket->stream_index = pctx->vinfo.stream_index;
> +            ppacket->pts = pctx->vinfo.pts;
> +            ++pctx->vinfo.pts;

do NOT set pts unless there is a pts in the packet (pts++ indicates that
you dont have a pts)

> +
> +            done = 1;
> +            break;

instead of the done=1; break stuff everywhere why not return 0; ?

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The worst form of inequality is to try to make unequal things equal.
-- Aristotle
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20070117/65bbc753/attachment.pgp>