[FFmpeg-devel] [PATCH] encoder for adobe's flash ScreenVideo2 codec

Wed Jul 22 07:23:23 CEST 2009

Joshua Warner wrote:
> Hi all,
> 
> I've developed an encoder for Adobe's Flash ScreenVideo2 format, which is
> stored in flv files.  My encoder currently only supports a large subset of
> the format.  The only player that supports this codec (so far) is Adobe
> Flash Player itself, but ScreenVideo2 makes dramatic improvement in file
> size over ScreenVideo (currently in ffmpeg as flashsv) - and should be very
> useful for uploading screencasts, etc.  Most options (block size, etc) now
> just fall back on defaults because I couldn't find a general algorithm that
> produced consistantly better results than these.  All the code is in place
> to be able to change these parameters dynamically, so future improvements
> there should be easy.  The patch is attached.
> 
> The codec is listed as flashsv2 in ffmpeg.

I'll give a few comments...

> +typedef struct FlashSV2Context {
> +    AVCodecContext *avctx;
> +    uint8_t *current_frame;
> +    uint8_t *key_frame;
> +    AVFrame frame;
> +    uint8_t *encbuffer;
> +    uint8_t *keybuffer;
> +    uint8_t *databuffer;
> +
> +    Block *frame_blocks;
> +    Block *key_blocks;
> +    int frame_size;
> +    int blocks_size;
> +
> +    int use15_7, dist, comp;
> +
> +    int rows, cols;
> +
> +    int last_key_frame;
> +
> +    int image_width, image_height;
> +    int block_width, block_height;
> +    uint8_t flags;
> +    uint8_t use_custom_pallet;
> +    uint8_t pallet_type;        //0=>default, 1=>custom - changed when pallet regenerated.
> +    Pallet pallet;
> +
> +    double tot_blocks;          //blocks encoded since last keyframe
> +    double diff_blocks;         //blocks that were different since last keyframe
> +    double tot_lines;           //total scanlines in image since last keyframe
> +    double diff_lines;          //scanlines that were different since last keyframe
> +    double raw_size;            //size of raw frames since last keyframe
> +    double comp_size;           //size of compressed data since last keyframe
> +    double uncomp_size;         //size of uncompressed data since last keyframe
> +
> +    double total_bits;          //total bits written to stream so far
> +} FlashSV2Context;

If it is preferable to use integers here if possible. It is easier to 
have a bit-exact output across platforms and is faster in systems with 
no FPU.

> +static void cleanup(FlashSV2Context * s)
> +{
> +    if (s->encbuffer)
> +        av_free(s->encbuffer);
> +    if (s->keybuffer)
> +        av_free(s->keybuffer);
> +    if (s->databuffer)
> +        av_free(s->databuffer);
> +    if (s->current_frame)
> +        av_free(s->current_frame);
> +    if (s->key_frame)
> +        av_free(s->key_frame);
> +
> +    if (s->frame_blocks)
> +        av_free(s->frame_blocks);
> +    if (s->key_blocks)
> +        av_free(s->key_blocks);
> +}

Why not simply put this code inside flashsv2_encode_end()?

> +static int generate_default_pallet(Pallet * pallet);

You can reorder the functions to avoid this forward reference.

> +static void init_blocks(FlashSV2Context * s, Block * blocks,
> +                        uint8_t * encbuf, uint8_t * databuf)
> +{
> +    int row, col;
> +    Block *b;
> +    for (col = 0; col < s->cols; col++) {
> +        for (row = 0; row < s->rows; row++) {
> +            b = blocks + (col + row * s->cols);
> +            b->width = (col < s->cols - 1)
> +                || (s->image_width % s->block_width ==
> +                    0) ? s->block_width : s->image_width % s->block_width;

Hmm, isn't

b->width = (col < s->cols - 1) :
                 s->block_width ?
                 s->image_width - col*s->block_width;

simpler?

> +static av_cold int flashsv2_encode_init(AVCodecContext * avctx)
> +{
> +    FlashSV2Context *s = avctx->priv_data;
> +
> +    s->avctx = avctx;
> +
> +    s->comp = avctx->compression_level;
> +    if (s->comp == -1)
> +        s->comp = 9;
> +    if (s->comp < 0 || s->comp > 9) {
> +        av_log(avctx, AV_LOG_ERROR,
> +               "Compression level should be 0-9, not %d\n", s->comp);
> +        return -1;
> +    }
> +
> +
> +    if ((avctx->width > 4095) || (avctx->height > 4095)) {
> +        av_log(avctx, AV_LOG_ERROR,
> +               "Input dimensions too large, input must be max 4096x4096 !\n");
> +        return -1;
> +    }
> +
> +    if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
> +        return -1;
> +    }
> +
> +
> +    s->last_key_frame = 0;
> +
> +    s->image_width = avctx->width;
> +    s->image_height = avctx->height;
> +
> +    s->block_width = (s->image_width / 12) & ~15;
> +    s->block_height = (s->image_height / 12) & ~15;
> +
> +    s->rows = (s->image_height + s->block_height - 1) / s->block_height;
> +    s->cols = (s->image_width + s->block_width - 1) / s->block_width;
> +
> +    s->frame_size = s->image_width * s->image_height * 3;
> +    s->blocks_size = s->rows * s->cols * sizeof(Block);
> +
> +    s->encbuffer = av_mallocz(s->frame_size);
> +    s->keybuffer = av_mallocz(s->frame_size);
> +    s->databuffer = av_mallocz(s->frame_size * 6);
> +    s->current_frame = av_mallocz(s->frame_size);
> +    s->key_frame = av_mallocz(s->frame_size);
> +    s->frame_blocks = av_mallocz(s->blocks_size);
> +    s->key_blocks = av_mallocz(s->blocks_size);

> +    s->pallet.index = av_mallocz(1 << 15);

I think it is simpler to define Pallet as

typedef struct Pallet {
     unsigned colors[128];
     uint8_t index[1 << 15];
} Pallet;

?

> +static int new_key_frame(FlashSV2Context * s)
> +{
> +    int i;
> +    memcpy(s->keybuffer, s->encbuffer, s->frame_size);

Can't this memcpy() be avoided by doing FFSWAP(s->keybuffer, 
s->encbuffer) at some point?

> +    memcpy(s->key_blocks, s->frame_blocks, s->blocks_size);
> +    memcpy(s->key_frame, s->current_frame, s->frame_size);

same for those

> +static int write_block(Block * b, uint8_t * buf, int buf_size)
> +{
> +    int buf_pos = 0;
> +    unsigned block_size = b->data_size;
> +
> +    if (b->flags & HAS_DIFF_BLOCKS)
> +        block_size += 2;
> +    if (b->flags & ZLIB_PRIME_COMPRESS_CURRENT)
> +        block_size += 2;
> +    if (block_size > 0)
> +        block_size += 1;
> +    if (buf_size < block_size + 2)
> +        return -1;
> +
> +    buf[buf_pos++] = block_size >> 8;
> +    buf[buf_pos++] = block_size;

See AV_WL16().

> +
> +    if (block_size == 0)
> +        return buf_pos;
> +
> +    buf[buf_pos++] = b->flags;
> +
> +    if (b->flags & HAS_DIFF_BLOCKS) {
> +        buf[buf_pos++] = (uint8_t) (b->start);
> +        buf[buf_pos++] = (uint8_t) (b->len);
> +    }
> +
> +    if (b->flags & ZLIB_PRIME_COMPRESS_CURRENT) {
> +        //This feature of the format is poorly understood, and as of now, unused.
> +        buf[buf_pos++] = (uint8_t) (b->col);
> +        buf[buf_pos++] = (uint8_t) (b->row);
> +    }
> +
> +    memcpy(buf + buf_pos, b->data, b->data_size);

Is it really necessary to memcpy() data, or there is a way to write 
directly to the buffer?

> +
> +static int encode_zlib(Block * b, uint8_t * buf, int *buf_size, int comp)
> +{
> +    int res =
> +        compress2(buf, buf_size, b->sl_begin, b->sl_end - b->sl_begin,
> +                  comp);
> +    return res == Z_OK ? 0 : -1;
> +}

I think it would be cleaner to use compress2() directly all over the 
code, for ex. instead of

> +        res = encode_zlib(b, b->data, &b->data_size, comp);
> +        if (res != 0)
> +            return res;

just

if (compress2(b->data, &b->data_size, b->sl_begin,
               b->sl_end - b->sl_begin, comp) != Z_OK)
     return -1;

> +static inline unsigned pixel_bgr(uint8_t * src)
> +{
> +    return (src[2]) | (src[1] << 8) | (src[2] << 16);
> +}

Hm, src[0] is unused?

> +static int generate_default_pallet(Pallet * pallet)
> +{
> +    memcpy(pallet->colors, default_screen_video_v2_palette,
> +           sizeof(default_screen_video_v2_palette));
> +

When using the default palette, it is better to just make pallet->colors 
point to default_screen_video_v2_palette instead of allocating and 
memcpy'ing.

> +static int generate_optimum_pallet(Pallet * pallet, uint8_t * image,
> +                                   int width, int height, int stride)
> +{
> +    //this isn't implemented yet!  Default pallet only!
> +    return -1;
> +}

I think it would be interesting to handle first the case where the 
encoder gets the input already as paletted data.

> +#ifndef FLASHSV2_DUMB
> +    //double save = (1-pow(s->diff_lines/s->diff_blocks/s->block_height, 0.5)) * s->comp_size/s->tot_blocks;
> +    //double width = block_size_fraction * sqrt(0.5 * save * s->rows * s->cols) * s->image_width;
> +    //int pwidth;
> +    //av_log(s->avctx, AV_LOG_DEBUG, "block width: %g\n", width);
> +    double width;
> +    width = ((double) s->image_width) / 10.0;
> +    pwidth = ((int) width);
> +    pwidth &= ~15;
> +    if (pwidth > 256)
> +        pwidth = 256;
> +    if (pwidth < 16)
> +        pwidth = 16;

Hm, this is the same as

pwidth = FFCLIP((s->image_width/10) & (~15), 16, 256);

no? Avoid floating point is nice.

-Vitor