[FFmpeg-devel] [PATCH] Mimic encoder

Wed May 21 03:43:16 CEST 2008

On Wed, May 21, 2008 at 12:33:35AM +0100, Ramiro Polla wrote:
> Hello,
>
>>> static VlcSymbol vlc_alphabet_gen[16][8] = {
>>> { {  3, 0x00000001,  0, 0x00000000, }, {  4, 0x00000007,  0, 0x00000000, 
>>> },
>>>   {  6, 0x00000027,  0, 0x00000000, }, {  8, 0x000000cf,  0, 0x00000000, 
>>> },
>>>   { 10, 0x0000035f,  0, 0x00000000, }, { 12, 0x00000eff,  0, 0x00000000, 
>>> },
>>>   { 17, 0x0001fd7f,  0, 0x00000000, }, { 17, 0x0001fd01,  0, 0x00000000, 
>>> }, },
>>> { {  5, 0x00000017,  0, 0x00000000, }, {  8, 0x000000e7,  0, 0x00000000, 
>>> },
>>>   {  9, 0x000001d7,  0, 0x00000000, }, { 12, 0x00000f8f,  0, 0x00000000, 
>>> },
>>>   { 15, 0x00007f1f,  0, 0x00000000, }, { 16, 0x0000fe7f,  0, 0x00000000, 
>>> },
>>>   { 27, 0x07fffff9,  7, 0x0000007f, }, { 27, 0x07fffff9,  7, 0x00000001, 
>>> }, },
>>> { {  6, 0x00000037,  0, 0x00000000, }, {  9, 0x000001ef,  0, 0x00000000, 
>>> },
>>>   { 12, 0x00000fd7,  0, 0x00000000, }, { 13, 0x00001fbf,  0, 0x00000000, 
>>> },
>>>   { 25, 0x01ffff7f,  0, 0x00000000, }, { 30, 0x3ffffe3f,  0, 0x00000000, 
>>> },
>>>   { 27, 0x07fffffa,  7, 0x0000007f, }, { 27, 0x07fffffa,  7, 0x00000001, 
>>> }, },
>>> { {  7, 0x00000071,  0, 0x00000000, }, { 10, 0x000003ef,  0, 0x00000000, 
>>> },
>>>   { 17, 0x0001ffdf,  0, 0x00000000, }, { 21, 0x001fffbf,  0, 0x00000000, 
>>> },
>>>   { 26, 0x03ffff1f,  0, 0x00000000, }, { 30, 0x3ffffe7f,  0, 0x00000000, 
>>> },
>>>   { 27, 0x07fffffb,  7, 0x0000007f, }, { 27, 0x07fffffb,  7, 0x00000001, 
>>> }, },
>>> { {  8, 0x000000f1,  0, 0x00000000, }, { 11, 0x000007e3,  0, 0x00000000, 
>>> },
>>>   { 18, 0x0003ffc7,  0, 0x00000000, }, { 22, 0x003fff8f,  0, 0x00000000, 
>>> },
>>>   { 26, 0x03ffff3f,  0, 0x00000000, }, { 30, 0x3ffffebf,  0, 0x00000000, 
>>> },
>>>   { 28, 0x0ffffff8,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, 
>>> }, },
>>> { {  8, 0x000000f3,  0, 0x00000000, }, { 11, 0x000007e7,  0, 0x00000000, 
>>> },
>>>   { 18, 0x0003ffcf,  0, 0x00000000, }, { 22, 0x003fff9f,  0, 0x00000000, 
>>> },
>>>   { 26, 0x03ffff5f,  0, 0x00000000, }, { 30, 0x3ffffeff,  0, 0x00000000, 
>>> },
>>>   { 28, 0x0ffffff9,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, 
>>> }, },
>>> { {  8, 0x000000f5,  0, 0x00000000, }, { 14, 0x00003feb,  0, 0x00000000, 
>>> },
>>>   { 18, 0x0003ffd7,  0, 0x00000000, }, { 22, 0x003fffaf,  0, 0x00000000, 
>>> },
>>>   { 26, 0x03ffff7f,  0, 0x00000000, }, { 31, 0x7ffffe3f,  0, 0x00000000, 
>>> },
>>>   { 28, 0x0ffffffa,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, 
>>> }, },
>>> { {  9, 0x000001f3,  0, 0x00000000, }, { 14, 0x00003fef,  0, 0x00000000, 
>>> },
>>>   { 18, 0x0003ffdf,  0, 0x00000000, }, { 22, 0x003fffbf,  0, 0x00000000, 
>>> },
>>>   { 27, 0x07ffff1f,  0, 0x00000000, }, { 31, 0x7ffffe7f,  0, 0x00000000, 
>>> },
>>>   { 28, 0x0ffffffb,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, 
>>> }, },
>>> { {  9, 0x000001f5,  0, 0x00000000, }, { 15, 0x00007fe3,  0, 0x00000000, 
>>> },
>>>   { 19, 0x0007ffc7,  0, 0x00000000, }, { 23, 0x007fff8f,  0, 0x00000000, 
>>> },
>>>   { 27, 0x07ffff3f,  0, 0x00000000, }, { 31, 0x7ffffebf,  0, 0x00000000, 
>>> },
>>>   { 29, 0x1ffffff8,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, 
>>> }, },
>>> { { 11, 0x000007f7,  0, 0x00000000, }, { 15, 0x00007fe7,  0, 0x00000000, 
>>> },
>>>   { 19, 0x0007ffcf,  0, 0x00000000, }, { 23, 0x007fff9f,  0, 0x00000000, 
>>> },
>>>   { 27, 0x07ffff5f,  0, 0x00000000, }, { 31, 0x7ffffeff,  0, 0x00000000, 
>>> },
>>>   { 29, 0x1ffffff9,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, 
>>> }, },
>>> { { 12, 0x00000ff1,  0, 0x00000000, }, { 15, 0x00007feb,  0, 0x00000000, 
>>> },
>>>   { 19, 0x0007ffd7,  0, 0x00000000, }, { 23, 0x007fffaf,  0, 0x00000000, 
>>> },
>>>   { 27, 0x07ffff7f,  0, 0x00000000, }, { 32, 0xfffffe3f,  0, 0x00000000, 
>>> },
>>>   { 29, 0x1ffffffa,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, 
>>> }, },
>>> { { 12, 0x00000ff3,  0, 0x00000000, }, { 15, 0x00007fef,  0, 0x00000000, 
>>> },
>>>   { 19, 0x0007ffdf,  0, 0x00000000, }, { 23, 0x007fffbf,  0, 0x00000000, 
>>> },
>>>   { 28, 0x0fffff1f,  0, 0x00000000, }, { 32, 0xfffffe7f,  0, 0x00000000, 
>>> },
>>>   { 29, 0x1ffffffb,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, 
>>> }, },
>>> { { 12, 0x00000ff5,  0, 0x00000000, }, { 16, 0x0000ffe3,  0, 0x00000000, 
>>> },
>>>   { 20, 0x000fffc7,  0, 0x00000000, }, { 24, 0x00ffff8f,  0, 0x00000000, 
>>> },
>>>   { 28, 0x0fffff3f,  0, 0x00000000, }, { 32, 0xfffffebf,  0, 0x00000000, 
>>> },
>>>   { 30, 0x1fff7400,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, 
>>> }, },
>>> { { 12, 0x00000ff7,  0, 0x00000000, }, { 16, 0x0000ffe7,  0, 0x00000000, 
>>> },
>>>   { 20, 0x000fffcf,  0, 0x00000000, }, { 24, 0x00ffff9f,  0, 0x00000000, 
>>> },
>>>   { 28, 0x0fffff5f,  0, 0x00000000, }, { 32, 0xfffffeff,  0, 0x00000000, 
>>> },
>>>   { 30, 0x3ffffff9,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, 
>>> }, },
>>> { { 13, 0x00001ff1,  0, 0x00000000, }, { 16, 0x0000ffeb,  0, 0x00000000, 
>>> },
>>>   { 20, 0x000fffd7,  0, 0x00000000, }, { 24, 0x00ffffaf,  0, 0x00000000, 
>>> },
>>>   { 28, 0x0fffff7f,  0, 0x00000000, }, { 27, 0x07fffff8,  6, 0x0000003f, 
>>> },
>>>   { 30, 0x3ffffffa,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, 
>>> }, },
>>> { { 13, 0x00001ff3,  0, 0x00000000, }, {  2, 0x00000003,  0, 0x00000000, 
>>> },
>>>   {  3, 0x00000007,  0, 0x00000000, }, { 31, 0x7ffffffb,  4, 0x0000000f, 
>>> },
>>>   {  5, 0x0000001f,  0, 0x00000000, }, {  6, 0x0000003f,  0, 0x00000000, 
>>> },
>>>   {  7, 0x0000007f,  0, 0x00000000, }, {  0, 0x00000000,  0, 0x00000000, 
>>> }, },
>>> };
>> This looks duplicated from mimic.c
>
> I couldn't find a way to split it further or reuse values from mimic.c. I 
> don't think they can be reused.

the decoder does
v= get_vlc(huffbits/code)
run= v&15;
num_bits = v>>4;
level= vlcdec_lookup[num_bits][get_bits(num_bits)]

for the encoder its
num_bits= vlcenc_lookup[level].num_bits;
bits    = vlcenc_lookup[level].bits;
v= run + (num_bits<<4);
put_bits(huffcode[v], huffbits[v]);
put_bits(bits, num_bits);

>
>> [...]
>>> static int mimic_encode_init(AVCodecContext *avctx)
>>> {
>>>     MimicContext *ctx = avctx->priv_data;
>>>     int i;
>>>
>>>     if(!(avctx->width == 160 && avctx->height == 120) &&
>>>        !(avctx->width == 320 && avctx->height == 240)) {
>>>         av_log(avctx, AV_LOG_ERROR, "size must be 320x240 or 160x120\n");
>>>         return -1;
>>>     }
>>>
>>>     ctx->avctx = avctx;
>>>
>>>     for (i = 0 ; i < 3 ; i++) {
>>>         ctx->num_vblocks[i] = -((-avctx->height) >> (3 + !!i));
>>>         ctx->num_hblocks[i] =     avctx->width   >> (3 + !!i) ;
>>>     }
>>>
>>>     ctx->cur_index = 15;
>>>     ctx->num_coeffs = 28;
>>>
>>>     for (i = 0; i < 16; i++) {
>>>         if(avctx->get_buffer(avctx, &ctx->buf_ptrs[i])) {
>>>             av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
>>>             return -1;
>>>         }
>>>         ff_mimic_prepare_avpic(ctx, &ctx->flipped_ptrs[i],
>>>               (AVPicture*) &ctx->buf_ptrs    [i]);
>>>     }
>>>
>>>     /* TODO Add a way to get quality per frame from the context. */
>>>     ctx->quality = ENCODER_QUALITY_DEFAULT;
>>>
>>>     avcodec_get_frame_defaults((AVFrame*)&ctx->picture);
>>>     avctx->coded_frame = (AVFrame*)&ctx->picture;
>> senseless casts
>
> Removed.
>

>>> static void vlc_encode_block(MimicContext *ctx,
>>>                               DCTELEM *idct_block, const DCTELEM 
>>> *dct_block,
>>>                               int num_coeffs, int qscale)
>>> {
>>>     const int qscale_dec = qscale << 2;
>>>     int num_zeroes = 0;
>>>     int value;
>>>     int i;
>>>
>>>     memset(idct_block, 0, sizeof(DCTELEM)*64);
>>>
>>>     value = shift_rnd(dct_block[0], 6);
>>>     idct_block[0] = value << 3;
>>>
>>>     /* The DC value is written out as is. */
>>>     put_bits(&ctx->pb, 8, value);
>>>
>>>     for (i = 1; i < num_coeffs && num_zeroes <= 14; i++) {
>>                                                ^^ ^^
>> If you cannot encode >14 zeros but there are >14 zeros then you should
>> check if its better to encoder the element most different from zero as
>> non zero or to encode the rest of the block as non zero.
>
> It becomes harder to do that with dct_quantize, since I don't have access 
> to fdct's unquantized output. Do you think this is so important that it'd 
> be worth re-fdct'ing and quantizing again in those cases?

no, we should better concentrate on trellis quantization :)

[...]
>>>     if(match < threshold[is_chroma]) {
>>>         put_bits(&ctx->pb, 1, !is_chroma);
>>>         ret = prev;
>>>     } else if(!is_chroma) {
>>>         int num_backrefs = av_clip(ctx->avctx->frame_number - 1, 0, 15);
>>>         int best_match = threshold[is_chroma];
>>>         int best_index = 0;
>>>
>>>         put_bits(&ctx->pb, 1, 0);
>>>
>>>         for (i = 1; i <= num_backrefs; i++) {
>>>             int backref = (ctx->cur_index + i) & 15;
>>>             uint8_t *backbuf = ctx->flipped_ptrs[backref].data[plane] + 
>>> offset;
>>>
>>>             match = ctx->dsp.sse[1](NULL, backbuf, cur, stride, rows);
>>>
>>>             if(match < best_match) {
>>>                 best_index = i;
>>>                 best_match = match;
>>>             }
>>>         }
>>>
>>>         if(best_index) {
>>>             int backref = (ctx->cur_index + best_index) & 15;
>>>
>>>             ret = ctx->flipped_ptrs[backref].data[plane] + offset;
>>>             put_bits(&ctx->pb, 1, 1);
>>>             put_bits(&ctx->pb, 4, best_index);
>>>         }
>>>     }
>> You should encode the block in all possible choices and select the one
>> which minimizes SSE + lambda2*bitrate. Where lambda2 is a constant set
>> based on quality. At least when mb_decission == FF_MB_DECISION_RD
>> if mb_decission is something else then you can use such a heuristic
>> as above ...
>> lambda= AVFrame.quality;
>> lambda2= (lambda*lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
>> and then minimize (SSE<<FF_LAMBDA_SHIFT) + lambda2*bitrate
>> Why is this better than a simple threshold?
>> Well, if you have 2 independant blocks and you choose for each the 
>> encoding
>> which minimizes SSE[block_i] + C*bitrate[block_i] then its obvious that
>> both together will be encoded so as SSE + C*bitrate of both are at their
>> global minimum.
>> Why is this mimimum a good choice?
>> Because each such minimum for each specific constant corresponds to a
>> encoding which maximizes the quality (minimizes sse) for a given bitrate.
>
> Should I test only in backreferences, or with the encoding too? It's always 
> 1 bit to copy from previous, 6 bits to copy from backreferences, and at a 
> bunch of bits (normally at least 30 to up to hundreds) to encode a new 
> block.

All possible encodings should be tested.

>
> In SSE + C*bitrate, I didn't really understand what value C should be.

A constant, which would be set based on the wanted quality.

>
>>>     return ret;
>>> }
>>>
>>> static void encode_plane(MimicContext *ctx, int plane, int is_pframe)
>>> {
>>>     const int is_chroma = !!plane;
>>>     const int stride = ctx->flipped_ptrs[ctx->cur_index 
>>> ].linesize[plane];
>>>     uint8_t *cur     = ctx->flipped_ptrs[ctx->cur_index ].data    
>>> [plane];
>>>     uint8_t *prev    = ctx->flipped_ptrs[ctx->prev_index].data    
>>> [plane];
>>>     const int qscale = 
>>> av_clip(10000-ctx->quality,is_chroma?1000:2000,10000);
>>>     int rows_shift = 0;
>>>     int offset = 0;
>>>     int x, y;
>>>
>>>     /* Bleed bottom line for 160x120 videos */
>>>     if(plane && ctx->avctx->height & 15) {
>>>         ctx->dsp.draw_edges(cur, stride,
>>>                             ctx->avctx->width>>1, ctx->avctx->height>>1, 
>>> 4);
>>>         rows_shift = 8;
>>>     }
>> Reflection is better then duplication for "invissible pixels" quality wise
>> IIRC.
>
> Reflected.
>

> I also removed the IDCT. The output file sizes were from 30% to 400% bigger 
> than no IDCT, and the quality wasn't so great. I don't think I really need 
> it since it's either copy or encode. There's no motion estimation or 
> prediction of any kind.
>
> I tested with a bunch of inputs, ranging from cif to good webcams and noisy 
> webcams. Using IDCT made the encoder decide to not copy quite frequently, 
> leading to lots of updates on static parts of the image, which is kind of 
> annoying. If I raised the threshold, the quality on moving parts would be 
> crap.

Sounds like "your IDCT" was buggy ...
having quantization & dequant not match exactly or anything else could have
been the cause ...  (or the zigzag tables maybe ...)
I assume you did somehow look at the IDCT-ed refernece image used to check
that it looks like the originalinstead of lets say having all 8x8 blocks
upside down or something like that ...

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Many that live deserve death. And some that die deserve life. Can you give
it to them? Then do not be too eager to deal out death in judgement. For
even the very wise cannot see all ends. -- Gandalf
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080521/0f6736a5/attachment.pgp>