[FFmpeg-devel] [PATCH] FFV1 2pass initial range coder states

Michael Niedermayer michaelni
Tue Oct 26 04:19:24 CEST 2010


Hi

patch below uses 2 pass to find approximatly good initial states (not global
optimal, that can be done too and i might try)

foreman with very small gops like 1 gop == 1 frame gains about 11% in terms
of compression.

I will apply this (+ any cleanups/bugfixes i stumble accross) soon


-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If you think the mosad wants you dead since a long time then you are either
wrong or dead since a long time.
-------------- next part --------------
diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index 3da38dc..fd7375e 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -233,6 +233,7 @@ typedef struct FFV1Context{
     GetBitContext gb;
     PutBitContext pb;
     uint64_t rc_stat[256][2];
+    uint64_t (*rc_stat2[MAX_QUANT_TABLES])[32][2];
     int version;
     int width, height;
     int chroma_h_shift, chroma_v_shift;
@@ -246,6 +247,7 @@ typedef struct FFV1Context{
     int16_t quant_tables[MAX_QUANT_TABLES][MAX_CONTEXT_INPUTS][256];
     int context_count[MAX_QUANT_TABLES];
     uint8_t state_transition[256];
+    uint8_t (*initial_states[MAX_QUANT_TABLES])[32];
     int run_index;
     int colorspace;
     int_fast16_t *sample_buffer;
@@ -299,12 +301,15 @@ static inline int get_context(PlaneContext *p, int_fast16_t *src, int_fast16_t *
         return p->quant_table[0][(L-LT) & 0xFF] + p->quant_table[1][(LT-T) & 0xFF] + p->quant_table[2][(T-RT) & 0xFF];
 }
 
-static av_always_inline av_flatten void put_symbol_inline(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2]){
+static av_always_inline av_flatten void put_symbol_inline(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
     int i;
 
 #define put_rac(C,S,B) \
 do{\
+    if(rc_stat){\
     rc_stat[*(S)][B]++;\
+        rc_stat2[(S)-state][B]++;\
+    }\
     put_rac(C,S,B);\
 }while(0)
 
@@ -344,8 +349,7 @@ do{\
 }
 
 static void av_noinline put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
-    uint64_t rc_stat[256][2]; //we dont bother counting header bits.
-    put_symbol_inline(c, state, v, is_signed, rc_stat);
+    put_symbol_inline(c, state, v, is_signed, NULL, NULL);
 }
 
 static inline av_flatten int get_symbol_inline(RangeCoder *c, uint8_t *state, int is_signed){
@@ -493,7 +497,11 @@ static av_always_inline int encode_line(FFV1Context *s, int w, int_fast16_t *sam
         diff= fold(diff, bits);
 
         if(s->ac){
-            put_symbol_inline(c, p->state[context], diff, 1, s->rc_stat);
+            if(s->flags & CODEC_FLAG_PASS1){
+                put_symbol_inline(c, p->state[context], diff, 1, s->rc_stat, s->rc_stat2[p->quant_table_index][context]);
+            }else{
+                put_symbol_inline(c, p->state[context], diff, 1, NULL, NULL);
+            }
         }else{
             if(context == 0) run_mode=1;
 
@@ -751,10 +759,13 @@ static av_cold int init_slice_contexts(FFV1Context *f){
 static int write_extra_header(FFV1Context *f){
     RangeCoder * const c= &f->c;
     uint8_t state[CONTEXT_SIZE];
-    int i;
+    int i, j, k;
+    uint8_t state2[32][CONTEXT_SIZE];
+
+    memset(state2, 128, sizeof(state2));
     memset(state, 128, sizeof(state));
 
-    f->avctx->extradata= av_malloc(f->avctx->extradata_size= 10000);
+    f->avctx->extradata= av_malloc(f->avctx->extradata_size= 10000 + (11*11*5*5*5+11*11*11)*32);
     ff_init_range_encoder(c, f->avctx->extradata, f->avctx->extradata_size);
     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
 
@@ -775,8 +786,23 @@ static int write_extra_header(FFV1Context *f){
     put_symbol(c, state, f->num_v_slices-1, 0);
 
     put_symbol(c, state, f->quant_table_count, 0);
-    for(i=0; i<f->quant_table_count; i++)
+    for(i=0; i<f->quant_table_count; i++){
         write_quant_tables(c, f->quant_tables[i]);
+        for(j=0; j<f->context_count[i]*CONTEXT_SIZE; j++)
+            if(f->initial_states[i] && f->initial_states[i][0][j] != 128)
+                break;
+        if(j<f->context_count[i]*CONTEXT_SIZE){
+            put_rac(c, state, 1);
+            for(j=0; j<f->context_count[i]; j++){
+                for(k=0; k<CONTEXT_SIZE; k++){
+                    int pred= j ? f->initial_states[i][j-1][k] : 128;
+                    put_symbol(c, state2[k], (int8_t)(f->initial_states[i][j][k]-pred), 1);
+                }
+            }
+        }else{
+            put_rac(c, state, 0);
+        }
+    }
 
     f->avctx->extradata_size= ff_rac_terminate(c);
 
@@ -829,7 +855,7 @@ static int sort_stt(FFV1Context *s, uint8_t stt[256]){
 static av_cold int encode_init(AVCodecContext *avctx)
 {
     FFV1Context *s = avctx->priv_data;
-    int i, j;
+    int i, j, k, m;
 
     common_init(avctx);
 
@@ -875,6 +901,13 @@ static av_cold int encode_init(AVCodecContext *avctx)
         p->context_count= s->context_count[p->quant_table_index];
     }
 
+    for(i=0; i<s->quant_table_count; i++){
+        s->initial_states[i]= av_malloc(CONTEXT_SIZE*s->context_count[i]*sizeof(uint8_t));
+        if(!s->initial_states[i])
+            return AVERROR(ENOMEM);
+        memset(s->initial_states[i], 128, CONTEXT_SIZE*s->context_count[i]);
+    }
+
     avctx->coded_frame= &s->picture;
     switch(avctx->pix_fmt){
     case PIX_FMT_YUV444P16:
@@ -909,6 +942,13 @@ static av_cold int encode_init(AVCodecContext *avctx)
 
     if(avctx->stats_in){
         char *p= avctx->stats_in;
+        uint64_t (*rc_stat2[MAX_QUANT_TABLES])[32][2];
+
+        av_assert0(s->version>=2);
+
+        for(i=0; i<s->quant_table_count; i++){
+            rc_stat2[i]= av_mallocz(CONTEXT_SIZE*2*sizeof(uint64_t)*s->context_count[i]);
+        }
 
         for(;;){
             for(j=0; j<256; j++){
@@ -922,10 +962,39 @@ static av_cold int encode_init(AVCodecContext *avctx)
                     p=next;
                 }
             }
+            for(i=0; i<s->quant_table_count; i++){
+                for(j=0; j<s->context_count[i]; j++){
+                    for(k=0; k<32; k++){
+                        for(m=0; m<2; m++){
+                            char *next;
+                            rc_stat2[i][j][k][m]= strtol(p, &next, 0);
+                            if(next==p){
+                                av_log(avctx, AV_LOG_ERROR, "2Pass file invalid at %d %d %d %d [%s]\n", i,j,k,m,p);
+                                return -1;
+                            }
+                            p=next;
+                        }
+                    }
+                }
+            }
             while(*p=='\n' || *p==' ') p++;
             if(p[0]==0) break;
         }
         sort_stt(s, s->state_transition);
+
+        for(i=0; i<s->quant_table_count; i++){
+            for(j=0; j<s->context_count[i]; j++){
+                for(k=0; k<32; k++){
+                    int p= 128;
+                    if(rc_stat2[i][j][k][0]+rc_stat2[i][j][k][1]){
+                        p=256*rc_stat2[i][j][k][1] / (rc_stat2[i][j][k][0]+rc_stat2[i][j][k][1]);
+                    }
+                    p= av_clip(p, 1, 254);
+                    s->initial_states[i][j][k]= p;
+                }
+            }
+            av_free(rc_stat2[i]);
+        }
     }
 
     if(s->version>1){
@@ -939,7 +1008,19 @@ static av_cold int encode_init(AVCodecContext *avctx)
     if(init_slice_state(s) < 0)
         return -1;
 
-    avctx->stats_out= av_mallocz(1024*30);
+    for(i=0; i<s->quant_table_count; i++){
+        for(j=0; j<s->slice_count; j++){
+            FFV1Context *sf= s->slice_context[j];
+            av_assert0(!sf->rc_stat2[i]);
+            sf->rc_stat2[i]= av_mallocz(CONTEXT_SIZE*2*s->context_count[i]*sizeof(uint64_t));
+            if(!sf->rc_stat2[i])
+                return AVERROR(ENOMEM);
+        }
+    }
+
+#define STATS_OUT_SIZE 1024*1024*6
+    if(avctx->flags & CODEC_FLAG_PASS1)
+        avctx->stats_out= av_mallocz(STATS_OUT_SIZE);
 
     return 0;
 }
@@ -957,15 +1038,15 @@ static void clear_state(FFV1Context *f){
             p->interlace_bit_state[0]= 128;
             p->interlace_bit_state[1]= 128;
 
+            if(fs->ac){
+                memcpy(p->state, f->initial_states[p->quant_table_index], CONTEXT_SIZE*p->context_count);
+            }else{
             for(j=0; j<p->context_count; j++){
-                if(fs->ac){
-                    memset(p->state[j], 128, sizeof(uint8_t)*CONTEXT_SIZE);
-                }else{
                     p->vlc_state[j].drift= 0;
                     p->vlc_state[j].error_sum= 4; //FFMAX((RANGE + 32)/64, 2);
                     p->vlc_state[j].bias= 0;
                     p->vlc_state[j].count= 1;
-                }
+            }
             }
         }
     }
@@ -1075,9 +1156,14 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
     }
 
     if((avctx->flags&CODEC_FLAG_PASS1) && (f->picture_number&31)==0){
-        int j;
+        int j, k, m;
         char *p= avctx->stats_out;
-        char *end= p + 1024*30;
+        char *end= p + STATS_OUT_SIZE;
+        uint64_t (*rc_stat2[MAX_QUANT_TABLES])[32][2];
+
+        for(i=0; i<f->quant_table_count; i++){
+            rc_stat2[i]= av_mallocz(CONTEXT_SIZE*2*sizeof(uint64_t)*f->context_count[i]);
+        }
 
         memset(f->rc_stat, 0, sizeof(f->rc_stat));
         for(j=0; j<f->slice_count; j++){
@@ -1086,6 +1172,14 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
                 f->rc_stat[i][0] += fs->rc_stat[i][0];
                 f->rc_stat[i][1] += fs->rc_stat[i][1];
             }
+            for(i=0; i<f->quant_table_count; i++){
+                for(k=0; k<f->context_count[i]; k++){
+                    for(m=0; m<32; m++){
+                        rc_stat2[i][k][m][0] += fs->rc_stat2[i][k][m][0];
+                        rc_stat2[i][k][m][1] += fs->rc_stat2[i][k][m][1];
+                    }
+                }
+            }
         }
 
         for(j=0; j<256; j++){
@@ -1093,7 +1187,21 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
             p+= strlen(p);
         }
         snprintf(p, end-p, "\n");
-    } else
+
+        for(i=0; i<f->quant_table_count; i++){
+            for(j=0; j<f->context_count[i]; j++){
+                for(m=0; m<32; m++){
+                    snprintf(p, end-p, "%"PRIu64" %"PRIu64" ", rc_stat2[i][j][m][0], rc_stat2[i][j][m][1]);
+                    p+= strlen(p);
+                }
+            }
+        }
+        snprintf(p, end-p, "\n");
+
+        for(i=0; i<f->quant_table_count; i++){
+            av_free(rc_stat2[i]);
+        }
+    } else if(avctx->flags&CODEC_FLAG_PASS1)
         avctx->stats_out[0] = '\0';
 
     f->picture_number++;
@@ -1117,6 +1225,13 @@ static av_cold int common_end(AVCodecContext *avctx){
     }
 
     av_freep(&avctx->stats_out);
+    for(j=0; j<s->quant_table_count; j++){
+        av_freep(&s->initial_states[j]);
+        for(i=0; i<s->slice_count; i++){
+            FFV1Context *sf= s->slice_context[i];
+            av_freep(&sf->rc_stat2[j]);
+        }
+    }
 
     return 0;
 }
@@ -1328,8 +1443,10 @@ static int read_quant_tables(RangeCoder *c, int16_t quant_table[MAX_CONTEXT_INPU
 static int read_extra_header(FFV1Context *f){
     RangeCoder * const c= &f->c;
     uint8_t state[CONTEXT_SIZE];
-    int i;
+    int i, j, k;
+    uint8_t state2[32][CONTEXT_SIZE];
 
+    memset(state2, 128, sizeof(state2));
     memset(state, 128, sizeof(state));
 
     ff_init_range_decoder(c, f->avctx->extradata, f->avctx->extradata_size);
@@ -1364,6 +1481,20 @@ static int read_extra_header(FFV1Context *f){
             av_log(f->avctx, AV_LOG_ERROR, "read_quant_table error\n");
             return -1;
         }
+
+        if(!f->initial_states[i]) f->initial_states[i]= av_malloc(CONTEXT_SIZE*f->context_count[i]*sizeof(uint8_t));
+        if(!f->initial_states[i])
+            return AVERROR(ENOMEM);
+        if(get_rac(c, state)){
+            for(j=0; j<f->context_count[i]; j++){
+                for(k=0; k<CONTEXT_SIZE; k++){
+                    int pred= j ? f->initial_states[i][j-1][k] : 128;
+                    f->initial_states[i][j][k]= (pred+get_symbol(c, state2[k], 1))&0xFF;
+                }
+            }
+        }else{
+            memset(f->initial_states[i], 128, CONTEXT_SIZE*f->context_count[i]*sizeof(uint8_t));
+        }
     }
 
     return 0;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20101026/5a04448f/attachment.pgp>



More information about the ffmpeg-devel mailing list