• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

libavcodec/h264.c

Go to the documentation of this file.
00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include "internal.h"
00029 #include "dsputil.h"
00030 #include "avcodec.h"
00031 #include "mpegvideo.h"
00032 #include "h264.h"
00033 #include "h264data.h"
00034 #include "h264_parser.h"
00035 #include "golomb.h"
00036 #include "mathops.h"
00037 #include "rectangle.h"
00038 #include "vdpau_internal.h"
00039 
00040 #include "cabac.h"
00041 #if ARCH_X86
00042 #include "x86/h264_i386.h"
00043 #endif
00044 
00045 //#undef NDEBUG
00046 #include <assert.h>
00047 
00052 #define DELAYED_PIC_REF 4
00053 
00054 static VLC coeff_token_vlc[4];
00055 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
00056 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
00057 
00058 static VLC chroma_dc_coeff_token_vlc;
00059 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
00060 static const int chroma_dc_coeff_token_vlc_table_size = 256;
00061 
00062 static VLC total_zeros_vlc[15];
00063 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
00064 static const int total_zeros_vlc_tables_size = 512;
00065 
00066 static VLC chroma_dc_total_zeros_vlc[3];
00067 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
00068 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
00069 
00070 static VLC run_vlc[6];
00071 static VLC_TYPE run_vlc_tables[6][8][2];
00072 static const int run_vlc_tables_size = 8;
00073 
00074 static VLC run7_vlc;
00075 static VLC_TYPE run7_vlc_table[96][2];
00076 static const int run7_vlc_table_size = 96;
00077 
00078 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
00079 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
00080 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
00081 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
00082 static Picture * remove_long(H264Context *h, int i, int ref_mask);
00083 
00084 static av_always_inline uint32_t pack16to32(int a, int b){
00085 #ifdef WORDS_BIGENDIAN
00086    return (b&0xFFFF) + (a<<16);
00087 #else
00088    return (a&0xFFFF) + (b<<16);
00089 #endif
00090 }
00091 
00092 static const uint8_t rem6[52]={
00093 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00094 };
00095 
00096 static const uint8_t div6[52]={
00097 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
00098 };
00099 
00100 static const uint8_t left_block_options[4][8]={
00101     {0,1,2,3,7,10,8,11},
00102     {2,2,3,3,8,11,8,11},
00103     {0,0,1,1,7,10,7,10},
00104     {0,2,0,2,7,10,7,10}
00105 };
00106 
00107 #define LEVEL_TAB_BITS 8
00108 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
00109 
00110 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
00111     MpegEncContext * const s = &h->s;
00112     const int mb_xy= h->mb_xy;
00113     int topleft_xy, top_xy, topright_xy, left_xy[2];
00114     int topleft_type, top_type, topright_type, left_type[2];
00115     const uint8_t * left_block;
00116     int topleft_partition= -1;
00117     int i;
00118 
00119     top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
00120 
00121     //FIXME deblocking could skip the intra and nnz parts.
00122     if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
00123         return;
00124 
00125     /* Wow, what a mess, why didn't they simplify the interlacing & intra
00126      * stuff, I can't imagine that these complex rules are worth it. */
00127 
00128     topleft_xy = top_xy - 1;
00129     topright_xy= top_xy + 1;
00130     left_xy[1] = left_xy[0] = mb_xy-1;
00131     left_block = left_block_options[0];
00132     if(FRAME_MBAFF){
00133         const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
00134         const int top_pair_xy      = pair_xy     - s->mb_stride;
00135         const int topleft_pair_xy  = top_pair_xy - 1;
00136         const int topright_pair_xy = top_pair_xy + 1;
00137         const int topleft_mb_field_flag  = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
00138         const int top_mb_field_flag      = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
00139         const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
00140         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
00141         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
00142         const int bottom = (s->mb_y & 1);
00143         tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
00144 
00145         if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
00146             top_xy -= s->mb_stride;
00147         }
00148         if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
00149             topleft_xy -= s->mb_stride;
00150         } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
00151             topleft_xy += s->mb_stride;
00152             // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
00153             topleft_partition = 0;
00154         }
00155         if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
00156             topright_xy -= s->mb_stride;
00157         }
00158         if (left_mb_field_flag != curr_mb_field_flag) {
00159             left_xy[1] = left_xy[0] = pair_xy - 1;
00160             if (curr_mb_field_flag) {
00161                 left_xy[1] += s->mb_stride;
00162                 left_block = left_block_options[3];
00163             } else {
00164                 left_block= left_block_options[2 - bottom];
00165             }
00166         }
00167     }
00168 
00169     h->top_mb_xy = top_xy;
00170     h->left_mb_xy[0] = left_xy[0];
00171     h->left_mb_xy[1] = left_xy[1];
00172     if(for_deblock){
00173         topleft_type = 0;
00174         topright_type = 0;
00175         top_type     = h->slice_table[top_xy     ] < 0xFFFF ? s->current_picture.mb_type[top_xy]     : 0;
00176         left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
00177         left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
00178 
00179         if(MB_MBAFF && !IS_INTRA(mb_type)){
00180             int list;
00181             for(list=0; list<h->list_count; list++){
00182                 //These values where changed for ease of performing MC, we need to change them back
00183                 //FIXME maybe we can make MC and loop filter use the same values or prevent
00184                 //the MC code from changing ref_cache and rather use a temporary array.
00185                 if(USES_LIST(mb_type,list)){
00186                     int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
00187                     *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
00188                     *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
00189                     ref += h->b8_stride;
00190                     *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
00191                     *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
00192                 }
00193             }
00194         }
00195     }else{
00196         topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
00197         top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
00198         topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
00199         left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
00200         left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
00201 
00202     if(IS_INTRA(mb_type)){
00203         int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
00204         h->topleft_samples_available=
00205         h->top_samples_available=
00206         h->left_samples_available= 0xFFFF;
00207         h->topright_samples_available= 0xEEEA;
00208 
00209         if(!(top_type & type_mask)){
00210             h->topleft_samples_available= 0xB3FF;
00211             h->top_samples_available= 0x33FF;
00212             h->topright_samples_available= 0x26EA;
00213         }
00214         if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
00215             if(IS_INTERLACED(mb_type)){
00216                 if(!(left_type[0] & type_mask)){
00217                     h->topleft_samples_available&= 0xDFFF;
00218                     h->left_samples_available&= 0x5FFF;
00219                 }
00220                 if(!(left_type[1] & type_mask)){
00221                     h->topleft_samples_available&= 0xFF5F;
00222                     h->left_samples_available&= 0xFF5F;
00223                 }
00224             }else{
00225                 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
00226                                 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
00227                 assert(left_xy[0] == left_xy[1]);
00228                 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
00229                     h->topleft_samples_available&= 0xDF5F;
00230                     h->left_samples_available&= 0x5F5F;
00231                 }
00232             }
00233         }else{
00234             if(!(left_type[0] & type_mask)){
00235                 h->topleft_samples_available&= 0xDF5F;
00236                 h->left_samples_available&= 0x5F5F;
00237             }
00238         }
00239 
00240         if(!(topleft_type & type_mask))
00241             h->topleft_samples_available&= 0x7FFF;
00242 
00243         if(!(topright_type & type_mask))
00244             h->topright_samples_available&= 0xFBFF;
00245 
00246         if(IS_INTRA4x4(mb_type)){
00247             if(IS_INTRA4x4(top_type)){
00248                 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
00249                 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
00250                 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
00251                 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
00252             }else{
00253                 int pred;
00254                 if(!(top_type & type_mask))
00255                     pred= -1;
00256                 else{
00257                     pred= 2;
00258                 }
00259                 h->intra4x4_pred_mode_cache[4+8*0]=
00260                 h->intra4x4_pred_mode_cache[5+8*0]=
00261                 h->intra4x4_pred_mode_cache[6+8*0]=
00262                 h->intra4x4_pred_mode_cache[7+8*0]= pred;
00263             }
00264             for(i=0; i<2; i++){
00265                 if(IS_INTRA4x4(left_type[i])){
00266                     h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
00267                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
00268                 }else{
00269                     int pred;
00270                     if(!(left_type[i] & type_mask))
00271                         pred= -1;
00272                     else{
00273                         pred= 2;
00274                     }
00275                     h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
00276                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
00277                 }
00278             }
00279         }
00280     }
00281     }
00282 
00283 
00284 /*
00285 0 . T T. T T T T
00286 1 L . .L . . . .
00287 2 L . .L . . . .
00288 3 . T TL . . . .
00289 4 L . .L . . . .
00290 5 L . .. . . . .
00291 */
00292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
00293     if(top_type){
00294         h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
00295         h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
00296         h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
00297         h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
00298 
00299         h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
00300         h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
00301 
00302         h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
00303         h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
00304 
00305     }else{
00306         h->non_zero_count_cache[4+8*0]=
00307         h->non_zero_count_cache[5+8*0]=
00308         h->non_zero_count_cache[6+8*0]=
00309         h->non_zero_count_cache[7+8*0]=
00310 
00311         h->non_zero_count_cache[1+8*0]=
00312         h->non_zero_count_cache[2+8*0]=
00313 
00314         h->non_zero_count_cache[1+8*3]=
00315         h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
00316 
00317     }
00318 
00319     for (i=0; i<2; i++) {
00320         if(left_type[i]){
00321             h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
00322             h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
00323             h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
00324             h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
00325         }else{
00326             h->non_zero_count_cache[3+8*1 + 2*8*i]=
00327             h->non_zero_count_cache[3+8*2 + 2*8*i]=
00328             h->non_zero_count_cache[0+8*1 +   8*i]=
00329             h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
00330         }
00331     }
00332 
00333     if( h->pps.cabac ) {
00334         // top_cbp
00335         if(top_type) {
00336             h->top_cbp = h->cbp_table[top_xy];
00337         } else if(IS_INTRA(mb_type)) {
00338             h->top_cbp = 0x1C0;
00339         } else {
00340             h->top_cbp = 0;
00341         }
00342         // left_cbp
00343         if (left_type[0]) {
00344             h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
00345         } else if(IS_INTRA(mb_type)) {
00346             h->left_cbp = 0x1C0;
00347         } else {
00348             h->left_cbp = 0;
00349         }
00350         if (left_type[0]) {
00351             h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
00352         }
00353         if (left_type[1]) {
00354             h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
00355         }
00356     }
00357 
00358 #if 1
00359     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
00360         int list;
00361         for(list=0; list<h->list_count; list++){
00362             if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
00363                 /*if(!h->mv_cache_clean[list]){
00364                     memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
00365                     memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
00366                     h->mv_cache_clean[list]= 1;
00367                 }*/
00368                 continue;
00369             }
00370             h->mv_cache_clean[list]= 0;
00371 
00372             if(USES_LIST(top_type, list)){
00373                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
00374                 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
00375                 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
00376                 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
00377                 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
00378                 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
00379                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
00380                 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
00381                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
00382                 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
00383             }else{
00384                 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
00385                 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
00386                 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
00387                 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
00388                 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
00389             }
00390 
00391             for(i=0; i<2; i++){
00392                 int cache_idx = scan8[0] - 1 + i*2*8;
00393                 if(USES_LIST(left_type[i], list)){
00394                     const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
00395                     const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
00396                     *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
00397                     *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
00398                     h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
00399                     h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
00400                 }else{
00401                     *(uint32_t*)h->mv_cache [list][cache_idx  ]=
00402                     *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
00403                     h->ref_cache[list][cache_idx  ]=
00404                     h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
00405                 }
00406             }
00407 
00408             if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
00409                 continue;
00410 
00411             if(USES_LIST(topleft_type, list)){
00412                 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
00413                 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
00414                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
00415                 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
00416             }else{
00417                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
00418                 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
00419             }
00420 
00421             if(USES_LIST(topright_type, list)){
00422                 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
00423                 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
00424                 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
00425                 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
00426             }else{
00427                 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
00428                 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
00429             }
00430 
00431             if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
00432                 continue;
00433 
00434             h->ref_cache[list][scan8[5 ]+1] =
00435             h->ref_cache[list][scan8[7 ]+1] =
00436             h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
00437             h->ref_cache[list][scan8[4 ]] =
00438             h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
00439             *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
00440             *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
00441             *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
00442             *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
00443             *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
00444 
00445             if( h->pps.cabac ) {
00446                 /* XXX beurk, Load mvd */
00447                 if(USES_LIST(top_type, list)){
00448                     const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
00449                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
00450                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
00451                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
00452                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
00453                 }else{
00454                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
00455                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
00456                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
00457                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
00458                 }
00459                 if(USES_LIST(left_type[0], list)){
00460                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
00461                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
00462                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
00463                 }else{
00464                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
00465                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
00466                 }
00467                 if(USES_LIST(left_type[1], list)){
00468                     const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
00469                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
00470                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
00471                 }else{
00472                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
00473                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
00474                 }
00475                 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
00476                 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
00477                 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
00478                 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
00479                 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
00480 
00481                 if(h->slice_type_nos == FF_B_TYPE){
00482                     fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
00483 
00484                     if(IS_DIRECT(top_type)){
00485                         *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
00486                     }else if(IS_8X8(top_type)){
00487                         int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
00488                         h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
00489                         h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
00490                     }else{
00491                         *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
00492                     }
00493 
00494                     if(IS_DIRECT(left_type[0]))
00495                         h->direct_cache[scan8[0] - 1 + 0*8]= 1;
00496                     else if(IS_8X8(left_type[0]))
00497                         h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
00498                     else
00499                         h->direct_cache[scan8[0] - 1 + 0*8]= 0;
00500 
00501                     if(IS_DIRECT(left_type[1]))
00502                         h->direct_cache[scan8[0] - 1 + 2*8]= 1;
00503                     else if(IS_8X8(left_type[1]))
00504                         h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
00505                     else
00506                         h->direct_cache[scan8[0] - 1 + 2*8]= 0;
00507                 }
00508             }
00509 
00510             if(FRAME_MBAFF){
00511 #define MAP_MVS\
00512                     MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
00513                     MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
00514                     MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
00515                     MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
00516                     MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
00517                     MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
00518                     MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
00519                     MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
00520                     MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
00521                     MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
00522                 if(MB_FIELD){
00523 #define MAP_F2F(idx, mb_type)\
00524                     if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
00525                         h->ref_cache[list][idx] <<= 1;\
00526                         h->mv_cache[list][idx][1] /= 2;\
00527                         h->mvd_cache[list][idx][1] /= 2;\
00528                     }
00529                     MAP_MVS
00530 #undef MAP_F2F
00531                 }else{
00532 #define MAP_F2F(idx, mb_type)\
00533                     if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
00534                         h->ref_cache[list][idx] >>= 1;\
00535                         h->mv_cache[list][idx][1] <<= 1;\
00536                         h->mvd_cache[list][idx][1] <<= 1;\
00537                     }
00538                     MAP_MVS
00539 #undef MAP_F2F
00540                 }
00541             }
00542         }
00543     }
00544 #endif
00545 
00546     h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
00547 }
00548 
00549 static inline void write_back_intra_pred_mode(H264Context *h){
00550     const int mb_xy= h->mb_xy;
00551 
00552     h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
00553     h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
00554     h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
00555     h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
00556     h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
00557     h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
00558     h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
00559 }
00560 
00564 static inline int check_intra4x4_pred_mode(H264Context *h){
00565     MpegEncContext * const s = &h->s;
00566     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00567     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00568     int i;
00569 
00570     if(!(h->top_samples_available&0x8000)){
00571         for(i=0; i<4; i++){
00572             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00573             if(status<0){
00574                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00575                 return -1;
00576             } else if(status){
00577                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00578             }
00579         }
00580     }
00581 
00582     if((h->left_samples_available&0x8888)!=0x8888){
00583         static const int mask[4]={0x8000,0x2000,0x80,0x20};
00584         for(i=0; i<4; i++){
00585             if(!(h->left_samples_available&mask[i])){
00586                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00587                 if(status<0){
00588                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00589                     return -1;
00590                 } else if(status){
00591                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00592                 }
00593             }
00594         }
00595     }
00596 
00597     return 0;
00598 } //FIXME cleanup like next
00599 
00603 static inline int check_intra_pred_mode(H264Context *h, int mode){
00604     MpegEncContext * const s = &h->s;
00605     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00606     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00607 
00608     if(mode > 6U) {
00609         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00610         return -1;
00611     }
00612 
00613     if(!(h->top_samples_available&0x8000)){
00614         mode= top[ mode ];
00615         if(mode<0){
00616             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00617             return -1;
00618         }
00619     }
00620 
00621     if((h->left_samples_available&0x8080) != 0x8080){
00622         mode= left[ mode ];
00623         if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
00624             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
00625         }
00626         if(mode<0){
00627             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00628             return -1;
00629         }
00630     }
00631 
00632     return mode;
00633 }
00634 
00638 static inline int pred_intra_mode(H264Context *h, int n){
00639     const int index8= scan8[n];
00640     const int left= h->intra4x4_pred_mode_cache[index8 - 1];
00641     const int top = h->intra4x4_pred_mode_cache[index8 - 8];
00642     const int min= FFMIN(left, top);
00643 
00644     tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
00645 
00646     if(min<0) return DC_PRED;
00647     else      return min;
00648 }
00649 
00650 static inline void write_back_non_zero_count(H264Context *h){
00651     const int mb_xy= h->mb_xy;
00652 
00653     h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
00654     h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
00655     h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
00656     h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
00657     h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
00658     h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
00659     h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
00660 
00661     h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
00662     h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
00663     h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
00664 
00665     h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
00666     h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
00667     h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
00668 }
00669 
00674 static inline int pred_non_zero_count(H264Context *h, int n){
00675     const int index8= scan8[n];
00676     const int left= h->non_zero_count_cache[index8 - 1];
00677     const int top = h->non_zero_count_cache[index8 - 8];
00678     int i= left + top;
00679 
00680     if(i<64) i= (i+1)>>1;
00681 
00682     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
00683 
00684     return i&31;
00685 }
00686 
00687 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
00688     const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
00689     MpegEncContext *s = &h->s;
00690 
00691     /* there is no consistent mapping of mvs to neighboring locations that will
00692      * make mbaff happy, so we can't move all this logic to fill_caches */
00693     if(FRAME_MBAFF){
00694         const uint32_t *mb_types = s->current_picture_ptr->mb_type;
00695         const int16_t *mv;
00696         *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
00697         *C = h->mv_cache[list][scan8[0]-2];
00698 
00699         if(!MB_FIELD
00700            && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
00701             int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
00702             if(IS_INTERLACED(mb_types[topright_xy])){
00703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
00704                 const int x4 = X4, y4 = Y4;\
00705                 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
00706                 if(!USES_LIST(mb_type,list))\
00707                     return LIST_NOT_USED;\
00708                 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
00709                 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
00710                 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
00711                 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
00712 
00713                 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
00714             }
00715         }
00716         if(topright_ref == PART_NOT_AVAILABLE
00717            && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
00718            && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
00719             if(!MB_FIELD
00720                && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
00721                 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
00722             }
00723             if(MB_FIELD
00724                && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
00725                && i >= scan8[0]+8){
00726                 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
00727                 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
00728             }
00729         }
00730 #undef SET_DIAG_MV
00731     }
00732 
00733     if(topright_ref != PART_NOT_AVAILABLE){
00734         *C= h->mv_cache[list][ i - 8 + part_width ];
00735         return topright_ref;
00736     }else{
00737         tprintf(s->avctx, "topright MV not available\n");
00738 
00739         *C= h->mv_cache[list][ i - 8 - 1 ];
00740         return h->ref_cache[list][ i - 8 - 1 ];
00741     }
00742 }
00743 
00751 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
00752     const int index8= scan8[n];
00753     const int top_ref=      h->ref_cache[list][ index8 - 8 ];
00754     const int left_ref=     h->ref_cache[list][ index8 - 1 ];
00755     const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
00756     const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
00757     const int16_t * C;
00758     int diagonal_ref, match_count;
00759 
00760     assert(part_width==1 || part_width==2 || part_width==4);
00761 
00762 /* mv_cache
00763   B . . A T T T T
00764   U . . L . . , .
00765   U . . L . . . .
00766   U . . L . . , .
00767   . . . L . . . .
00768 */
00769 
00770     diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
00771     match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
00772     tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
00773     if(match_count > 1){ //most common
00774         *mx= mid_pred(A[0], B[0], C[0]);
00775         *my= mid_pred(A[1], B[1], C[1]);
00776     }else if(match_count==1){
00777         if(left_ref==ref){
00778             *mx= A[0];
00779             *my= A[1];
00780         }else if(top_ref==ref){
00781             *mx= B[0];
00782             *my= B[1];
00783         }else{
00784             *mx= C[0];
00785             *my= C[1];
00786         }
00787     }else{
00788         if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
00789             *mx= A[0];
00790             *my= A[1];
00791         }else{
00792             *mx= mid_pred(A[0], B[0], C[0]);
00793             *my= mid_pred(A[1], B[1], C[1]);
00794         }
00795     }
00796 
00797     tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
00798 }
00799 
00806 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
00807     if(n==0){
00808         const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
00809         const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
00810 
00811         tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
00812 
00813         if(top_ref == ref){
00814             *mx= B[0];
00815             *my= B[1];
00816             return;
00817         }
00818     }else{
00819         const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
00820         const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
00821 
00822         tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
00823 
00824         if(left_ref == ref){
00825             *mx= A[0];
00826             *my= A[1];
00827             return;
00828         }
00829     }
00830 
00831     //RARE
00832     pred_motion(h, n, 4, list, ref, mx, my);
00833 }
00834 
00841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
00842     if(n==0){
00843         const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
00844         const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
00845 
00846         tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
00847 
00848         if(left_ref == ref){
00849             *mx= A[0];
00850             *my= A[1];
00851             return;
00852         }
00853     }else{
00854         const int16_t * C;
00855         int diagonal_ref;
00856 
00857         diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
00858 
00859         tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
00860 
00861         if(diagonal_ref == ref){
00862             *mx= C[0];
00863             *my= C[1];
00864             return;
00865         }
00866     }
00867 
00868     //RARE
00869     pred_motion(h, n, 2, list, ref, mx, my);
00870 }
00871 
00872 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
00873     const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
00874     const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
00875 
00876     tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
00877 
00878     if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
00879        || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
00880        || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
00881 
00882         *mx = *my = 0;
00883         return;
00884     }
00885 
00886     pred_motion(h, 0, 4, 0, 0, mx, my);
00887 
00888     return;
00889 }
00890 
00891 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
00892     int poc0 = h->ref_list[0][i].poc;
00893     int td = av_clip(poc1 - poc0, -128, 127);
00894     if(td == 0 || h->ref_list[0][i].long_ref){
00895         return 256;
00896     }else{
00897         int tb = av_clip(poc - poc0, -128, 127);
00898         int tx = (16384 + (FFABS(td) >> 1)) / td;
00899         return av_clip((tb*tx + 32) >> 6, -1024, 1023);
00900     }
00901 }
00902 
00903 static inline void direct_dist_scale_factor(H264Context * const h){
00904     MpegEncContext * const s = &h->s;
00905     const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
00906     const int poc1 = h->ref_list[1][0].poc;
00907     int i, field;
00908     for(field=0; field<2; field++){
00909         const int poc  = h->s.current_picture_ptr->field_poc[field];
00910         const int poc1 = h->ref_list[1][0].field_poc[field];
00911         for(i=0; i < 2*h->ref_count[0]; i++)
00912             h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
00913     }
00914 
00915     for(i=0; i<h->ref_count[0]; i++){
00916         h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
00917     }
00918 }
00919 
00920 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
00921     MpegEncContext * const s = &h->s;
00922     Picture * const ref1 = &h->ref_list[1][0];
00923     int j, old_ref, rfield;
00924     int start= mbafi ? 16                      : 0;
00925     int end  = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
00926     int interl= mbafi || s->picture_structure != PICT_FRAME;
00927 
00928     /* bogus; fills in for missing frames */
00929     memset(map[list], 0, sizeof(map[list]));
00930 
00931     for(rfield=0; rfield<2; rfield++){
00932         for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
00933             int poc = ref1->ref_poc[colfield][list][old_ref];
00934 
00935             if     (!interl)
00936                 poc |= 3;
00937             else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
00938                 poc= (poc&~3) + rfield + 1;
00939 
00940             for(j=start; j<end; j++){
00941                 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
00942                     int cur_ref= mbafi ? (j-16)^field : j;
00943                     map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
00944                     if(rfield == field)
00945                         map[list][old_ref] = cur_ref;
00946                     break;
00947                 }
00948             }
00949         }
00950     }
00951 }
00952 
00953 static inline void direct_ref_list_init(H264Context * const h){
00954     MpegEncContext * const s = &h->s;
00955     Picture * const ref1 = &h->ref_list[1][0];
00956     Picture * const cur = s->current_picture_ptr;
00957     int list, j, field;
00958     int sidx= (s->picture_structure&1)^1;
00959     int ref1sidx= (ref1->reference&1)^1;
00960 
00961     for(list=0; list<2; list++){
00962         cur->ref_count[sidx][list] = h->ref_count[list];
00963         for(j=0; j<h->ref_count[list]; j++)
00964             cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
00965     }
00966 
00967     if(s->picture_structure == PICT_FRAME){
00968         memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
00969         memcpy(cur->ref_poc  [1], cur->ref_poc  [0], sizeof(cur->ref_poc  [0]));
00970     }
00971 
00972     cur->mbaff= FRAME_MBAFF;
00973 
00974     if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
00975         return;
00976 
00977     for(list=0; list<2; list++){
00978         fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
00979         for(field=0; field<2; field++)
00980             fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
00981     }
00982 }
00983 
00984 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
00985     MpegEncContext * const s = &h->s;
00986     int b8_stride = h->b8_stride;
00987     int b4_stride = h->b_stride;
00988     int mb_xy = h->mb_xy;
00989     int mb_type_col[2];
00990     const int16_t (*l1mv0)[2], (*l1mv1)[2];
00991     const int8_t *l1ref0, *l1ref1;
00992     const int is_b8x8 = IS_8X8(*mb_type);
00993     unsigned int sub_mb_type;
00994     int i8, i4;
00995 
00996 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
00997 
00998     if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
00999         if(!IS_INTERLACED(*mb_type)){                    //     AFR/FR    -> AFL/FL
01000             int cur_poc = s->current_picture_ptr->poc;
01001             int *col_poc = h->ref_list[1]->field_poc;
01002             int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
01003             mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
01004             b8_stride = 0;
01005         }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
01006             int fieldoff= 2*(h->ref_list[1][0].reference)-3;
01007             mb_xy += s->mb_stride*fieldoff;
01008         }
01009         goto single_col;
01010     }else{                                               // AFL/AFR/FR/FL -> AFR/FR
01011         if(IS_INTERLACED(*mb_type)){                     // AFL       /FL -> AFR/FR
01012             mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
01013             mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
01014             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
01015             b8_stride *= 3;
01016             b4_stride *= 6;
01017             //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
01018             if(    (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
01019                 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
01020                 && !is_b8x8){
01021                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
01022                 *mb_type   |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
01023             }else{
01024                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
01025                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
01026             }
01027         }else{                                           //     AFR/FR    -> AFR/FR
01028 single_col:
01029             mb_type_col[0] =
01030             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
01031             if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
01032                 /* FIXME save sub mb types from previous frames (or derive from MVs)
01033                 * so we know exactly what block size to use */
01034                 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
01035                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
01036             }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
01037                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
01038                 *mb_type   |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
01039             }else{
01040                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
01041                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
01042             }
01043         }
01044     }
01045 
01046     l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
01047     l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
01048     l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
01049     l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
01050     if(!b8_stride){
01051         if(s->mb_y&1){
01052             l1ref0 += h->b8_stride;
01053             l1ref1 += h->b8_stride;
01054             l1mv0  +=  2*b4_stride;
01055             l1mv1  +=  2*b4_stride;
01056         }
01057     }
01058 
01059     if(h->direct_spatial_mv_pred){
01060         int ref[2];
01061         int mv[2][2];
01062         int list;
01063 
01064         /* FIXME interlacing + spatial direct uses wrong colocated block positions */
01065 
01066         /* ref = min(neighbors) */
01067         for(list=0; list<2; list++){
01068             int refa = h->ref_cache[list][scan8[0] - 1];
01069             int refb = h->ref_cache[list][scan8[0] - 8];
01070             int refc = h->ref_cache[list][scan8[0] - 8 + 4];
01071             if(refc == PART_NOT_AVAILABLE)
01072                 refc = h->ref_cache[list][scan8[0] - 8 - 1];
01073             ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
01074             if(ref[list] < 0)
01075                 ref[list] = -1;
01076         }
01077 
01078         if(ref[0] < 0 && ref[1] < 0){
01079             ref[0] = ref[1] = 0;
01080             mv[0][0] = mv[0][1] =
01081             mv[1][0] = mv[1][1] = 0;
01082         }else{
01083             for(list=0; list<2; list++){
01084                 if(ref[list] >= 0)
01085                     pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
01086                 else
01087                     mv[list][0] = mv[list][1] = 0;
01088             }
01089         }
01090 
01091         if(ref[1] < 0){
01092             if(!is_b8x8)
01093                 *mb_type &= ~MB_TYPE_L1;
01094             sub_mb_type &= ~MB_TYPE_L1;
01095         }else if(ref[0] < 0){
01096             if(!is_b8x8)
01097                 *mb_type &= ~MB_TYPE_L0;
01098             sub_mb_type &= ~MB_TYPE_L0;
01099         }
01100 
01101         if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
01102             for(i8=0; i8<4; i8++){
01103                 int x8 = i8&1;
01104                 int y8 = i8>>1;
01105                 int xy8 = x8+y8*b8_stride;
01106                 int xy4 = 3*x8+y8*b4_stride;
01107                 int a=0, b=0;
01108 
01109                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
01110                     continue;
01111                 h->sub_mb_type[i8] = sub_mb_type;
01112 
01113                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
01114                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
01115                 if(!IS_INTRA(mb_type_col[y8])
01116                    && (   (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
01117                        || (l1ref0[xy8]  < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
01118                     if(ref[0] > 0)
01119                         a= pack16to32(mv[0][0],mv[0][1]);
01120                     if(ref[1] > 0)
01121                         b= pack16to32(mv[1][0],mv[1][1]);
01122                 }else{
01123                     a= pack16to32(mv[0][0],mv[0][1]);
01124                     b= pack16to32(mv[1][0],mv[1][1]);
01125                 }
01126                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
01127                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
01128             }
01129         }else if(IS_16X16(*mb_type)){
01130             int a=0, b=0;
01131 
01132             fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
01133             fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
01134             if(!IS_INTRA(mb_type_col[0])
01135                && (   (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
01136                    || (l1ref0[0]  < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
01137                        && (h->x264_build>33 || !h->x264_build)))){
01138                 if(ref[0] > 0)
01139                     a= pack16to32(mv[0][0],mv[0][1]);
01140                 if(ref[1] > 0)
01141                     b= pack16to32(mv[1][0],mv[1][1]);
01142             }else{
01143                 a= pack16to32(mv[0][0],mv[0][1]);
01144                 b= pack16to32(mv[1][0],mv[1][1]);
01145             }
01146             fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
01147             fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
01148         }else{
01149             for(i8=0; i8<4; i8++){
01150                 const int x8 = i8&1;
01151                 const int y8 = i8>>1;
01152 
01153                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
01154                     continue;
01155                 h->sub_mb_type[i8] = sub_mb_type;
01156 
01157                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
01158                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
01159                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
01160                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
01161 
01162                 /* col_zero_flag */
01163                 if(!IS_INTRA(mb_type_col[0]) && (   l1ref0[x8 + y8*b8_stride] == 0
01164                                               || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
01165                                                   && (h->x264_build>33 || !h->x264_build)))){
01166                     const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
01167                     if(IS_SUB_8X8(sub_mb_type)){
01168                         const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
01169                         if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
01170                             if(ref[0] == 0)
01171                                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
01172                             if(ref[1] == 0)
01173                                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
01174                         }
01175                     }else
01176                     for(i4=0; i4<4; i4++){
01177                         const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
01178                         if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
01179                             if(ref[0] == 0)
01180                                 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
01181                             if(ref[1] == 0)
01182                                 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
01183                         }
01184                     }
01185                 }
01186             }
01187         }
01188     }else{ /* direct temporal mv pred */
01189         const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
01190         const int *dist_scale_factor = h->dist_scale_factor;
01191         int ref_offset= 0;
01192 
01193         if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
01194             map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
01195             map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
01196             dist_scale_factor   =h->dist_scale_factor_field[s->mb_y&1];
01197         }
01198         if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
01199             ref_offset += 16;
01200 
01201         if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
01202             /* FIXME assumes direct_8x8_inference == 1 */
01203             int y_shift  = 2*!IS_INTERLACED(*mb_type);
01204 
01205             for(i8=0; i8<4; i8++){
01206                 const int x8 = i8&1;
01207                 const int y8 = i8>>1;
01208                 int ref0, scale;
01209                 const int16_t (*l1mv)[2]= l1mv0;
01210 
01211                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
01212                     continue;
01213                 h->sub_mb_type[i8] = sub_mb_type;
01214 
01215                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
01216                 if(IS_INTRA(mb_type_col[y8])){
01217                     fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
01218                     fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
01219                     fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
01220                     continue;
01221                 }
01222 
01223                 ref0 = l1ref0[x8 + y8*b8_stride];
01224                 if(ref0 >= 0)
01225                     ref0 = map_col_to_list0[0][ref0 + ref_offset];
01226                 else{
01227                     ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
01228                     l1mv= l1mv1;
01229                 }
01230                 scale = dist_scale_factor[ref0];
01231                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
01232 
01233                 {
01234                     const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
01235                     int my_col = (mv_col[1]<<y_shift)/2;
01236                     int mx = (scale * mv_col[0] + 128) >> 8;
01237                     int my = (scale * my_col + 128) >> 8;
01238                     fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
01239                     fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
01240                 }
01241             }
01242             return;
01243         }
01244 
01245         /* one-to-one mv scaling */
01246 
01247         if(IS_16X16(*mb_type)){
01248             int ref, mv0, mv1;
01249 
01250             fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
01251             if(IS_INTRA(mb_type_col[0])){
01252                 ref=mv0=mv1=0;
01253             }else{
01254                 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
01255                                                 : map_col_to_list0[1][l1ref1[0] + ref_offset];
01256                 const int scale = dist_scale_factor[ref0];
01257                 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
01258                 int mv_l0[2];
01259                 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
01260                 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
01261                 ref= ref0;
01262                 mv0= pack16to32(mv_l0[0],mv_l0[1]);
01263                 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
01264             }
01265             fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
01266             fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
01267             fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
01268         }else{
01269             for(i8=0; i8<4; i8++){
01270                 const int x8 = i8&1;
01271                 const int y8 = i8>>1;
01272                 int ref0, scale;
01273                 const int16_t (*l1mv)[2]= l1mv0;
01274 
01275                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
01276                     continue;
01277                 h->sub_mb_type[i8] = sub_mb_type;
01278                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
01279                 if(IS_INTRA(mb_type_col[0])){
01280                     fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
01281                     fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
01282                     fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
01283                     continue;
01284                 }
01285 
01286                 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
01287                 if(ref0 >= 0)
01288                     ref0 = map_col_to_list0[0][ref0];
01289                 else{
01290                     ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
01291                     l1mv= l1mv1;
01292                 }
01293                 scale = dist_scale_factor[ref0];
01294 
01295                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
01296                 if(IS_SUB_8X8(sub_mb_type)){
01297                     const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
01298                     int mx = (scale * mv_col[0] + 128) >> 8;
01299                     int my = (scale * mv_col[1] + 128) >> 8;
01300                     fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
01301                     fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
01302                 }else
01303                 for(i4=0; i4<4; i4++){
01304                     const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
01305                     int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
01306                     mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
01307                     mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
01308                     *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
01309                         pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
01310                 }
01311             }
01312         }
01313     }
01314 }
01315 
01316 static inline void write_back_motion(H264Context *h, int mb_type){
01317     MpegEncContext * const s = &h->s;
01318     const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
01319     const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
01320     int list;
01321 
01322     if(!USES_LIST(mb_type, 0))
01323         fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
01324 
01325     for(list=0; list<h->list_count; list++){
01326         int y;
01327         if(!USES_LIST(mb_type, list))
01328             continue;
01329 
01330         for(y=0; y<4; y++){
01331             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
01332             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
01333         }
01334         if( h->pps.cabac ) {
01335             if(IS_SKIP(mb_type))
01336                 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
01337             else
01338             for(y=0; y<4; y++){
01339                 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
01340                 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
01341             }
01342         }
01343 
01344         {
01345             int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
01346             ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
01347             ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
01348             ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
01349             ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
01350         }
01351     }
01352 
01353     if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
01354         if(IS_8X8(mb_type)){
01355             uint8_t *direct_table = &h->direct_table[b8_xy];
01356             direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
01357             direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
01358             direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
01359         }
01360     }
01361 }
01362 
01363 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
01364     int i, si, di;
01365     uint8_t *dst;
01366     int bufidx;
01367 
01368 //    src[0]&0x80;                //forbidden bit
01369     h->nal_ref_idc= src[0]>>5;
01370     h->nal_unit_type= src[0]&0x1F;
01371 
01372     src++; length--;
01373 #if 0
01374     for(i=0; i<length; i++)
01375         printf("%2X ", src[i]);
01376 #endif
01377 
01378 #if HAVE_FAST_UNALIGNED
01379 # if HAVE_FAST_64BIT
01380 #   define RS 7
01381     for(i=0; i+1<length; i+=9){
01382         if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
01383 # else
01384 #   define RS 3
01385     for(i=0; i+1<length; i+=5){
01386         if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
01387 # endif
01388             continue;
01389         if(i>0 && !src[i]) i--;
01390         while(src[i]) i++;
01391 #else
01392 #   define RS 0
01393     for(i=0; i+1<length; i+=2){
01394         if(src[i]) continue;
01395         if(i>0 && src[i-1]==0) i--;
01396 #endif
01397         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
01398             if(src[i+2]!=3){
01399                 /* startcode, so we must be past the end */
01400                 length=i;
01401             }
01402             break;
01403         }
01404         i-= RS;
01405     }
01406 
01407     if(i>=length-1){ //no escaped 0
01408         *dst_length= length;
01409         *consumed= length+1; //+1 for the header
01410         return src;
01411     }
01412 
01413     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
01414     h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
01415     dst= h->rbsp_buffer[bufidx];
01416 
01417     if (dst == NULL){
01418         return NULL;
01419     }
01420 
01421 //printf("decoding esc\n");
01422     memcpy(dst, src, i);
01423     si=di=i;
01424     while(si+2<length){
01425         //remove escapes (very rare 1:2^22)
01426         if(src[si+2]>3){
01427             dst[di++]= src[si++];
01428             dst[di++]= src[si++];
01429         }else if(src[si]==0 && src[si+1]==0){
01430             if(src[si+2]==3){ //escape
01431                 dst[di++]= 0;
01432                 dst[di++]= 0;
01433                 si+=3;
01434                 continue;
01435             }else //next start code
01436                 goto nsc;
01437         }
01438 
01439         dst[di++]= src[si++];
01440     }
01441     while(si<length)
01442         dst[di++]= src[si++];
01443 nsc:
01444 
01445     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
01446 
01447     *dst_length= di;
01448     *consumed= si + 1;//+1 for the header
01449 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
01450     return dst;
01451 }
01452 
01453 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
01454     int v= *src;
01455     int r;
01456 
01457     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
01458 
01459     for(r=1; r<9; r++){
01460         if(v&1) return r;
01461         v>>=1;
01462     }
01463     return 0;
01464 }
01465 
01470 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
01471 #define stride 16
01472     int i;
01473     int temp[16]; //FIXME check if this is a good idea
01474     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
01475     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
01476 
01477 //memset(block, 64, 2*256);
01478 //return;
01479     for(i=0; i<4; i++){
01480         const int offset= y_offset[i];
01481         const int z0= block[offset+stride*0] + block[offset+stride*4];
01482         const int z1= block[offset+stride*0] - block[offset+stride*4];
01483         const int z2= block[offset+stride*1] - block[offset+stride*5];
01484         const int z3= block[offset+stride*1] + block[offset+stride*5];
01485 
01486         temp[4*i+0]= z0+z3;
01487         temp[4*i+1]= z1+z2;
01488         temp[4*i+2]= z1-z2;
01489         temp[4*i+3]= z0-z3;
01490     }
01491 
01492     for(i=0; i<4; i++){
01493         const int offset= x_offset[i];
01494         const int z0= temp[4*0+i] + temp[4*2+i];
01495         const int z1= temp[4*0+i] - temp[4*2+i];
01496         const int z2= temp[4*1+i] - temp[4*3+i];
01497         const int z3= temp[4*1+i] + temp[4*3+i];
01498 
01499         block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
01500         block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
01501         block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
01502         block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
01503     }
01504 }
01505 
01506 #if 0
01507 
01511 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
01512 //    const int qmul= dequant_coeff[qp][0];
01513     int i;
01514     int temp[16]; //FIXME check if this is a good idea
01515     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
01516     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
01517 
01518     for(i=0; i<4; i++){
01519         const int offset= y_offset[i];
01520         const int z0= block[offset+stride*0] + block[offset+stride*4];
01521         const int z1= block[offset+stride*0] - block[offset+stride*4];
01522         const int z2= block[offset+stride*1] - block[offset+stride*5];
01523         const int z3= block[offset+stride*1] + block[offset+stride*5];
01524 
01525         temp[4*i+0]= z0+z3;
01526         temp[4*i+1]= z1+z2;
01527         temp[4*i+2]= z1-z2;
01528         temp[4*i+3]= z0-z3;
01529     }
01530 
01531     for(i=0; i<4; i++){
01532         const int offset= x_offset[i];
01533         const int z0= temp[4*0+i] + temp[4*2+i];
01534         const int z1= temp[4*0+i] - temp[4*2+i];
01535         const int z2= temp[4*1+i] - temp[4*3+i];
01536         const int z3= temp[4*1+i] + temp[4*3+i];
01537 
01538         block[stride*0 +offset]= (z0 + z3)>>1;
01539         block[stride*2 +offset]= (z1 + z2)>>1;
01540         block[stride*8 +offset]= (z1 - z2)>>1;
01541         block[stride*10+offset]= (z0 - z3)>>1;
01542     }
01543 }
01544 #endif
01545 
01546 #undef xStride
01547 #undef stride
01548 
01549 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
01550     const int stride= 16*2;
01551     const int xStride= 16;
01552     int a,b,c,d,e;
01553 
01554     a= block[stride*0 + xStride*0];
01555     b= block[stride*0 + xStride*1];
01556     c= block[stride*1 + xStride*0];
01557     d= block[stride*1 + xStride*1];
01558 
01559     e= a-b;
01560     a= a+b;
01561     b= c-d;
01562     c= c+d;
01563 
01564     block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
01565     block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
01566     block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
01567     block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
01568 }
01569 
01570 #if 0
01571 static void chroma_dc_dct_c(DCTELEM *block){
01572     const int stride= 16*2;
01573     const int xStride= 16;
01574     int a,b,c,d,e;
01575 
01576     a= block[stride*0 + xStride*0];
01577     b= block[stride*0 + xStride*1];
01578     c= block[stride*1 + xStride*0];
01579     d= block[stride*1 + xStride*1];
01580 
01581     e= a-b;
01582     a= a+b;
01583     b= c-d;
01584     c= c+d;
01585 
01586     block[stride*0 + xStride*0]= (a+c);
01587     block[stride*0 + xStride*1]= (e+b);
01588     block[stride*1 + xStride*0]= (a-c);
01589     block[stride*1 + xStride*1]= (e-b);
01590 }
01591 #endif
01592 
01596 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
01597     return h->pps.chroma_qp_table[t][qscale];
01598 }
01599 
01600 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
01601                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01602                            int src_x_offset, int src_y_offset,
01603                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
01604     MpegEncContext * const s = &h->s;
01605     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
01606     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
01607     const int luma_xy= (mx&3) + ((my&3)<<2);
01608     uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
01609     uint8_t * src_cb, * src_cr;
01610     int extra_width= h->emu_edge_width;
01611     int extra_height= h->emu_edge_height;
01612     int emu=0;
01613     const int full_mx= mx>>2;
01614     const int full_my= my>>2;
01615     const int pic_width  = 16*s->mb_width;
01616     const int pic_height = 16*s->mb_height >> MB_FIELD;
01617 
01618     if(mx&7) extra_width -= 3;
01619     if(my&7) extra_height -= 3;
01620 
01621     if(   full_mx < 0-extra_width
01622        || full_my < 0-extra_height
01623        || full_mx + 16/*FIXME*/ > pic_width + extra_width
01624        || full_my + 16/*FIXME*/ > pic_height + extra_height){
01625         ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
01626             src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
01627         emu=1;
01628     }
01629 
01630     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
01631     if(!square){
01632         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
01633     }
01634 
01635     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
01636 
01637     if(MB_FIELD){
01638         // chroma offset when predicting from a field of opposite parity
01639         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
01640         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
01641     }
01642     src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
01643     src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
01644 
01645     if(emu){
01646         ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
01647             src_cb= s->edge_emu_buffer;
01648     }
01649     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
01650 
01651     if(emu){
01652         ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
01653             src_cr= s->edge_emu_buffer;
01654     }
01655     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
01656 }
01657 
01658 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
01659                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01660                            int x_offset, int y_offset,
01661                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
01662                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
01663                            int list0, int list1){
01664     MpegEncContext * const s = &h->s;
01665     qpel_mc_func *qpix_op=  qpix_put;
01666     h264_chroma_mc_func chroma_op= chroma_put;
01667 
01668     dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
01669     dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
01670     dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
01671     x_offset += 8*s->mb_x;
01672     y_offset += 8*(s->mb_y >> MB_FIELD);
01673 
01674     if(list0){
01675         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
01676         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
01677                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
01678                            qpix_op, chroma_op);
01679 
01680         qpix_op=  qpix_avg;
01681         chroma_op= chroma_avg;
01682     }
01683 
01684     if(list1){
01685         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
01686         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
01687                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
01688                            qpix_op, chroma_op);
01689     }
01690 }
01691 
01692 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
01693                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01694                            int x_offset, int y_offset,
01695                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
01696                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
01697                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
01698                            int list0, int list1){
01699     MpegEncContext * const s = &h->s;
01700 
01701     dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
01702     dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
01703     dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
01704     x_offset += 8*s->mb_x;
01705     y_offset += 8*(s->mb_y >> MB_FIELD);
01706 
01707     if(list0 && list1){
01708         /* don't optimize for luma-only case, since B-frames usually
01709          * use implicit weights => chroma too. */
01710         uint8_t *tmp_cb = s->obmc_scratchpad;
01711         uint8_t *tmp_cr = s->obmc_scratchpad + 8;
01712         uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
01713         int refn0 = h->ref_cache[0][ scan8[n] ];
01714         int refn1 = h->ref_cache[1][ scan8[n] ];
01715 
01716         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
01717                     dest_y, dest_cb, dest_cr,
01718                     x_offset, y_offset, qpix_put, chroma_put);
01719         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
01720                     tmp_y, tmp_cb, tmp_cr,
01721                     x_offset, y_offset, qpix_put, chroma_put);
01722 
01723         if(h->use_weight == 2){
01724             int weight0 = h->implicit_weight[refn0][refn1];
01725             int weight1 = 64 - weight0;
01726             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
01727             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
01728             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
01729         }else{
01730             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
01731                             h->luma_weight[0][refn0], h->luma_weight[1][refn1],
01732                             h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
01733             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
01734                             h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
01735                             h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
01736             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
01737                             h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
01738                             h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
01739         }
01740     }else{
01741         int list = list1 ? 1 : 0;
01742         int refn = h->ref_cache[list][ scan8[n] ];
01743         Picture *ref= &h->ref_list[list][refn];
01744         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
01745                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
01746                     qpix_put, chroma_put);
01747 
01748         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
01749                        h->luma_weight[list][refn], h->luma_offset[list][refn]);
01750         if(h->use_weight_chroma){
01751             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
01752                              h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
01753             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
01754                              h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
01755         }
01756     }
01757 }
01758 
01759 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
01760                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01761                            int x_offset, int y_offset,
01762                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
01763                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
01764                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
01765                            int list0, int list1){
01766     if((h->use_weight==2 && list0 && list1
01767         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
01768        || h->use_weight==1)
01769         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
01770                          x_offset, y_offset, qpix_put, chroma_put,
01771                          weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
01772     else
01773         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
01774                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
01775 }
01776 
01777 static inline void prefetch_motion(H264Context *h, int list){
01778     /* fetch pixels for estimated mv 4 macroblocks ahead
01779      * optimized for 64byte cache lines */
01780     MpegEncContext * const s = &h->s;
01781     const int refn = h->ref_cache[list][scan8[0]];
01782     if(refn >= 0){
01783         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
01784         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
01785         uint8_t **src= h->ref_list[list][refn].data;
01786         int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
01787         s->dsp.prefetch(src[0]+off, s->linesize, 4);
01788         off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
01789         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
01790     }
01791 }
01792 
01793 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01794                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
01795                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
01796                       h264_weight_func *weight_op, h264_biweight_func *weight_avg){
01797     MpegEncContext * const s = &h->s;
01798     const int mb_xy= h->mb_xy;
01799     const int mb_type= s->current_picture.mb_type[mb_xy];
01800 
01801     assert(IS_INTER(mb_type));
01802 
01803     prefetch_motion(h, 0);
01804 
01805     if(IS_16X16(mb_type)){
01806         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
01807                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
01808                 &weight_op[0], &weight_avg[0],
01809                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
01810     }else if(IS_16X8(mb_type)){
01811         mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
01812                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
01813                 &weight_op[1], &weight_avg[1],
01814                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
01815         mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
01816                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
01817                 &weight_op[1], &weight_avg[1],
01818                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
01819     }else if(IS_8X16(mb_type)){
01820         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
01821                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
01822                 &weight_op[2], &weight_avg[2],
01823                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
01824         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
01825                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
01826                 &weight_op[2], &weight_avg[2],
01827                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
01828     }else{
01829         int i;
01830 
01831         assert(IS_8X8(mb_type));
01832 
01833         for(i=0; i<4; i++){
01834             const int sub_mb_type= h->sub_mb_type[i];
01835             const int n= 4*i;
01836             int x_offset= (i&1)<<2;
01837             int y_offset= (i&2)<<1;
01838 
01839             if(IS_SUB_8X8(sub_mb_type)){
01840                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
01841                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
01842                     &weight_op[3], &weight_avg[3],
01843                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01844             }else if(IS_SUB_8X4(sub_mb_type)){
01845                 mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
01846                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
01847                     &weight_op[4], &weight_avg[4],
01848                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01849                 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
01850                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
01851                     &weight_op[4], &weight_avg[4],
01852                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01853             }else if(IS_SUB_4X8(sub_mb_type)){
01854                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
01855                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
01856                     &weight_op[5], &weight_avg[5],
01857                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01858                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
01859                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
01860                     &weight_op[5], &weight_avg[5],
01861                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01862             }else{
01863                 int j;
01864                 assert(IS_SUB_4X4(sub_mb_type));
01865                 for(j=0; j<4; j++){
01866                     int sub_x_offset= x_offset + 2*(j&1);
01867                     int sub_y_offset= y_offset +   (j&2);
01868                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
01869                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
01870                         &weight_op[6], &weight_avg[6],
01871                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01872                 }
01873             }
01874         }
01875     }
01876 
01877     prefetch_motion(h, 1);
01878 }
01879 
01880 static av_cold void init_cavlc_level_tab(void){
01881     int suffix_length, mask;
01882     unsigned int i;
01883 
01884     for(suffix_length=0; suffix_length<7; suffix_length++){
01885         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
01886             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
01887             int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
01888 
01889             mask= -(level_code&1);
01890             level_code= (((2+level_code)>>1) ^ mask) - mask;
01891             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
01892                 cavlc_level_tab[suffix_length][i][0]= level_code;
01893                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
01894             }else if(prefix + 1 <= LEVEL_TAB_BITS){
01895                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
01896                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
01897             }else{
01898                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
01899                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
01900             }
01901         }
01902     }
01903 }
01904 
01905 static av_cold void decode_init_vlc(void){
01906     static int done = 0;
01907 
01908     if (!done) {
01909         int i;
01910         int offset;
01911         done = 1;
01912 
01913         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
01914         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
01915         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
01916                  &chroma_dc_coeff_token_len [0], 1, 1,
01917                  &chroma_dc_coeff_token_bits[0], 1, 1,
01918                  INIT_VLC_USE_NEW_STATIC);
01919 
01920         offset = 0;
01921         for(i=0; i<4; i++){
01922             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
01923             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
01924             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
01925                      &coeff_token_len [i][0], 1, 1,
01926                      &coeff_token_bits[i][0], 1, 1,
01927                      INIT_VLC_USE_NEW_STATIC);
01928             offset += coeff_token_vlc_tables_size[i];
01929         }
01930         /*
01931          * This is a one time safety check to make sure that
01932          * the packed static coeff_token_vlc table sizes
01933          * were initialized correctly.
01934          */
01935         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
01936 
01937         for(i=0; i<3; i++){
01938             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
01939             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
01940             init_vlc(&chroma_dc_total_zeros_vlc[i],
01941                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
01942                      &chroma_dc_total_zeros_len [i][0], 1, 1,
01943                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
01944                      INIT_VLC_USE_NEW_STATIC);
01945         }
01946         for(i=0; i<15; i++){
01947             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
01948             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
01949             init_vlc(&total_zeros_vlc[i],
01950                      TOTAL_ZEROS_VLC_BITS, 16,
01951                      &total_zeros_len [i][0], 1, 1,
01952                      &total_zeros_bits[i][0], 1, 1,
01953                      INIT_VLC_USE_NEW_STATIC);
01954         }
01955 
01956         for(i=0; i<6; i++){
01957             run_vlc[i].table = run_vlc_tables[i];
01958             run_vlc[i].table_allocated = run_vlc_tables_size;
01959             init_vlc(&run_vlc[i],
01960                      RUN_VLC_BITS, 7,
01961                      &run_len [i][0], 1, 1,
01962                      &run_bits[i][0], 1, 1,
01963                      INIT_VLC_USE_NEW_STATIC);
01964         }
01965         run7_vlc.table = run7_vlc_table,
01966         run7_vlc.table_allocated = run7_vlc_table_size;
01967         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
01968                  &run_len [6][0], 1, 1,
01969                  &run_bits[6][0], 1, 1,
01970                  INIT_VLC_USE_NEW_STATIC);
01971 
01972         init_cavlc_level_tab();
01973     }
01974 }
01975 
01976 static void free_tables(H264Context *h){
01977     int i;
01978     H264Context *hx;
01979     av_freep(&h->intra4x4_pred_mode);
01980     av_freep(&h->chroma_pred_mode_table);
01981     av_freep(&h->cbp_table);
01982     av_freep(&h->mvd_table[0]);
01983     av_freep(&h->mvd_table[1]);
01984     av_freep(&h->direct_table);
01985     av_freep(&h->non_zero_count);
01986     av_freep(&h->slice_table_base);
01987     h->slice_table= NULL;
01988 
01989     av_freep(&h->mb2b_xy);
01990     av_freep(&h->mb2b8_xy);
01991 
01992     for(i = 0; i < h->s.avctx->thread_count; i++) {
01993         hx = h->thread_context[i];
01994         if(!hx) continue;
01995         av_freep(&hx->top_borders[1]);
01996         av_freep(&hx->top_borders[0]);
01997         av_freep(&hx->s.obmc_scratchpad);
01998     }
01999 }
02000 
02001 static void init_dequant8_coeff_table(H264Context *h){
02002     int i,q,x;
02003     const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
02004     h->dequant8_coeff[0] = h->dequant8_buffer[0];
02005     h->dequant8_coeff[1] = h->dequant8_buffer[1];
02006 
02007     for(i=0; i<2; i++ ){
02008         if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
02009             h->dequant8_coeff[1] = h->dequant8_buffer[0];
02010             break;
02011         }
02012 
02013         for(q=0; q<52; q++){
02014             int shift = div6[q];
02015             int idx = rem6[q];
02016             for(x=0; x<64; x++)
02017                 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
02018                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
02019                     h->pps.scaling_matrix8[i][x]) << shift;
02020         }
02021     }
02022 }
02023 
02024 static void init_dequant4_coeff_table(H264Context *h){
02025     int i,j,q,x;
02026     const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
02027     for(i=0; i<6; i++ ){
02028         h->dequant4_coeff[i] = h->dequant4_buffer[i];
02029         for(j=0; j<i; j++){
02030             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
02031                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
02032                 break;
02033             }
02034         }
02035         if(j<i)
02036             continue;
02037 
02038         for(q=0; q<52; q++){
02039             int shift = div6[q] + 2;
02040             int idx = rem6[q];
02041             for(x=0; x<16; x++)
02042                 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
02043                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
02044                     h->pps.scaling_matrix4[i][x]) << shift;
02045         }
02046     }
02047 }
02048 
02049 static void init_dequant_tables(H264Context *h){
02050     int i,x;
02051     init_dequant4_coeff_table(h);
02052     if(h->pps.transform_8x8_mode)
02053         init_dequant8_coeff_table(h);
02054     if(h->sps.transform_bypass){
02055         for(i=0; i<6; i++)
02056             for(x=0; x<16; x++)
02057                 h->dequant4_coeff[i][0][x] = 1<<6;
02058         if(h->pps.transform_8x8_mode)
02059             for(i=0; i<2; i++)
02060                 for(x=0; x<64; x++)
02061                     h->dequant8_coeff[i][0][x] = 1<<6;
02062     }
02063 }
02064 
02065 
02070 static int alloc_tables(H264Context *h){
02071     MpegEncContext * const s = &h->s;
02072     const int big_mb_num= s->mb_stride * (s->mb_height+1);
02073     int x,y;
02074 
02075     CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
02076 
02077     CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
02078     CHECKED_ALLOCZ(h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
02079     CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
02080 
02081     CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
02082     CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
02083     CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
02084     CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
02085 
02086     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
02087     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
02088 
02089     CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint32_t));
02090     CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
02091     for(y=0; y<s->mb_height; y++){
02092         for(x=0; x<s->mb_width; x++){
02093             const int mb_xy= x + y*s->mb_stride;
02094             const int b_xy = 4*x + 4*y*h->b_stride;
02095             const int b8_xy= 2*x + 2*y*h->b8_stride;
02096 
02097             h->mb2b_xy [mb_xy]= b_xy;
02098             h->mb2b8_xy[mb_xy]= b8_xy;
02099         }
02100     }
02101 
02102     s->obmc_scratchpad = NULL;
02103 
02104     if(!h->dequant4_coeff[0])
02105         init_dequant_tables(h);
02106 
02107     return 0;
02108 fail:
02109     free_tables(h);
02110     return -1;
02111 }
02112 
02116 static void clone_tables(H264Context *dst, H264Context *src){
02117     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode;
02118     dst->non_zero_count           = src->non_zero_count;
02119     dst->slice_table              = src->slice_table;
02120     dst->cbp_table                = src->cbp_table;
02121     dst->mb2b_xy                  = src->mb2b_xy;
02122     dst->mb2b8_xy                 = src->mb2b8_xy;
02123     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
02124     dst->mvd_table[0]             = src->mvd_table[0];
02125     dst->mvd_table[1]             = src->mvd_table[1];
02126     dst->direct_table             = src->direct_table;
02127 
02128     dst->s.obmc_scratchpad = NULL;
02129     ff_h264_pred_init(&dst->hpc, src->s.codec_id);
02130 }
02131 
02136 static int context_init(H264Context *h){
02137     CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
02138     CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
02139 
02140     return 0;
02141 fail:
02142     return -1; // free_tables will clean up for us
02143 }
02144 
02145 static av_cold void common_init(H264Context *h){
02146     MpegEncContext * const s = &h->s;
02147 
02148     s->width = s->avctx->width;
02149     s->height = s->avctx->height;
02150     s->codec_id= s->avctx->codec->id;
02151 
02152     ff_h264_pred_init(&h->hpc, s->codec_id);
02153 
02154     h->dequant_coeff_pps= -1;
02155     s->unrestricted_mv=1;
02156     s->decode=1; //FIXME
02157 
02158     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
02159 
02160     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
02161     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
02162 }
02163 
02169 static void reset_sei(H264Context *h) {
02170     h->sei_recovery_frame_cnt       = -1;
02171     h->sei_dpb_output_delay         =  0;
02172     h->sei_cpb_removal_delay        = -1;
02173     h->sei_buffering_period_present =  0;
02174 }
02175 
02176 static av_cold int decode_init(AVCodecContext *avctx){
02177     H264Context *h= avctx->priv_data;
02178     MpegEncContext * const s = &h->s;
02179 
02180     MPV_decode_defaults(s);
02181 
02182     s->avctx = avctx;
02183     common_init(h);
02184 
02185     s->out_format = FMT_H264;
02186     s->workaround_bugs= avctx->workaround_bugs;
02187 
02188     // set defaults
02189 //    s->decode_mb= ff_h263_decode_mb;
02190     s->quarter_sample = 1;
02191     if(!avctx->has_b_frames)
02192     s->low_delay= 1;
02193 
02194     if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02195         avctx->pix_fmt= PIX_FMT_VDPAU_H264;
02196     else
02197         avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
02198     avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
02199 
02200     decode_init_vlc();
02201 
02202     if(avctx->extradata_size > 0 && avctx->extradata &&
02203        *(char *)avctx->extradata == 1){
02204         h->is_avc = 1;
02205         h->got_avcC = 0;
02206     } else {
02207         h->is_avc = 0;
02208     }
02209 
02210     h->thread_context[0] = h;
02211     h->outputed_poc = INT_MIN;
02212     h->prev_poc_msb= 1<<16;
02213     reset_sei(h);
02214     if(avctx->codec_id == CODEC_ID_H264){
02215         if(avctx->ticks_per_frame == 1){
02216             s->avctx->time_base.den *=2;
02217         }
02218         avctx->ticks_per_frame = 2;
02219     }
02220     return 0;
02221 }
02222 
02223 static int frame_start(H264Context *h){
02224     MpegEncContext * const s = &h->s;
02225     int i;
02226 
02227     if(MPV_frame_start(s, s->avctx) < 0)
02228         return -1;
02229     ff_er_frame_start(s);
02230     /*
02231      * MPV_frame_start uses pict_type to derive key_frame.
02232      * This is incorrect for H.264; IDR markings must be used.
02233      * Zero here; IDR markings per slice in frame or fields are ORed in later.
02234      * See decode_nal_units().
02235      */
02236     s->current_picture_ptr->key_frame= 0;
02237 
02238     assert(s->linesize && s->uvlinesize);
02239 
02240     for(i=0; i<16; i++){
02241         h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
02242         h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
02243     }
02244     for(i=0; i<4; i++){
02245         h->block_offset[16+i]=
02246         h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
02247         h->block_offset[24+16+i]=
02248         h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
02249     }
02250 
02251     /* can't be in alloc_tables because linesize isn't known there.
02252      * FIXME: redo bipred weight to not require extra buffer? */
02253     for(i = 0; i < s->avctx->thread_count; i++)
02254         if(!h->thread_context[i]->s.obmc_scratchpad)
02255             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
02256 
02257     /* some macroblocks will be accessed before they're available */
02258     if(FRAME_MBAFF || s->avctx->thread_count > 1)
02259         memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
02260 
02261 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
02262 
02263     // We mark the current picture as non-reference after allocating it, so
02264     // that if we break out due to an error it can be released automatically
02265     // in the next MPV_frame_start().
02266     // SVQ3 as well as most other codecs have only last/next/current and thus
02267     // get released even with set reference, besides SVQ3 and others do not
02268     // mark frames as reference later "naturally".
02269     if(s->codec_id != CODEC_ID_SVQ3)
02270         s->current_picture_ptr->reference= 0;
02271 
02272     s->current_picture_ptr->field_poc[0]=
02273     s->current_picture_ptr->field_poc[1]= INT_MAX;
02274     assert(s->current_picture_ptr->long_ref==0);
02275 
02276     return 0;
02277 }
02278 
02279 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
02280     MpegEncContext * const s = &h->s;
02281     int i;
02282     int step    = 1;
02283     int offset  = 1;
02284     int uvoffset= 1;
02285     int top_idx = 1;
02286     int skiplast= 0;
02287 
02288     src_y  -=   linesize;
02289     src_cb -= uvlinesize;
02290     src_cr -= uvlinesize;
02291 
02292     if(!simple && FRAME_MBAFF){
02293         if(s->mb_y&1){
02294             offset  = MB_MBAFF ? 1 : 17;
02295             uvoffset= MB_MBAFF ? 1 : 9;
02296             if(!MB_MBAFF){
02297                 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y +  15*linesize);
02298                 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
02299                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02300                     *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
02301                     *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
02302                 }
02303             }
02304         }else{
02305             if(!MB_MBAFF){
02306                 h->left_border[0]= h->top_borders[0][s->mb_x][15];
02307                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02308                     h->left_border[34   ]= h->top_borders[0][s->mb_x][16+7  ];
02309                     h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
02310                 }
02311                 skiplast= 1;
02312             }
02313             offset  =
02314             uvoffset=
02315             top_idx = MB_MBAFF ? 0 : 1;
02316         }
02317         step= MB_MBAFF ? 2 : 1;
02318     }
02319 
02320     // There are two lines saved, the line above the the top macroblock of a pair,
02321     // and the line above the bottom macroblock
02322     h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
02323     for(i=1; i<17 - skiplast; i++){
02324         h->left_border[offset+i*step]= src_y[15+i*  linesize];
02325     }
02326 
02327     *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
02328     *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
02329 
02330     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02331         h->left_border[uvoffset+34   ]= h->top_borders[top_idx][s->mb_x][16+7];
02332         h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
02333         for(i=1; i<9 - skiplast; i++){
02334             h->left_border[uvoffset+34   +i*step]= src_cb[7+i*uvlinesize];
02335             h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
02336         }
02337         *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
02338         *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
02339     }
02340 }
02341 
02342 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
02343     MpegEncContext * const s = &h->s;
02344     int temp8, i;
02345     uint64_t temp64;
02346     int deblock_left;
02347     int deblock_top;
02348     int mb_xy;
02349     int step    = 1;
02350     int offset  = 1;
02351     int uvoffset= 1;
02352     int top_idx = 1;
02353 
02354     if(!simple && FRAME_MBAFF){
02355         if(s->mb_y&1){
02356             offset  = MB_MBAFF ? 1 : 17;
02357             uvoffset= MB_MBAFF ? 1 : 9;
02358         }else{
02359             offset  =
02360             uvoffset=
02361             top_idx = MB_MBAFF ? 0 : 1;
02362         }
02363         step= MB_MBAFF ? 2 : 1;
02364     }
02365 
02366     if(h->deblocking_filter == 2) {
02367         mb_xy = h->mb_xy;
02368         deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
02369         deblock_top  = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
02370     } else {
02371         deblock_left = (s->mb_x > 0);
02372         deblock_top =  (s->mb_y > !!MB_FIELD);
02373     }
02374 
02375     src_y  -=   linesize + 1;
02376     src_cb -= uvlinesize + 1;
02377     src_cr -= uvlinesize + 1;
02378 
02379 #define XCHG(a,b,t,xchg)\
02380 t= a;\
02381 if(xchg)\
02382     a= b;\
02383 b= t;
02384 
02385     if(deblock_left){
02386         for(i = !deblock_top; i<16; i++){
02387             XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, xchg);
02388         }
02389         XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, 1);
02390     }
02391 
02392     if(deblock_top){
02393         XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
02394         XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
02395         if(s->mb_x+1 < s->mb_width){
02396             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
02397         }
02398     }
02399 
02400     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02401         if(deblock_left){
02402             for(i = !deblock_top; i<8; i++){
02403                 XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, xchg);
02404                 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
02405             }
02406             XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, 1);
02407             XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
02408         }
02409         if(deblock_top){
02410             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
02411             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
02412         }
02413     }
02414 }
02415 
02416 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
02417     MpegEncContext * const s = &h->s;
02418     const int mb_x= s->mb_x;
02419     const int mb_y= s->mb_y;
02420     const int mb_xy= h->mb_xy;
02421     const int mb_type= s->current_picture.mb_type[mb_xy];
02422     uint8_t  *dest_y, *dest_cb, *dest_cr;
02423     int linesize, uvlinesize /*dct_offset*/;
02424     int i;
02425     int *block_offset = &h->block_offset[0];
02426     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
02427     /* is_h264 should always be true if SVQ3 is disabled. */
02428     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
02429     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
02430     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
02431 
02432     dest_y  = s->current_picture.data[0] + (mb_x + mb_y * s->linesize  ) * 16;
02433     dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
02434     dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
02435 
02436     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
02437     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
02438 
02439     if (!simple && MB_FIELD) {
02440         linesize   = h->mb_linesize   = s->linesize * 2;
02441         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
02442         block_offset = &h->block_offset[24];
02443         if(mb_y&1){ //FIXME move out of this function?
02444             dest_y -= s->linesize*15;
02445             dest_cb-= s->uvlinesize*7;
02446             dest_cr-= s->uvlinesize*7;
02447         }
02448         if(FRAME_MBAFF) {
02449             int list;
02450             for(list=0; list<h->list_count; list++){
02451                 if(!USES_LIST(mb_type, list))
02452                     continue;
02453                 if(IS_16X16(mb_type)){
02454                     int8_t *ref = &h->ref_cache[list][scan8[0]];
02455                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
02456                 }else{
02457                     for(i=0; i<16; i+=4){
02458                         int ref = h->ref_cache[list][scan8[i]];
02459                         if(ref >= 0)
02460                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
02461                     }
02462                 }
02463             }
02464         }
02465     } else {
02466         linesize   = h->mb_linesize   = s->linesize;
02467         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
02468 //        dct_offset = s->linesize * 16;
02469     }
02470 
02471     if (!simple && IS_INTRA_PCM(mb_type)) {
02472         for (i=0; i<16; i++) {
02473             memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
02474         }
02475         for (i=0; i<8; i++) {
02476             memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
02477             memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
02478         }
02479     } else {
02480         if(IS_INTRA(mb_type)){
02481             if(h->deblocking_filter)
02482                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
02483 
02484             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02485                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
02486                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
02487             }
02488 
02489             if(IS_INTRA4x4(mb_type)){
02490                 if(simple || !s->encoding){
02491                     if(IS_8x8DCT(mb_type)){
02492                         if(transform_bypass){
02493                             idct_dc_add =
02494                             idct_add    = s->dsp.add_pixels8;
02495                         }else{
02496                             idct_dc_add = s->dsp.h264_idct8_dc_add;
02497                             idct_add    = s->dsp.h264_idct8_add;
02498                         }
02499                         for(i=0; i<16; i+=4){
02500                             uint8_t * const ptr= dest_y + block_offset[i];
02501                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
02502                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
02503                                 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
02504                             }else{
02505                                 const int nnz = h->non_zero_count_cache[ scan8[i] ];
02506                                 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
02507                                                             (h->topright_samples_available<<i)&0x4000, linesize);
02508                                 if(nnz){
02509                                     if(nnz == 1 && h->mb[i*16])
02510                                         idct_dc_add(ptr, h->mb + i*16, linesize);
02511                                     else
02512                                         idct_add   (ptr, h->mb + i*16, linesize);
02513                                 }
02514                             }
02515                         }
02516                     }else{
02517                         if(transform_bypass){
02518                             idct_dc_add =
02519                             idct_add    = s->dsp.add_pixels4;
02520                         }else{
02521                             idct_dc_add = s->dsp.h264_idct_dc_add;
02522                             idct_add    = s->dsp.h264_idct_add;
02523                         }
02524                         for(i=0; i<16; i++){
02525                             uint8_t * const ptr= dest_y + block_offset[i];
02526                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
02527 
02528                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
02529                                 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
02530                             }else{
02531                                 uint8_t *topright;
02532                                 int nnz, tr;
02533                                 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
02534                                     const int topright_avail= (h->topright_samples_available<<i)&0x8000;
02535                                     assert(mb_y || linesize <= block_offset[i]);
02536                                     if(!topright_avail){
02537                                         tr= ptr[3 - linesize]*0x01010101;
02538                                         topright= (uint8_t*) &tr;
02539                                     }else
02540                                         topright= ptr + 4 - linesize;
02541                                 }else
02542                                     topright= NULL;
02543 
02544                                 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
02545                                 nnz = h->non_zero_count_cache[ scan8[i] ];
02546                                 if(nnz){
02547                                     if(is_h264){
02548                                         if(nnz == 1 && h->mb[i*16])
02549                                             idct_dc_add(ptr, h->mb + i*16, linesize);
02550                                         else
02551                                             idct_add   (ptr, h->mb + i*16, linesize);
02552                                     }else
02553                                         svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
02554                                 }
02555                             }
02556                         }
02557                     }
02558                 }
02559             }else{
02560                 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
02561                 if(is_h264){
02562                     if(!transform_bypass)
02563                         h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
02564                 }else
02565                     svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
02566             }
02567             if(h->deblocking_filter)
02568                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
02569         }else if(is_h264){
02570             hl_motion(h, dest_y, dest_cb, dest_cr,
02571                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02572                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02573                       s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
02574         }
02575 
02576 
02577         if(!IS_INTRA4x4(mb_type)){
02578             if(is_h264){
02579                 if(IS_INTRA16x16(mb_type)){
02580                     if(transform_bypass){
02581                         if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
02582                             h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
02583                         }else{
02584                             for(i=0; i<16; i++){
02585                                 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
02586                                     s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
02587                             }
02588                         }
02589                     }else{
02590                          s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
02591                     }
02592                 }else if(h->cbp&15){
02593                     if(transform_bypass){
02594                         const int di = IS_8x8DCT(mb_type) ? 4 : 1;
02595                         idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
02596                         for(i=0; i<16; i+=di){
02597                             if(h->non_zero_count_cache[ scan8[i] ]){
02598                                 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
02599                             }
02600                         }
02601                     }else{
02602                         if(IS_8x8DCT(mb_type)){
02603                             s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
02604                         }else{
02605                             s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
02606                         }
02607                     }
02608                 }
02609             }else{
02610                 for(i=0; i<16; i++){
02611                     if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
02612                         uint8_t * const ptr= dest_y + block_offset[i];
02613                         svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
02614                     }
02615                 }
02616             }
02617         }
02618 
02619         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
02620             uint8_t *dest[2] = {dest_cb, dest_cr};
02621             if(transform_bypass){
02622                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
02623                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
02624                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
02625                 }else{
02626                     idct_add = s->dsp.add_pixels4;
02627                     for(i=16; i<16+8; i++){
02628                         if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
02629                             idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
02630                     }
02631                 }
02632             }else{
02633                 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
02634                 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
02635                 if(is_h264){
02636                     idct_add = s->dsp.h264_idct_add;
02637                     idct_dc_add = s->dsp.h264_idct_dc_add;
02638                     for(i=16; i<16+8; i++){
02639                         if(h->non_zero_count_cache[ scan8[i] ])
02640                             idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
02641                         else if(h->mb[i*16])
02642                             idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
02643                     }
02644                 }else{
02645                     for(i=16; i<16+8; i++){
02646                         if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
02647                             uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
02648                             svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
02649                         }
02650                     }
02651                 }
02652             }
02653         }
02654     }
02655     if(h->cbp || IS_INTRA(mb_type))
02656         s->dsp.clear_blocks(h->mb);
02657 
02658     if(h->deblocking_filter) {
02659         backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
02660         fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
02661         h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
02662         h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
02663         if (!simple && FRAME_MBAFF) {
02664             filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
02665         } else {
02666             filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
02667         }
02668     }
02669 }
02670 
02674 static void hl_decode_mb_simple(H264Context *h){
02675     hl_decode_mb_internal(h, 1);
02676 }
02677 
02681 static void av_noinline hl_decode_mb_complex(H264Context *h){
02682     hl_decode_mb_internal(h, 0);
02683 }
02684 
02685 static void hl_decode_mb(H264Context *h){
02686     MpegEncContext * const s = &h->s;
02687     const int mb_xy= h->mb_xy;
02688     const int mb_type= s->current_picture.mb_type[mb_xy];
02689     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
02690 
02691     if (is_complex)
02692         hl_decode_mb_complex(h);
02693     else hl_decode_mb_simple(h);
02694 }
02695 
02696 static void pic_as_field(Picture *pic, const int parity){
02697     int i;
02698     for (i = 0; i < 4; ++i) {
02699         if (parity == PICT_BOTTOM_FIELD)
02700             pic->data[i] += pic->linesize[i];
02701         pic->reference = parity;
02702         pic->linesize[i] *= 2;
02703     }
02704     pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
02705 }
02706 
02707 static int split_field_copy(Picture *dest, Picture *src,
02708                             int parity, int id_add){
02709     int match = !!(src->reference & parity);
02710 
02711     if (match) {
02712         *dest = *src;
02713         if(parity != PICT_FRAME){
02714             pic_as_field(dest, parity);
02715             dest->pic_id *= 2;
02716             dest->pic_id += id_add;
02717         }
02718     }
02719 
02720     return match;
02721 }
02722 
02723 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
02724     int i[2]={0};
02725     int index=0;
02726 
02727     while(i[0]<len || i[1]<len){
02728         while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
02729             i[0]++;
02730         while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
02731             i[1]++;
02732         if(i[0] < len){
02733             in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
02734             split_field_copy(&def[index++], in[ i[0]++ ], sel  , 1);
02735         }
02736         if(i[1] < len){
02737             in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
02738             split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
02739         }
02740     }
02741 
02742     return index;
02743 }
02744 
02745 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
02746     int i, best_poc;
02747     int out_i= 0;
02748 
02749     for(;;){
02750         best_poc= dir ? INT_MIN : INT_MAX;
02751 
02752         for(i=0; i<len; i++){
02753             const int poc= src[i]->poc;
02754             if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
02755                 best_poc= poc;
02756                 sorted[out_i]= src[i];
02757             }
02758         }
02759         if(best_poc == (dir ? INT_MIN : INT_MAX))
02760             break;
02761         limit= sorted[out_i++]->poc - dir;
02762     }
02763     return out_i;
02764 }
02765 
02769 static int fill_default_ref_list(H264Context *h){
02770     MpegEncContext * const s = &h->s;
02771     int i, len;
02772 
02773     if(h->slice_type_nos==FF_B_TYPE){
02774         Picture *sorted[32];
02775         int cur_poc, list;
02776         int lens[2];
02777 
02778         if(FIELD_PICTURE)
02779             cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
02780         else
02781             cur_poc= s->current_picture_ptr->poc;
02782 
02783         for(list= 0; list<2; list++){
02784             len= add_sorted(sorted    , h->short_ref, h->short_ref_count, cur_poc, 1^list);
02785             len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
02786             assert(len<=32);
02787             len= build_def_list(h->default_ref_list[list]    , sorted     , len, 0, s->picture_structure);
02788             len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
02789             assert(len<=32);
02790 
02791             if(len < h->ref_count[list])
02792                 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
02793             lens[list]= len;
02794         }
02795 
02796         if(lens[0] == lens[1] && lens[1] > 1){
02797             for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
02798             if(i == lens[0])
02799                 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
02800         }
02801     }else{
02802         len = build_def_list(h->default_ref_list[0]    , h->short_ref, h->short_ref_count, 0, s->picture_structure);
02803         len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16                , 1, s->picture_structure);
02804         assert(len <= 32);
02805         if(len < h->ref_count[0])
02806             memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
02807     }
02808 #ifdef TRACE
02809     for (i=0; i<h->ref_count[0]; i++) {
02810         tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
02811     }
02812     if(h->slice_type_nos==FF_B_TYPE){
02813         for (i=0; i<h->ref_count[1]; i++) {
02814             tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
02815         }
02816     }
02817 #endif
02818     return 0;
02819 }
02820 
02821 static void print_short_term(H264Context *h);
02822 static void print_long_term(H264Context *h);
02823 
02834 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
02835     MpegEncContext * const s = &h->s;
02836 
02837     *structure = s->picture_structure;
02838     if(FIELD_PICTURE){
02839         if (!(pic_num & 1))
02840             /* opposite field */
02841             *structure ^= PICT_FRAME;
02842         pic_num >>= 1;
02843     }
02844 
02845     return pic_num;
02846 }
02847 
02848 static int decode_ref_pic_list_reordering(H264Context *h){
02849     MpegEncContext * const s = &h->s;
02850     int list, index, pic_structure;
02851 
02852     print_short_term(h);
02853     print_long_term(h);
02854 
02855     for(list=0; list<h->list_count; list++){
02856         memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
02857 
02858         if(get_bits1(&s->gb)){
02859             int pred= h->curr_pic_num;
02860 
02861             for(index=0; ; index++){
02862                 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
02863                 unsigned int pic_id;
02864                 int i;
02865                 Picture *ref = NULL;
02866 
02867                 if(reordering_of_pic_nums_idc==3)
02868                     break;
02869 
02870                 if(index >= h->ref_count[list]){
02871                     av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
02872                     return -1;
02873                 }
02874 
02875                 if(reordering_of_pic_nums_idc<3){
02876                     if(reordering_of_pic_nums_idc<2){
02877                         const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
02878                         int frame_num;
02879 
02880                         if(abs_diff_pic_num > h->max_pic_num){
02881                             av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
02882                             return -1;
02883                         }
02884 
02885                         if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
02886                         else                                pred+= abs_diff_pic_num;
02887                         pred &= h->max_pic_num - 1;
02888 
02889                         frame_num = pic_num_extract(h, pred, &pic_structure);
02890 
02891                         for(i= h->short_ref_count-1; i>=0; i--){
02892                             ref = h->short_ref[i];
02893                             assert(ref->reference);
02894                             assert(!ref->long_ref);
02895                             if(
02896                                    ref->frame_num == frame_num &&
02897                                    (ref->reference & pic_structure)
02898                               )
02899                                 break;
02900                         }
02901                         if(i>=0)
02902                             ref->pic_id= pred;
02903                     }else{
02904                         int long_idx;
02905                         pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
02906 
02907                         long_idx= pic_num_extract(h, pic_id, &pic_structure);
02908 
02909                         if(long_idx>31){
02910                             av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
02911                             return -1;
02912                         }
02913                         ref = h->long_ref[long_idx];
02914                         assert(!(ref && !ref->reference));
02915                         if(ref && (ref->reference & pic_structure)){
02916                             ref->pic_id= pic_id;
02917                             assert(ref->long_ref);
02918                             i=0;
02919                         }else{
02920                             i=-1;
02921                         }
02922                     }
02923 
02924                     if (i < 0) {
02925                         av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
02926                         memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
02927                     } else {
02928                         for(i=index; i+1<h->ref_count[list]; i++){
02929                             if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
02930                                 break;
02931                         }
02932                         for(; i > index; i--){
02933                             h->ref_list[list][i]= h->ref_list[list][i-1];
02934                         }
02935                         h->ref_list[list][index]= *ref;
02936                         if (FIELD_PICTURE){
02937                             pic_as_field(&h->ref_list[list][index], pic_structure);
02938                         }
02939                     }
02940                 }else{
02941                     av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
02942                     return -1;
02943                 }
02944             }
02945         }
02946     }
02947     for(list=0; list<h->list_count; list++){
02948         for(index= 0; index < h->ref_count[list]; index++){
02949             if(!h->ref_list[list][index].data[0]){
02950                 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
02951                 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
02952             }
02953         }
02954     }
02955 
02956     return 0;
02957 }
02958 
02959 static void fill_mbaff_ref_list(H264Context *h){
02960     int list, i, j;
02961     for(list=0; list<2; list++){ //FIXME try list_count
02962         for(i=0; i<h->ref_count[list]; i++){
02963             Picture *frame = &h->ref_list[list][i];
02964             Picture *field = &h->ref_list[list][16+2*i];
02965             field[0] = *frame;
02966             for(j=0; j<3; j++)
02967                 field[0].linesize[j] <<= 1;
02968             field[0].reference = PICT_TOP_FIELD;
02969             field[0].poc= field[0].field_poc[0];
02970             field[1] = field[0];
02971             for(j=0; j<3; j++)
02972                 field[1].data[j] += frame->linesize[j];
02973             field[1].reference = PICT_BOTTOM_FIELD;
02974             field[1].poc= field[1].field_poc[1];
02975 
02976             h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
02977             h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
02978             for(j=0; j<2; j++){
02979                 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
02980                 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
02981             }
02982         }
02983     }
02984     for(j=0; j<h->ref_count[1]; j++){
02985         for(i=0; i<h->ref_count[0]; i++)
02986             h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
02987         memcpy(h->implicit_weight[16+2*j],   h->implicit_weight[j], sizeof(*h->implicit_weight));
02988         memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
02989     }
02990 }
02991 
02992 static int pred_weight_table(H264Context *h){
02993     MpegEncContext * const s = &h->s;
02994     int list, i;
02995     int luma_def, chroma_def;
02996 
02997     h->use_weight= 0;
02998     h->use_weight_chroma= 0;
02999     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
03000     h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
03001     luma_def = 1<<h->luma_log2_weight_denom;
03002     chroma_def = 1<<h->chroma_log2_weight_denom;
03003 
03004     for(list=0; list<2; list++){
03005         h->luma_weight_flag[list]   = 0;
03006         h->chroma_weight_flag[list] = 0;
03007         for(i=0; i<h->ref_count[list]; i++){
03008             int luma_weight_flag, chroma_weight_flag;
03009 
03010             luma_weight_flag= get_bits1(&s->gb);
03011             if(luma_weight_flag){
03012                 h->luma_weight[list][i]= get_se_golomb(&s->gb);
03013                 h->luma_offset[list][i]= get_se_golomb(&s->gb);
03014                 if(   h->luma_weight[list][i] != luma_def
03015                    || h->luma_offset[list][i] != 0) {
03016                     h->use_weight= 1;
03017                     h->luma_weight_flag[list]= 1;
03018                 }
03019             }else{
03020                 h->luma_weight[list][i]= luma_def;
03021                 h->luma_offset[list][i]= 0;
03022             }
03023 
03024             if(CHROMA){
03025                 chroma_weight_flag= get_bits1(&s->gb);
03026                 if(chroma_weight_flag){
03027                     int j;
03028                     for(j=0; j<2; j++){
03029                         h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
03030                         h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
03031                         if(   h->chroma_weight[list][i][j] != chroma_def
03032                            || h->chroma_offset[list][i][j] != 0) {
03033                             h->use_weight_chroma= 1;
03034                             h->chroma_weight_flag[list]= 1;
03035                         }
03036                     }
03037                 }else{
03038                     int j;
03039                     for(j=0; j<2; j++){
03040                         h->chroma_weight[list][i][j]= chroma_def;
03041                         h->chroma_offset[list][i][j]= 0;
03042                     }
03043                 }
03044             }
03045         }
03046         if(h->slice_type_nos != FF_B_TYPE) break;
03047     }
03048     h->use_weight= h->use_weight || h->use_weight_chroma;
03049     return 0;
03050 }
03051 
03052 static void implicit_weight_table(H264Context *h){
03053     MpegEncContext * const s = &h->s;
03054     int ref0, ref1, i;
03055     int cur_poc = s->current_picture_ptr->poc;
03056 
03057     for (i = 0; i < 2; i++) {
03058         h->luma_weight_flag[i]   = 0;
03059         h->chroma_weight_flag[i] = 0;
03060     }
03061 
03062     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1
03063        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
03064         h->use_weight= 0;
03065         h->use_weight_chroma= 0;
03066         return;
03067     }
03068 
03069     h->use_weight= 2;
03070     h->use_weight_chroma= 2;
03071     h->luma_log2_weight_denom= 5;
03072     h->chroma_log2_weight_denom= 5;
03073 
03074     for(ref0=0; ref0 < h->ref_count[0]; ref0++){
03075         int poc0 = h->ref_list[0][ref0].poc;
03076         for(ref1=0; ref1 < h->ref_count[1]; ref1++){
03077             int poc1 = h->ref_list[1][ref1].poc;
03078             int td = av_clip(poc1 - poc0, -128, 127);
03079             if(td){
03080                 int tb = av_clip(cur_poc - poc0, -128, 127);
03081                 int tx = (16384 + (FFABS(td) >> 1)) / td;
03082                 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
03083                 if(dist_scale_factor < -64 || dist_scale_factor > 128)
03084                     h->implicit_weight[ref0][ref1] = 32;
03085                 else
03086                     h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
03087             }else
03088                 h->implicit_weight[ref0][ref1] = 32;
03089         }
03090     }
03091 }
03092 
03104 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
03105     int i;
03106     if (pic->reference &= refmask) {
03107         return 0;
03108     } else {
03109         for(i = 0; h->delayed_pic[i]; i++)
03110             if(pic == h->delayed_pic[i]){
03111                 pic->reference=DELAYED_PIC_REF;
03112                 break;
03113             }
03114         return 1;
03115     }
03116 }
03117 
03121 static void idr(H264Context *h){
03122     int i;
03123 
03124     for(i=0; i<16; i++){
03125         remove_long(h, i, 0);
03126     }
03127     assert(h->long_ref_count==0);
03128 
03129     for(i=0; i<h->short_ref_count; i++){
03130         unreference_pic(h, h->short_ref[i], 0);
03131         h->short_ref[i]= NULL;
03132     }
03133     h->short_ref_count=0;
03134     h->prev_frame_num= 0;
03135     h->prev_frame_num_offset= 0;
03136     h->prev_poc_msb=
03137     h->prev_poc_lsb= 0;
03138 }
03139 
03140 /* forget old pics after a seek */
03141 static void flush_dpb(AVCodecContext *avctx){
03142     H264Context *h= avctx->priv_data;
03143     int i;
03144     for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
03145         if(h->delayed_pic[i])
03146             h->delayed_pic[i]->reference= 0;
03147         h->delayed_pic[i]= NULL;
03148     }
03149     h->outputed_poc= INT_MIN;
03150     idr(h);
03151     if(h->s.current_picture_ptr)
03152         h->s.current_picture_ptr->reference= 0;
03153     h->s.first_field= 0;
03154     reset_sei(h);
03155     ff_mpeg_flush(avctx);
03156 }
03157 
03166 static Picture * find_short(H264Context *h, int frame_num, int *idx){
03167     MpegEncContext * const s = &h->s;
03168     int i;
03169 
03170     for(i=0; i<h->short_ref_count; i++){
03171         Picture *pic= h->short_ref[i];
03172         if(s->avctx->debug&FF_DEBUG_MMCO)
03173             av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
03174         if(pic->frame_num == frame_num) {
03175             *idx = i;
03176             return pic;
03177         }
03178     }
03179     return NULL;
03180 }
03181 
03188 static void remove_short_at_index(H264Context *h, int i){
03189     assert(i >= 0 && i < h->short_ref_count);
03190     h->short_ref[i]= NULL;
03191     if (--h->short_ref_count)
03192         memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
03193 }
03194 
03199 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
03200     MpegEncContext * const s = &h->s;
03201     Picture *pic;
03202     int i;
03203 
03204     if(s->avctx->debug&FF_DEBUG_MMCO)
03205         av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
03206 
03207     pic = find_short(h, frame_num, &i);
03208     if (pic){
03209         if(unreference_pic(h, pic, ref_mask))
03210         remove_short_at_index(h, i);
03211     }
03212 
03213     return pic;
03214 }
03215 
03221 static Picture * remove_long(H264Context *h, int i, int ref_mask){
03222     Picture *pic;
03223 
03224     pic= h->long_ref[i];
03225     if (pic){
03226         if(unreference_pic(h, pic, ref_mask)){
03227             assert(h->long_ref[i]->long_ref == 1);
03228             h->long_ref[i]->long_ref= 0;
03229             h->long_ref[i]= NULL;
03230             h->long_ref_count--;
03231         }
03232     }
03233 
03234     return pic;
03235 }
03236 
03240 static void print_short_term(H264Context *h) {
03241     uint32_t i;
03242     if(h->s.avctx->debug&FF_DEBUG_MMCO) {
03243         av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
03244         for(i=0; i<h->short_ref_count; i++){
03245             Picture *pic= h->short_ref[i];
03246             av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
03247         }
03248     }
03249 }
03250 
03254 static void print_long_term(H264Context *h) {
03255     uint32_t i;
03256     if(h->s.avctx->debug&FF_DEBUG_MMCO) {
03257         av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
03258         for(i = 0; i < 16; i++){
03259             Picture *pic= h->long_ref[i];
03260             if (pic) {
03261                 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
03262             }
03263         }
03264     }
03265 }
03266 
03270 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
03271     MpegEncContext * const s = &h->s;
03272     int i, j;
03273     int current_ref_assigned=0;
03274     Picture *av_uninit(pic);
03275 
03276     if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
03277         av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
03278 
03279     for(i=0; i<mmco_count; i++){
03280         int structure, av_uninit(frame_num);
03281         if(s->avctx->debug&FF_DEBUG_MMCO)
03282             av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
03283 
03284         if(   mmco[i].opcode == MMCO_SHORT2UNUSED
03285            || mmco[i].opcode == MMCO_SHORT2LONG){
03286             frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
03287             pic = find_short(h, frame_num, &j);
03288             if(!pic){
03289                 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
03290                    || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
03291                 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
03292                 continue;
03293             }
03294         }
03295 
03296         switch(mmco[i].opcode){
03297         case MMCO_SHORT2UNUSED:
03298             if(s->avctx->debug&FF_DEBUG_MMCO)
03299                 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
03300             remove_short(h, frame_num, structure ^ PICT_FRAME);
03301             break;
03302         case MMCO_SHORT2LONG:
03303                 if (h->long_ref[mmco[i].long_arg] != pic)
03304                     remove_long(h, mmco[i].long_arg, 0);
03305 
03306                 remove_short_at_index(h, j);
03307                 h->long_ref[ mmco[i].long_arg ]= pic;
03308                 if (h->long_ref[ mmco[i].long_arg ]){
03309                     h->long_ref[ mmco[i].long_arg ]->long_ref=1;
03310                     h->long_ref_count++;
03311                 }
03312             break;
03313         case MMCO_LONG2UNUSED:
03314             j = pic_num_extract(h, mmco[i].long_arg, &structure);
03315             pic = h->long_ref[j];
03316             if (pic) {
03317                 remove_long(h, j, structure ^ PICT_FRAME);
03318             } else if(s->avctx->debug&FF_DEBUG_MMCO)
03319                 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
03320             break;
03321         case MMCO_LONG:
03322                     // Comment below left from previous code as it is an interresting note.
03323                     /* First field in pair is in short term list or
03324                      * at a different long term index.
03325                      * This is not allowed; see 7.4.3.3, notes 2 and 3.
03326                      * Report the problem and keep the pair where it is,
03327                      * and mark this field valid.
03328                      */
03329 
03330             if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
03331                 remove_long(h, mmco[i].long_arg, 0);
03332 
03333                 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
03334                 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
03335                 h->long_ref_count++;
03336             }
03337 
03338             s->current_picture_ptr->reference |= s->picture_structure;
03339             current_ref_assigned=1;
03340             break;
03341         case MMCO_SET_MAX_LONG:
03342             assert(mmco[i].long_arg <= 16);
03343             // just remove the long term which index is greater than new max
03344             for(j = mmco[i].long_arg; j<16; j++){
03345                 remove_long(h, j, 0);
03346             }
03347             break;
03348         case MMCO_RESET:
03349             while(h->short_ref_count){
03350                 remove_short(h, h->short_ref[0]->frame_num, 0);
03351             }
03352             for(j = 0; j < 16; j++) {
03353                 remove_long(h, j, 0);
03354             }
03355             s->current_picture_ptr->poc=
03356             s->current_picture_ptr->field_poc[0]=
03357             s->current_picture_ptr->field_poc[1]=
03358             h->poc_lsb=
03359             h->poc_msb=
03360             h->frame_num=
03361             s->current_picture_ptr->frame_num= 0;
03362             break;
03363         default: assert(0);
03364         }
03365     }
03366 
03367     if (!current_ref_assigned) {
03368         /* Second field of complementary field pair; the first field of
03369          * which is already referenced. If short referenced, it
03370          * should be first entry in short_ref. If not, it must exist
03371          * in long_ref; trying to put it on the short list here is an
03372          * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
03373          */
03374         if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
03375             /* Just mark the second field valid */
03376             s->current_picture_ptr->reference = PICT_FRAME;
03377         } else if (s->current_picture_ptr->long_ref) {
03378             av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
03379                                              "assignment for second field "
03380                                              "in complementary field pair "
03381                                              "(first field is long term)\n");
03382         } else {
03383             pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
03384             if(pic){
03385                 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
03386             }
03387 
03388             if(h->short_ref_count)
03389                 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
03390 
03391             h->short_ref[0]= s->current_picture_ptr;
03392             h->short_ref_count++;
03393             s->current_picture_ptr->reference |= s->picture_structure;
03394         }
03395     }
03396 
03397     if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
03398 
03399         /* We have too many reference frames, probably due to corrupted
03400          * stream. Need to discard one frame. Prevents overrun of the
03401          * short_ref and long_ref buffers.
03402          */
03403         av_log(h->s.avctx, AV_LOG_ERROR,
03404                "number of reference frames exceeds max (probably "
03405                "corrupt input), discarding one\n");
03406 
03407         if (h->long_ref_count && !h->short_ref_count) {
03408             for (i = 0; i < 16; ++i)
03409                 if (h->long_ref[i])
03410                     break;
03411 
03412             assert(i < 16);
03413             remove_long(h, i, 0);
03414         } else {
03415             pic = h->short_ref[h->short_ref_count - 1];
03416             remove_short(h, pic->frame_num, 0);
03417         }
03418     }
03419 
03420     print_short_term(h);
03421     print_long_term(h);
03422     return 0;
03423 }
03424 
03425 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
03426     MpegEncContext * const s = &h->s;
03427     int i;
03428 
03429     h->mmco_index= 0;
03430     if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
03431         s->broken_link= get_bits1(gb) -1;
03432         if(get_bits1(gb)){
03433             h->mmco[0].opcode= MMCO_LONG;
03434             h->mmco[0].long_arg= 0;
03435             h->mmco_index= 1;
03436         }
03437     }else{
03438         if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
03439             for(i= 0; i<MAX_MMCO_COUNT; i++) {
03440                 MMCOOpcode opcode= get_ue_golomb_31(gb);
03441 
03442                 h->mmco[i].opcode= opcode;
03443                 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
03444                     h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
03445 /*                    if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
03446                         av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
03447                         return -1;
03448                     }*/
03449                 }
03450                 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
03451                     unsigned int long_arg= get_ue_golomb_31(gb);
03452                     if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
03453                         av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
03454                         return -1;
03455                     }
03456                     h->mmco[i].long_arg= long_arg;
03457                 }
03458 
03459                 if(opcode > (unsigned)MMCO_LONG){
03460                     av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
03461                     return -1;
03462                 }
03463                 if(opcode == MMCO_END)
03464                     break;
03465             }
03466             h->mmco_index= i;
03467         }else{
03468             assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
03469 
03470             if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
03471                     !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
03472                 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
03473                 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
03474                 h->mmco_index= 1;
03475                 if (FIELD_PICTURE) {
03476                     h->mmco[0].short_pic_num *= 2;
03477                     h->mmco[1].opcode= MMCO_SHORT2UNUSED;
03478                     h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
03479                     h->mmco_index= 2;
03480                 }
03481             }
03482         }
03483     }
03484 
03485     return 0;
03486 }
03487 
03488 static int init_poc(H264Context *h){
03489     MpegEncContext * const s = &h->s;
03490     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
03491     int field_poc[2];
03492     Picture *cur = s->current_picture_ptr;
03493 
03494     h->frame_num_offset= h->prev_frame_num_offset;
03495     if(h->frame_num < h->prev_frame_num)
03496         h->frame_num_offset += max_frame_num;
03497 
03498     if(h->sps.poc_type==0){
03499         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
03500 
03501         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
03502             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
03503         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
03504             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
03505         else
03506             h->poc_msb = h->prev_poc_msb;
03507 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
03508         field_poc[0] =
03509         field_poc[1] = h->poc_msb + h->poc_lsb;
03510         if(s->picture_structure == PICT_FRAME)
03511             field_poc[1] += h->delta_poc_bottom;
03512     }else if(h->sps.poc_type==1){
03513         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
03514         int i;
03515 
03516         if(h->sps.poc_cycle_length != 0)
03517             abs_frame_num = h->frame_num_offset + h->frame_num;
03518         else
03519             abs_frame_num = 0;
03520 
03521         if(h->nal_ref_idc==0 && abs_frame_num > 0)
03522             abs_frame_num--;
03523 
03524         expected_delta_per_poc_cycle = 0;
03525         for(i=0; i < h->sps.poc_cycle_length; i++)
03526             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
03527 
03528         if(abs_frame_num > 0){
03529             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
03530             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
03531 
03532             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
03533             for(i = 0; i <= frame_num_in_poc_cycle; i++)
03534                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
03535         } else
03536             expectedpoc = 0;
03537 
03538         if(h->nal_ref_idc == 0)
03539             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
03540 
03541         field_poc[0] = expectedpoc + h->delta_poc[0];
03542         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
03543 
03544         if(s->picture_structure == PICT_FRAME)
03545             field_poc[1] += h->delta_poc[1];
03546     }else{
03547         int poc= 2*(h->frame_num_offset + h->frame_num);
03548 
03549         if(!h->nal_ref_idc)
03550             poc--;
03551 
03552         field_poc[0]= poc;
03553         field_poc[1]= poc;
03554     }
03555 
03556     if(s->picture_structure != PICT_BOTTOM_FIELD)
03557         s->current_picture_ptr->field_poc[0]= field_poc[0];
03558     if(s->picture_structure != PICT_TOP_FIELD)
03559         s->current_picture_ptr->field_poc[1]= field_poc[1];
03560     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
03561 
03562     return 0;
03563 }
03564 
03565 
03569 static void init_scan_tables(H264Context *h){
03570     MpegEncContext * const s = &h->s;
03571     int i;
03572     if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
03573         memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
03574         memcpy(h-> field_scan,  field_scan, 16*sizeof(uint8_t));
03575     }else{
03576         for(i=0; i<16; i++){
03577 #define T(x) (x>>2) | ((x<<2) & 0xF)
03578             h->zigzag_scan[i] = T(zigzag_scan[i]);
03579             h-> field_scan[i] = T( field_scan[i]);
03580 #undef T
03581         }
03582     }
03583     if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
03584         memcpy(h->zigzag_scan8x8,       ff_zigzag_direct,     64*sizeof(uint8_t));
03585         memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
03586         memcpy(h->field_scan8x8,        field_scan8x8,        64*sizeof(uint8_t));
03587         memcpy(h->field_scan8x8_cavlc,  field_scan8x8_cavlc,  64*sizeof(uint8_t));
03588     }else{
03589         for(i=0; i<64; i++){
03590 #define T(x) (x>>3) | ((x&7)<<3)
03591             h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
03592             h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
03593             h->field_scan8x8[i]        = T(field_scan8x8[i]);
03594             h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
03595 #undef T
03596         }
03597     }
03598     if(h->sps.transform_bypass){ //FIXME same ugly
03599         h->zigzag_scan_q0          = zigzag_scan;
03600         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
03601         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
03602         h->field_scan_q0           = field_scan;
03603         h->field_scan8x8_q0        = field_scan8x8;
03604         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
03605     }else{
03606         h->zigzag_scan_q0          = h->zigzag_scan;
03607         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
03608         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
03609         h->field_scan_q0           = h->field_scan;
03610         h->field_scan8x8_q0        = h->field_scan8x8;
03611         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
03612     }
03613 }
03614 
03618 static void clone_slice(H264Context *dst, H264Context *src)
03619 {
03620     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
03621     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
03622     dst->s.current_picture      = src->s.current_picture;
03623     dst->s.linesize             = src->s.linesize;
03624     dst->s.uvlinesize           = src->s.uvlinesize;
03625     dst->s.first_field          = src->s.first_field;
03626 
03627     dst->prev_poc_msb           = src->prev_poc_msb;
03628     dst->prev_poc_lsb           = src->prev_poc_lsb;
03629     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
03630     dst->prev_frame_num         = src->prev_frame_num;
03631     dst->short_ref_count        = src->short_ref_count;
03632 
03633     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
03634     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
03635     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
03636     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
03637 
03638     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
03639     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
03640 }
03641 
03651 static int decode_slice_header(H264Context *h, H264Context *h0){
03652     MpegEncContext * const s = &h->s;
03653     MpegEncContext * const s0 = &h0->s;
03654     unsigned int first_mb_in_slice;
03655     unsigned int pps_id;
03656     int num_ref_idx_active_override_flag;
03657     unsigned int slice_type, tmp, i, j;
03658     int default_ref_list_done = 0;
03659     int last_pic_structure;
03660 
03661     s->dropable= h->nal_ref_idc == 0;
03662 
03663     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
03664         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
03665         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
03666     }else{
03667         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
03668         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
03669     }
03670 
03671     first_mb_in_slice= get_ue_golomb(&s->gb);
03672 
03673     if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
03674         h0->current_slice = 0;
03675         if (!s0->first_field)
03676             s->current_picture_ptr= NULL;
03677     }
03678 
03679     slice_type= get_ue_golomb_31(&s->gb);
03680     if(slice_type > 9){
03681         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
03682         return -1;
03683     }
03684     if(slice_type > 4){
03685         slice_type -= 5;
03686         h->slice_type_fixed=1;
03687     }else
03688         h->slice_type_fixed=0;
03689 
03690     slice_type= golomb_to_pict_type[ slice_type ];
03691     if (slice_type == FF_I_TYPE
03692         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
03693         default_ref_list_done = 1;
03694     }
03695     h->slice_type= slice_type;
03696     h->slice_type_nos= slice_type & 3;
03697 
03698     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
03699     if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
03700         av_log(h->s.avctx, AV_LOG_ERROR,
03701                "B picture before any references, skipping\n");
03702         return -1;
03703     }
03704 
03705     pps_id= get_ue_golomb(&s->gb);
03706     if(pps_id>=MAX_PPS_COUNT){
03707         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
03708         return -1;
03709     }
03710     if(!h0->pps_buffers[pps_id]) {
03711         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
03712         return -1;
03713     }
03714     h->pps= *h0->pps_buffers[pps_id];
03715 
03716     if(!h0->sps_buffers[h->pps.sps_id]) {
03717         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
03718         return -1;
03719     }
03720     h->sps = *h0->sps_buffers[h->pps.sps_id];
03721 
03722     if(h == h0 && h->dequant_coeff_pps != pps_id){
03723         h->dequant_coeff_pps = pps_id;
03724         init_dequant_tables(h);
03725     }
03726 
03727     s->mb_width= h->sps.mb_width;
03728     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
03729 
03730     h->b_stride=  s->mb_width*4;
03731     h->b8_stride= s->mb_width*2;
03732 
03733     s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
03734     if(h->sps.frame_mbs_only_flag)
03735         s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
03736     else
03737         s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
03738 
03739     if (s->context_initialized
03740         && (   s->width != s->avctx->width || s->height != s->avctx->height)) {
03741         if(h != h0)
03742             return -1;   // width / height changed during parallelized decoding
03743         free_tables(h);
03744         flush_dpb(s->avctx);
03745         MPV_common_end(s);
03746         h->list_count = 0;
03747     }
03748     if (!s->context_initialized) {
03749         if(h != h0)
03750             return -1;  // we cant (re-)initialize context during parallel decoding
03751         if (MPV_common_init(s) < 0)
03752             return -1;
03753         s->first_field = 0;
03754 
03755         init_scan_tables(h);
03756         alloc_tables(h);
03757 
03758         for(i = 1; i < s->avctx->thread_count; i++) {
03759             H264Context *c;
03760             c = h->thread_context[i] = av_malloc(sizeof(H264Context));
03761             memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
03762             memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
03763             c->sps = h->sps;
03764             c->pps = h->pps;
03765             init_scan_tables(c);
03766             clone_tables(c, h);
03767         }
03768 
03769         for(i = 0; i < s->avctx->thread_count; i++)
03770             if(context_init(h->thread_context[i]) < 0)
03771                 return -1;
03772 
03773         s->avctx->width = s->width;
03774         s->avctx->height = s->height;
03775         s->avctx->sample_aspect_ratio= h->sps.sar;
03776         if(!s->avctx->sample_aspect_ratio.den)
03777             s->avctx->sample_aspect_ratio.den = 1;
03778 
03779         if(h->sps.timing_info_present_flag){
03780             s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
03781             if(h->x264_build > 0 && h->x264_build < 44)
03782                 s->avctx->time_base.den *= 2;
03783             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
03784                       s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
03785         }
03786     }
03787 
03788     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
03789 
03790     h->mb_mbaff = 0;
03791     h->mb_aff_frame = 0;
03792     last_pic_structure = s0->picture_structure;
03793     if(h->sps.frame_mbs_only_flag){
03794         s->picture_structure= PICT_FRAME;
03795     }else{
03796         if(get_bits1(&s->gb)) { //field_pic_flag
03797             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
03798         } else {
03799             s->picture_structure= PICT_FRAME;
03800             h->mb_aff_frame = h->sps.mb_aff;
03801         }
03802     }
03803     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
03804 
03805     if(h0->current_slice == 0){
03806         while(h->frame_num !=  h->prev_frame_num &&
03807               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
03808             av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
03809             if (frame_start(h) < 0)
03810                 return -1;
03811             h->prev_frame_num++;
03812             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
03813             s->current_picture_ptr->frame_num= h->prev_frame_num;
03814             execute_ref_pic_marking(h, NULL, 0);
03815         }
03816 
03817         /* See if we have a decoded first field looking for a pair... */
03818         if (s0->first_field) {
03819             assert(s0->current_picture_ptr);
03820             assert(s0->current_picture_ptr->data[0]);
03821             assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
03822 
03823             /* figure out if we have a complementary field pair */
03824             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
03825                 /*
03826                  * Previous field is unmatched. Don't display it, but let it
03827                  * remain for reference if marked as such.
03828                  */
03829                 s0->current_picture_ptr = NULL;
03830                 s0->first_field = FIELD_PICTURE;
03831 
03832             } else {
03833                 if (h->nal_ref_idc &&
03834                         s0->current_picture_ptr->reference &&
03835                         s0->current_picture_ptr->frame_num != h->frame_num) {
03836                     /*
03837                      * This and previous field were reference, but had
03838                      * different frame_nums. Consider this field first in
03839                      * pair. Throw away previous field except for reference
03840                      * purposes.
03841                      */
03842                     s0->first_field = 1;
03843                     s0->current_picture_ptr = NULL;
03844 
03845                 } else {
03846                     /* Second field in complementary pair */
03847                     s0->first_field = 0;
03848                 }
03849             }
03850 
03851         } else {
03852             /* Frame or first field in a potentially complementary pair */
03853             assert(!s0->current_picture_ptr);
03854             s0->first_field = FIELD_PICTURE;
03855         }
03856 
03857         if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
03858             s0->first_field = 0;
03859             return -1;
03860         }
03861     }
03862     if(h != h0)
03863         clone_slice(h, h0);
03864 
03865     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
03866 
03867     assert(s->mb_num == s->mb_width * s->mb_height);
03868     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
03869        first_mb_in_slice                    >= s->mb_num){
03870         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
03871         return -1;
03872     }
03873     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
03874     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
03875     if (s->picture_structure == PICT_BOTTOM_FIELD)
03876         s->resync_mb_y = s->mb_y = s->mb_y + 1;
03877     assert(s->mb_y < s->mb_height);
03878 
03879     if(s->picture_structure==PICT_FRAME){
03880         h->curr_pic_num=   h->frame_num;
03881         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
03882     }else{
03883         h->curr_pic_num= 2*h->frame_num + 1;
03884         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
03885     }
03886 
03887     if(h->nal_unit_type == NAL_IDR_SLICE){
03888         get_ue_golomb(&s->gb); /* idr_pic_id */
03889     }
03890 
03891     if(h->sps.poc_type==0){
03892         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
03893 
03894         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
03895             h->delta_poc_bottom= get_se_golomb(&s->gb);
03896         }
03897     }
03898 
03899     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
03900         h->delta_poc[0]= get_se_golomb(&s->gb);
03901 
03902         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
03903             h->delta_poc[1]= get_se_golomb(&s->gb);
03904     }
03905 
03906     init_poc(h);
03907 
03908     if(h->pps.redundant_pic_cnt_present){
03909         h->redundant_pic_count= get_ue_golomb(&s->gb);
03910     }
03911 
03912     //set defaults, might be overridden a few lines later
03913     h->ref_count[0]= h->pps.ref_count[0];
03914     h->ref_count[1]= h->pps.ref_count[1];
03915 
03916     if(h->slice_type_nos != FF_I_TYPE){
03917         if(h->slice_type_nos == FF_B_TYPE){
03918             h->direct_spatial_mv_pred= get_bits1(&s->gb);
03919         }
03920         num_ref_idx_active_override_flag= get_bits1(&s->gb);
03921 
03922         if(num_ref_idx_active_override_flag){
03923             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
03924             if(h->slice_type_nos==FF_B_TYPE)
03925                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
03926 
03927             if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
03928                 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
03929                 h->ref_count[0]= h->ref_count[1]= 1;
03930                 return -1;
03931             }
03932         }
03933         if(h->slice_type_nos == FF_B_TYPE)
03934             h->list_count= 2;
03935         else
03936             h->list_count= 1;
03937     }else
03938         h->list_count= 0;
03939 
03940     if(!default_ref_list_done){
03941         fill_default_ref_list(h);
03942     }
03943 
03944     if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0) {
03945         h->ref_count[1]= h->ref_count[0]= 0;
03946         return -1;
03947     }
03948 
03949     if(h->slice_type_nos!=FF_I_TYPE){
03950         s->last_picture_ptr= &h->ref_list[0][0];
03951         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
03952     }
03953     if(h->slice_type_nos==FF_B_TYPE){
03954         s->next_picture_ptr= &h->ref_list[1][0];
03955         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
03956     }
03957 
03958     if(   (h->pps.weighted_pred          && h->slice_type_nos == FF_P_TYPE )
03959        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
03960         pred_weight_table(h);
03961     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
03962         implicit_weight_table(h);
03963     else {
03964         h->use_weight = 0;
03965         for (i = 0; i < 2; i++) {
03966             h->luma_weight_flag[i]   = 0;
03967             h->chroma_weight_flag[i] = 0;
03968         }
03969     }
03970 
03971     if(h->nal_ref_idc)
03972         decode_ref_pic_marking(h0, &s->gb);
03973 
03974     if(FRAME_MBAFF)
03975         fill_mbaff_ref_list(h);
03976 
03977     if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
03978         direct_dist_scale_factor(h);
03979     direct_ref_list_init(h);
03980 
03981     if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
03982         tmp = get_ue_golomb_31(&s->gb);
03983         if(tmp > 2){
03984             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
03985             return -1;
03986         }
03987         h->cabac_init_idc= tmp;
03988     }
03989 
03990     h->last_qscale_diff = 0;
03991     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
03992     if(tmp>51){
03993         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
03994         return -1;
03995     }
03996     s->qscale= tmp;
03997     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03998     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03999     //FIXME qscale / qp ... stuff
04000     if(h->slice_type == FF_SP_TYPE){
04001         get_bits1(&s->gb); /* sp_for_switch_flag */
04002     }
04003     if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
04004         get_se_golomb(&s->gb); /* slice_qs_delta */
04005     }
04006 
04007     h->deblocking_filter = 1;
04008     h->slice_alpha_c0_offset = 0;
04009     h->slice_beta_offset = 0;
04010     if( h->pps.deblocking_filter_parameters_present ) {
04011         tmp= get_ue_golomb_31(&s->gb);
04012         if(tmp > 2){
04013             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
04014             return -1;
04015         }
04016         h->deblocking_filter= tmp;
04017         if(h->deblocking_filter < 2)
04018             h->deblocking_filter^= 1; // 1<->0
04019 
04020         if( h->deblocking_filter ) {
04021             h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
04022             h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
04023         }
04024     }
04025 
04026     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
04027        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
04028        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == FF_B_TYPE)
04029        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
04030         h->deblocking_filter= 0;
04031 
04032     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
04033         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
04034             /* Cheat slightly for speed:
04035                Do not bother to deblock across slices. */
04036             h->deblocking_filter = 2;
04037         } else {
04038             h0->max_contexts = 1;
04039             if(!h0->single_decode_warning) {
04040                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
04041                 h0->single_decode_warning = 1;
04042             }
04043             if(h != h0)
04044                 return 1; // deblocking switched inside frame
04045         }
04046     }
04047 
04048 #if 0 //FMO
04049     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
04050         slice_group_change_cycle= get_bits(&s->gb, ?);
04051 #endif
04052 
04053     h0->last_slice_type = slice_type;
04054     h->slice_num = ++h0->current_slice;
04055     if(h->slice_num >= MAX_SLICES){
04056         av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
04057     }
04058 
04059     for(j=0; j<2; j++){
04060         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
04061         ref2frm[0]=
04062         ref2frm[1]= -1;
04063         for(i=0; i<16; i++)
04064             ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
04065                           +(h->ref_list[j][i].reference&3);
04066         ref2frm[18+0]=
04067         ref2frm[18+1]= -1;
04068         for(i=16; i<48; i++)
04069             ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
04070                           +(h->ref_list[j][i].reference&3);
04071     }
04072 
04073     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
04074     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
04075 
04076     s->avctx->refs= h->sps.ref_frame_count;
04077 
04078     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
04079         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
04080                h->slice_num,
04081                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
04082                first_mb_in_slice,
04083                av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
04084                pps_id, h->frame_num,
04085                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
04086                h->ref_count[0], h->ref_count[1],
04087                s->qscale,
04088                h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
04089                h->use_weight,
04090                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
04091                h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
04092                );
04093     }
04094 
04095     return 0;
04096 }
04097 
04101 static inline int get_level_prefix(GetBitContext *gb){
04102     unsigned int buf;
04103     int log;
04104 
04105     OPEN_READER(re, gb);
04106     UPDATE_CACHE(re, gb);
04107     buf=GET_CACHE(re, gb);
04108 
04109     log= 32 - av_log2(buf);
04110 #ifdef TRACE
04111     print_bin(buf>>(32-log), log);
04112     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
04113 #endif
04114 
04115     LAST_SKIP_BITS(re, gb, log);
04116     CLOSE_READER(re, gb);
04117 
04118     return log-1;
04119 }
04120 
04121 static inline int get_dct8x8_allowed(H264Context *h){
04122     if(h->sps.direct_8x8_inference_flag)
04123         return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8                )*0x0001000100010001ULL));
04124     else
04125         return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
04126 }
04127 
04135 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
04136     MpegEncContext * const s = &h->s;
04137     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
04138     int level[16];
04139     int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
04140 
04141     //FIXME put trailing_onex into the context
04142 
04143     if(n == CHROMA_DC_BLOCK_INDEX){
04144         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
04145         total_coeff= coeff_token>>2;
04146     }else{
04147         if(n == LUMA_DC_BLOCK_INDEX){
04148             total_coeff= pred_non_zero_count(h, 0);
04149             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
04150             total_coeff= coeff_token>>2;
04151         }else{
04152             total_coeff= pred_non_zero_count(h, n);
04153             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
04154             total_coeff= coeff_token>>2;
04155             h->non_zero_count_cache[ scan8[n] ]= total_coeff;
04156         }
04157     }
04158 
04159     //FIXME set last_non_zero?
04160 
04161     if(total_coeff==0)
04162         return 0;
04163     if(total_coeff > (unsigned)max_coeff) {
04164         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
04165         return -1;
04166     }
04167 
04168     trailing_ones= coeff_token&3;
04169     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
04170     assert(total_coeff<=16);
04171 
04172     i = show_bits(gb, 3);
04173     skip_bits(gb, trailing_ones);
04174     level[0] = 1-((i&4)>>1);
04175     level[1] = 1-((i&2)   );
04176     level[2] = 1-((i&1)<<1);
04177 
04178     if(trailing_ones<total_coeff) {
04179         int mask, prefix;
04180         int suffix_length = total_coeff > 10 && trailing_ones < 3;
04181         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
04182         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
04183 
04184         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
04185         if(level_code >= 100){
04186             prefix= level_code - 100;
04187             if(prefix == LEVEL_TAB_BITS)
04188                 prefix += get_level_prefix(gb);
04189 
04190             //first coefficient has suffix_length equal to 0 or 1
04191             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
04192                 if(suffix_length)
04193                     level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
04194                 else
04195                     level_code= (prefix<<suffix_length); //part
04196             }else if(prefix==14){
04197                 if(suffix_length)
04198                     level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
04199                 else
04200                     level_code= prefix + get_bits(gb, 4); //part
04201             }else{
04202                 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
04203                 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
04204                 if(prefix>=16)
04205                     level_code += (1<<(prefix-3))-4096;
04206             }
04207 
04208             if(trailing_ones < 3) level_code += 2;
04209 
04210             suffix_length = 2;
04211             mask= -(level_code&1);
04212             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
04213         }else{
04214             if(trailing_ones < 3) level_code += (level_code>>31)|1;
04215 
04216             suffix_length = 1;
04217             if(level_code + 3U > 6U)
04218                 suffix_length++;
04219             level[trailing_ones]= level_code;
04220         }
04221 
04222         //remaining coefficients have suffix_length > 0
04223         for(i=trailing_ones+1;i<total_coeff;i++) {
04224             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
04225             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
04226             level_code= cavlc_level_tab[suffix_length][bitsi][0];
04227 
04228             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
04229             if(level_code >= 100){
04230                 prefix= level_code - 100;
04231                 if(prefix == LEVEL_TAB_BITS){
04232                     prefix += get_level_prefix(gb);
04233                 }
04234                 if(prefix<15){
04235                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
04236                 }else{
04237                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
04238                     if(prefix>=16)
04239                         level_code += (1<<(prefix-3))-4096;
04240                 }
04241                 mask= -(level_code&1);
04242                 level_code= (((2+level_code)>>1) ^ mask) - mask;
04243             }
04244             level[i]= level_code;
04245 
04246             if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
04247                 suffix_length++;
04248         }
04249     }
04250 
04251     if(total_coeff == max_coeff)
04252         zeros_left=0;
04253     else{
04254         if(n == CHROMA_DC_BLOCK_INDEX)
04255             zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
04256         else
04257             zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
04258     }
04259 
04260     coeff_num = zeros_left + total_coeff - 1;
04261     j = scantable[coeff_num];
04262     if(n > 24){
04263         block[j] = level[0];
04264         for(i=1;i<total_coeff;i++) {
04265             if(zeros_left <= 0)
04266                 run_before = 0;
04267             else if(zeros_left < 7){
04268                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
04269             }else{
04270                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
04271             }
04272             zeros_left -= run_before;
04273             coeff_num -= 1 + run_before;
04274             j= scantable[ coeff_num ];
04275 
04276             block[j]= level[i];
04277         }
04278     }else{
04279         block[j] = (level[0] * qmul[j] + 32)>>6;
04280         for(i=1;i<total_coeff;i++) {
04281             if(zeros_left <= 0)
04282                 run_before = 0;
04283             else if(zeros_left < 7){
04284                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
04285             }else{
04286                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
04287             }
04288             zeros_left -= run_before;
04289             coeff_num -= 1 + run_before;
04290             j= scantable[ coeff_num ];
04291 
04292             block[j]= (level[i] * qmul[j] + 32)>>6;
04293         }
04294     }
04295 
04296     if(zeros_left<0){
04297         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
04298         return -1;
04299     }
04300 
04301     return 0;
04302 }
04303 
04304 static void predict_field_decoding_flag(H264Context *h){
04305     MpegEncContext * const s = &h->s;
04306     const int mb_xy= h->mb_xy;
04307     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
04308                 ? s->current_picture.mb_type[mb_xy-1]
04309                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
04310                 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
04311                 : 0;
04312     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
04313 }
04314 
04318 static void decode_mb_skip(H264Context *h){
04319     MpegEncContext * const s = &h->s;
04320     const int mb_xy= h->mb_xy;
04321     int mb_type=0;
04322 
04323     memset(h->non_zero_count[mb_xy], 0, 16);
04324     memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
04325 
04326     if(MB_FIELD)
04327         mb_type|= MB_TYPE_INTERLACED;
04328 
04329     if( h->slice_type_nos == FF_B_TYPE )
04330     {
04331         // just for fill_caches. pred_direct_motion will set the real mb_type
04332         mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
04333 
04334         fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
04335         pred_direct_motion(h, &mb_type);
04336         mb_type|= MB_TYPE_SKIP;
04337     }
04338     else
04339     {
04340         int mx, my;
04341         mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
04342 
04343         fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
04344         pred_pskip_motion(h, &mx, &my);
04345         fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
04346         fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
04347     }
04348 
04349     write_back_motion(h, mb_type);
04350     s->current_picture.mb_type[mb_xy]= mb_type;
04351     s->current_picture.qscale_table[mb_xy]= s->qscale;
04352     h->slice_table[ mb_xy ]= h->slice_num;
04353     h->prev_mb_skipped= 1;
04354 }
04355 
04360 static int decode_mb_cavlc(H264Context *h){
04361     MpegEncContext * const s = &h->s;
04362     int mb_xy;
04363     int partition_count;
04364     unsigned int mb_type, cbp;
04365     int dct8x8_allowed= h->pps.transform_8x8_mode;
04366 
04367     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
04368 
04369     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
04370     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
04371                 down the code */
04372     if(h->slice_type_nos != FF_I_TYPE){
04373         if(s->mb_skip_run==-1)
04374             s->mb_skip_run= get_ue_golomb(&s->gb);
04375 
04376         if (s->mb_skip_run--) {
04377             if(FRAME_MBAFF && (s->mb_y&1) == 0){
04378                 if(s->mb_skip_run==0)
04379                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
04380                 else
04381                     predict_field_decoding_flag(h);
04382             }
04383             decode_mb_skip(h);
04384             return 0;
04385         }
04386     }
04387     if(FRAME_MBAFF){
04388         if( (s->mb_y&1) == 0 )
04389             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
04390     }
04391 
04392     h->prev_mb_skipped= 0;
04393 
04394     mb_type= get_ue_golomb(&s->gb);
04395     if(h->slice_type_nos == FF_B_TYPE){
04396         if(mb_type < 23){
04397             partition_count= b_mb_type_info[mb_type].partition_count;
04398             mb_type=         b_mb_type_info[mb_type].type;
04399         }else{
04400             mb_type -= 23;
04401             goto decode_intra_mb;
04402         }
04403     }else if(h->slice_type_nos == FF_P_TYPE){
04404         if(mb_type < 5){
04405             partition_count= p_mb_type_info[mb_type].partition_count;
04406             mb_type=         p_mb_type_info[mb_type].type;
04407         }else{
04408             mb_type -= 5;
04409             goto decode_intra_mb;
04410         }
04411     }else{
04412        assert(h->slice_type_nos == FF_I_TYPE);
04413         if(h->slice_type == FF_SI_TYPE && mb_type)
04414             mb_type--;
04415 decode_intra_mb:
04416         if(mb_type > 25){
04417             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
04418             return -1;
04419         }
04420         partition_count=0;
04421         cbp= i_mb_type_info[mb_type].cbp;
04422         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
04423         mb_type= i_mb_type_info[mb_type].type;
04424     }
04425 
04426     if(MB_FIELD)
04427         mb_type |= MB_TYPE_INTERLACED;
04428 
04429     h->slice_table[ mb_xy ]= h->slice_num;
04430 
04431     if(IS_INTRA_PCM(mb_type)){
04432         unsigned int x;
04433 
04434         // We assume these blocks are very rare so we do not optimize it.
04435         align_get_bits(&s->gb);
04436 
04437         // The pixels are stored in the same order as levels in h->mb array.
04438         for(x=0; x < (CHROMA ? 384 : 256); x++){
04439             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
04440         }
04441 
04442         // In deblocking, the quantizer is 0
04443         s->current_picture.qscale_table[mb_xy]= 0;
04444         // All coeffs are present
04445         memset(h->non_zero_count[mb_xy], 16, 16);
04446 
04447         s->current_picture.mb_type[mb_xy]= mb_type;
04448         return 0;
04449     }
04450 
04451     if(MB_MBAFF){
04452         h->ref_count[0] <<= 1;
04453         h->ref_count[1] <<= 1;
04454     }
04455 
04456     fill_caches(h, mb_type, 0);
04457 
04458     //mb_pred
04459     if(IS_INTRA(mb_type)){
04460         int pred_mode;
04461 //            init_top_left_availability(h);
04462         if(IS_INTRA4x4(mb_type)){
04463             int i;
04464             int di = 1;
04465             if(dct8x8_allowed && get_bits1(&s->gb)){
04466                 mb_type |= MB_TYPE_8x8DCT;
04467                 di = 4;
04468             }
04469 
04470 //                fill_intra4x4_pred_table(h);
04471             for(i=0; i<16; i+=di){
04472                 int mode= pred_intra_mode(h, i);
04473 
04474                 if(!get_bits1(&s->gb)){
04475                     const int rem_mode= get_bits(&s->gb, 3);
04476                     mode = rem_mode + (rem_mode >= mode);
04477                 }
04478 
04479                 if(di==4)
04480                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
04481                 else
04482                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
04483             }
04484             write_back_intra_pred_mode(h);
04485             if( check_intra4x4_pred_mode(h) < 0)
04486                 return -1;
04487         }else{
04488             h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
04489             if(h->intra16x16_pred_mode < 0)
04490                 return -1;
04491         }
04492         if(CHROMA){
04493             pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
04494             if(pred_mode < 0)
04495                 return -1;
04496             h->chroma_pred_mode= pred_mode;
04497         }
04498     }else if(partition_count==4){
04499         int i, j, sub_partition_count[4], list, ref[2][4];
04500 
04501         if(h->slice_type_nos == FF_B_TYPE){
04502             for(i=0; i<4; i++){
04503                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
04504                 if(h->sub_mb_type[i] >=13){
04505                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
04506                     return -1;
04507                 }
04508                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
04509                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
04510             }
04511             if(   IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
04512                || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
04513                 pred_direct_motion(h, &mb_type);
04514                 h->ref_cache[0][scan8[4]] =
04515                 h->ref_cache[1][scan8[4]] =
04516                 h->ref_cache[0][scan8[12]] =
04517                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
04518             }
04519         }else{
04520             assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
04521             for(i=0; i<4; i++){
04522                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
04523                 if(h->sub_mb_type[i] >=4){
04524                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
04525                     return -1;
04526                 }
04527                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
04528                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
04529             }
04530         }
04531 
04532         for(list=0; list<h->list_count; list++){
04533             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
04534             for(i=0; i<4; i++){
04535                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
04536                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
04537                     unsigned int tmp;
04538                     if(ref_count == 1){
04539                         tmp= 0;
04540                     }else if(ref_count == 2){
04541                         tmp= get_bits1(&s->gb)^1;
04542                     }else{
04543                         tmp= get_ue_golomb_31(&s->gb);
04544                         if(tmp>=ref_count){
04545                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
04546                             return -1;
04547                         }
04548                     }
04549                     ref[list][i]= tmp;
04550                 }else{
04551                  //FIXME
04552                     ref[list][i] = -1;
04553                 }
04554             }
04555         }
04556 
04557         if(dct8x8_allowed)
04558             dct8x8_allowed = get_dct8x8_allowed(h);
04559 
04560         for(list=0; list<h->list_count; list++){
04561             for(i=0; i<4; i++){
04562                 if(IS_DIRECT(h->sub_mb_type[i])) {
04563                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
04564                     continue;
04565                 }
04566                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
04567                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
04568 
04569                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
04570                     const int sub_mb_type= h->sub_mb_type[i];
04571                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
04572                     for(j=0; j<sub_partition_count[i]; j++){
04573                         int mx, my;
04574                         const int index= 4*i + block_width*j;
04575                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
04576                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
04577                         mx += get_se_golomb(&s->gb);
04578                         my += get_se_golomb(&s->gb);
04579                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
04580 
04581                         if(IS_SUB_8X8(sub_mb_type)){
04582                             mv_cache[ 1 ][0]=
04583                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
04584                             mv_cache[ 1 ][1]=
04585                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
04586                         }else if(IS_SUB_8X4(sub_mb_type)){
04587                             mv_cache[ 1 ][0]= mx;
04588                             mv_cache[ 1 ][1]= my;
04589                         }else if(IS_SUB_4X8(sub_mb_type)){
04590                             mv_cache[ 8 ][0]= mx;
04591                             mv_cache[ 8 ][1]= my;
04592                         }
04593                         mv_cache[ 0 ][0]= mx;
04594                         mv_cache[ 0 ][1]= my;
04595                     }
04596                 }else{
04597                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
04598                     p[0] = p[1]=
04599                     p[8] = p[9]= 0;
04600                 }
04601             }
04602         }
04603     }else if(IS_DIRECT(mb_type)){
04604         pred_direct_motion(h, &mb_type);
04605         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
04606     }else{
04607         int list, mx, my, i;
04608          //FIXME we should set ref_idx_l? to 0 if we use that later ...
04609         if(IS_16X16(mb_type)){
04610             for(list=0; list<h->list_count; list++){
04611                     unsigned int val;
04612                     if(IS_DIR(mb_type, 0, list)){
04613                         if(h->ref_count[list]==1){
04614                             val= 0;
04615                         }else if(h->ref_count[list]==2){
04616                             val= get_bits1(&s->gb)^1;
04617                         }else{
04618                             val= get_ue_golomb_31(&s->gb);
04619                             if(val >= h->ref_count[list]){
04620                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
04621                                 return -1;
04622                             }
04623                         }
04624                     }else
04625                         val= LIST_NOT_USED&0xFF;
04626                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
04627             }
04628             for(list=0; list<h->list_count; list++){
04629                 unsigned int val;
04630                 if(IS_DIR(mb_type, 0, list)){
04631                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
04632                     mx += get_se_golomb(&s->gb);
04633                     my += get_se_golomb(&s->gb);
04634                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
04635 
04636                     val= pack16to32(mx,my);
04637                 }else
04638                     val=0;
04639                 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
04640             }
04641         }
04642         else if(IS_16X8(mb_type)){
04643             for(list=0; list<h->list_count; list++){
04644                     for(i=0; i<2; i++){
04645                         unsigned int val;
04646                         if(IS_DIR(mb_type, i, list)){
04647                             if(h->ref_count[list] == 1){
04648                                 val= 0;
04649                             }else if(h->ref_count[list] == 2){
04650                                 val= get_bits1(&s->gb)^1;
04651                             }else{
04652                                 val= get_ue_golomb_31(&s->gb);
04653                                 if(val >= h->ref_count[list]){
04654                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
04655                                     return -1;
04656                                 }
04657                             }
04658                         }else
04659                             val= LIST_NOT_USED&0xFF;
04660                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
04661                     }
04662             }
04663             for(list=0; list<h->list_count; list++){
04664                 for(i=0; i<2; i++){
04665                     unsigned int val;
04666                     if(IS_DIR(mb_type, i, list)){
04667                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
04668                         mx += get_se_golomb(&s->gb);
04669                         my += get_se_golomb(&s->gb);
04670                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
04671 
04672                         val= pack16to32(mx,my);
04673                     }else
04674                         val=0;
04675                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
04676                 }
04677             }
04678         }else{
04679             assert(IS_8X16(mb_type));
04680             for(list=0; list<h->list_count; list++){
04681                     for(i=0; i<2; i++){
04682                         unsigned int val;
04683                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
04684                             if(h->ref_count[list]==1){
04685                                 val= 0;
04686                             }else if(h->ref_count[list]==2){
04687                                 val= get_bits1(&s->gb)^1;
04688                             }else{
04689                                 val= get_ue_golomb_31(&s->gb);
04690                                 if(val >= h->ref_count[list]){
04691                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
04692                                     return -1;
04693                                 }
04694                             }
04695                         }else
04696                             val= LIST_NOT_USED&0xFF;
04697                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
04698                     }
04699             }
04700             for(list=0; list<h->list_count; list++){
04701                 for(i=0; i<2; i++){
04702                     unsigned int val;
04703                     if(IS_DIR(mb_type, i, list)){
04704                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
04705                         mx += get_se_golomb(&s->gb);
04706                         my += get_se_golomb(&s->gb);
04707                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
04708 
04709                         val= pack16to32(mx,my);
04710                     }else
04711                         val=0;
04712                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
04713                 }
04714             }
04715         }
04716     }
04717 
04718     if(IS_INTER(mb_type))
04719         write_back_motion(h, mb_type);
04720 
04721     if(!IS_INTRA16x16(mb_type)){
04722         cbp= get_ue_golomb(&s->gb);
04723         if(cbp > 47){
04724             av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
04725             return -1;
04726         }
04727 
04728         if(CHROMA){
04729             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
04730             else                     cbp= golomb_to_inter_cbp   [cbp];
04731         }else{
04732             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
04733             else                     cbp= golomb_to_inter_cbp_gray[cbp];
04734         }
04735     }
04736     h->cbp = cbp;
04737 
04738     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
04739         if(get_bits1(&s->gb)){
04740             mb_type |= MB_TYPE_8x8DCT;
04741             h->cbp_table[mb_xy]= cbp;
04742         }
04743     }
04744     s->current_picture.mb_type[mb_xy]= mb_type;
04745 
04746     if(cbp || IS_INTRA16x16(mb_type)){
04747         int i8x8, i4x4, chroma_idx;
04748         int dquant;
04749         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
04750         const uint8_t *scan, *scan8x8, *dc_scan;
04751 
04752 //        fill_non_zero_count_cache(h);
04753 
04754         if(IS_INTERLACED(mb_type)){
04755             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
04756             scan= s->qscale ? h->field_scan : h->field_scan_q0;
04757             dc_scan= luma_dc_field_scan;
04758         }else{
04759             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
04760             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
04761             dc_scan= luma_dc_zigzag_scan;
04762         }
04763 
04764         dquant= get_se_golomb(&s->gb);
04765 
04766         if( dquant > 25 || dquant < -26 ){
04767             av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
04768             return -1;
04769         }
04770 
04771         s->qscale += dquant;
04772         if(((unsigned)s->qscale) > 51){
04773             if(s->qscale<0) s->qscale+= 52;
04774             else            s->qscale-= 52;
04775         }
04776 
04777         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
04778         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
04779         if(IS_INTRA16x16(mb_type)){
04780             if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
04781                 return -1; //FIXME continue if partitioned and other return -1 too
04782             }
04783 
04784             assert((cbp&15) == 0 || (cbp&15) == 15);
04785 
04786             if(cbp&15){
04787                 for(i8x8=0; i8x8<4; i8x8++){
04788                     for(i4x4=0; i4x4<4; i4x4++){
04789                         const int index= i4x4 + 4*i8x8;
04790                         if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
04791                             return -1;
04792                         }
04793                     }
04794                 }
04795             }else{
04796                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
04797             }
04798         }else{
04799             for(i8x8=0; i8x8<4; i8x8++){
04800                 if(cbp & (1<<i8x8)){
04801                     if(IS_8x8DCT(mb_type)){
04802                         DCTELEM *buf = &h->mb[64*i8x8];
04803                         uint8_t *nnz;
04804                         for(i4x4=0; i4x4<4; i4x4++){
04805                             if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
04806                                                 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
04807                                 return -1;
04808                         }
04809                         nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
04810                         nnz[0] += nnz[1] + nnz[8] + nnz[9];
04811                     }else{
04812                         for(i4x4=0; i4x4<4; i4x4++){
04813                             const int index= i4x4 + 4*i8x8;
04814 
04815                             if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
04816                                 return -1;
04817                             }
04818                         }
04819                     }
04820                 }else{
04821                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
04822                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
04823                 }
04824             }
04825         }
04826 
04827         if(cbp&0x30){
04828             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
04829                 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
04830                     return -1;
04831                 }
04832         }
04833 
04834         if(cbp&0x20){
04835             for(chroma_idx=0; chroma_idx<2; chroma_idx++){
04836                 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
04837                 for(i4x4=0; i4x4<4; i4x4++){
04838                     const int index= 16 + 4*chroma_idx + i4x4;
04839                     if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
04840                         return -1;
04841                     }
04842                 }
04843             }
04844         }else{
04845             uint8_t * const nnz= &h->non_zero_count_cache[0];
04846             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
04847             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
04848         }
04849     }else{
04850         uint8_t * const nnz= &h->non_zero_count_cache[0];
04851         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
04852         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
04853         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
04854     }
04855     s->current_picture.qscale_table[mb_xy]= s->qscale;
04856     write_back_non_zero_count(h);
04857 
04858     if(MB_MBAFF){
04859         h->ref_count[0] >>= 1;
04860         h->ref_count[1] >>= 1;
04861     }
04862 
04863     return 0;
04864 }
04865 
04866 static int decode_cabac_field_decoding_flag(H264Context *h) {
04867     MpegEncContext * const s = &h->s;
04868     const int mb_x = s->mb_x;
04869     const int mb_y = s->mb_y & ~1;
04870     const int mba_xy = mb_x - 1 +  mb_y   *s->mb_stride;
04871     const int mbb_xy = mb_x     + (mb_y-2)*s->mb_stride;
04872 
04873     unsigned int ctx = 0;
04874 
04875     if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
04876         ctx += 1;
04877     }
04878     if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
04879         ctx += 1;
04880     }
04881 
04882     return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
04883 }
04884 
04885 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
04886     uint8_t *state= &h->cabac_state[ctx_base];
04887     int mb_type;
04888 
04889     if(intra_slice){
04890         MpegEncContext * const s = &h->s;
04891         const int mba_xy = h->left_mb_xy[0];
04892         const int mbb_xy = h->top_mb_xy;
04893         int ctx=0;
04894         if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
04895             ctx++;
04896         if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
04897             ctx++;
04898         if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
04899             return 0;   /* I4x4 */
04900         state += 2;
04901     }else{
04902         if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
04903             return 0;   /* I4x4 */
04904     }
04905 
04906     if( get_cabac_terminate( &h->cabac ) )
04907         return 25;  /* PCM */
04908 
04909     mb_type = 1; /* I16x16 */
04910     mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
04911     if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
04912         mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
04913     mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
04914     mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
04915     return mb_type;
04916 }
04917 
04918 static int decode_cabac_mb_type_b( H264Context *h ) {
04919     MpegEncContext * const s = &h->s;
04920 
04921         const int mba_xy = h->left_mb_xy[0];
04922         const int mbb_xy = h->top_mb_xy;
04923         int ctx = 0;
04924         int bits;
04925         assert(h->slice_type_nos == FF_B_TYPE);
04926 
04927         if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
04928             ctx++;
04929         if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
04930             ctx++;
04931 
04932         if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
04933             return 0; /* B_Direct_16x16 */
04934 
04935         if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
04936             return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
04937         }
04938 
04939         bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
04940         bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
04941         bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
04942         bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
04943         if( bits < 8 )
04944             return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
04945         else if( bits == 13 ) {
04946             return decode_cabac_intra_mb_type(h, 32, 0) + 23;
04947         } else if( bits == 14 )
04948             return 11; /* B_L1_L0_8x16 */
04949         else if( bits == 15 )
04950             return 22; /* B_8x8 */
04951 
04952         bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
04953         return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
04954 }
04955 
04956 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
04957     MpegEncContext * const s = &h->s;
04958     int mba_xy, mbb_xy;
04959     int ctx = 0;
04960 
04961     if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
04962         int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
04963         mba_xy = mb_xy - 1;
04964         if( (mb_y&1)
04965             && h->slice_table[mba_xy] == h->slice_num
04966             && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
04967             mba_xy += s->mb_stride;
04968         if( MB_FIELD ){
04969             mbb_xy = mb_xy - s->mb_stride;
04970             if( !(mb_y&1)
04971                 && h->slice_table[mbb_xy] == h->slice_num
04972                 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
04973                 mbb_xy -= s->mb_stride;
04974         }else
04975             mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
04976     }else{
04977         int mb_xy = h->mb_xy;
04978         mba_xy = mb_xy - 1;
04979         mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
04980     }
04981 
04982     if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
04983         ctx++;
04984     if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
04985         ctx++;
04986 
04987     if( h->slice_type_nos == FF_B_TYPE )
04988         ctx += 13;
04989     return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
04990 }
04991 
04992 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
04993     int mode = 0;
04994 
04995     if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
04996         return pred_mode;
04997 
04998     mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
04999     mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
05000     mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
05001 
05002     if( mode >= pred_mode )
05003         return mode + 1;
05004     else
05005         return mode;
05006 }
05007 
05008 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
05009     const int mba_xy = h->left_mb_xy[0];
05010     const int mbb_xy = h->top_mb_xy;
05011 
05012     int ctx = 0;
05013 
05014     /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
05015     if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
05016         ctx++;
05017 
05018     if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
05019         ctx++;
05020 
05021     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
05022         return 0;
05023 
05024     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
05025         return 1;
05026     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
05027         return 2;
05028     else
05029         return 3;
05030 }
05031 
05032 static int decode_cabac_mb_cbp_luma( H264Context *h) {
05033     int cbp_b, cbp_a, ctx, cbp = 0;
05034 
05035     cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
05036     cbp_b = h->slice_table[h->top_mb_xy]     == h->slice_num ? h->top_cbp  : -1;
05037 
05038     ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
05039     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
05040     ctx = !(cbp   & 0x01) + 2 * !(cbp_b & 0x08);
05041     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
05042     ctx = !(cbp_a & 0x08) + 2 * !(cbp   & 0x01);
05043     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
05044     ctx = !(cbp   & 0x04) + 2 * !(cbp   & 0x02);
05045     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
05046     return cbp;
05047 }
05048 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
05049     int ctx;
05050     int cbp_a, cbp_b;
05051 
05052     cbp_a = (h->left_cbp>>4)&0x03;
05053     cbp_b = (h-> top_cbp>>4)&0x03;
05054 
05055     ctx = 0;
05056     if( cbp_a > 0 ) ctx++;
05057     if( cbp_b > 0 ) ctx += 2;
05058     if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
05059         return 0;
05060 
05061     ctx = 4;
05062     if( cbp_a == 2 ) ctx++;
05063     if( cbp_b == 2 ) ctx += 2;
05064     return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
05065 }
05066 static int decode_cabac_mb_dqp( H264Context *h) {
05067     int   ctx= h->last_qscale_diff != 0;
05068     int   val = 0;
05069 
05070     while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
05071         ctx= 2+(ctx>>1);
05072         val++;
05073         if(val > 102) //prevent infinite loop
05074             return INT_MIN;
05075     }
05076 
05077     if( val&0x01 )
05078         return   (val + 1)>>1 ;
05079     else
05080         return -((val + 1)>>1);
05081 }
05082 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
05083     if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
05084         return 0;   /* 8x8 */
05085     if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
05086         return 1;   /* 8x4 */
05087     if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
05088         return 2;   /* 4x8 */
05089     return 3;       /* 4x4 */
05090 }
05091 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
05092     int type;
05093     if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
05094         return 0;   /* B_Direct_8x8 */
05095     if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
05096         return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
05097     type = 3;
05098     if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
05099         if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
05100             return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
05101         type += 4;
05102     }
05103     type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
05104     type +=   get_cabac( &h->cabac, &h->cabac_state[39] );
05105     return type;
05106 }
05107 
05108 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
05109     return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
05110 }
05111 
05112 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
05113     int refa = h->ref_cache[list][scan8[n] - 1];
05114     int refb = h->ref_cache[list][scan8[n] - 8];
05115     int ref  = 0;
05116     int ctx  = 0;
05117 
05118     if( h->slice_type_nos == FF_B_TYPE) {
05119         if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
05120             ctx++;
05121         if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
05122             ctx += 2;
05123     } else {
05124         if( refa > 0 )
05125             ctx++;
05126         if( refb > 0 )
05127             ctx += 2;
05128     }
05129 
05130     while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
05131         ref++;
05132         ctx = (ctx>>2)+4;
05133         if(ref >= 32 /*h->ref_list[list]*/){
05134             return -1;
05135         }
05136     }
05137     return ref;
05138 }
05139 
05140 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
05141     int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
05142                abs( h->mvd_cache[list][scan8[n] - 8][l] );
05143     int ctxbase = (l == 0) ? 40 : 47;
05144     int mvd;
05145     int ctx = (amvd>2) + (amvd>32);
05146 
05147     if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
05148         return 0;
05149 
05150     mvd= 1;
05151     ctx= 3;
05152     while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
05153         mvd++;
05154         if( ctx < 6 )
05155             ctx++;
05156     }
05157 
05158     if( mvd >= 9 ) {
05159         int k = 3;
05160         while( get_cabac_bypass( &h->cabac ) ) {
05161             mvd += 1 << k;
05162             k++;
05163             if(k>24){
05164                 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
05165                 return INT_MIN;
05166             }
05167         }
05168         while( k-- ) {
05169             if( get_cabac_bypass( &h->cabac ) )
05170                 mvd += 1 << k;
05171         }
05172     }
05173     return get_cabac_bypass_sign( &h->cabac, -mvd );
05174 }
05175 
05176 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
05177     int nza, nzb;
05178     int ctx = 0;
05179 
05180     if( is_dc ) {
05181         if( cat == 0 ) {
05182             nza = h->left_cbp&0x100;
05183             nzb = h-> top_cbp&0x100;
05184         } else {
05185             nza = (h->left_cbp>>(6+idx))&0x01;
05186             nzb = (h-> top_cbp>>(6+idx))&0x01;
05187         }
05188     } else {
05189         assert(cat == 1 || cat == 2 || cat == 4);
05190         nza = h->non_zero_count_cache[scan8[idx] - 1];
05191         nzb = h->non_zero_count_cache[scan8[idx] - 8];
05192     }
05193 
05194     if( nza > 0 )
05195         ctx++;
05196 
05197     if( nzb > 0 )
05198         ctx += 2;
05199 
05200     return ctx + 4 * cat;
05201 }
05202 
05203 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
05204     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
05205     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
05206     3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
05207     5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
05208 };
05209 
05210 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
05211     static const int significant_coeff_flag_offset[2][6] = {
05212       { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
05213       { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
05214     };
05215     static const int last_coeff_flag_offset[2][6] = {
05216       { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
05217       { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
05218     };
05219     static const int coeff_abs_level_m1_offset[6] = {
05220         227+0, 227+10, 227+20, 227+30, 227+39, 426
05221     };
05222     static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
05223       { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
05224         4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
05225         7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
05226        12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
05227       { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
05228         6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
05229         9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
05230         9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
05231     };
05232     /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
05233      * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
05234      * map node ctx => cabac ctx for level=1 */
05235     static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
05236     /* map node ctx => cabac ctx for level>1 */
05237     static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
05238     static const uint8_t coeff_abs_level_transition[2][8] = {
05239     /* update node ctx after decoding a level=1 */
05240         { 1, 2, 3, 3, 4, 5, 6, 7 },
05241     /* update node ctx after decoding a level>1 */
05242         { 4, 4, 4, 4, 5, 6, 7, 7 }
05243     };
05244 
05245     int index[64];
05246 
05247     int av_unused last;
05248     int coeff_count = 0;
05249     int node_ctx = 0;
05250 
05251     uint8_t *significant_coeff_ctx_base;
05252     uint8_t *last_coeff_ctx_base;
05253     uint8_t *abs_level_m1_ctx_base;
05254 
05255 #if !ARCH_X86
05256 #define CABAC_ON_STACK
05257 #endif
05258 #ifdef CABAC_ON_STACK
05259 #define CC &cc
05260     CABACContext cc;
05261     cc.range     = h->cabac.range;
05262     cc.low       = h->cabac.low;
05263     cc.bytestream= h->cabac.bytestream;
05264 #else
05265 #define CC &h->cabac
05266 #endif
05267 
05268 
05269     /* cat: 0-> DC 16x16  n = 0
05270      *      1-> AC 16x16  n = luma4x4idx
05271      *      2-> Luma4x4   n = luma4x4idx
05272      *      3-> DC Chroma n = iCbCr
05273      *      4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
05274      *      5-> Luma8x8   n = 4 * luma8x8idx
05275      */
05276 
05277     /* read coded block flag */
05278     if( is_dc || cat != 5 ) {
05279         if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
05280             if( !is_dc )
05281                 h->non_zero_count_cache[scan8[n]] = 0;
05282 
05283 #ifdef CABAC_ON_STACK
05284             h->cabac.range     = cc.range     ;
05285             h->cabac.low       = cc.low       ;
05286             h->cabac.bytestream= cc.bytestream;
05287 #endif
05288             return;
05289         }
05290     }
05291 
05292     significant_coeff_ctx_base = h->cabac_state
05293         + significant_coeff_flag_offset[MB_FIELD][cat];
05294     last_coeff_ctx_base = h->cabac_state
05295         + last_coeff_flag_offset[MB_FIELD][cat];
05296     abs_level_m1_ctx_base = h->cabac_state
05297         + coeff_abs_level_m1_offset[cat];
05298 
05299     if( !is_dc && cat == 5 ) {
05300 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
05301         for(last= 0; last < coefs; last++) { \
05302             uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
05303             if( get_cabac( CC, sig_ctx )) { \
05304                 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
05305                 index[coeff_count++] = last; \
05306                 if( get_cabac( CC, last_ctx ) ) { \
05307                     last= max_coeff; \
05308                     break; \
05309                 } \
05310             } \
05311         }\
05312         if( last == max_coeff -1 ) {\
05313             index[coeff_count++] = last;\
05314         }
05315         const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
05316 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
05317         coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
05318     } else {
05319         coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
05320 #else
05321         DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
05322     } else {
05323         DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
05324 #endif
05325     }
05326     assert(coeff_count > 0);
05327 
05328     if( is_dc ) {
05329         if( cat == 0 )
05330             h->cbp_table[h->mb_xy] |= 0x100;
05331         else
05332             h->cbp_table[h->mb_xy] |= 0x40 << n;
05333     } else {
05334         if( cat == 5 )
05335             fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
05336         else {
05337             assert( cat == 1 || cat == 2 || cat == 4 );
05338             h->non_zero_count_cache[scan8[n]] = coeff_count;
05339         }
05340     }
05341 
05342     do {
05343         uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
05344 
05345         int j= scantable[index[--coeff_count]];
05346 
05347         if( get_cabac( CC, ctx ) == 0 ) {
05348             node_ctx = coeff_abs_level_transition[0][node_ctx];
05349             if( is_dc ) {
05350                 block[j] = get_cabac_bypass_sign( CC, -1);
05351             }else{
05352                 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
05353             }
05354         } else {
05355             int coeff_abs = 2;
05356             ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
05357             node_ctx = coeff_abs_level_transition[1][node_ctx];
05358 
05359             while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
05360                 coeff_abs++;
05361             }
05362 
05363             if( coeff_abs >= 15 ) {
05364                 int j = 0;
05365                 while( get_cabac_bypass( CC ) ) {
05366                     j++;
05367                 }
05368 
05369                 coeff_abs=1;
05370                 while( j-- ) {
05371                     coeff_abs += coeff_abs + get_cabac_bypass( CC );
05372                 }
05373                 coeff_abs+= 14;
05374             }
05375 
05376             if( is_dc ) {
05377                 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
05378             }else{
05379                 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
05380             }
05381         }
05382     } while( coeff_count );
05383 #ifdef CABAC_ON_STACK
05384             h->cabac.range     = cc.range     ;
05385             h->cabac.low       = cc.low       ;
05386             h->cabac.bytestream= cc.bytestream;
05387 #endif
05388 
05389 }
05390 
05391 #if !CONFIG_SMALL
05392 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
05393     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
05394 }
05395 
05396 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
05397     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
05398 }
05399 #endif
05400 
05401 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
05402 #if CONFIG_SMALL
05403     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
05404 #else
05405     if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
05406     else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
05407 #endif
05408 }
05409 
05410 static inline void compute_mb_neighbors(H264Context *h)
05411 {
05412     MpegEncContext * const s = &h->s;
05413     const int mb_xy  = h->mb_xy;
05414     h->top_mb_xy     = mb_xy - s->mb_stride;
05415     h->left_mb_xy[0] = mb_xy - 1;
05416     if(FRAME_MBAFF){
05417         const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
05418         const int top_pair_xy      = pair_xy     - s->mb_stride;
05419         const int top_mb_field_flag  = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
05420         const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
05421         const int curr_mb_field_flag = MB_FIELD;
05422         const int bottom = (s->mb_y & 1);
05423 
05424         if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
05425             h->top_mb_xy -= s->mb_stride;
05426         }
05427         if (!left_mb_field_flag == curr_mb_field_flag) {
05428             h->left_mb_xy[0] = pair_xy - 1;
05429         }
05430     } else if (FIELD_PICTURE) {
05431         h->top_mb_xy -= s->mb_stride;
05432     }
05433     return;
05434 }
05435 
05440 static int decode_mb_cabac(H264Context *h) {
05441     MpegEncContext * const s = &h->s;
05442     int mb_xy;
05443     int mb_type, partition_count, cbp = 0;
05444     int dct8x8_allowed= h->pps.transform_8x8_mode;
05445 
05446     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
05447 
05448     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
05449     if( h->slice_type_nos != FF_I_TYPE ) {
05450         int skip;
05451         /* a skipped mb needs the aff flag from the following mb */
05452         if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
05453             predict_field_decoding_flag(h);
05454         if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
05455             skip = h->next_mb_skipped;
05456         else
05457             skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
05458         /* read skip flags */
05459         if( skip ) {
05460             if( FRAME_MBAFF && (s->mb_y&1)==0 ){
05461                 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
05462                 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
05463                 if(!h->next_mb_skipped)
05464                     h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
05465             }
05466 
05467             decode_mb_skip(h);
05468 
05469             h->cbp_table[mb_xy] = 0;
05470             h->chroma_pred_mode_table[mb_xy] = 0;
05471             h->last_qscale_diff = 0;
05472 
05473             return 0;
05474 
05475         }
05476     }
05477     if(FRAME_MBAFF){
05478         if( (s->mb_y&1) == 0 )
05479             h->mb_mbaff =
05480             h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
05481     }
05482 
05483     h->prev_mb_skipped = 0;
05484 
05485     compute_mb_neighbors(h);
05486 
05487     if( h->slice_type_nos == FF_B_TYPE ) {
05488         mb_type = decode_cabac_mb_type_b( h );
05489         if( mb_type < 23 ){
05490             partition_count= b_mb_type_info[mb_type].partition_count;
05491             mb_type=         b_mb_type_info[mb_type].type;
05492         }else{
05493             mb_type -= 23;
05494             goto decode_intra_mb;
05495         }
05496     } else if( h->slice_type_nos == FF_P_TYPE ) {
05497         if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
05498             /* P-type */
05499             if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
05500                 /* P_L0_D16x16, P_8x8 */
05501                 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
05502             } else {
05503                 /* P_L0_D8x16, P_L0_D16x8 */
05504                 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
05505             }
05506             partition_count= p_mb_type_info[mb_type].partition_count;
05507             mb_type=         p_mb_type_info[mb_type].type;
05508         } else {
05509             mb_type= decode_cabac_intra_mb_type(h, 17, 0);
05510             goto decode_intra_mb;
05511         }
05512     } else {
05513         mb_type= decode_cabac_intra_mb_type(h, 3, 1);
05514         if(h->slice_type == FF_SI_TYPE && mb_type)
05515             mb_type--;
05516         assert(h->slice_type_nos == FF_I_TYPE);
05517 decode_intra_mb:
05518         partition_count = 0;
05519         cbp= i_mb_type_info[mb_type].cbp;
05520         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
05521         mb_type= i_mb_type_info[mb_type].type;
05522     }
05523     if(MB_FIELD)
05524         mb_type |= MB_TYPE_INTERLACED;
05525 
05526     h->slice_table[ mb_xy ]= h->slice_num;
05527 
05528     if(IS_INTRA_PCM(mb_type)) {
05529         const uint8_t *ptr;
05530 
05531         // We assume these blocks are very rare so we do not optimize it.
05532         // FIXME The two following lines get the bitstream position in the cabac
05533         // decode, I think it should be done by a function in cabac.h (or cabac.c).
05534         ptr= h->cabac.bytestream;
05535         if(h->cabac.low&0x1) ptr--;
05536         if(CABAC_BITS==16){
05537             if(h->cabac.low&0x1FF) ptr--;
05538         }
05539 
05540         // The pixels are stored in the same order as levels in h->mb array.
05541         memcpy(h->mb, ptr, 256); ptr+=256;
05542         if(CHROMA){
05543             memcpy(h->mb+128, ptr, 128); ptr+=128;
05544         }
05545 
05546         ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
05547 
05548         // All blocks are present
05549         h->cbp_table[mb_xy] = 0x1ef;
05550         h->chroma_pred_mode_table[mb_xy] = 0;
05551         // In deblocking, the quantizer is 0
05552         s->current_picture.qscale_table[mb_xy]= 0;
05553         // All coeffs are present
05554         memset(h->non_zero_count[mb_xy], 16, 16);
05555         s->current_picture.mb_type[mb_xy]= mb_type;
05556         h->last_qscale_diff = 0;
05557         return 0;
05558     }
05559 
05560     if(MB_MBAFF){
05561         h->ref_count[0] <<= 1;
05562         h->ref_count[1] <<= 1;
05563     }
05564 
05565     fill_caches(h, mb_type, 0);
05566 
05567     if( IS_INTRA( mb_type ) ) {
05568         int i, pred_mode;
05569         if( IS_INTRA4x4( mb_type ) ) {
05570             if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
05571                 mb_type |= MB_TYPE_8x8DCT;
05572                 for( i = 0; i < 16; i+=4 ) {
05573                     int pred = pred_intra_mode( h, i );
05574                     int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
05575                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
05576                 }
05577             } else {
05578                 for( i = 0; i < 16; i++ ) {
05579                     int pred = pred_intra_mode( h, i );
05580                     h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
05581 
05582                 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
05583                 }
05584             }
05585             write_back_intra_pred_mode(h);
05586             if( check_intra4x4_pred_mode(h) < 0 ) return -1;
05587         } else {
05588             h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
05589             if( h->intra16x16_pred_mode < 0 ) return -1;
05590         }
05591         if(CHROMA){
05592             h->chroma_pred_mode_table[mb_xy] =
05593             pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );
05594 
05595             pred_mode= check_intra_pred_mode( h, pred_mode );
05596             if( pred_mode < 0 ) return -1;
05597             h->chroma_pred_mode= pred_mode;
05598         }
05599     } else if( partition_count == 4 ) {
05600         int i, j, sub_partition_count[4], list, ref[2][4];
05601 
05602         if( h->slice_type_nos == FF_B_TYPE ) {
05603             for( i = 0; i < 4; i++ ) {
05604                 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
05605                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
05606                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
05607             }
05608             if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
05609                           h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
05610                 pred_direct_motion(h, &mb_type);
05611                 h->ref_cache[0][scan8[4]] =
05612                 h->ref_cache[1][scan8[4]] =
05613                 h->ref_cache[0][scan8[12]] =
05614                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
05615                 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
05616                     for( i = 0; i < 4; i++ )
05617                         if( IS_DIRECT(h->sub_mb_type[i]) )
05618                             fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
05619                 }
05620             }
05621         } else {
05622             for( i = 0; i < 4; i++ ) {
05623                 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
05624                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
05625                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
05626             }
05627         }
05628 
05629         for( list = 0; list < h->list_count; list++ ) {
05630                 for( i = 0; i < 4; i++ ) {
05631                     if(IS_DIRECT(h->sub_mb_type[i])) continue;
05632                     if(IS_DIR(h->sub_mb_type[i], 0, list)){
05633                         if( h->ref_count[list] > 1 ){
05634                             ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
05635                             if(ref[list][i] >= (unsigned)h->ref_count[list]){
05636                                 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
05637                                 return -1;
05638                             }
05639                         }else
05640                             ref[list][i] = 0;
05641                     } else {
05642                         ref[list][i] = -1;
05643                     }
05644                                                        h->ref_cache[list][ scan8[4*i]+1 ]=
05645                     h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
05646                 }
05647         }
05648 
05649         if(dct8x8_allowed)
05650             dct8x8_allowed = get_dct8x8_allowed(h);
05651 
05652         for(list=0; list<h->list_count; list++){
05653             for(i=0; i<4; i++){
05654                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ];
05655                 if(IS_DIRECT(h->sub_mb_type[i])){
05656                     fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
05657                     continue;
05658                 }
05659 
05660                 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
05661                     const int sub_mb_type= h->sub_mb_type[i];
05662                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
05663                     for(j=0; j<sub_partition_count[i]; j++){
05664                         int mpx, mpy;
05665                         int mx, my;
05666                         const int index= 4*i + block_width*j;
05667                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
05668                         int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
05669                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
05670 
05671                         mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
05672                         my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
05673                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
05674 
05675                         if(IS_SUB_8X8(sub_mb_type)){
05676                             mv_cache[ 1 ][0]=
05677                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
05678                             mv_cache[ 1 ][1]=
05679                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
05680 
05681                             mvd_cache[ 1 ][0]=
05682                             mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
05683                             mvd_cache[ 1 ][1]=
05684                             mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
05685                         }else if(IS_SUB_8X4(sub_mb_type)){
05686                             mv_cache[ 1 ][0]= mx;
05687                             mv_cache[ 1 ][1]= my;
05688 
05689                             mvd_cache[ 1 ][0]= mx - mpx;
05690                             mvd_cache[ 1 ][1]= my - mpy;
05691                         }else if(IS_SUB_4X8(sub_mb_type)){
05692                             mv_cache[ 8 ][0]= mx;
05693                             mv_cache[ 8 ][1]= my;
05694 
05695                             mvd_cache[ 8 ][0]= mx - mpx;
05696                             mvd_cache[ 8 ][1]= my - mpy;
05697                         }
05698                         mv_cache[ 0 ][0]= mx;
05699                         mv_cache[ 0 ][1]= my;
05700 
05701                         mvd_cache[ 0 ][0]= mx - mpx;
05702                         mvd_cache[ 0 ][1]= my - mpy;
05703                     }
05704                 }else{
05705                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
05706                     uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
05707                     p[0] = p[1] = p[8] = p[9] = 0;
05708                     pd[0]= pd[1]= pd[8]= pd[9]= 0;
05709                 }
05710             }
05711         }
05712     } else if( IS_DIRECT(mb_type) ) {
05713         pred_direct_motion(h, &mb_type);
05714         fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
05715         fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
05716         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
05717     } else {
05718         int list, mx, my, i, mpx, mpy;
05719         if(IS_16X16(mb_type)){
05720             for(list=0; list<h->list_count; list++){
05721                 if(IS_DIR(mb_type, 0, list)){
05722                     int ref;
05723                     if(h->ref_count[list] > 1){
05724                         ref= decode_cabac_mb_ref(h, list, 0);
05725                         if(ref >= (unsigned)h->ref_count[list]){
05726                             av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
05727                             return -1;
05728                         }
05729                     }else
05730                         ref=0;
05731                         fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
05732                 }else
05733                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
05734             }
05735             for(list=0; list<h->list_count; list++){
05736                 if(IS_DIR(mb_type, 0, list)){
05737                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
05738 
05739                     mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
05740                     my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
05741                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
05742 
05743                     fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
05744                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
05745                 }else
05746                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
05747             }
05748         }
05749         else if(IS_16X8(mb_type)){
05750             for(list=0; list<h->list_count; list++){
05751                     for(i=0; i<2; i++){
05752                         if(IS_DIR(mb_type, i, list)){
05753                             int ref;
05754                             if(h->ref_count[list] > 1){
05755                                 ref= decode_cabac_mb_ref( h, list, 8*i );
05756                                 if(ref >= (unsigned)h->ref_count[list]){
05757                                     av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
05758                                     return -1;
05759                                 }
05760                             }else
05761                                 ref=0;
05762                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
05763                         }else
05764                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
05765                     }
05766             }
05767             for(list=0; list<h->list_count; list++){
05768                 for(i=0; i<2; i++){
05769                     if(IS_DIR(mb_type, i, list)){
05770                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
05771                         mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
05772                         my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
05773                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
05774 
05775                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
05776                         fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
05777                     }else{
05778                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
05779                         fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
05780                     }
05781                 }
05782             }
05783         }else{
05784             assert(IS_8X16(mb_type));
05785             for(list=0; list<h->list_count; list++){
05786                     for(i=0; i<2; i++){
05787                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
05788                             int ref;
05789                             if(h->ref_count[list] > 1){
05790                                 ref= decode_cabac_mb_ref( h, list, 4*i );
05791                                 if(ref >= (unsigned)h->ref_count[list]){
05792                                     av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
05793                                     return -1;
05794                                 }
05795                             }else
05796                                 ref=0;
05797                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
05798                         }else
05799                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
05800                     }
05801             }
05802             for(list=0; list<h->list_count; list++){
05803                 for(i=0; i<2; i++){
05804                     if(IS_DIR(mb_type, i, list)){
05805                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
05806                         mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
05807                         my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
05808 
05809                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
05810                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
05811                         fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
05812                     }else{
05813                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
05814                         fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
05815                     }
05816                 }
05817             }
05818         }
05819     }
05820 
05821    if( IS_INTER( mb_type ) ) {
05822         h->chroma_pred_mode_table[mb_xy] = 0;
05823         write_back_motion( h, mb_type );
05824    }
05825 
05826     if( !IS_INTRA16x16( mb_type ) ) {
05827         cbp  = decode_cabac_mb_cbp_luma( h );
05828         if(CHROMA)
05829             cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
05830     }
05831 
05832     h->cbp_table[mb_xy] = h->cbp = cbp;
05833 
05834     if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
05835         if( decode_cabac_mb_transform_size( h ) )
05836             mb_type |= MB_TYPE_8x8DCT;
05837     }
05838     s->current_picture.mb_type[mb_xy]= mb_type;
05839 
05840     if( cbp || IS_INTRA16x16( mb_type ) ) {
05841         const uint8_t *scan, *scan8x8, *dc_scan;
05842         const uint32_t *qmul;
05843         int dqp;
05844 
05845         if(IS_INTERLACED(mb_type)){
05846             scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
05847             scan= s->qscale ? h->field_scan : h->field_scan_q0;
05848             dc_scan= luma_dc_field_scan;
05849         }else{
05850             scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
05851             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
05852             dc_scan= luma_dc_zigzag_scan;
05853         }
05854 
05855         h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
05856         if( dqp == INT_MIN ){
05857             av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
05858             return -1;
05859         }
05860         s->qscale += dqp;
05861         if(((unsigned)s->qscale) > 51){
05862             if(s->qscale<0) s->qscale+= 52;
05863             else            s->qscale-= 52;
05864         }
05865         h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
05866         h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
05867 
05868         if( IS_INTRA16x16( mb_type ) ) {
05869             int i;
05870             //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
05871             decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
05872 
05873             if( cbp&15 ) {
05874                 qmul = h->dequant4_coeff[0][s->qscale];
05875                 for( i = 0; i < 16; i++ ) {
05876                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
05877                     decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
05878                 }
05879             } else {
05880                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
05881             }
05882         } else {
05883             int i8x8, i4x4;
05884             for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
05885                 if( cbp & (1<<i8x8) ) {
05886                     if( IS_8x8DCT(mb_type) ) {
05887                         decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
05888                             scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
05889                     } else {
05890                         qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
05891                         for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
05892                             const int index = 4*i8x8 + i4x4;
05893                             //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
05894 //START_TIMER
05895                             decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
05896 //STOP_TIMER("decode_residual")
05897                         }
05898                     }
05899                 } else {
05900                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
05901                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
05902                 }
05903             }
05904         }
05905 
05906         if( cbp&0x30 ){
05907             int c;
05908             for( c = 0; c < 2; c++ ) {
05909                 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
05910                 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
05911             }
05912         }
05913 
05914         if( cbp&0x20 ) {
05915             int c, i;
05916             for( c = 0; c < 2; c++ ) {
05917                 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
05918                 for( i = 0; i < 4; i++ ) {
05919                     const int index = 16 + 4 * c + i;
05920                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
05921                     decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
05922                 }
05923             }
05924         } else {
05925             uint8_t * const nnz= &h->non_zero_count_cache[0];
05926             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
05927             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
05928         }
05929     } else {
05930         uint8_t * const nnz= &h->non_zero_count_cache[0];
05931         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
05932         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
05933         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
05934         h->last_qscale_diff = 0;
05935     }
05936 
05937     s->current_picture.qscale_table[mb_xy]= s->qscale;
05938     write_back_non_zero_count(h);
05939 
05940     if(MB_MBAFF){
05941         h->ref_count[0] >>= 1;
05942         h->ref_count[1] >>= 1;
05943     }
05944 
05945     return 0;
05946 }
05947 
05948 
05949 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
05950     const int index_a = qp + h->slice_alpha_c0_offset;
05951     const int alpha = (alpha_table+52)[index_a];
05952     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
05953 
05954     if( bS[0] < 4 ) {
05955         int8_t tc[4];
05956         tc[0] = (tc0_table+52)[index_a][bS[0]];
05957         tc[1] = (tc0_table+52)[index_a][bS[1]];
05958         tc[2] = (tc0_table+52)[index_a][bS[2]];
05959         tc[3] = (tc0_table+52)[index_a][bS[3]];
05960         h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
05961     } else {
05962         h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
05963     }
05964 }
05965 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
05966     const int index_a = qp + h->slice_alpha_c0_offset;
05967     const int alpha = (alpha_table+52)[index_a];
05968     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
05969 
05970     if( bS[0] < 4 ) {
05971         int8_t tc[4];
05972         tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
05973         tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
05974         tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
05975         tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
05976         h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
05977     } else {
05978         h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
05979     }
05980 }
05981 
05982 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
05983     int i;
05984     for( i = 0; i < 16; i++, pix += stride) {
05985         int index_a;
05986         int alpha;
05987         int beta;
05988 
05989         int qp_index;
05990         int bS_index = (i >> 1);
05991         if (!MB_FIELD) {
05992             bS_index &= ~1;
05993             bS_index |= (i & 1);
05994         }
05995 
05996         if( bS[bS_index] == 0 ) {
05997             continue;
05998         }
05999 
06000         qp_index = MB_FIELD ? (i >> 3) : (i & 1);
06001         index_a = qp[qp_index] + h->slice_alpha_c0_offset;
06002         alpha = (alpha_table+52)[index_a];
06003         beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
06004 
06005         if( bS[bS_index] < 4 ) {
06006             const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
06007             const int p0 = pix[-1];
06008             const int p1 = pix[-2];
06009             const int p2 = pix[-3];
06010             const int q0 = pix[0];
06011             const int q1 = pix[1];
06012             const int q2 = pix[2];
06013 
06014             if( FFABS( p0 - q0 ) < alpha &&
06015                 FFABS( p1 - p0 ) < beta &&
06016                 FFABS( q1 - q0 ) < beta ) {
06017                 int tc = tc0;
06018                 int i_delta;
06019 
06020                 if( FFABS( p2 - p0 ) < beta ) {
06021                     pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
06022                     tc++;
06023                 }
06024                 if( FFABS( q2 - q0 ) < beta ) {
06025                     pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
06026                     tc++;
06027                 }
06028 
06029                 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
06030                 pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
06031                 pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
06032                 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
06033             }
06034         }else{
06035             const int p0 = pix[-1];
06036             const int p1 = pix[-2];
06037             const int p2 = pix[-3];
06038 
06039             const int q0 = pix[0];
06040             const int q1 = pix[1];
06041             const int q2 = pix[2];
06042 
06043             if( FFABS( p0 - q0 ) < alpha &&
06044                 FFABS( p1 - p0 ) < beta &&
06045                 FFABS( q1 - q0 ) < beta ) {
06046 
06047                 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
06048                     if( FFABS( p2 - p0 ) < beta)
06049                     {
06050                         const int p3 = pix[-4];
06051                         /* p0', p1', p2' */
06052                         pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
06053                         pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
06054                         pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
06055                     } else {
06056                         /* p0' */
06057                         pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
06058                     }
06059                     if( FFABS( q2 - q0 ) < beta)
06060                     {
06061                         const int q3 = pix[3];
06062                         /* q0', q1', q2' */
06063                         pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
06064                         pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
06065                         pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
06066                     } else {
06067                         /* q0' */
06068                         pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
06069                     }
06070                 }else{
06071                     /* p0', q0' */
06072                     pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
06073                     pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
06074                 }
06075                 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
06076             }
06077         }
06078     }
06079 }
06080 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
06081     int i;
06082     for( i = 0; i < 8; i++, pix += stride) {
06083         int index_a;
06084         int alpha;
06085         int beta;
06086 
06087         int qp_index;
06088         int bS_index = i;
06089 
06090         if( bS[bS_index] == 0 ) {
06091             continue;
06092         }
06093 
06094         qp_index = MB_FIELD ? (i >> 2) : (i & 1);
06095         index_a = qp[qp_index] + h->slice_alpha_c0_offset;
06096         alpha = (alpha_table+52)[index_a];
06097         beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
06098 
06099         if( bS[bS_index] < 4 ) {
06100             const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
06101             const int p0 = pix[-1];
06102             const int p1 = pix[-2];
06103             const int q0 = pix[0];
06104             const int q1 = pix[1];
06105 
06106             if( FFABS( p0 - q0 ) < alpha &&
06107                 FFABS( p1 - p0 ) < beta &&
06108                 FFABS( q1 - q0 ) < beta ) {
06109                 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
06110 
06111                 pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
06112                 pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
06113                 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
06114             }
06115         }else{
06116             const int p0 = pix[-1];
06117             const int p1 = pix[-2];
06118             const int q0 = pix[0];
06119             const int q1 = pix[1];
06120 
06121             if( FFABS( p0 - q0 ) < alpha &&
06122                 FFABS( p1 - p0 ) < beta &&
06123                 FFABS( q1 - q0 ) < beta ) {
06124 
06125                 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
06126                 pix[0]  = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
06127                 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
06128             }
06129         }
06130     }
06131 }
06132 
06133 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
06134     const int index_a = qp + h->slice_alpha_c0_offset;
06135     const int alpha = (alpha_table+52)[index_a];
06136     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
06137 
06138     if( bS[0] < 4 ) {
06139         int8_t tc[4];
06140         tc[0] = (tc0_table+52)[index_a][bS[0]];
06141         tc[1] = (tc0_table+52)[index_a][bS[1]];
06142         tc[2] = (tc0_table+52)[index_a][bS[2]];
06143         tc[3] = (tc0_table+52)[index_a][bS[3]];
06144         h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
06145     } else {
06146         h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
06147     }
06148 }
06149 
06150 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
06151     const int index_a = qp + h->slice_alpha_c0_offset;
06152     const int alpha = (alpha_table+52)[index_a];
06153     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
06154 
06155     if( bS[0] < 4 ) {
06156         int8_t tc[4];
06157         tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
06158         tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
06159         tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
06160         tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
06161         h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
06162     } else {
06163         h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
06164     }
06165 }
06166 
06167 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
06168     MpegEncContext * const s = &h->s;
06169     int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
06170     int mb_xy, mb_type;
06171     int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
06172 
06173     mb_xy = h->mb_xy;
06174 
06175     if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
06176         !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
06177        (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
06178                                       h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
06179         filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
06180         return;
06181     }
06182     assert(!FRAME_MBAFF);
06183 
06184     mb_type = s->current_picture.mb_type[mb_xy];
06185     qp = s->current_picture.qscale_table[mb_xy];
06186     qp0 = s->current_picture.qscale_table[mb_xy-1];
06187     qp1 = s->current_picture.qscale_table[h->top_mb_xy];
06188     qpc = get_chroma_qp( h, 0, qp );
06189     qpc0 = get_chroma_qp( h, 0, qp0 );
06190     qpc1 = get_chroma_qp( h, 0, qp1 );
06191     qp0 = (qp + qp0 + 1) >> 1;
06192     qp1 = (qp + qp1 + 1) >> 1;
06193     qpc0 = (qpc + qpc0 + 1) >> 1;
06194     qpc1 = (qpc + qpc1 + 1) >> 1;
06195     qp_thresh = 15 - h->slice_alpha_c0_offset;
06196     if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
06197        qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
06198         return;
06199 
06200     if( IS_INTRA(mb_type) ) {
06201         int16_t bS4[4] = {4,4,4,4};
06202         int16_t bS3[4] = {3,3,3,3};
06203         int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
06204         if( IS_8x8DCT(mb_type) ) {
06205             filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
06206             filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
06207             filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
06208             filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
06209         } else {
06210             filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
06211             filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
06212             filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
06213             filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
06214             filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
06215             filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
06216             filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
06217             filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
06218         }
06219         filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
06220         filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
06221         filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
06222         filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
06223         filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
06224         filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
06225         filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
06226         filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
06227         return;
06228     } else {
06229         DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
06230         uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
06231         int edges;
06232         if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
06233             edges = 4;
06234             bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
06235         } else {
06236             int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
06237                              (mb_type & MB_TYPE_16x8) ? 1 : 0;
06238             int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
06239                              && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
06240                              ? 3 : 0;
06241             int step = IS_8x8DCT(mb_type) ? 2 : 1;
06242             edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
06243             s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
06244                                               (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
06245         }
06246         if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
06247             bSv[0][0] = 0x0004000400040004ULL;
06248         if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
06249             bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
06250 
06251 #define FILTER(hv,dir,edge)\
06252         if(bSv[dir][edge]) {\
06253             filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
06254             if(!(edge&1)) {\
06255                 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
06256                 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
06257             }\
06258         }
06259         if( edges == 1 ) {
06260             FILTER(v,0,0);
06261             FILTER(h,1,0);
06262         } else if( IS_8x8DCT(mb_type) ) {
06263             FILTER(v,0,0);
06264             FILTER(v,0,2);
06265             FILTER(h,1,0);
06266             FILTER(h,1,2);
06267         } else {
06268             FILTER(v,0,0);
06269             FILTER(v,0,1);
06270             FILTER(v,0,2);
06271             FILTER(v,0,3);
06272             FILTER(h,1,0);
06273             FILTER(h,1,1);
06274             FILTER(h,1,2);
06275             FILTER(h,1,3);
06276         }
06277 #undef FILTER
06278     }
06279 }
06280 
06281 
06282 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
06283     MpegEncContext * const s = &h->s;
06284     int edge;
06285     const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
06286     const int mbm_type = s->current_picture.mb_type[mbm_xy];
06287     int (*ref2frm) [64] = h->ref2frm[ h->slice_num          &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
06288     int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
06289     int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
06290 
06291     const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
06292                               == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
06293     // how often to recheck mv-based bS when iterating between edges
06294     const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
06295                           (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
06296     // how often to recheck mv-based bS when iterating along each edge
06297     const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
06298 
06299     if (first_vertical_edge_done) {
06300         start = 1;
06301     }
06302 
06303     if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
06304         start = 1;
06305 
06306     if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
06307         && !IS_INTERLACED(mb_type)
06308         && IS_INTERLACED(mbm_type)
06309         ) {
06310         // This is a special case in the norm where the filtering must
06311         // be done twice (one each of the field) even if we are in a
06312         // frame macroblock.
06313         //
06314         static const int nnz_idx[4] = {4,5,6,3};
06315         unsigned int tmp_linesize   = 2 *   linesize;
06316         unsigned int tmp_uvlinesize = 2 * uvlinesize;
06317         int mbn_xy = mb_xy - 2 * s->mb_stride;
06318         int qp;
06319         int i, j;
06320         int16_t bS[4];
06321 
06322         for(j=0; j<2; j++, mbn_xy += s->mb_stride){
06323             if( IS_INTRA(mb_type) ||
06324                 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
06325                 bS[0] = bS[1] = bS[2] = bS[3] = 3;
06326             } else {
06327                 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
06328                 for( i = 0; i < 4; i++ ) {
06329                     if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
06330                         mbn_nnz[nnz_idx[i]] != 0 )
06331                         bS[i] = 2;
06332                     else
06333                         bS[i] = 1;
06334                 }
06335             }
06336             // Do not use s->qscale as luma quantizer because it has not the same
06337             // value in IPCM macroblocks.
06338             qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
06339             tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
06340             { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
06341             filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
06342             filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
06343                               ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
06344             filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
06345                               ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
06346         }
06347 
06348         start = 1;
06349     }
06350 
06351     /* Calculate bS */
06352     for( edge = start; edge < edges; edge++ ) {
06353         /* mbn_xy: neighbor macroblock */
06354         const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
06355         const int mbn_type = s->current_picture.mb_type[mbn_xy];
06356         int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
06357         int16_t bS[4];
06358         int qp;
06359 
06360         if( (edge&1) && IS_8x8DCT(mb_type) )
06361             continue;
06362 
06363         if( IS_INTRA(mb_type) ||
06364             IS_INTRA(mbn_type) ) {
06365             int value;
06366             if (edge == 0) {
06367                 if (   (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
06368                     || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
06369                 ) {
06370                     value = 4;
06371                 } else {
06372                     value = 3;
06373                 }
06374             } else {
06375                 value = 3;
06376             }
06377             bS[0] = bS[1] = bS[2] = bS[3] = value;
06378         } else {
06379             int i, l;
06380             int mv_done;
06381 
06382             if( edge & mask_edge ) {
06383                 bS[0] = bS[1] = bS[2] = bS[3] = 0;
06384                 mv_done = 1;
06385             }
06386             else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
06387                 bS[0] = bS[1] = bS[2] = bS[3] = 1;
06388                 mv_done = 1;
06389             }
06390             else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
06391                 int b_idx= 8 + 4 + edge * (dir ? 8:1);
06392                 int bn_idx= b_idx - (dir ? 8:1);
06393                 int v = 0;
06394 
06395                 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
06396                     v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
06397                          FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
06398                          FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
06399                 }
06400 
06401                 if(h->slice_type_nos == FF_B_TYPE && v){
06402                     v=0;
06403                     for( l = 0; !v && l < 2; l++ ) {
06404                         int ln= 1-l;
06405                         v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
06406                             FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
06407                             FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
06408                     }
06409                 }
06410 
06411                 bS[0] = bS[1] = bS[2] = bS[3] = v;
06412                 mv_done = 1;
06413             }
06414             else
06415                 mv_done = 0;
06416 
06417             for( i = 0; i < 4; i++ ) {
06418                 int x = dir == 0 ? edge : i;
06419                 int y = dir == 0 ? i    : edge;
06420                 int b_idx= 8 + 4 + x + 8*y;
06421                 int bn_idx= b_idx - (dir ? 8:1);
06422 
06423                 if( h->non_zero_count_cache[b_idx] |
06424                     h->non_zero_count_cache[bn_idx] ) {
06425                     bS[i] = 2;
06426                 }
06427                 else if(!mv_done)
06428                 {
06429                     bS[i] = 0;
06430                     for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
06431                         if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
06432                             FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
06433                             FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
06434                             bS[i] = 1;
06435                             break;
06436                         }
06437                     }
06438 
06439                     if(h->slice_type_nos == FF_B_TYPE && bS[i]){
06440                         bS[i] = 0;
06441                         for( l = 0; l < 2; l++ ) {
06442                             int ln= 1-l;
06443                             if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
06444                                 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
06445                                 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
06446                                 bS[i] = 1;
06447                                 break;
06448                             }
06449                         }
06450                     }
06451                 }
06452             }
06453 
06454             if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
06455                 continue;
06456         }
06457 
06458         /* Filter edge */
06459         // Do not use s->qscale as luma quantizer because it has not the same
0