FFmpeg: libavcodec/h264.c Source File

00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #define UNCHECKED_BITSTREAM_READER 1
00029 
00030 #include "libavutil/imgutils.h"
00031 #include "libavutil/opt.h"
00032 #include "internal.h"
00033 #include "dsputil.h"
00034 #include "avcodec.h"
00035 #include "mpegvideo.h"
00036 #include "h264.h"
00037 #include "h264data.h"
00038 #include "h264_mvpred.h"
00039 #include "golomb.h"
00040 #include "mathops.h"
00041 #include "rectangle.h"
00042 #include "thread.h"
00043 #include "vdpau_internal.h"
00044 #include "libavutil/avassert.h"
00045 
00046 #include "cabac.h"
00047 
00048 //#undef NDEBUG
00049 #include <assert.h>
00050 
00051 static const uint8_t rem6[QP_MAX_NUM+1]={
00052 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00053 };
00054 
00055 static const uint8_t div6[QP_MAX_NUM+1]={
00056 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,
00057 };
00058 
00059 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
00060     PIX_FMT_DXVA2_VLD,
00061     PIX_FMT_VAAPI_VLD,
00062     PIX_FMT_VDA_VLD,
00063     PIX_FMT_YUVJ420P,
00064     PIX_FMT_NONE
00065 };
00066 
00070 int ff_h264_check_intra4x4_pred_mode(H264Context *h){
00071     MpegEncContext * const s = &h->s;
00072     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00073     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00074     int i;
00075 
00076     if(!(h->top_samples_available&0x8000)){
00077         for(i=0; i<4; i++){
00078             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00079             if(status<0){
00080                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00081                 return -1;
00082             } else if(status){
00083                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00084             }
00085         }
00086     }
00087 
00088     if((h->left_samples_available&0x8888)!=0x8888){
00089         static const int mask[4]={0x8000,0x2000,0x80,0x20};
00090         for(i=0; i<4; i++){
00091             if(!(h->left_samples_available&mask[i])){
00092                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00093                 if(status<0){
00094                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00095                     return -1;
00096                 } else if(status){
00097                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00098                 }
00099             }
00100         }
00101     }
00102 
00103     return 0;
00104 } //FIXME cleanup like check_intra_pred_mode
00105 
00106 static int check_intra_pred_mode(H264Context *h, int mode, int is_chroma){
00107     MpegEncContext * const s = &h->s;
00108     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00109     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00110 
00111     if(mode > 6U) {
00112         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00113         return -1;
00114     }
00115 
00116     if(!(h->top_samples_available&0x8000)){
00117         mode= top[ mode ];
00118         if(mode<0){
00119             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00120             return -1;
00121         }
00122     }
00123 
00124     if((h->left_samples_available&0x8080) != 0x8080){
00125         mode= left[ mode ];
00126         if(is_chroma && (h->left_samples_available&0x8080)){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
00127             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
00128         }
00129         if(mode<0){
00130             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00131             return -1;
00132         }
00133     }
00134 
00135     return mode;
00136 }
00137 
00141 int ff_h264_check_intra16x16_pred_mode(H264Context *h, int mode)
00142 {
00143     return check_intra_pred_mode(h, mode, 0);
00144 }
00145 
00149 int ff_h264_check_intra_chroma_pred_mode(H264Context *h, int mode)
00150 {
00151     return check_intra_pred_mode(h, mode, 1);
00152 }
00153 
00154 
00155 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
00156     int i, si, di;
00157     uint8_t *dst;
00158     int bufidx;
00159 
00160 //    src[0]&0x80;                //forbidden bit
00161     h->nal_ref_idc= src[0]>>5;
00162     h->nal_unit_type= src[0]&0x1F;
00163 
00164     src++; length--;
00165 
00166 #if HAVE_FAST_UNALIGNED
00167 # if HAVE_FAST_64BIT
00168 #   define RS 7
00169     for(i=0; i+1<length; i+=9){
00170         if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
00171 # else
00172 #   define RS 3
00173     for(i=0; i+1<length; i+=5){
00174         if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
00175 # endif
00176             continue;
00177         if(i>0 && !src[i]) i--;
00178         while(src[i]) i++;
00179 #else
00180 #   define RS 0
00181     for(i=0; i+1<length; i+=2){
00182         if(src[i]) continue;
00183         if(i>0 && src[i-1]==0) i--;
00184 #endif
00185         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
00186             if(src[i+2]!=3){
00187                 /* startcode, so we must be past the end */
00188                 length=i;
00189             }
00190             break;
00191         }
00192         i-= RS;
00193     }
00194 
00195     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
00196     si=h->rbsp_buffer_size[bufidx];
00197     av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE+MAX_MBPAIR_SIZE);
00198     dst= h->rbsp_buffer[bufidx];
00199     if(si != h->rbsp_buffer_size[bufidx])
00200         memset(dst + length, 0, FF_INPUT_BUFFER_PADDING_SIZE+MAX_MBPAIR_SIZE);
00201 
00202     if (dst == NULL){
00203         return NULL;
00204     }
00205 
00206     if(i>=length-1){ //no escaped 0
00207         *dst_length= length;
00208         *consumed= length+1; //+1 for the header
00209         if(h->s.avctx->flags2 & CODEC_FLAG2_FAST){
00210             return src;
00211         }else{
00212             memcpy(dst, src, length);
00213             return dst;
00214         }
00215     }
00216 
00217 //printf("decoding esc\n");
00218     memcpy(dst, src, i);
00219     si=di=i;
00220     while(si+2<length){
00221         //remove escapes (very rare 1:2^22)
00222         if(src[si+2]>3){
00223             dst[di++]= src[si++];
00224             dst[di++]= src[si++];
00225         }else if(src[si]==0 && src[si+1]==0){
00226             if(src[si+2]==3){ //escape
00227                 dst[di++]= 0;
00228                 dst[di++]= 0;
00229                 si+=3;
00230                 continue;
00231             }else //next start code
00232                 goto nsc;
00233         }
00234 
00235         dst[di++]= src[si++];
00236     }
00237     while(si<length)
00238         dst[di++]= src[si++];
00239 nsc:
00240 
00241     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
00242 
00243     *dst_length= di;
00244     *consumed= si + 1;//+1 for the header
00245 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
00246     return dst;
00247 }
00248 
00253 static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
00254     int v= *src;
00255     int r;
00256 
00257     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
00258 
00259     for(r=1; r<9; r++){
00260         if(v&1) return r;
00261         v>>=1;
00262     }
00263     return 0;
00264 }
00265 
00266 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
00267                                  int y_offset, int list){
00268     int raw_my= h->mv_cache[list][ scan8[n] ][1];
00269     int filter_height= (raw_my&3) ? 2 : 0;
00270     int full_my= (raw_my>>2) + y_offset;
00271     int top = full_my - filter_height, bottom = full_my + height + filter_height;
00272 
00273     return FFMAX(abs(top), bottom);
00274 }
00275 
00276 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
00277                                int y_offset, int list0, int list1, int *nrefs){
00278     MpegEncContext * const s = &h->s;
00279     int my;
00280 
00281     y_offset += 16*(s->mb_y >> MB_FIELD);
00282 
00283     if(list0){
00284         int ref_n = h->ref_cache[0][ scan8[n] ];
00285         Picture *ref= &h->ref_list[0][ref_n];
00286 
00287         // Error resilience puts the current picture in the ref list.
00288         // Don't try to wait on these as it will cause a deadlock.
00289         // Fields can wait on each other, though.
00290         if (ref->f.thread_opaque != s->current_picture.f.thread_opaque ||
00291            (ref->f.reference & 3) != s->picture_structure) {
00292             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
00293             if (refs[0][ref_n] < 0) nrefs[0] += 1;
00294             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
00295         }
00296     }
00297 
00298     if(list1){
00299         int ref_n = h->ref_cache[1][ scan8[n] ];
00300         Picture *ref= &h->ref_list[1][ref_n];
00301 
00302         if (ref->f.thread_opaque != s->current_picture.f.thread_opaque ||
00303            (ref->f.reference & 3) != s->picture_structure) {
00304             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
00305             if (refs[1][ref_n] < 0) nrefs[1] += 1;
00306             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
00307         }
00308     }
00309 }
00310 
00316 static void await_references(H264Context *h){
00317     MpegEncContext * const s = &h->s;
00318     const int mb_xy= h->mb_xy;
00319     const int mb_type = s->current_picture.f.mb_type[mb_xy];
00320     int refs[2][48];
00321     int nrefs[2] = {0};
00322     int ref, list;
00323 
00324     memset(refs, -1, sizeof(refs));
00325 
00326     if(IS_16X16(mb_type)){
00327         get_lowest_part_y(h, refs, 0, 16, 0,
00328                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00329     }else if(IS_16X8(mb_type)){
00330         get_lowest_part_y(h, refs, 0, 8, 0,
00331                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00332         get_lowest_part_y(h, refs, 8, 8, 8,
00333                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00334     }else if(IS_8X16(mb_type)){
00335         get_lowest_part_y(h, refs, 0, 16, 0,
00336                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00337         get_lowest_part_y(h, refs, 4, 16, 0,
00338                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00339     }else{
00340         int i;
00341 
00342         assert(IS_8X8(mb_type));
00343 
00344         for(i=0; i<4; i++){
00345             const int sub_mb_type= h->sub_mb_type[i];
00346             const int n= 4*i;
00347             int y_offset= (i&2)<<2;
00348 
00349             if(IS_SUB_8X8(sub_mb_type)){
00350                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00351                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00352             }else if(IS_SUB_8X4(sub_mb_type)){
00353                 get_lowest_part_y(h, refs, n  , 4, y_offset,
00354                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00355                 get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
00356                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00357             }else if(IS_SUB_4X8(sub_mb_type)){
00358                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00359                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00360                 get_lowest_part_y(h, refs, n+1, 8, y_offset,
00361                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00362             }else{
00363                 int j;
00364                 assert(IS_SUB_4X4(sub_mb_type));
00365                 for(j=0; j<4; j++){
00366                     int sub_y_offset= y_offset + 2*(j&2);
00367                     get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
00368                               IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00369                 }
00370             }
00371         }
00372     }
00373 
00374     for(list=h->list_count-1; list>=0; list--){
00375         for(ref=0; ref<48 && nrefs[list]; ref++){
00376             int row = refs[list][ref];
00377             if(row >= 0){
00378                 Picture *ref_pic = &h->ref_list[list][ref];
00379                 int ref_field = ref_pic->f.reference - 1;
00380                 int ref_field_picture = ref_pic->field_picture;
00381                 int pic_height = 16*s->mb_height >> ref_field_picture;
00382 
00383                 row <<= MB_MBAFF;
00384                 nrefs[list]--;
00385 
00386                 if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
00387                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
00388                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
00389                 }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
00390                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
00391                 }else if(FIELD_PICTURE){
00392                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
00393                 }else{
00394                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
00395                 }
00396             }
00397         }
00398     }
00399 }
00400 
00401 #if 0
00402 
00406 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
00407 //    const int qmul= dequant_coeff[qp][0];
00408     int i;
00409     int temp[16]; //FIXME check if this is a good idea
00410     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
00411     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
00412 
00413     for(i=0; i<4; i++){
00414         const int offset= y_offset[i];
00415         const int z0= block[offset+stride*0] + block[offset+stride*4];
00416         const int z1= block[offset+stride*0] - block[offset+stride*4];
00417         const int z2= block[offset+stride*1] - block[offset+stride*5];
00418         const int z3= block[offset+stride*1] + block[offset+stride*5];
00419 
00420         temp[4*i+0]= z0+z3;
00421         temp[4*i+1]= z1+z2;
00422         temp[4*i+2]= z1-z2;
00423         temp[4*i+3]= z0-z3;
00424     }
00425 
00426     for(i=0; i<4; i++){
00427         const int offset= x_offset[i];
00428         const int z0= temp[4*0+i] + temp[4*2+i];
00429         const int z1= temp[4*0+i] - temp[4*2+i];
00430         const int z2= temp[4*1+i] - temp[4*3+i];
00431         const int z3= temp[4*1+i] + temp[4*3+i];
00432 
00433         block[stride*0 +offset]= (z0 + z3)>>1;
00434         block[stride*2 +offset]= (z1 + z2)>>1;
00435         block[stride*8 +offset]= (z1 - z2)>>1;
00436         block[stride*10+offset]= (z0 - z3)>>1;
00437     }
00438 }
00439 #endif
00440 
00441 #undef xStride
00442 #undef stride
00443 
00444 #if 0
00445 static void chroma_dc_dct_c(DCTELEM *block){
00446     const int stride= 16*2;
00447     const int xStride= 16;
00448     int a,b,c,d,e;
00449 
00450     a= block[stride*0 + xStride*0];
00451     b= block[stride*0 + xStride*1];
00452     c= block[stride*1 + xStride*0];
00453     d= block[stride*1 + xStride*1];
00454 
00455     e= a-b;
00456     a= a+b;
00457     b= c-d;
00458     c= c+d;
00459 
00460     block[stride*0 + xStride*0]= (a+c);
00461     block[stride*0 + xStride*1]= (e+b);
00462     block[stride*1 + xStride*0]= (a-c);
00463     block[stride*1 + xStride*1]= (e-b);
00464 }
00465 #endif
00466 
00467 static av_always_inline void
00468 mc_dir_part(H264Context *h, Picture *pic, int n, int square,
00469             int height, int delta, int list,
00470             uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00471             int src_x_offset, int src_y_offset,
00472             qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
00473             int pixel_shift, int chroma_idc)
00474 {
00475     MpegEncContext * const s = &h->s;
00476     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
00477     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
00478     const int luma_xy= (mx&3) + ((my&3)<<2);
00479     int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
00480     uint8_t * src_y = pic->f.data[0] + offset;
00481     uint8_t * src_cb, * src_cr;
00482     int extra_width= h->emu_edge_width;
00483     int extra_height= h->emu_edge_height;
00484     int emu=0;
00485     const int full_mx= mx>>2;
00486     const int full_my= my>>2;
00487     const int pic_width  = 16*s->mb_width;
00488     const int pic_height = 16*s->mb_height >> MB_FIELD;
00489     int ysh;
00490 
00491     if(mx&7) extra_width -= 3;
00492     if(my&7) extra_height -= 3;
00493 
00494     if(   full_mx < 0-extra_width
00495        || full_my < 0-extra_height
00496        || full_mx + 16/*FIXME*/ > pic_width + extra_width
00497        || full_my + 16/*FIXME*/ > pic_height + extra_height){
00498         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00499                                 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00500             src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00501         emu=1;
00502     }
00503 
00504     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
00505     if(!square){
00506         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
00507     }
00508 
00509     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
00510 
00511     if(chroma_idc == 3 /* yuv444 */){
00512         src_cb = pic->f.data[1] + offset;
00513         if(emu){
00514             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00515                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00516             src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00517         }
00518         qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?
00519         if(!square){
00520             qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
00521         }
00522 
00523         src_cr = pic->f.data[2] + offset;
00524         if(emu){
00525             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00526                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00527             src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00528         }
00529         qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?
00530         if(!square){
00531             qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
00532         }
00533         return;
00534     }
00535 
00536     ysh = 3 - (chroma_idc == 2 /* yuv422 */);
00537     if(chroma_idc == 1 /* yuv420 */ && MB_FIELD){
00538         // chroma offset when predicting from a field of opposite parity
00539         my += 2 * ((s->mb_y & 1) - (pic->f.reference - 1));
00540         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
00541     }
00542 
00543     src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
00544     src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
00545 
00546     if(emu){
00547         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
00548                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
00549                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
00550             src_cb= s->edge_emu_buffer;
00551     }
00552     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
00553               mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
00554 
00555     if(emu){
00556         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
00557                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
00558                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
00559             src_cr= s->edge_emu_buffer;
00560     }
00561     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
00562               mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
00563 }
00564 
00565 static av_always_inline void
00566 mc_part_std(H264Context *h, int n, int square, int height, int delta,
00567             uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00568             int x_offset, int y_offset,
00569             qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00570             qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00571             int list0, int list1, int pixel_shift, int chroma_idc)
00572 {
00573     MpegEncContext * const s = &h->s;
00574     qpel_mc_func *qpix_op=  qpix_put;
00575     h264_chroma_mc_func chroma_op= chroma_put;
00576 
00577     dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00578     if (chroma_idc == 3 /* yuv444 */) {
00579         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00580         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00581     } else if (chroma_idc == 2 /* yuv422 */) {
00582         dest_cb += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00583         dest_cr += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00584     } else /* yuv420 */ {
00585         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00586         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00587     }
00588     x_offset += 8*s->mb_x;
00589     y_offset += 8*(s->mb_y >> MB_FIELD);
00590 
00591     if(list0){
00592         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
00593         mc_dir_part(h, ref, n, square, height, delta, 0,
00594                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00595                            qpix_op, chroma_op, pixel_shift, chroma_idc);
00596 
00597         qpix_op=  qpix_avg;
00598         chroma_op= chroma_avg;
00599     }
00600 
00601     if(list1){
00602         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
00603         mc_dir_part(h, ref, n, square, height, delta, 1,
00604                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00605                            qpix_op, chroma_op, pixel_shift, chroma_idc);
00606     }
00607 }
00608 
00609 static av_always_inline void
00610 mc_part_weighted(H264Context *h, int n, int square, int height, int delta,
00611                  uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00612                  int x_offset, int y_offset,
00613                  qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00614                  h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
00615                  h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
00616                  int list0, int list1, int pixel_shift, int chroma_idc){
00617     MpegEncContext * const s = &h->s;
00618     int chroma_height;
00619 
00620     dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00621     if (chroma_idc == 3 /* yuv444 */) {
00622         chroma_height = height;
00623         chroma_weight_avg = luma_weight_avg;
00624         chroma_weight_op = luma_weight_op;
00625         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00626         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00627     } else if (chroma_idc == 2 /* yuv422 */) {
00628         chroma_height = height;
00629         dest_cb += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00630         dest_cr += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00631     } else /* yuv420 */ {
00632         chroma_height = height >> 1;
00633         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00634         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00635     }
00636     x_offset += 8*s->mb_x;
00637     y_offset += 8*(s->mb_y >> MB_FIELD);
00638 
00639     if(list0 && list1){
00640         /* don't optimize for luma-only case, since B-frames usually
00641          * use implicit weights => chroma too. */
00642         uint8_t *tmp_cb = s->obmc_scratchpad;
00643         uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
00644         uint8_t *tmp_y  = s->obmc_scratchpad + 16*h->mb_uvlinesize;
00645         int refn0 = h->ref_cache[0][ scan8[n] ];
00646         int refn1 = h->ref_cache[1][ scan8[n] ];
00647 
00648         mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
00649                     dest_y, dest_cb, dest_cr,
00650                     x_offset, y_offset, qpix_put, chroma_put,
00651                     pixel_shift, chroma_idc);
00652         mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
00653                     tmp_y, tmp_cb, tmp_cr,
00654                     x_offset, y_offset, qpix_put, chroma_put,
00655                     pixel_shift, chroma_idc);
00656 
00657         if(h->use_weight == 2){
00658             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
00659             int weight1 = 64 - weight0;
00660             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize,
00661                               height,        5, weight0, weight1, 0);
00662             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
00663                               chroma_height, 5, weight0, weight1, 0);
00664             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
00665                               chroma_height, 5, weight0, weight1, 0);
00666         }else{
00667             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, h->luma_log2_weight_denom,
00668                             h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
00669                             h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
00670             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00671                             h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
00672                             h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
00673             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00674                             h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
00675                             h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
00676         }
00677     }else{
00678         int list = list1 ? 1 : 0;
00679         int refn = h->ref_cache[list][ scan8[n] ];
00680         Picture *ref= &h->ref_list[list][refn];
00681         mc_dir_part(h, ref, n, square, height, delta, list,
00682                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
00683                     qpix_put, chroma_put, pixel_shift, chroma_idc);
00684 
00685         luma_weight_op(dest_y, h->mb_linesize, height, h->luma_log2_weight_denom,
00686                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
00687         if(h->use_weight_chroma){
00688             chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00689                              h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
00690             chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00691                              h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
00692         }
00693     }
00694 }
00695 
00696 static av_always_inline void
00697 mc_part(H264Context *h, int n, int square, int height, int delta,
00698         uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00699         int x_offset, int y_offset,
00700         qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00701         qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00702         h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00703         int list0, int list1, int pixel_shift, int chroma_idc)
00704 {
00705     if((h->use_weight==2 && list0 && list1
00706         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
00707        || h->use_weight==1)
00708         mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
00709                          x_offset, y_offset, qpix_put, chroma_put,
00710                          weight_op[0], weight_op[1], weight_avg[0],
00711                          weight_avg[1], list0, list1, pixel_shift, chroma_idc);
00712     else
00713         mc_part_std(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
00714                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
00715                     chroma_avg, list0, list1, pixel_shift, chroma_idc);
00716 }
00717 
00718 static av_always_inline void
00719 prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma_idc)
00720 {
00721     /* fetch pixels for estimated mv 4 macroblocks ahead
00722      * optimized for 64byte cache lines */
00723     MpegEncContext * const s = &h->s;
00724     const int refn = h->ref_cache[list][scan8[0]];
00725     if(refn >= 0){
00726         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
00727         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
00728         uint8_t **src = h->ref_list[list][refn].f.data;
00729         int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
00730         s->dsp.prefetch(src[0]+off, s->linesize, 4);
00731         if (chroma_idc == 3 /* yuv444 */) {
00732             s->dsp.prefetch(src[1]+off, s->linesize, 4);
00733             s->dsp.prefetch(src[2]+off, s->linesize, 4);
00734         }else{
00735             off= (((mx>>1)+64)<<pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
00736             s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
00737         }
00738     }
00739 }
00740 
00741 static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00742                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00743                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00744                       h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00745                       int pixel_shift, int chroma_idc)
00746 {
00747     MpegEncContext * const s = &h->s;
00748     const int mb_xy= h->mb_xy;
00749     const int mb_type = s->current_picture.f.mb_type[mb_xy];
00750 
00751     assert(IS_INTER(mb_type));
00752 
00753     if(HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
00754         await_references(h);
00755     prefetch_motion(h, 0, pixel_shift, chroma_idc);
00756 
00757     if(IS_16X16(mb_type)){
00758         mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
00759                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
00760                 weight_op, weight_avg,
00761                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00762                 pixel_shift, chroma_idc);
00763     }else if(IS_16X8(mb_type)){
00764         mc_part(h, 0, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
00765                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00766                 weight_op, weight_avg,
00767                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00768                 pixel_shift, chroma_idc);
00769         mc_part(h, 8, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
00770                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00771                 weight_op, weight_avg,
00772                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00773                 pixel_shift, chroma_idc);
00774     }else if(IS_8X16(mb_type)){
00775         mc_part(h, 0, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
00776                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00777                 &weight_op[1], &weight_avg[1],
00778                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00779                 pixel_shift, chroma_idc);
00780         mc_part(h, 4, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
00781                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00782                 &weight_op[1], &weight_avg[1],
00783                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00784                 pixel_shift, chroma_idc);
00785     }else{
00786         int i;
00787 
00788         assert(IS_8X8(mb_type));
00789 
00790         for(i=0; i<4; i++){
00791             const int sub_mb_type= h->sub_mb_type[i];
00792             const int n= 4*i;
00793             int x_offset= (i&1)<<2;
00794             int y_offset= (i&2)<<1;
00795 
00796             if(IS_SUB_8X8(sub_mb_type)){
00797                 mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00798                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00799                     &weight_op[1], &weight_avg[1],
00800                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00801                     pixel_shift, chroma_idc);
00802             }else if(IS_SUB_8X4(sub_mb_type)){
00803                 mc_part(h, n  , 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00804                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00805                     &weight_op[1], &weight_avg[1],
00806                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00807                     pixel_shift, chroma_idc);
00808                 mc_part(h, n+2, 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
00809                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00810                     &weight_op[1], &weight_avg[1],
00811                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00812                     pixel_shift, chroma_idc);
00813             }else if(IS_SUB_4X8(sub_mb_type)){
00814                 mc_part(h, n  , 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00815                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00816                     &weight_op[2], &weight_avg[2],
00817                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00818                     pixel_shift, chroma_idc);
00819                 mc_part(h, n+1, 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
00820                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00821                     &weight_op[2], &weight_avg[2],
00822                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00823                     pixel_shift, chroma_idc);
00824             }else{
00825                 int j;
00826                 assert(IS_SUB_4X4(sub_mb_type));
00827                 for(j=0; j<4; j++){
00828                     int sub_x_offset= x_offset + 2*(j&1);
00829                     int sub_y_offset= y_offset +   (j&2);
00830                     mc_part(h, n+j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
00831                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00832                         &weight_op[2], &weight_avg[2],
00833                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00834                         pixel_shift, chroma_idc);
00835                 }
00836             }
00837         }
00838     }
00839 
00840     prefetch_motion(h, 1, pixel_shift, chroma_idc);
00841 }
00842 
00843 static av_always_inline void
00844 hl_motion_420(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00845               qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00846               qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00847               h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00848               int pixel_shift)
00849 {
00850     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
00851               qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 1);
00852 }
00853 
00854 static av_always_inline void
00855 hl_motion_422(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00856               qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00857               qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00858               h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00859               int pixel_shift)
00860 {
00861     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
00862               qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 2);
00863 }
00864 
00865 static void free_tables(H264Context *h, int free_rbsp){
00866     int i;
00867     H264Context *hx;
00868 
00869     av_freep(&h->intra4x4_pred_mode);
00870     av_freep(&h->chroma_pred_mode_table);
00871     av_freep(&h->cbp_table);
00872     av_freep(&h->mvd_table[0]);
00873     av_freep(&h->mvd_table[1]);
00874     av_freep(&h->direct_table);
00875     av_freep(&h->non_zero_count);
00876     av_freep(&h->slice_table_base);
00877     h->slice_table= NULL;
00878     av_freep(&h->list_counts);
00879 
00880     av_freep(&h->mb2b_xy);
00881     av_freep(&h->mb2br_xy);
00882 
00883     for(i = 0; i < MAX_THREADS; i++) {
00884         hx = h->thread_context[i];
00885         if(!hx) continue;
00886         av_freep(&hx->top_borders[1]);
00887         av_freep(&hx->top_borders[0]);
00888         av_freep(&hx->s.obmc_scratchpad);
00889         if (free_rbsp){
00890             av_freep(&hx->rbsp_buffer[1]);
00891             av_freep(&hx->rbsp_buffer[0]);
00892             hx->rbsp_buffer_size[0] = 0;
00893             hx->rbsp_buffer_size[1] = 0;
00894         }
00895         if (i) av_freep(&h->thread_context[i]);
00896     }
00897 }
00898 
00899 static void init_dequant8_coeff_table(H264Context *h){
00900     int i,j,q,x;
00901     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00902 
00903     for(i=0; i<6; i++ ){
00904         h->dequant8_coeff[i] = h->dequant8_buffer[i];
00905         for(j=0; j<i; j++){
00906             if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){
00907                 h->dequant8_coeff[i] = h->dequant8_buffer[j];
00908                 break;
00909             }
00910         }
00911         if(j<i)
00912             continue;
00913 
00914         for(q=0; q<max_qp+1; q++){
00915             int shift = div6[q];
00916             int idx = rem6[q];
00917             for(x=0; x<64; x++)
00918                 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
00919                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
00920                     h->pps.scaling_matrix8[i][x]) << shift;
00921         }
00922     }
00923 }
00924 
00925 static void init_dequant4_coeff_table(H264Context *h){
00926     int i,j,q,x;
00927     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00928     for(i=0; i<6; i++ ){
00929         h->dequant4_coeff[i] = h->dequant4_buffer[i];
00930         for(j=0; j<i; j++){
00931             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
00932                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
00933                 break;
00934             }
00935         }
00936         if(j<i)
00937             continue;
00938 
00939         for(q=0; q<max_qp+1; q++){
00940             int shift = div6[q] + 2;
00941             int idx = rem6[q];
00942             for(x=0; x<16; x++)
00943                 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
00944                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
00945                     h->pps.scaling_matrix4[i][x]) << shift;
00946         }
00947     }
00948 }
00949 
00950 static void init_dequant_tables(H264Context *h){
00951     int i,x;
00952     init_dequant4_coeff_table(h);
00953     if(h->pps.transform_8x8_mode)
00954         init_dequant8_coeff_table(h);
00955     if(h->sps.transform_bypass){
00956         for(i=0; i<6; i++)
00957             for(x=0; x<16; x++)
00958                 h->dequant4_coeff[i][0][x] = 1<<6;
00959         if(h->pps.transform_8x8_mode)
00960             for(i=0; i<6; i++)
00961                 for(x=0; x<64; x++)
00962                     h->dequant8_coeff[i][0][x] = 1<<6;
00963     }
00964 }
00965 
00966 
00967 int ff_h264_alloc_tables(H264Context *h){
00968     MpegEncContext * const s = &h->s;
00969     const int big_mb_num= s->mb_stride * (s->mb_height+1);
00970     const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
00971     int x,y;
00972 
00973     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
00974 
00975     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 48 * sizeof(uint8_t), fail)
00976     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
00977     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
00978 
00979     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
00980     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
00981     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
00982     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
00983     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
00984 
00985     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
00986     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
00987 
00988     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy  , big_mb_num * sizeof(uint32_t), fail);
00989     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
00990     for(y=0; y<s->mb_height; y++){
00991         for(x=0; x<s->mb_width; x++){
00992             const int mb_xy= x + y*s->mb_stride;
00993             const int b_xy = 4*x + 4*y*h->b_stride;
00994 
00995             h->mb2b_xy [mb_xy]= b_xy;
00996             h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
00997         }
00998     }
00999 
01000     s->obmc_scratchpad = NULL;
01001 
01002     if(!h->dequant4_coeff[0])
01003         init_dequant_tables(h);
01004 
01005     return 0;
01006 fail:
01007     free_tables(h, 1);
01008     return -1;
01009 }
01010 
01014 static void clone_tables(H264Context *dst, H264Context *src, int i){
01015     MpegEncContext * const s = &src->s;
01016     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
01017     dst->non_zero_count           = src->non_zero_count;
01018     dst->slice_table              = src->slice_table;
01019     dst->cbp_table                = src->cbp_table;
01020     dst->mb2b_xy                  = src->mb2b_xy;
01021     dst->mb2br_xy                 = src->mb2br_xy;
01022     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
01023     dst->mvd_table[0]             = src->mvd_table[0] + i*8*2*s->mb_stride;
01024     dst->mvd_table[1]             = src->mvd_table[1] + i*8*2*s->mb_stride;
01025     dst->direct_table             = src->direct_table;
01026     dst->list_counts              = src->list_counts;
01027 
01028     dst->s.obmc_scratchpad = NULL;
01029     ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma, src->sps.chroma_format_idc);
01030 }
01031 
01036 static int context_init(H264Context *h){
01037     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
01038     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
01039 
01040     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
01041     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
01042 
01043     return 0;
01044 fail:
01045     return -1; // free_tables will clean up for us
01046 }
01047 
01048 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
01049 
01050 static av_cold void common_init(H264Context *h){
01051     MpegEncContext * const s = &h->s;
01052 
01053     s->width = s->avctx->width;
01054     s->height = s->avctx->height;
01055     s->codec_id= s->avctx->codec->id;
01056 
01057     s->avctx->bits_per_raw_sample = 8;
01058     h->cur_chroma_format_idc = 1;
01059 
01060     ff_h264dsp_init(&h->h264dsp,
01061                     s->avctx->bits_per_raw_sample, h->cur_chroma_format_idc);
01062     ff_h264_pred_init(&h->hpc, s->codec_id,
01063                       s->avctx->bits_per_raw_sample, h->cur_chroma_format_idc);
01064 
01065     h->dequant_coeff_pps= -1;
01066     s->unrestricted_mv=1;
01067 
01068     s->dsp.dct_bits = 16;
01069     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
01070 
01071     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
01072     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
01073 }
01074 
01075 int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size)
01076 {
01077     AVCodecContext *avctx = h->s.avctx;
01078 
01079     if(!buf || size <= 0)
01080         return -1;
01081 
01082     if(buf[0] == 1){
01083         int i, cnt, nalsize;
01084         const unsigned char *p = buf;
01085 
01086         h->is_avc = 1;
01087 
01088         if(size < 7) {
01089             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
01090             return -1;
01091         }
01092         /* sps and pps in the avcC always have length coded with 2 bytes,
01093            so put a fake nal_length_size = 2 while parsing them */
01094         h->nal_length_size = 2;
01095         // Decode sps from avcC
01096         cnt = *(p+5) & 0x1f; // Number of sps
01097         p += 6;
01098         for (i = 0; i < cnt; i++) {
01099             nalsize = AV_RB16(p) + 2;
01100             if(nalsize > size - (p-buf))
01101                 return -1;
01102             if(decode_nal_units(h, p, nalsize) < 0) {
01103                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
01104                 return -1;
01105             }
01106             p += nalsize;
01107         }
01108         // Decode pps from avcC
01109         cnt = *(p++); // Number of pps
01110         for (i = 0; i < cnt; i++) {
01111             nalsize = AV_RB16(p) + 2;
01112             if(nalsize > size - (p-buf))
01113                 return -1;
01114             if (decode_nal_units(h, p, nalsize) < 0) {
01115                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
01116                 return -1;
01117             }
01118             p += nalsize;
01119         }
01120         // Now store right nal length size, that will be use to parse all other nals
01121         h->nal_length_size = (buf[4] & 0x03) + 1;
01122     } else {
01123         h->is_avc = 0;
01124         if(decode_nal_units(h, buf, size) < 0)
01125             return -1;
01126     }
01127     return 0;
01128 }
01129 
01130 av_cold int ff_h264_decode_init(AVCodecContext *avctx){
01131     H264Context *h= avctx->priv_data;
01132     MpegEncContext * const s = &h->s;
01133     int i;
01134 
01135     MPV_decode_defaults(s);
01136 
01137     s->avctx = avctx;
01138     common_init(h);
01139 
01140     s->out_format = FMT_H264;
01141     s->workaround_bugs= avctx->workaround_bugs;
01142 
01143     // set defaults
01144 //    s->decode_mb= ff_h263_decode_mb;
01145     s->quarter_sample = 1;
01146     if(!avctx->has_b_frames)
01147     s->low_delay= 1;
01148 
01149     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
01150 
01151     ff_h264_decode_init_vlc();
01152 
01153     h->pixel_shift = 0;
01154     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
01155 
01156     h->thread_context[0] = h;
01157     h->outputed_poc = h->next_outputed_poc = INT_MIN;
01158     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
01159         h->last_pocs[i] = INT_MIN;
01160     h->prev_poc_msb= 1<<16;
01161     h->prev_frame_num= -1;
01162     h->x264_build = -1;
01163     ff_h264_reset_sei(h);
01164     if(avctx->codec_id == CODEC_ID_H264){
01165         if(avctx->ticks_per_frame == 1){
01166             s->avctx->time_base.den *=2;
01167         }
01168         avctx->ticks_per_frame = 2;
01169     }
01170 
01171     if(avctx->extradata_size > 0 && avctx->extradata &&
01172         ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size))
01173         return -1;
01174 
01175     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01176         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01177         s->low_delay = 0;
01178     }
01179 
01180     return 0;
01181 }
01182 
01183 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
01184 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
01185 {
01186     int i;
01187 
01188     for (i=0; i<count; i++){
01189         assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
01190                 IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||
01191                 !from[i]));
01192         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
01193     }
01194 }
01195 
01196 static void copy_parameter_set(void **to, void **from, int count, int size)
01197 {
01198     int i;
01199 
01200     for (i=0; i<count; i++){
01201         if (to[i] && !from[i]) av_freep(&to[i]);
01202         else if (from[i] && !to[i]) to[i] = av_malloc(size);
01203 
01204         if (from[i]) memcpy(to[i], from[i], size);
01205     }
01206 }
01207 
01208 static int decode_init_thread_copy(AVCodecContext *avctx){
01209     H264Context *h= avctx->priv_data;
01210 
01211     if (!avctx->internal->is_copy)
01212         return 0;
01213     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01214     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01215 
01216     return 0;
01217 }
01218 
01219 #define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)
01220 static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){
01221     H264Context *h= dst->priv_data, *h1= src->priv_data;
01222     MpegEncContext * const s = &h->s, * const s1 = &h1->s;
01223     int inited = s->context_initialized, err;
01224     int i;
01225 
01226     if(dst == src || !s1->context_initialized) return 0;
01227 
01228     err = ff_mpeg_update_thread_context(dst, src);
01229     if(err) return err;
01230 
01231     //FIXME handle width/height changing
01232     if(!inited){
01233         for(i = 0; i < MAX_SPS_COUNT; i++)
01234             av_freep(h->sps_buffers + i);
01235 
01236         for(i = 0; i < MAX_PPS_COUNT; i++)
01237             av_freep(h->pps_buffers + i);
01238 
01239         memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc
01240         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01241         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01242         if (ff_h264_alloc_tables(h) < 0) {
01243             av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
01244             return AVERROR(ENOMEM);
01245         }
01246         context_init(h);
01247 
01248         for(i=0; i<2; i++){
01249             h->rbsp_buffer[i] = NULL;
01250             h->rbsp_buffer_size[i] = 0;
01251         }
01252 
01253         h->thread_context[0] = h;
01254 
01255         // frame_start may not be called for the next thread (if it's decoding a bottom field)
01256         // so this has to be allocated here
01257         h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01258 
01259         s->dsp.clear_blocks(h->mb);
01260         s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));
01261     }
01262 
01263     //extradata/NAL handling
01264     h->is_avc          = h1->is_avc;
01265 
01266     //SPS/PPS
01267     copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS));
01268     h->sps             = h1->sps;
01269     copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS));
01270     h->pps             = h1->pps;
01271 
01272     //Dequantization matrices
01273     //FIXME these are big - can they be only copied when PPS changes?
01274     copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
01275 
01276     for(i=0; i<6; i++)
01277         h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
01278 
01279     for(i=0; i<6; i++)
01280         h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
01281 
01282     h->dequant_coeff_pps = h1->dequant_coeff_pps;
01283 
01284     //POC timing
01285     copy_fields(h, h1, poc_lsb, redundant_pic_count);
01286 
01287     //reference lists
01288     copy_fields(h, h1, ref_count, list_count);
01289     copy_fields(h, h1, ref_list,  intra_gb);
01290     copy_fields(h, h1, short_ref, cabac_init_idc);
01291 
01292     copy_picture_range(h->short_ref,   h1->short_ref,   32, s, s1);
01293     copy_picture_range(h->long_ref,    h1->long_ref,    32, s, s1);
01294     copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1);
01295 
01296     h->last_slice_type = h1->last_slice_type;
01297     h->sync            = h1->sync;
01298 
01299     if(!s->current_picture_ptr) return 0;
01300 
01301     if(!s->dropable) {
01302         err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
01303         h->prev_poc_msb     = h->poc_msb;
01304         h->prev_poc_lsb     = h->poc_lsb;
01305     }
01306     h->prev_frame_num_offset= h->frame_num_offset;
01307     h->prev_frame_num       = h->frame_num;
01308     h->outputed_poc         = h->next_outputed_poc;
01309 
01310     return err;
01311 }
01312 
01313 int ff_h264_frame_start(H264Context *h){
01314     MpegEncContext * const s = &h->s;
01315     int i;
01316     const int pixel_shift = h->pixel_shift;
01317     int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1;
01318 
01319     if(MPV_frame_start(s, s->avctx) < 0)
01320         return -1;
01321     ff_er_frame_start(s);
01322     /*
01323      * MPV_frame_start uses pict_type to derive key_frame.
01324      * This is incorrect for H.264; IDR markings must be used.
01325      * Zero here; IDR markings per slice in frame or fields are ORed in later.
01326      * See decode_nal_units().
01327      */
01328     s->current_picture_ptr->f.key_frame = 0;
01329     s->current_picture_ptr->sync = 0;
01330     s->current_picture_ptr->mmco_reset= 0;
01331 
01332     assert(s->linesize && s->uvlinesize);
01333 
01334     for(i=0; i<16; i++){
01335         h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
01336         h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
01337     }
01338     for(i=0; i<16; i++){
01339         h->block_offset[16+i]=
01340         h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01341         h->block_offset[48+16+i]=
01342         h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01343     }
01344 
01345     /* can't be in alloc_tables because linesize isn't known there.
01346      * FIXME: redo bipred weight to not require extra buffer? */
01347     for(i = 0; i < thread_count; i++)
01348         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
01349             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01350 
01351     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
01352     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
01353 
01354 //    s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.f.reference /*|| h->contains_intra*/ || 1;
01355 
01356     // We mark the current picture as non-reference after allocating it, so
01357     // that if we break out due to an error it can be released automatically
01358     // in the next MPV_frame_start().
01359     // SVQ3 as well as most other codecs have only last/next/current and thus
01360     // get released even with set reference, besides SVQ3 and others do not
01361     // mark frames as reference later "naturally".
01362     if(s->codec_id != CODEC_ID_SVQ3)
01363         s->current_picture_ptr->f.reference = 0;
01364 
01365     s->current_picture_ptr->field_poc[0]=
01366     s->current_picture_ptr->field_poc[1]= INT_MAX;
01367 
01368     h->next_output_pic = NULL;
01369 
01370     assert(s->current_picture_ptr->long_ref==0);
01371 
01372     return 0;
01373 }
01374 
01383 static void decode_postinit(H264Context *h, int setup_finished){
01384     MpegEncContext * const s = &h->s;
01385     Picture *out = s->current_picture_ptr;
01386     Picture *cur = s->current_picture_ptr;
01387     int i, pics, out_of_order, out_idx;
01388 
01389     s->current_picture_ptr->f.qscale_type = FF_QSCALE_TYPE_H264;
01390     s->current_picture_ptr->f.pict_type   = s->pict_type;
01391 
01392     if (h->next_output_pic) return;
01393 
01394     if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
01395         //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
01396         //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
01397         //to find this yet, so we assume the worst for now.
01398         //if (setup_finished)
01399         //    ff_thread_finish_setup(s->avctx);
01400         return;
01401     }
01402 
01403     cur->f.interlaced_frame = 0;
01404     cur->f.repeat_pict      = 0;
01405 
01406     /* Signal interlacing information externally. */
01407     /* Prioritize picture timing SEI information over used decoding process if it exists. */
01408 
01409     if(h->sps.pic_struct_present_flag){
01410         switch (h->sei_pic_struct)
01411         {
01412         case SEI_PIC_STRUCT_FRAME:
01413             break;
01414         case SEI_PIC_STRUCT_TOP_FIELD:
01415         case SEI_PIC_STRUCT_BOTTOM_FIELD:
01416             cur->f.interlaced_frame = 1;
01417             break;
01418         case SEI_PIC_STRUCT_TOP_BOTTOM:
01419         case SEI_PIC_STRUCT_BOTTOM_TOP:
01420             if (FIELD_OR_MBAFF_PICTURE)
01421                 cur->f.interlaced_frame = 1;
01422             else
01423                 // try to flag soft telecine progressive
01424                 cur->f.interlaced_frame = h->prev_interlaced_frame;
01425             break;
01426         case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
01427         case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
01428             // Signal the possibility of telecined film externally (pic_struct 5,6)
01429             // From these hints, let the applications decide if they apply deinterlacing.
01430             cur->f.repeat_pict = 1;
01431             break;
01432         case SEI_PIC_STRUCT_FRAME_DOUBLING:
01433             // Force progressive here, as doubling interlaced frame is a bad idea.
01434             cur->f.repeat_pict = 2;
01435             break;
01436         case SEI_PIC_STRUCT_FRAME_TRIPLING:
01437             cur->f.repeat_pict = 4;
01438             break;
01439         }
01440 
01441         if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
01442             cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0;
01443     }else{
01444         /* Derive interlacing flag from used decoding process. */
01445         cur->f.interlaced_frame = FIELD_OR_MBAFF_PICTURE;
01446     }
01447     h->prev_interlaced_frame = cur->f.interlaced_frame;
01448 
01449     if (cur->field_poc[0] != cur->field_poc[1]){
01450         /* Derive top_field_first from field pocs. */
01451         cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1];
01452     }else{
01453         if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) {
01454             /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
01455             if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
01456               || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
01457                 cur->f.top_field_first = 1;
01458             else
01459                 cur->f.top_field_first = 0;
01460         }else{
01461             /* Most likely progressive */
01462             cur->f.top_field_first = 0;
01463         }
01464     }
01465 
01466     cur->mmco_reset = h->mmco_reset;
01467     h->mmco_reset = 0;
01468     //FIXME do something with unavailable reference frames
01469 
01470     /* Sort B-frames into display order */
01471 
01472     if(h->sps.bitstream_restriction_flag
01473        && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01474         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01475         s->low_delay = 0;
01476     }
01477 
01478     if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
01479        && !h->sps.bitstream_restriction_flag){
01480         s->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1;
01481         s->low_delay= 0;
01482     }
01483 
01484     for (i = 0; 1; i++) {
01485         if(i == MAX_DELAYED_PIC_COUNT || cur->poc < h->last_pocs[i]){
01486             if(i)
01487                 h->last_pocs[i-1] = cur->poc;
01488             break;
01489         } else if(i) {
01490             h->last_pocs[i-1]= h->last_pocs[i];
01491         }
01492     }
01493     out_of_order = MAX_DELAYED_PIC_COUNT - i;
01494     if(   cur->f.pict_type == AV_PICTURE_TYPE_B
01495        || (h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > INT_MIN && h->last_pocs[MAX_DELAYED_PIC_COUNT-1] - h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > 2))
01496         out_of_order = FFMAX(out_of_order, 1);
01497     if(s->avctx->has_b_frames < out_of_order && !h->sps.bitstream_restriction_flag){
01498         av_log(s->avctx, AV_LOG_WARNING, "Increasing reorder buffer to %d\n", out_of_order);
01499         s->avctx->has_b_frames = out_of_order;
01500         s->low_delay = 0;
01501     }
01502 
01503     pics = 0;
01504     while(h->delayed_pic[pics]) pics++;
01505 
01506     av_assert0(pics <= MAX_DELAYED_PIC_COUNT);
01507 
01508     h->delayed_pic[pics++] = cur;
01509     if (cur->f.reference == 0)
01510         cur->f.reference = DELAYED_PIC_REF;
01511 
01512     out = h->delayed_pic[0];
01513     out_idx = 0;
01514     for (i = 1; h->delayed_pic[i] && !h->delayed_pic[i]->f.key_frame && !h->delayed_pic[i]->mmco_reset; i++)
01515         if(h->delayed_pic[i]->poc < out->poc){
01516             out = h->delayed_pic[i];
01517             out_idx = i;
01518         }
01519     if (s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->f.key_frame || h->delayed_pic[0]->mmco_reset))
01520         h->next_outputed_poc= INT_MIN;
01521     out_of_order = out->poc < h->next_outputed_poc;
01522 
01523     if(out_of_order || pics > s->avctx->has_b_frames){
01524         out->f.reference &= ~DELAYED_PIC_REF;
01525         out->owner2 = s; // for frame threading, the owner must be the second field's thread
01526                          // or else the first thread can release the picture and reuse it unsafely
01527         for(i=out_idx; h->delayed_pic[i]; i++)
01528             h->delayed_pic[i] = h->delayed_pic[i+1];
01529     }
01530     if(!out_of_order && pics > s->avctx->has_b_frames){
01531         h->next_output_pic = out;
01532         if (out_idx == 0 && h->delayed_pic[0] && (h->delayed_pic[0]->f.key_frame || h->delayed_pic[0]->mmco_reset)) {
01533             h->next_outputed_poc = INT_MIN;
01534         } else
01535             h->next_outputed_poc = out->poc;
01536     }else{
01537         av_log(s->avctx, AV_LOG_DEBUG, "no picture %s\n", out_of_order ? "ooo" : "");
01538     }
01539 
01540     if (h->next_output_pic && h->next_output_pic->sync) {
01541         h->sync |= 2;
01542     }
01543 
01544     if (setup_finished)
01545         ff_thread_finish_setup(s->avctx);
01546 }
01547 
01548 static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
01549                                               uint8_t *src_cb, uint8_t *src_cr,
01550                                               int linesize, int uvlinesize, int simple)
01551 {
01552     MpegEncContext * const s = &h->s;
01553     uint8_t *top_border;
01554     int top_idx = 1;
01555     const int pixel_shift = h->pixel_shift;
01556     int chroma444 = CHROMA444;
01557     int chroma422 = CHROMA422;
01558 
01559     src_y  -=   linesize;
01560     src_cb -= uvlinesize;
01561     src_cr -= uvlinesize;
01562 
01563     if(!simple && FRAME_MBAFF){
01564         if(s->mb_y&1){
01565             if(!MB_MBAFF){
01566                 top_border = h->top_borders[0][s->mb_x];
01567                 AV_COPY128(top_border, src_y + 15*linesize);
01568                 if (pixel_shift)
01569                     AV_COPY128(top_border+16, src_y+15*linesize+16);
01570                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01571                     if(chroma444){
01572                         if (pixel_shift){
01573                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
01574                             AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);
01575                             AV_COPY128(top_border+64, src_cr + 15*uvlinesize);
01576                             AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);
01577                         } else {
01578                             AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
01579                             AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
01580                         }
01581                     } else if(chroma422){
01582                         if (pixel_shift) {
01583                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
01584                             AV_COPY128(top_border+48, src_cr + 15*uvlinesize);
01585                         } else {
01586                             AV_COPY64(top_border+16, src_cb +  15*uvlinesize);
01587                             AV_COPY64(top_border+24, src_cr +  15*uvlinesize);
01588                         }
01589                     } else {
01590                         if (pixel_shift) {
01591                             AV_COPY128(top_border+32, src_cb+7*uvlinesize);
01592                             AV_COPY128(top_border+48, src_cr+7*uvlinesize);
01593                         } else {
01594                             AV_COPY64(top_border+16, src_cb+7*uvlinesize);
01595                             AV_COPY64(top_border+24, src_cr+7*uvlinesize);
01596                         }
01597                     }
01598                 }
01599             }
01600         }else if(MB_MBAFF){
01601             top_idx = 0;
01602         }else
01603             return;
01604     }
01605 
01606     top_border = h->top_borders[top_idx][s->mb_x];
01607     // There are two lines saved, the line above the the top macroblock of a pair,
01608     // and the line above the bottom macroblock
01609     AV_COPY128(top_border, src_y + 16*linesize);
01610     if (pixel_shift)
01611         AV_COPY128(top_border+16, src_y+16*linesize+16);
01612 
01613     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01614         if(chroma444){
01615             if (pixel_shift){
01616                 AV_COPY128(top_border+32, src_cb + 16*linesize);
01617                 AV_COPY128(top_border+48, src_cb + 16*linesize+16);
01618                 AV_COPY128(top_border+64, src_cr + 16*linesize);
01619                 AV_COPY128(top_border+80, src_cr + 16*linesize+16);
01620             } else {
01621                 AV_COPY128(top_border+16, src_cb + 16*linesize);
01622                 AV_COPY128(top_border+32, src_cr + 16*linesize);
01623             }
01624         } else if(chroma422) {
01625             if (pixel_shift) {
01626                 AV_COPY128(top_border+32, src_cb+16*uvlinesize);
01627                 AV_COPY128(top_border+48, src_cr+16*uvlinesize);
01628             } else {
01629                 AV_COPY64(top_border+16, src_cb+16*uvlinesize);
01630                 AV_COPY64(top_border+24, src_cr+16*uvlinesize);
01631             }
01632         } else {
01633             if (pixel_shift) {
01634                 AV_COPY128(top_border+32, src_cb+8*uvlinesize);
01635                 AV_COPY128(top_border+48, src_cr+8*uvlinesize);
01636             } else {
01637                 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
01638                 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
01639             }
01640         }
01641     }
01642 }
01643 
01644 static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
01645                                   uint8_t *src_cb, uint8_t *src_cr,
01646                                   int linesize, int uvlinesize,
01647                                   int xchg, int chroma444,
01648                                   int simple, int pixel_shift){
01649     MpegEncContext * const s = &h->s;
01650     int deblock_topleft;
01651     int deblock_top;
01652     int top_idx = 1;
01653     uint8_t *top_border_m1;
01654     uint8_t *top_border;
01655 
01656     if(!simple && FRAME_MBAFF){
01657         if(s->mb_y&1){
01658             if(!MB_MBAFF)
01659                 return;
01660         }else{
01661             top_idx = MB_MBAFF ? 0 : 1;
01662         }
01663     }
01664 
01665     if(h->deblocking_filter == 2) {
01666         deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
01667         deblock_top     = h->top_type;
01668     } else {
01669         deblock_topleft = (s->mb_x > 0);
01670         deblock_top     = (s->mb_y > !!MB_FIELD);
01671     }
01672 
01673     src_y  -=   linesize + 1 + pixel_shift;
01674     src_cb -= uvlinesize + 1 + pixel_shift;
01675     src_cr -= uvlinesize + 1 + pixel_shift;
01676 
01677     top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
01678     top_border    = h->top_borders[top_idx][s->mb_x];
01679 
01680 #define XCHG(a,b,xchg)\
01681     if (pixel_shift) {\
01682         if (xchg) {\
01683             AV_SWAP64(b+0,a+0);\
01684             AV_SWAP64(b+8,a+8);\
01685         } else {\
01686             AV_COPY128(b,a); \
01687         }\
01688     } else \
01689 if (xchg) AV_SWAP64(b,a);\
01690 else      AV_COPY64(b,a);
01691 
01692     if(deblock_top){
01693         if(deblock_topleft){
01694             XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
01695         }
01696         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
01697         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
01698         if(s->mb_x+1 < s->mb_width){
01699             XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
01700         }
01701     }
01702     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01703         if(chroma444){
01704             if(deblock_topleft){
01705                 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01706                 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01707             }
01708             XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
01709             XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
01710             XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
01711             XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
01712             if(s->mb_x+1 < s->mb_width){
01713                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
01714                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
01715             }
01716         } else {
01717             if(deblock_top){
01718                 if(deblock_topleft){
01719                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01720                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01721                 }
01722                 XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
01723                 XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
01724             }
01725         }
01726     }
01727 }
01728 
01729 static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
01730     if (high_bit_depth) {
01731         return AV_RN32A(((int32_t*)mb) + index);
01732     } else
01733         return AV_RN16A(mb + index);
01734 }
01735 
01736 static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
01737     if (high_bit_depth) {
01738         AV_WN32A(((int32_t*)mb) + index, value);
01739     } else
01740         AV_WN16A(mb + index, value);
01741 }
01742 
01743 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01744                                                        int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01745 {
01746     MpegEncContext * const s = &h->s;
01747     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01748     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
01749     int i;
01750     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
01751     block_offset += 16*p;
01752     if(IS_INTRA4x4(mb_type)){
01753         if(simple || !s->encoding){
01754             if(IS_8x8DCT(mb_type)){
01755                 if(transform_bypass){
01756                     idct_dc_add =
01757                     idct_add    = s->dsp.add_pixels8;
01758                 }else{
01759                     idct_dc_add = h->h264dsp.h264_idct8_dc_add;
01760                     idct_add    = h->h264dsp.h264_idct8_add;
01761                 }
01762                 for(i=0; i<16; i+=4){
01763                     uint8_t * const ptr= dest_y + block_offset[i];
01764                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01765                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01766                         h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01767                     }else{
01768                         const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01769                         h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
01770                                                     (h->topright_samples_available<<i)&0x4000, linesize);
01771                         if(nnz){
01772                             if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01773                                 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01774                             else
01775                                 idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01776                         }
01777                     }
01778                 }
01779             }else{
01780                 if(transform_bypass){
01781                     idct_dc_add =
01782                     idct_add    = s->dsp.add_pixels4;
01783                 }else{
01784                     idct_dc_add = h->h264dsp.h264_idct_dc_add;
01785                     idct_add    = h->h264dsp.h264_idct_add;
01786                 }
01787                 for(i=0; i<16; i++){
01788                     uint8_t * const ptr= dest_y + block_offset[i];
01789                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01790 
01791                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01792                         h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01793                     }else{
01794                         uint8_t *topright;
01795                         int nnz, tr;
01796                         uint64_t tr_high;
01797                         if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
01798                             const int topright_avail= (h->topright_samples_available<<i)&0x8000;
01799                             assert(s->mb_y || linesize <= block_offset[i]);
01800                             if(!topright_avail){
01801                                 if (pixel_shift) {
01802                                     tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
01803                                     topright= (uint8_t*) &tr_high;
01804                                 } else {
01805                                     tr= ptr[3 - linesize]*0x01010101u;
01806                                     topright= (uint8_t*) &tr;
01807                                 }
01808                             }else
01809                                 topright= ptr + (4 << pixel_shift) - linesize;
01810                         }else
01811                             topright= NULL;
01812 
01813                         h->hpc.pred4x4[ dir ](ptr, topright, linesize);
01814                         nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01815                         if(nnz){
01816                             if(is_h264){
01817                                 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01818                                     idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01819                                 else
01820                                     idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01821                             }else if(CONFIG_SVQ3_DECODER)
01822                                 ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);
01823                         }
01824                     }
01825                 }
01826             }
01827         }
01828     }else{
01829         h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
01830         if(is_h264){
01831             if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){
01832                 if(!transform_bypass)
01833                     h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);
01834                 else{
01835                     static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
01836                                                             8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
01837                     for(i = 0; i < 16; i++)
01838                         dctcoef_set(h->mb+(p*256 << pixel_shift), pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));
01839                 }
01840             }
01841         }else if(CONFIG_SVQ3_DECODER)
01842             ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);
01843     }
01844 }
01845 
01846 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01847                                                     int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01848 {
01849     MpegEncContext * const s = &h->s;
01850     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01851     int i;
01852     block_offset += 16*p;
01853     if(!IS_INTRA4x4(mb_type)){
01854         if(is_h264){
01855             if(IS_INTRA16x16(mb_type)){
01856                 if(transform_bypass){
01857                     if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
01858                         h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);
01859                     }else{
01860                         for(i=0; i<16; i++){
01861                             if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01862                                 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01863                         }
01864                     }
01865                 }else{
01866                     h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01867                 }
01868             }else if(h->cbp&15){
01869                 if(transform_bypass){
01870                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
01871                     idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
01872                     for(i=0; i<16; i+=di){
01873                         if(h->non_zero_count_cache[ scan8[i+p*16] ]){
01874                             idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01875                         }
01876                     }
01877                 }else{
01878                     if(IS_8x8DCT(mb_type)){
01879                         h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01880                     }else{
01881                         h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01882                     }
01883                 }
01884             }
01885         }else if(CONFIG_SVQ3_DECODER) {
01886             for(i=0; i<16; i++){
01887                 if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below
01888                     uint8_t * const ptr= dest_y + block_offset[i];
01889                     ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
01890                 }
01891             }
01892         }
01893     }
01894 }
01895 
01896 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift)
01897 {
01898     MpegEncContext * const s = &h->s;
01899     const int mb_x= s->mb_x;
01900     const int mb_y= s->mb_y;
01901     const int mb_xy= h->mb_xy;
01902     const int mb_type = s->current_picture.f.mb_type[mb_xy];
01903     uint8_t  *dest_y, *dest_cb, *dest_cr;
01904     int linesize, uvlinesize /*dct_offset*/;
01905     int i, j;
01906     int *block_offset = &h->block_offset[0];
01907     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
01908     /* is_h264 should always be true if SVQ3 is disabled. */
01909     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
01910     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01911     const int block_h = 16 >> s->chroma_y_shift;
01912     const int chroma422 = CHROMA422;
01913 
01914     dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
01915     dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
01916     dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
01917 
01918     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
01919     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
01920 
01921     h->list_counts[mb_xy]= h->list_count;
01922 
01923     if (!simple && MB_FIELD) {
01924         linesize   = h->mb_linesize   = s->linesize * 2;
01925         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
01926         block_offset = &h->block_offset[48];
01927         if(mb_y&1){ //FIXME move out of this function?
01928             dest_y -= s->linesize*15;
01929             dest_cb-= s->uvlinesize * (block_h - 1);
01930             dest_cr-= s->uvlinesize * (block_h - 1);
01931         }
01932         if(FRAME_MBAFF) {
01933             int list;
01934             for(list=0; list<h->list_count; list++){
01935                 if(!USES_LIST(mb_type, list))
01936                     continue;
01937                 if(IS_16X16(mb_type)){
01938                     int8_t *ref = &h->ref_cache[list][scan8[0]];
01939                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
01940                 }else{
01941                     for(i=0; i<16; i+=4){
01942                         int ref = h->ref_cache[list][scan8[i]];
01943                         if(ref >= 0)
01944                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
01945                     }
01946                 }
01947             }
01948         }
01949     } else {
01950         linesize   = h->mb_linesize   = s->linesize;
01951         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
01952 //        dct_offset = s->linesize * 16;
01953     }
01954 
01955     if (!simple && IS_INTRA_PCM(mb_type)) {
01956         const int bit_depth = h->sps.bit_depth_luma;
01957         if (pixel_shift) {
01958             int j;
01959             GetBitContext gb;
01960             init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
01961 
01962             for (i = 0; i < 16; i++) {
01963                 uint16_t *tmp_y  = (uint16_t*)(dest_y  + i*linesize);
01964                 for (j = 0; j < 16; j++)
01965                     tmp_y[j] = get_bits(&gb, bit_depth);
01966             }
01967             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01968                 if (!h->sps.chroma_format_idc) {
01969                     for (i = 0; i < block_h; i++) {
01970                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01971                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01972                         for (j = 0; j < 8; j++) {
01973                             tmp_cb[j] = tmp_cr[j] = 1 << (bit_depth - 1);
01974                         }
01975                     }
01976                 } else {
01977                     for (i = 0; i < block_h; i++) {
01978                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01979                         for (j = 0; j < 8; j++)
01980                             tmp_cb[j] = get_bits(&gb, bit_depth);
01981                     }
01982                     for (i = 0; i < block_h; i++) {
01983                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01984                         for (j = 0; j < 8; j++)
01985                             tmp_cr[j] = get_bits(&gb, bit_depth);
01986                     }
01987                 }
01988             }
01989         } else {
01990             for (i=0; i<16; i++) {
01991                 memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
01992             }
01993             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01994                 if (!h->sps.chroma_format_idc) {
01995                     for (i=0; i<8; i++) {
01996                         memset(dest_cb + i*uvlinesize, 1 << (bit_depth - 1), 8);
01997                         memset(dest_cr + i*uvlinesize, 1 << (bit_depth - 1), 8);
01998                     }
01999                 } else {
02000                     for (i=0; i<block_h; i++) {
02001                         memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4,  8);
02002                         memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4,  8);
02003                     }
02004                 }
02005             }
02006         }
02007     } else {
02008         if(IS_INTRA(mb_type)){
02009             if(h->deblocking_filter)
02010                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);
02011 
02012             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02013                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
02014                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
02015             }
02016 
02017             hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
02018 
02019             if(h->deblocking_filter)
02020                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
02021         }else if(is_h264){
02022             if (chroma422) {
02023                 hl_motion_422(h, dest_y, dest_cb, dest_cr,
02024                               s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02025                               s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02026                               h->h264dsp.weight_h264_pixels_tab,
02027                               h->h264dsp.biweight_h264_pixels_tab,
02028                               pixel_shift);
02029             } else {
02030                 hl_motion_420(h, dest_y, dest_cb, dest_cr,
02031                               s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02032                               s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02033                               h->h264dsp.weight_h264_pixels_tab,
02034                               h->h264dsp.biweight_h264_pixels_tab,
02035                               pixel_shift);
02036             }
02037         }
02038 
02039         hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
02040 
02041         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
02042             uint8_t *dest[2] = {dest_cb, dest_cr};
02043             if(transform_bypass){
02044                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
02045                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);
02046                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);
02047                 }else{
02048                     idct_add = s->dsp.add_pixels4;
02049                     for(j=1; j<3; j++){
02050                         for(i=j*16; i<j*16+4; i++){
02051                             if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
02052                                 idct_add   (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
02053                         }
02054                         if (chroma422) {
02055                             for(i=j*16+4; i<j*16+8; i++){
02056                                 if(h->non_zero_count_cache[ scan8[i+4] ] || dctcoef_get(h->mb, pixel_shift, i*16))
02057                                     idct_add   (dest[j-1] + block_offset[i+4], h->mb + (i*16 << pixel_shift), uvlinesize);
02058                             }
02059                         }
02060                     }
02061                 }
02062             }else{
02063                 if(is_h264){
02064                     int qp[2];
02065                     if (chroma422) {
02066                         qp[0] = h->chroma_qp[0] + 3;
02067                         qp[1] = h->chroma_qp[1] + 3;
02068                     } else {
02069                         qp[0] = h->chroma_qp[0];
02070                         qp[1] = h->chroma_qp[1];
02071                     }
02072                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
02073                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][qp[0]][0]);
02074                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
02075                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][qp[1]][0]);
02076                     h->h264dsp.h264_idct_add8(dest, block_offset,
02077                                               h->mb, uvlinesize,
02078                                               h->non_zero_count_cache);
02079                 }
02080 #if CONFIG_SVQ3_DECODER
02081                 else{
02082                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
02083                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
02084                     for(j=1; j<3; j++){
02085                         for(i=j*16; i<j*16+4; i++){
02086                             if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
02087                                 uint8_t * const ptr= dest[j-1] + block_offset[i];
02088                                 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
02089                             }
02090                         }
02091                     }
02092                 }
02093 #endif
02094             }
02095         }
02096     }
02097     if(h->cbp || IS_INTRA(mb_type))
02098     {
02099         s->dsp.clear_blocks(h->mb);
02100         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
02101     }
02102 }
02103 
02104 static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){
02105     MpegEncContext * const s = &h->s;
02106     const int mb_x= s->mb_x;
02107     const int mb_y= s->mb_y;
02108     const int mb_xy= h->mb_xy;
02109     const int mb_type = s->current_picture.f.mb_type[mb_xy];
02110     uint8_t  *dest[3];
02111     int linesize;
02112     int i, j, p;
02113     int *block_offset = &h->block_offset[0];
02114     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
02115     const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;
02116 
02117     for (p = 0; p < plane_count; p++)
02118     {
02119         dest[p] = s->current_picture.f.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
02120         s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
02121     }
02122 
02123     h->list_counts[mb_xy]= h->list_count;
02124 
02125     if (!simple && MB_FIELD) {
02126         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
02127         block_offset = &h->block_offset[48];
02128         if(mb_y&1) //FIXME move out of this function?
02129             for (p = 0; p < 3; p++)
02130                 dest[p] -= s->linesize*15;
02131         if(FRAME_MBAFF) {
02132             int list;
02133             for(list=0; list<h->list_count; list++){
02134                 if(!USES_LIST(mb_type, list))
02135                     continue;
02136                 if(IS_16X16(mb_type)){
02137                     int8_t *ref = &h->ref_cache[list][scan8[0]];
02138                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
02139                 }else{
02140                     for(i=0; i<16; i+=4){
02141                         int ref = h->ref_cache[list][scan8[i]];
02142                         if(ref >= 0)
02143                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
02144                     }
02145                 }
02146             }
02147         }
02148     } else {
02149         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize;
02150     }
02151 
02152     if (!simple && IS_INTRA_PCM(mb_type)) {
02153         if (pixel_shift) {
02154             const int bit_depth = h->sps.bit_depth_luma;
02155             GetBitContext gb;
02156             init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
02157 
02158             for (p = 0; p < plane_count; p++) {
02159                 for (i = 0; i < 16; i++) {
02160                     uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
02161                     for (j = 0; j < 16; j++)
02162                         tmp[j] = get_bits(&gb, bit_depth);
02163                 }
02164             }
02165         } else {
02166             for (p = 0; p < plane_count; p++) {
02167                 for (i = 0; i < 16; i++) {
02168                     memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
02169                 }
02170             }
02171         }
02172     } else {
02173         if(IS_INTRA(mb_type)){
02174             if(h->deblocking_filter)
02175                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
02176 
02177             for (p = 0; p < plane_count; p++)
02178                 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02179 
02180             if(h->deblocking_filter)
02181                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
02182         }else{
02183             hl_motion(h, dest[0], dest[1], dest[2],
02184                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02185                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02186                       h->h264dsp.weight_h264_pixels_tab,
02187                       h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 3);
02188         }
02189 
02190         for (p = 0; p < plane_count; p++)
02191             hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02192     }
02193     if(h->cbp || IS_INTRA(mb_type))
02194     {
02195         s->dsp.clear_blocks(h->mb);
02196         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
02197     }
02198 }
02199 
02203 #define hl_decode_mb_simple(sh, bits) \
02204 static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
02205     hl_decode_mb_internal(h, 1, sh); \
02206 }
02207 hl_decode_mb_simple(0, 8);
02208 hl_decode_mb_simple(1, 16);
02209 
02213 static void av_noinline hl_decode_mb_complex(H264Context *h){
02214     hl_decode_mb_internal(h, 0, h->pixel_shift);
02215 }
02216 
02217 static void av_noinline hl_decode_mb_444_complex(H264Context *h){
02218     hl_decode_mb_444_internal(h, 0, h->pixel_shift);
02219 }
02220 
02221 static void av_noinline hl_decode_mb_444_simple(H264Context *h){
02222     hl_decode_mb_444_internal(h, 1, 0);
02223 }
02224 
02225 void ff_h264_hl_decode_mb(H264Context *h){
02226     MpegEncContext * const s = &h->s;
02227     const int mb_xy= h->mb_xy;
02228     const int mb_type = s->current_picture.f.mb_type[mb_xy];
02229     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
02230 
02231     if (CHROMA444) {
02232         if(is_complex || h->pixel_shift)
02233             hl_decode_mb_444_complex(h);
02234         else
02235             hl_decode_mb_444_simple(h);
02236     } else if (is_complex) {
02237         hl_decode_mb_complex(h);
02238     } else if (h->pixel_shift) {
02239         hl_decode_mb_simple_16(h);
02240     } else
02241         hl_decode_mb_simple_8(h);
02242 }
02243 
02244 static int pred_weight_table(H264Context *h){
02245     MpegEncContext * const s = &h->s;
02246     int list, i;
02247     int luma_def, chroma_def;
02248 
02249     h->use_weight= 0;
02250     h->use_weight_chroma= 0;
02251     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
02252     if(h->sps.chroma_format_idc)
02253         h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
02254     luma_def = 1<<h->luma_log2_weight_denom;
02255     chroma_def = 1<<h->chroma_log2_weight_denom;
02256 
02257     for(list=0; list<2; list++){
02258         h->luma_weight_flag[list]   = 0;
02259         h->chroma_weight_flag[list] = 0;
02260         for(i=0; i<h->ref_count[list]; i++){
02261             int luma_weight_flag, chroma_weight_flag;
02262 
02263             luma_weight_flag= get_bits1(&s->gb);
02264             if(luma_weight_flag){
02265                 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
02266                 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
02267                 if(   h->luma_weight[i][list][0] != luma_def
02268                    || h->luma_weight[i][list][1] != 0) {
02269                     h->use_weight= 1;
02270                     h->luma_weight_flag[list]= 1;
02271                 }
02272             }else{
02273                 h->luma_weight[i][list][0]= luma_def;
02274                 h->luma_weight[i][list][1]= 0;
02275             }
02276 
02277             if(h->sps.chroma_format_idc){
02278                 chroma_weight_flag= get_bits1(&s->gb);
02279                 if(chroma_weight_flag){
02280                     int j;
02281                     for(j=0; j<2; j++){
02282                         h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
02283                         h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
02284                         if(   h->chroma_weight[i][list][j][0] != chroma_def
02285                            || h->chroma_weight[i][list][j][1] != 0) {
02286                             h->use_weight_chroma= 1;
02287                             h->chroma_weight_flag[list]= 1;
02288                         }
02289                     }
02290                 }else{
02291                     int j;
02292                     for(j=0; j<2; j++){
02293                         h->chroma_weight[i][list][j][0]= chroma_def;
02294                         h->chroma_weight[i][list][j][1]= 0;
02295                     }
02296                 }
02297             }
02298         }
02299         if(h->slice_type_nos != AV_PICTURE_TYPE_B) break;
02300     }
02301     h->use_weight= h->use_weight || h->use_weight_chroma;
02302     return 0;
02303 }
02304 
02310 static void implicit_weight_table(H264Context *h, int field){
02311     MpegEncContext * const s = &h->s;
02312     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
02313 
02314     for (i = 0; i < 2; i++) {
02315         h->luma_weight_flag[i]   = 0;
02316         h->chroma_weight_flag[i] = 0;
02317     }
02318 
02319     if(field < 0){
02320         if (s->picture_structure == PICT_FRAME) {
02321             cur_poc = s->current_picture_ptr->poc;
02322         } else {
02323             cur_poc = s->current_picture_ptr->field_poc[s->picture_structure - 1];
02324         }
02325     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
02326        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
02327         h->use_weight= 0;
02328         h->use_weight_chroma= 0;
02329         return;
02330     }
02331         ref_start= 0;
02332         ref_count0= h->ref_count[0];
02333         ref_count1= h->ref_count[1];
02334     }else{
02335         cur_poc = s->current_picture_ptr->field_poc[field];
02336         ref_start= 16;
02337         ref_count0= 16+2*h->ref_count[0];
02338         ref_count1= 16+2*h->ref_count[1];
02339     }
02340 
02341     h->use_weight= 2;
02342     h->use_weight_chroma= 2;
02343     h->luma_log2_weight_denom= 5;
02344     h->chroma_log2_weight_denom= 5;
02345 
02346     for(ref0=ref_start; ref0 < ref_count0; ref0++){
02347         int poc0 = h->ref_list[0][ref0].poc;
02348         for(ref1=ref_start; ref1 < ref_count1; ref1++){
02349             int w = 32;
02350             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
02351                 int poc1 = h->ref_list[1][ref1].poc;
02352                 int td = av_clip(poc1 - poc0, -128, 127);
02353                 if(td){
02354                     int tb = av_clip(cur_poc - poc0, -128, 127);
02355                     int tx = (16384 + (FFABS(td) >> 1)) / td;
02356                     int dist_scale_factor = (tb*tx + 32) >> 8;
02357                     if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
02358                         w = 64 - dist_scale_factor;
02359                 }
02360             }
02361             if(field<0){
02362                 h->implicit_weight[ref0][ref1][0]=
02363                 h->implicit_weight[ref0][ref1][1]= w;
02364             }else{
02365                 h->implicit_weight[ref0][ref1][field]=w;
02366             }
02367         }
02368     }
02369 }
02370 
02374 static void idr(H264Context *h){
02375     int i;
02376     ff_h264_remove_all_refs(h);
02377     h->prev_frame_num= 0;
02378     h->prev_frame_num_offset= 0;
02379     h->prev_poc_msb= 1<<16;
02380     h->prev_poc_lsb= 0;
02381     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
02382         h->last_pocs[i] = INT_MIN;
02383 }
02384 
02385 /* forget old pics after a seek */
02386 static void flush_dpb(AVCodecContext *avctx){
02387     H264Context *h= avctx->priv_data;
02388     int i;
02389     for(i=0; i<=MAX_DELAYED_PIC_COUNT; i++) {
02390         if(h->delayed_pic[i])
02391             h->delayed_pic[i]->f.reference = 0;
02392         h->delayed_pic[i]= NULL;
02393     }
02394     h->outputed_poc=h->next_outputed_poc= INT_MIN;
02395     h->prev_interlaced_frame = 1;
02396     idr(h);
02397     h->prev_frame_num= -1;
02398     if(h->s.current_picture_ptr)
02399         h->s.current_picture_ptr->f.reference = 0;
02400     h->s.first_field= 0;
02401     ff_h264_reset_sei(h);
02402     ff_mpeg_flush(avctx);
02403     h->recovery_frame= -1;
02404     h->sync= 0;
02405 }
02406 
02407 static int init_poc(H264Context *h){
02408     MpegEncContext * const s = &h->s;
02409     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
02410     int field_poc[2];
02411     Picture *cur = s->current_picture_ptr;
02412 
02413     h->frame_num_offset= h->prev_frame_num_offset;
02414     if(h->frame_num < h->prev_frame_num)
02415         h->frame_num_offset += max_frame_num;
02416 
02417     if(h->sps.poc_type==0){
02418         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
02419 
02420         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
02421             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
02422         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
02423             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
02424         else
02425             h->poc_msb = h->prev_poc_msb;
02426 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
02427         field_poc[0] =
02428         field_poc[1] = h->poc_msb + h->poc_lsb;
02429         if(s->picture_structure == PICT_FRAME)
02430             field_poc[1] += h->delta_poc_bottom;
02431     }else if(h->sps.poc_type==1){
02432         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
02433         int i;
02434 
02435         if(h->sps.poc_cycle_length != 0)
02436             abs_frame_num = h->frame_num_offset + h->frame_num;
02437         else
02438             abs_frame_num = 0;
02439 
02440         if(h->nal_ref_idc==0 && abs_frame_num > 0)
02441             abs_frame_num--;
02442 
02443         expected_delta_per_poc_cycle = 0;
02444         for(i=0; i < h->sps.poc_cycle_length; i++)
02445             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
02446 
02447         if(abs_frame_num > 0){
02448             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
02449             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
02450 
02451             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
02452             for(i = 0; i <= frame_num_in_poc_cycle; i++)
02453                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
02454         } else
02455             expectedpoc = 0;
02456 
02457         if(h->nal_ref_idc == 0)
02458             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
02459 
02460         field_poc[0] = expectedpoc + h->delta_poc[0];
02461         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
02462 
02463         if(s->picture_structure == PICT_FRAME)
02464             field_poc[1] += h->delta_poc[1];
02465     }else{
02466         int poc= 2*(h->frame_num_offset + h->frame_num);
02467 
02468         if(!h->nal_ref_idc)
02469             poc--;
02470 
02471         field_poc[0]= poc;
02472         field_poc[1]= poc;
02473     }
02474 
02475     if(s->picture_structure != PICT_BOTTOM_FIELD)
02476         s->current_picture_ptr->field_poc[0]= field_poc[0];
02477     if(s->picture_structure != PICT_TOP_FIELD)
02478         s->current_picture_ptr->field_poc[1]= field_poc[1];
02479     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
02480 
02481     return 0;
02482 }
02483 
02484 
02488 static void init_scan_tables(H264Context *h){
02489     int i;
02490     for(i=0; i<16; i++){
02491 #define T(x) (x>>2) | ((x<<2) & 0xF)
02492         h->zigzag_scan[i] = T(zigzag_scan[i]);
02493         h-> field_scan[i] = T( field_scan[i]);
02494 #undef T
02495     }
02496     for(i=0; i<64; i++){
02497 #define T(x) (x>>3) | ((x&7)<<3)
02498         h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
02499         h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
02500         h->field_scan8x8[i]        = T(field_scan8x8[i]);
02501         h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
02502 #undef T
02503     }
02504     if(h->sps.transform_bypass){ //FIXME same ugly
02505         h->zigzag_scan_q0          = zigzag_scan;
02506         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
02507         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
02508         h->field_scan_q0           = field_scan;
02509         h->field_scan8x8_q0        = field_scan8x8;
02510         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
02511     }else{
02512         h->zigzag_scan_q0          = h->zigzag_scan;
02513         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
02514         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
02515         h->field_scan_q0           = h->field_scan;
02516         h->field_scan8x8_q0        = h->field_scan8x8;
02517         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
02518     }
02519 }
02520 
02521 static int field_end(H264Context *h, int in_setup){
02522     MpegEncContext * const s = &h->s;
02523     AVCodecContext * const avctx= s->avctx;
02524     int err = 0;
02525     s->mb_y= 0;
02526 
02527     if (!in_setup && !s->dropable)
02528         ff_thread_report_progress((AVFrame*)s->current_picture_ptr, (16*s->mb_height >> FIELD_PICTURE) - 1,
02529                                  s->picture_structure==PICT_BOTTOM_FIELD);
02530 
02531     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02532         ff_vdpau_h264_set_reference_frames(s);
02533 
02534     if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){
02535         if(!s->dropable) {
02536             err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02537             h->prev_poc_msb= h->poc_msb;
02538             h->prev_poc_lsb= h->poc_lsb;
02539         }
02540         h->prev_frame_num_offset= h->frame_num_offset;
02541         h->prev_frame_num= h->frame_num;
02542         h->outputed_poc = h->next_outputed_poc;
02543     }
02544 
02545     if (avctx->hwaccel) {
02546         if (avctx->hwaccel->end_frame(avctx) < 0)
02547             av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
02548     }
02549 
02550     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02551         ff_vdpau_h264_picture_complete(s);
02552 
02553     /*
02554      * FIXME: Error handling code does not seem to support interlaced
02555      * when slices span multiple rows
02556      * The ff_er_add_slice calls don't work right for bottom
02557      * fields; they cause massive erroneous error concealing
02558      * Error marking covers both fields (top and bottom).
02559      * This causes a mismatched s->error_count
02560      * and a bad error table. Further, the error count goes to
02561      * INT_MAX when called for bottom field, because mb_y is
02562      * past end by one (callers fault) and resync_mb_y != 0
02563      * causes problems for the first MB line, too.
02564      */
02565     if (!FIELD_PICTURE)
02566         ff_er_frame_end(s);
02567 
02568     MPV_frame_end(s);
02569 
02570     h->current_slice=0;
02571 
02572     return err;
02573 }
02574 
02578 static void clone_slice(H264Context *dst, H264Context *src)
02579 {
02580     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
02581     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
02582     dst->s.current_picture      = src->s.current_picture;
02583     dst->s.linesize             = src->s.linesize;
02584     dst->s.uvlinesize           = src->s.uvlinesize;
02585     dst->s.first_field          = src->s.first_field;
02586 
02587     dst->prev_poc_msb           = src->prev_poc_msb;
02588     dst->prev_poc_lsb           = src->prev_poc_lsb;
02589     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
02590     dst->prev_frame_num         = src->prev_frame_num;
02591     dst->short_ref_count        = src->short_ref_count;
02592 
02593     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
02594     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
02595     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
02596     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
02597 
02598     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
02599     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
02600 }
02601 
02609 int ff_h264_get_profile(SPS *sps)
02610 {
02611     int profile = sps->profile_idc;
02612 
02613     switch(sps->profile_idc) {
02614     case FF_PROFILE_H264_BASELINE:
02615         // constraint_set1_flag set to 1
02616         profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;
02617         break;
02618     case FF_PROFILE_H264_HIGH_10:
02619     case FF_PROFILE_H264_HIGH_422:
02620     case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
02621         // constraint_set3_flag set to 1
02622         profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;
02623         break;
02624     }
02625 
02626     return profile;
02627 }
02628 
02638 static int decode_slice_header(H264Context *h, H264Context *h0){
02639     MpegEncContext * const s = &h->s;
02640     MpegEncContext * const s0 = &h0->s;
02641     unsigned int first_mb_in_slice;
02642     unsigned int pps_id;
02643     int num_ref_idx_active_override_flag;
02644     unsigned int slice_type, tmp, i, j;
02645     int default_ref_list_done = 0;
02646     int last_pic_structure;
02647 
02648     s->dropable= h->nal_ref_idc == 0;
02649 
02650     /* FIXME: 2tap qpel isn't implemented for high bit depth. */
02651     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){
02652         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
02653         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
02654     }else{
02655         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
02656         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
02657     }
02658 
02659     first_mb_in_slice= get_ue_golomb_long(&s->gb);
02660 
02661     if(first_mb_in_slice == 0){ //FIXME better field boundary detection
02662         if(h0->current_slice && FIELD_PICTURE){
02663             field_end(h, 1);
02664         }
02665 
02666         h0->current_slice = 0;
02667         if (!s0->first_field)
02668             s->current_picture_ptr= NULL;
02669     }
02670 
02671     slice_type= get_ue_golomb_31(&s->gb);
02672     if(slice_type > 9){
02673         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
02674         return -1;
02675     }
02676     if(slice_type > 4){
02677         slice_type -= 5;
02678         h->slice_type_fixed=1;
02679     }else
02680         h->slice_type_fixed=0;
02681 
02682     slice_type= golomb_to_pict_type[ slice_type ];
02683     if (slice_type == AV_PICTURE_TYPE_I
02684         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
02685         default_ref_list_done = 1;
02686     }
02687     h->slice_type= slice_type;
02688     h->slice_type_nos= slice_type & 3;
02689 
02690     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
02691 
02692     pps_id= get_ue_golomb(&s->gb);
02693     if(pps_id>=MAX_PPS_COUNT){
02694         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
02695         return -1;
02696     }
02697     if(!h0->pps_buffers[pps_id]) {
02698         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
02699         return -1;
02700     }
02701     h->pps= *h0->pps_buffers[pps_id];
02702 
02703     if(!h0->sps_buffers[h->pps.sps_id]) {
02704         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
02705         return -1;
02706     }
02707     h->sps = *h0->sps_buffers[h->pps.sps_id];
02708 
02709     s->avctx->profile = ff_h264_get_profile(&h->sps);
02710     s->avctx->level   = h->sps.level_idc;
02711     s->avctx->refs    = h->sps.ref_frame_count;
02712 
02713     if(h == h0 && h->dequant_coeff_pps != pps_id){
02714         h->dequant_coeff_pps = pps_id;
02715         init_dequant_tables(h);
02716     }
02717 
02718     s->mb_width= h->sps.mb_width;
02719     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
02720 
02721     h->b_stride=  s->mb_width*4;
02722 
02723     s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
02724 
02725     s->width = 16*s->mb_width;
02726     s->height= 16*s->mb_height;
02727 
02728     if (s->context_initialized
02729         && (   s->width != s->avctx->coded_width || s->height != s->avctx->coded_height
02730             || s->avctx->bits_per_raw_sample != h->sps.bit_depth_luma
02731             || h->cur_chroma_format_idc != h->sps.chroma_format_idc
02732             || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
02733         if(h != h0) {
02734             av_log_missing_feature(s->avctx, "Width/height/bit depth/chroma idc changing with threads is", 0);
02735             return -1;   // width / height changed during parallelized decoding
02736         }
02737         free_tables(h, 0);
02738         flush_dpb(s->avctx);
02739         MPV_common_end(s);
02740         h->list_count = 0;
02741     }
02742     if (!s->context_initialized) {
02743         if (h != h0) {
02744             av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n");
02745             return -1;
02746         }
02747         avcodec_set_dimensions(s->avctx, s->width, s->height);
02748         s->avctx->width  -= (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
02749         s->avctx->height -= (1<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1) * (2 - h->sps.frame_mbs_only_flag);
02750         s->avctx->sample_aspect_ratio= h->sps.sar;
02751         av_assert0(s->avctx->sample_aspect_ratio.den);
02752 
02753         if (s->avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
02754             h->cur_chroma_format_idc != h->sps.chroma_format_idc) {
02755             if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10 &&
02756                 (h->sps.bit_depth_luma != 9 || !CHROMA422)) {
02757                 s->avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
02758                 h->cur_chroma_format_idc = h->sps.chroma_format_idc;
02759                 h->pixel_shift = h->sps.bit_depth_luma > 8;
02760 
02761                 ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
02762                 ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
02763                 s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
02764                 dsputil_init(&s->dsp, s->avctx);
02765             } else {
02766                 av_log(s->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d chroma_idc: %d\n",
02767                        h->sps.bit_depth_luma, h->sps.chroma_format_idc);
02768                 return -1;
02769             }
02770         }
02771 
02772         if(h->sps.video_signal_type_present_flag){
02773             s->avctx->color_range = h->sps.full_range>0 ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
02774             if(h->sps.colour_description_present_flag){
02775                 s->avctx->color_primaries = h->sps.color_primaries;
02776                 s->avctx->color_trc       = h->sps.color_trc;
02777                 s->avctx->colorspace      = h->sps.colorspace;
02778             }
02779         }
02780 
02781         if(h->sps.timing_info_present_flag){
02782             int64_t den= h->sps.time_scale;
02783             if(h->x264_build < 44U)
02784                 den *= 2;
02785             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
02786                       h->sps.num_units_in_tick, den, 1<<30);
02787         }
02788 
02789         switch (h->sps.bit_depth_luma) {
02790             case 9 :
02791                 if (CHROMA444) {
02792                     if (s->avctx->colorspace == AVCOL_SPC_RGB) {
02793                         s->avctx->pix_fmt = PIX_FMT_GBRP9;
02794                     } else
02795                         s->avctx->pix_fmt = PIX_FMT_YUV444P9;
02796                 } else if (CHROMA422)
02797                     s->avctx->pix_fmt = PIX_FMT_YUV422P9;
02798                 else
02799                     s->avctx->pix_fmt = PIX_FMT_YUV420P9;
02800                 break;
02801             case 10 :
02802                 if (CHROMA444) {
02803                     if (s->avctx->colorspace == AVCOL_SPC_RGB) {
02804                         s->avctx->pix_fmt = PIX_FMT_GBRP10;
02805                     } else
02806                         s->avctx->pix_fmt = PIX_FMT_YUV444P10;
02807                 } else if (CHROMA422)
02808                     s->avctx->pix_fmt = PIX_FMT_YUV422P10;
02809                 else
02810                     s->avctx->pix_fmt = PIX_FMT_YUV420P10;
02811                 break;
02812             default:
02813                 if (CHROMA444){
02814                     s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
02815                     if (s->avctx->colorspace == AVCOL_SPC_RGB) {
02816                        s->avctx->pix_fmt = PIX_FMT_GBR24P;
02817                        av_log(h->s.avctx, AV_LOG_DEBUG, "Detected GBR colorspace.\n");
02818                     } else if (s->avctx->colorspace == AVCOL_SPC_YCGCO) {
02819                         av_log(h->s.avctx, AV_LOG_WARNING, "Detected unsupported YCgCo colorspace.\n");
02820                     }
02821                 } else if (CHROMA422) {
02822                     s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P;
02823                 }else{
02824                     s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
02825                                                              s->avctx->codec->pix_fmts ?
02826                                                              s->avctx->codec->pix_fmts :
02827                                                              s->avctx->color_range == AVCOL_RANGE_JPEG ?
02828                                                              hwaccel_pixfmt_list_h264_jpeg_420 :
02829                                                              ff_hwaccel_pixfmt_list_420);
02830                 }
02831         }
02832 
02833         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
02834 
02835         if (MPV_common_init(s) < 0) {
02836             av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n");
02837             return -1;
02838         }
02839         s->first_field = 0;
02840         h->prev_interlaced_frame = 1;
02841 
02842         init_scan_tables(h);
02843         if (ff_h264_alloc_tables(h) < 0) {
02844             av_log(h->s.avctx, AV_LOG_ERROR, "Could not allocate memory for h264\n");
02845             return AVERROR(ENOMEM);
02846         }
02847 
02848         if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) {
02849             if (context_init(h) < 0) {
02850                 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02851                 return -1;
02852             }
02853         } else {
02854             for(i = 1; i < s->avctx->thread_count; i++) {
02855                 H264Context *c;
02856                 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
02857                 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
02858                 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
02859                 c->h264dsp = h->h264dsp;
02860                 c->sps = h->sps;
02861                 c->pps = h->pps;
02862                 c->pixel_shift = h->pixel_shift;
02863                 c->cur_chroma_format_idc = h->cur_chroma_format_idc;
02864                 init_scan_tables(c);
02865                 clone_tables(c, h, i);
02866             }
02867 
02868             for(i = 0; i < s->avctx->thread_count; i++)
02869                 if (context_init(h->thread_context[i]) < 0) {
02870                     av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02871                     return -1;
02872                 }
02873         }
02874     }
02875 
02876     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
02877 
02878     h->mb_mbaff = 0;
02879     h->mb_aff_frame = 0;
02880     last_pic_structure = s0->picture_structure;
02881     if(h->sps.frame_mbs_only_flag){
02882         s->picture_structure= PICT_FRAME;
02883     }else{
02884         if(!h->sps.direct_8x8_inference_flag && slice_type == AV_PICTURE_TYPE_B){
02885             av_log(h->s.avctx, AV_LOG_ERROR, "This stream was generated by a broken encoder, invalid 8x8 inference\n");
02886             return -1;
02887         }
02888         if(get_bits1(&s->gb)) { //field_pic_flag
02889             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
02890         } else {
02891             s->picture_structure= PICT_FRAME;
02892             h->mb_aff_frame = h->sps.mb_aff;
02893         }
02894     }
02895     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
02896 
02897     if(h0->current_slice == 0){
02898         // Shorten frame num gaps so we don't have to allocate reference frames just to throw them away
02899         if(h->frame_num != h->prev_frame_num && h->prev_frame_num >= 0) {
02900             int unwrap_prev_frame_num = h->prev_frame_num, max_frame_num = 1<<h->sps.log2_max_frame_num;
02901 
02902             if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num;
02903 
02904             if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
02905                 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
02906                 if (unwrap_prev_frame_num < 0)
02907                     unwrap_prev_frame_num += max_frame_num;
02908 
02909                 h->prev_frame_num = unwrap_prev_frame_num;
02910             }
02911         }
02912 
02913         while(h->frame_num !=  h->prev_frame_num && h->prev_frame_num >= 0 &&
02914               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
02915             Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
02916             av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
02917             if (ff_h264_frame_start(h) < 0)
02918                 return -1;
02919             h->prev_frame_num++;
02920             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
02921             s->current_picture_ptr->frame_num= h->prev_frame_num;
02922             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0);
02923             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);
02924             ff_generate_sliding_window_mmcos(h);
02925             if (ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index) < 0 &&
02926                 (s->avctx->err_recognition & AV_EF_EXPLODE))
02927                 return AVERROR_INVALIDDATA;
02928             /* Error concealment: if a ref is missing, copy the previous ref in its place.
02929              * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
02930              * about there being no actual duplicates.
02931              * FIXME: this doesn't copy padding for out-of-frame motion vectors.  Given we're
02932              * concealing a lost frame, this probably isn't noticable by comparison, but it should
02933              * be fixed. */
02934             if (h->short_ref_count) {
02935                 if (prev) {
02936                     av_image_copy(h->short_ref[0]->f.data, h->short_ref[0]->f.linesize,
02937                                   (const uint8_t**)prev->f.data, prev->f.linesize,
02938                                   s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);
02939                     h->short_ref[0]->poc = prev->poc+2;
02940                 }
02941                 h->short_ref[0]->frame_num = h->prev_frame_num;
02942             }
02943         }
02944 
02945         /* See if we have a decoded first field looking for a pair... */
02946         if (s0->first_field) {
02947             assert(s0->current_picture_ptr);
02948             assert(s0->current_picture_ptr->f.data[0]);
02949             assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF);
02950 
02951             /* figure out if we have a complementary field pair */
02952             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
02953                 /*
02954                  * Previous field is unmatched. Don't display it, but let it
02955                  * remain for reference if marked as such.
02956                  */
02957                 s0->current_picture_ptr = NULL;
02958                 s0->first_field = FIELD_PICTURE;
02959 
02960             } else {
02961                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
02962                     /*
02963                      * This and previous field had
02964                      * different frame_nums. Consider this field first in
02965                      * pair. Throw away previous field except for reference
02966                      * purposes.
02967                      */
02968                     s0->first_field = 1;
02969                     s0->current_picture_ptr = NULL;
02970 
02971                 } else {
02972                     /* Second field in complementary pair */
02973                     s0->first_field = 0;
02974                 }
02975             }
02976 
02977         } else {
02978             /* Frame or first field in a potentially complementary pair */
02979             assert(!s0->current_picture_ptr);
02980             s0->first_field = FIELD_PICTURE;
02981         }
02982 
02983         if(!FIELD_PICTURE || s0->first_field) {
02984             if (ff_h264_frame_start(h) < 0) {
02985                 s0->first_field = 0;
02986                 return -1;
02987             }
02988         } else {
02989             ff_release_unused_pictures(s, 0);
02990         }
02991     }
02992     if(h != h0)
02993         clone_slice(h, h0);
02994 
02995     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
02996 
02997     assert(s->mb_num == s->mb_width * s->mb_height);
02998     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
02999        first_mb_in_slice                    >= s->mb_num){
03000         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
03001         return -1;
03002     }
03003     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
03004     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
03005     if (s->picture_structure == PICT_BOTTOM_FIELD)
03006         s->resync_mb_y = s->mb_y = s->mb_y + 1;
03007     assert(s->mb_y < s->mb_height);
03008 
03009     if(s->picture_structure==PICT_FRAME){
03010         h->curr_pic_num=   h->frame_num;
03011         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
03012     }else{
03013         h->curr_pic_num= 2*h->frame_num + 1;
03014         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
03015     }
03016 
03017     if(h->nal_unit_type == NAL_IDR_SLICE){
03018         get_ue_golomb(&s->gb); /* idr_pic_id */
03019     }
03020 
03021     if(h->sps.poc_type==0){
03022         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
03023 
03024         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
03025             h->delta_poc_bottom= get_se_golomb(&s->gb);
03026         }
03027     }
03028 
03029     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
03030         h->delta_poc[0]= get_se_golomb(&s->gb);
03031 
03032         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
03033             h->delta_poc[1]= get_se_golomb(&s->gb);
03034     }
03035 
03036     init_poc(h);
03037 
03038     if(h->pps.redundant_pic_cnt_present){
03039         h->redundant_pic_count= get_ue_golomb(&s->gb);
03040     }
03041 
03042     //set defaults, might be overridden a few lines later
03043     h->ref_count[0]= h->pps.ref_count[0];
03044     h->ref_count[1]= h->pps.ref_count[1];
03045 
03046     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
03047         unsigned max= (16<<(s->picture_structure != PICT_FRAME))-1;
03048         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
03049             h->direct_spatial_mv_pred= get_bits1(&s->gb);
03050         }
03051         num_ref_idx_active_override_flag= get_bits1(&s->gb);
03052 
03053         if(num_ref_idx_active_override_flag){
03054             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
03055             if(h->slice_type_nos==AV_PICTURE_TYPE_B)
03056                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
03057 
03058         }
03059         if(h->ref_count[0]-1 > max || h->ref_count[1]-1 > max){
03060             av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
03061             h->ref_count[0]= h->ref_count[1]= 1;
03062             return -1;
03063         }
03064         if(h->slice_type_nos == AV_PICTURE_TYPE_B)
03065             h->list_count= 2;
03066         else
03067             h->list_count= 1;
03068     }else
03069         h->ref_count[1]= h->ref_count[0]= h->list_count= 0;
03070 
03071     if(!default_ref_list_done){
03072         ff_h264_fill_default_ref_list(h);
03073     }
03074 
03075     if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0) {
03076         h->ref_count[1]= h->ref_count[0]= 0;
03077         return -1;
03078     }
03079 
03080     if(h->slice_type_nos!=AV_PICTURE_TYPE_I){
03081         s->last_picture_ptr= &h->ref_list[0][0];
03082         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
03083     }
03084     if(h->slice_type_nos==AV_PICTURE_TYPE_B){
03085         s->next_picture_ptr= &h->ref_list[1][0];
03086         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
03087     }
03088 
03089     if(   (h->pps.weighted_pred          && h->slice_type_nos == AV_PICTURE_TYPE_P )
03090        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) )
03091         pred_weight_table(h);
03092     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
03093         implicit_weight_table(h, -1);
03094     }else {
03095         h->use_weight = 0;
03096         for (i = 0; i < 2; i++) {
03097             h->luma_weight_flag[i]   = 0;
03098             h->chroma_weight_flag[i] = 0;
03099         }
03100     }
03101 
03102     if(h->nal_ref_idc && ff_h264_decode_ref_pic_marking(h0, &s->gb) < 0 &&
03103        (s->avctx->err_recognition & AV_EF_EXPLODE))
03104         return AVERROR_INVALIDDATA;
03105 
03106     if(FRAME_MBAFF){
03107         ff_h264_fill_mbaff_ref_list(h);
03108 
03109         if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
03110             implicit_weight_table(h, 0);
03111             implicit_weight_table(h, 1);
03112         }
03113     }
03114 
03115     if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
03116         ff_h264_direct_dist_scale_factor(h);
03117     ff_h264_direct_ref_list_init(h);
03118 
03119     if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){
03120         tmp = get_ue_golomb_31(&s->gb);
03121         if(tmp > 2){
03122             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
03123             return -1;
03124         }
03125         h->cabac_init_idc= tmp;
03126     }
03127 
03128     h->last_qscale_diff = 0;
03129     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
03130     if(tmp>51+6*(h->sps.bit_depth_luma-8)){
03131         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
03132         return -1;
03133     }
03134     s->qscale= tmp;
03135     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03136     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03137     //FIXME qscale / qp ... stuff
03138     if(h->slice_type == AV_PICTURE_TYPE_SP){
03139         get_bits1(&s->gb); /* sp_for_switch_flag */
03140     }
03141     if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){
03142         get_se_golomb(&s->gb); /* slice_qs_delta */
03143     }
03144 
03145     h->deblocking_filter = 1;
03146     h->slice_alpha_c0_offset = 52;
03147     h->slice_beta_offset = 52;
03148     if( h->pps.deblocking_filter_parameters_present ) {
03149         tmp= get_ue_golomb_31(&s->gb);
03150         if(tmp > 2){
03151             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
03152             return -1;
03153         }
03154         h->deblocking_filter= tmp;
03155         if(h->deblocking_filter < 2)
03156             h->deblocking_filter^= 1; // 1<->0
03157 
03158         if( h->deblocking_filter ) {
03159             h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
03160             h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
03161             if(   h->slice_alpha_c0_offset > 104U
03162                || h->slice_beta_offset     > 104U){
03163                 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
03164                 return -1;
03165             }
03166         }
03167     }
03168 
03169     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
03170        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I)
03171        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == AV_PICTURE_TYPE_B)
03172        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
03173         h->deblocking_filter= 0;
03174 
03175     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
03176         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
03177             /* Cheat slightly for speed:
03178                Do not bother to deblock across slices. */
03179             h->deblocking_filter = 2;
03180         } else {
03181             h0->max_contexts = 1;
03182             if(!h0->single_decode_warning) {
03183                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
03184                 h0->single_decode_warning = 1;
03185             }
03186             if (h != h0) {
03187                 av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n");
03188                 return 1;
03189             }
03190         }
03191     }
03192     h->qp_thresh = 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset)
03193                  - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1])
03194                  + 6 * (h->sps.bit_depth_luma - 8);
03195 
03196 #if 0 //FMO
03197     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
03198         slice_group_change_cycle= get_bits(&s->gb, ?);
03199 #endif
03200 
03201     h0->last_slice_type = slice_type;
03202     h->slice_num = ++h0->current_slice;
03203 
03204     if(h->slice_num)
03205         h0->slice_row[(h->slice_num-1)&(MAX_SLICES-1)]= s->resync_mb_y;
03206     if (   h0->slice_row[h->slice_num&(MAX_SLICES-1)] + 3 >= s->resync_mb_y
03207         && h0->slice_row[h->slice_num&(MAX_SLICES-1)] <= s->resync_mb_y
03208         && h->slice_num >= MAX_SLICES) {
03209         //in case of ASO this check needs to be updated depending on how we decide to assign slice numbers in this case
03210         av_log(s->avctx, AV_LOG_WARNING, "Possibly too many slices (%d >= %d), increase MAX_SLICES and recompile if there are artifacts\n", h->slice_num, MAX_SLICES);
03211     }
03212 
03213     for(j=0; j<2; j++){
03214         int id_list[16];
03215         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
03216         for(i=0; i<16; i++){
03217             id_list[i]= 60;
03218             if (h->ref_list[j][i].f.data[0]) {
03219                 int k;
03220                 uint8_t *base = h->ref_list[j][i].f.base[0];
03221                 for(k=0; k<h->short_ref_count; k++)
03222                     if (h->short_ref[k]->f.base[0] == base) {
03223                         id_list[i]= k;
03224                         break;
03225                     }
03226                 for(k=0; k<h->long_ref_count; k++)
03227                     if (h->long_ref[k] && h->long_ref[k]->f.base[0] == base) {
03228                         id_list[i]= h->short_ref_count + k;
03229                         break;
03230                     }
03231             }
03232         }
03233 
03234         ref2frm[0]=
03235         ref2frm[1]= -1;
03236         for(i=0; i<16; i++)
03237             ref2frm[i+2]= 4*id_list[i]
03238                           + (h->ref_list[j][i].f.reference & 3);
03239         ref2frm[18+0]=
03240         ref2frm[18+1]= -1;
03241         for(i=16; i<48; i++)
03242             ref2frm[i+4]= 4*id_list[(i-16)>>1]
03243                           + (h->ref_list[j][i].f.reference & 3);
03244     }
03245 
03246     //FIXME: fix draw_edges+PAFF+frame threads
03247     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;
03248     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
03249 
03250     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
03251         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
03252                h->slice_num,
03253                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
03254                first_mb_in_slice,
03255                av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
03256                pps_id, h->frame_num,
03257                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
03258                h->ref_count[0], h->ref_count[1],
03259                s->qscale,
03260                h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
03261                h->use_weight,
03262                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
03263                h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
03264                );
03265     }
03266 
03267     return 0;
03268 }
03269 
03270 int ff_h264_get_slice_type(const H264Context *h)
03271 {
03272     switch (h->slice_type) {
03273     case AV_PICTURE_TYPE_P:  return 0;
03274     case AV_PICTURE_TYPE_B:  return 1;
03275     case AV_PICTURE_TYPE_I:  return 2;
03276     case AV_PICTURE_TYPE_SP: return 3;
03277     case AV_PICTURE_TYPE_SI: return 4;
03278     default:         return -1;
03279     }
03280 }
03281 
03282 static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncContext * const s, int mb_type, int top_xy,
03283                                                       int left_xy[LEFT_MBS], int top_type, int left_type[LEFT_MBS], int mb_xy, int list)
03284 {
03285     int b_stride = h->b_stride;
03286     int16_t (*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
03287     int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
03288     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
03289         if(USES_LIST(top_type, list)){
03290             const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride;
03291             const int b8_xy= 4*top_xy + 2;
03292             int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03293             AV_COPY128(mv_dst - 1*8, s->current_picture.f.motion_val[list][b_xy + 0]);
03294             ref_cache[0 - 1*8]=
03295             ref_cache[1 - 1*8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 0]];
03296             ref_cache[2 - 1*8]=
03297             ref_cache[3 - 1*8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 1]];
03298         }else{
03299             AV_ZERO128(mv_dst - 1*8);
03300             AV_WN32A(&ref_cache[0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03301         }
03302 
03303         if(!IS_INTERLACED(mb_type^left_type[LTOP])){
03304             if(USES_LIST(left_type[LTOP], list)){
03305                 const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
03306                 const int b8_xy= 4*left_xy[LTOP] + 1;
03307                 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[LTOP]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03308                 AV_COPY32(mv_dst - 1 +  0, s->current_picture.f.motion_val[list][b_xy + b_stride*0]);
03309                 AV_COPY32(mv_dst - 1 +  8, s->current_picture.f.motion_val[list][b_xy + b_stride*1]);
03310                 AV_COPY32(mv_dst - 1 + 16, s->current_picture.f.motion_val[list][b_xy + b_stride*2]);
03311                 AV_COPY32(mv_dst - 1 + 24, s->current_picture.f.motion_val[list][b_xy + b_stride*3]);
03312                 ref_cache[-1 +  0]=
03313                 ref_cache[-1 +  8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2*0]];
03314                 ref_cache[-1 + 16]=
03315                 ref_cache[-1 + 24]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2*1]];
03316             }else{
03317                 AV_ZERO32(mv_dst - 1 + 0);
03318                 AV_ZERO32(mv_dst - 1 + 8);
03319                 AV_ZERO32(mv_dst - 1 +16);
03320                 AV_ZERO32(mv_dst - 1 +24);
03321                 ref_cache[-1 +  0]=
03322                 ref_cache[-1 +  8]=
03323                 ref_cache[-1 + 16]=
03324                 ref_cache[-1 + 24]= LIST_NOT_USED;
03325             }
03326         }
03327     }
03328 
03329     if(!USES_LIST(mb_type, list)){
03330         fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0,0), 4);
03331         AV_WN32A(&ref_cache[0*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03332         AV_WN32A(&ref_cache[1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03333         AV_WN32A(&ref_cache[2*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03334         AV_WN32A(&ref_cache[3*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03335         return;
03336     }
03337 
03338     {
03339         int8_t *ref = &s->current_picture.f.ref_index[list][4*mb_xy];
03340         int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03341         uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
03342         uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]],ref2frm[list][ref[3]])&0x00FF00FF)*0x0101;
03343         AV_WN32A(&ref_cache[0*8], ref01);
03344         AV_WN32A(&ref_cache[1*8], ref01);
03345         AV_WN32A(&ref_cache[2*8], ref23);
03346         AV_WN32A(&ref_cache[3*8], ref23);
03347     }
03348 
03349     {
03350         int16_t (*mv_src)[2] = &s->current_picture.f.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
03351         AV_COPY128(mv_dst + 8*0, mv_src + 0*b_stride);
03352         AV_COPY128(mv_dst + 8*1, mv_src + 1*b_stride);
03353         AV_COPY128(mv_dst + 8*2, mv_src + 2*b_stride);
03354         AV_COPY128(mv_dst + 8*3, mv_src + 3*b_stride);
03355     }
03356 }
03357 
03362 static int fill_filter_caches(H264Context *h, int mb_type){
03363     MpegEncContext * const s = &h->s;
03364     const int mb_xy= h->mb_xy;
03365     int top_xy, left_xy[LEFT_MBS];
03366     int top_type, left_type[LEFT_MBS];
03367     uint8_t *nnz;
03368     uint8_t *nnz_cache;
03369 
03370     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
03371 
03372     /* Wow, what a mess, why didn't they simplify the interlacing & intra
03373      * stuff, I can't imagine that these complex rules are worth it. */
03374 
03375     left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
03376     if(FRAME_MBAFF){
03377         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.f.mb_type[mb_xy - 1]);
03378         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
03379         if(s->mb_y&1){
03380             if (left_mb_field_flag != curr_mb_field_flag) {
03381                 left_xy[LTOP] -= s->mb_stride;
03382             }
03383         }else{
03384             if(curr_mb_field_flag){
03385                 top_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy] >> 7) & 1) - 1);
03386             }
03387             if (left_mb_field_flag != curr_mb_field_flag) {
03388                 left_xy[LBOT] += s->mb_stride;
03389             }
03390         }
03391     }
03392 
03393     h->top_mb_xy = top_xy;
03394     h->left_mb_xy[LTOP] = left_xy[LTOP];
03395     h->left_mb_xy[LBOT] = left_xy[LBOT];
03396     {
03397         //for sufficiently low qp, filtering wouldn't do anything
03398         //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
03399         int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
03400         int qp = s->current_picture.f.qscale_table[mb_xy];
03401         if(qp <= qp_thresh
03402            && (left_xy[LTOP] < 0 || ((qp + s->current_picture.f.qscale_table[left_xy[LTOP]] + 1) >> 1) <= qp_thresh)
03403            && (top_xy        < 0 || ((qp + s->current_picture.f.qscale_table[top_xy       ] + 1) >> 1) <= qp_thresh)) {
03404             if(!FRAME_MBAFF)
03405                 return 1;
03406             if ((left_xy[LTOP] < 0            || ((qp + s->current_picture.f.qscale_table[left_xy[LBOT]        ] + 1) >> 1) <= qp_thresh) &&
03407                 (top_xy        < s->mb_stride || ((qp + s->current_picture.f.qscale_table[top_xy - s->mb_stride] + 1) >> 1) <= qp_thresh))
03408                 return 1;
03409         }
03410     }
03411 
03412     top_type        = s->current_picture.f.mb_type[top_xy];
03413     left_type[LTOP] = s->current_picture.f.mb_type[left_xy[LTOP]];
03414     left_type[LBOT] = s->current_picture.f.mb_type[left_xy[LBOT]];
03415     if(h->deblocking_filter == 2){
03416         if(h->slice_table[top_xy       ] != h->slice_num) top_type= 0;
03417         if(h->slice_table[left_xy[LBOT]] != h->slice_num) left_type[LTOP]= left_type[LBOT]= 0;
03418     }else{
03419         if(h->slice_table[top_xy       ] == 0xFFFF) top_type= 0;
03420         if(h->slice_table[left_xy[LBOT]] == 0xFFFF) left_type[LTOP]= left_type[LBOT] =0;
03421     }
03422     h->top_type       = top_type;
03423     h->left_type[LTOP]= left_type[LTOP];
03424     h->left_type[LBOT]= left_type[LBOT];
03425 
03426     if(IS_INTRA(mb_type))
03427         return 0;
03428 
03429     fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 0);
03430     if(h->list_count == 2)
03431         fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 1);
03432 
03433     nnz = h->non_zero_count[mb_xy];
03434     nnz_cache = h->non_zero_count_cache;
03435     AV_COPY32(&nnz_cache[4+8*1], &nnz[ 0]);
03436     AV_COPY32(&nnz_cache[4+8*2], &nnz[ 4]);
03437     AV_COPY32(&nnz_cache[4+8*3], &nnz[ 8]);
03438     AV_COPY32(&nnz_cache[4+8*4], &nnz[12]);
03439     h->cbp= h->cbp_table[mb_xy];
03440 
03441     if(top_type){
03442         nnz = h->non_zero_count[top_xy];
03443         AV_COPY32(&nnz_cache[4+8*0], &nnz[3*4]);
03444     }
03445 
03446     if(left_type[LTOP]){
03447         nnz = h->non_zero_count[left_xy[LTOP]];
03448         nnz_cache[3+8*1]= nnz[3+0*4];
03449         nnz_cache[3+8*2]= nnz[3+1*4];
03450         nnz_cache[3+8*3]= nnz[3+2*4];
03451         nnz_cache[3+8*4]= nnz[3+3*4];
03452     }
03453 
03454     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
03455     if(!CABAC && h->pps.transform_8x8_mode){
03456         if(IS_8x8DCT(top_type)){
03457             nnz_cache[4+8*0]=
03458             nnz_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
03459             nnz_cache[6+8*0]=
03460             nnz_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
03461         }
03462         if(IS_8x8DCT(left_type[LTOP])){
03463             nnz_cache[3+8*1]=
03464             nnz_cache[3+8*2]= (h->cbp_table[left_xy[LTOP]]&0x2000) >> 12; //FIXME check MBAFF
03465         }
03466         if(IS_8x8DCT(left_type[LBOT])){
03467             nnz_cache[3+8*3]=
03468             nnz_cache[3+8*4]= (h->cbp_table[left_xy[LBOT]]&0x8000) >> 12; //FIXME check MBAFF
03469         }
03470 
03471         if(IS_8x8DCT(mb_type)){
03472             nnz_cache[scan8[0   ]]= nnz_cache[scan8[1   ]]=
03473             nnz_cache[scan8[2   ]]= nnz_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
03474 
03475             nnz_cache[scan8[0+ 4]]= nnz_cache[scan8[1+ 4]]=
03476             nnz_cache[scan8[2+ 4]]= nnz_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
03477 
03478             nnz_cache[scan8[0+ 8]]= nnz_cache[scan8[1+ 8]]=
03479             nnz_cache[scan8[2+ 8]]= nnz_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
03480 
03481             nnz_cache[scan8[0+12]]= nnz_cache[scan8[1+12]]=
03482             nnz_cache[scan8[2+12]]= nnz_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
03483         }
03484     }
03485 
03486     return 0;
03487 }
03488 
03489 static void loop_filter(H264Context *h, int start_x, int end_x){
03490     MpegEncContext * const s = &h->s;
03491     uint8_t  *dest_y, *dest_cb, *dest_cr;
03492     int linesize, uvlinesize, mb_x, mb_y;
03493     const int end_mb_y= s->mb_y + FRAME_MBAFF;
03494     const int old_slice_type= h->slice_type;
03495     const int pixel_shift = h->pixel_shift;
03496     const int block_h = 16 >> s->chroma_y_shift;
03497 
03498     if(h->deblocking_filter) {
03499         for(mb_x= start_x; mb_x<end_x; mb_x++){
03500             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
03501                 int mb_xy, mb_type;
03502                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
03503                 h->slice_num= h->slice_table[mb_xy];
03504                 mb_type = s->current_picture.f.mb_type[mb_xy];
03505                 h->list_count= h->list_counts[mb_xy];
03506 
03507                 if(FRAME_MBAFF)
03508                     h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
03509 
03510                 s->mb_x= mb_x;
03511                 s->mb_y= mb_y;
03512                 dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
03513                 dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
03514                 dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
03515                     //FIXME simplify above
03516 
03517                 if (MB_FIELD) {
03518                     linesize   = h->mb_linesize   = s->linesize * 2;
03519                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
03520                     if(mb_y&1){ //FIXME move out of this function?
03521                         dest_y -= s->linesize*15;
03522                         dest_cb-= s->uvlinesize * (block_h - 1);
03523                         dest_cr-= s->uvlinesize * (block_h - 1);
03524                     }
03525                 } else {
03526                     linesize   = h->mb_linesize   = s->linesize;
03527                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
03528                 }
03529                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
03530                 if(fill_filter_caches(h, mb_type))
03531                     continue;
03532                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mb_xy]);
03533                 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.f.qscale_table[mb_xy]);
03534 
03535                 if (FRAME_MBAFF) {
03536                     ff_h264_filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03537                 } else {
03538                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03539                 }
03540             }
03541         }
03542     }
03543     h->slice_type= old_slice_type;
03544     s->mb_x= end_x;
03545     s->mb_y= end_mb_y - FRAME_MBAFF;
03546     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03547     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03548 }
03549 
03550 static void predict_field_decoding_flag(H264Context *h){
03551     MpegEncContext * const s = &h->s;
03552     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
03553     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
03554                 ? s->current_picture.f.mb_type[mb_xy - 1]
03555                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
03556                 ? s->current_picture.f.mb_type[mb_xy - s->mb_stride]
03557                 : 0;
03558     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
03559 }
03560 
03564 static void decode_finish_row(H264Context *h){
03565     MpegEncContext * const s = &h->s;
03566     int top = 16*(s->mb_y >> FIELD_PICTURE);
03567     int height = 16 << FRAME_MBAFF;
03568     int deblock_border = (16 + 4) << FRAME_MBAFF;
03569     int pic_height = 16*s->mb_height >> FIELD_PICTURE;
03570 
03571     if (h->deblocking_filter) {
03572         if((top + height) >= pic_height)
03573             height += deblock_border;
03574 
03575         top -= deblock_border;
03576     }
03577 
03578     if (top >= pic_height || (top + height) < h->emu_edge_height)
03579         return;
03580 
03581     height = FFMIN(height, pic_height - top);
03582     if (top < h->emu_edge_height) {
03583         height = top+height;
03584         top = 0;
03585     }
03586 
03587     ff_draw_horiz_band(s, top, height);
03588 
03589     if (s->dropable) return;
03590 
03591     ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1,
03592                              s->picture_structure==PICT_BOTTOM_FIELD);
03593 }
03594 
03595 static int decode_slice(struct AVCodecContext *avctx, void *arg){
03596     H264Context *h = *(void**)arg;
03597     MpegEncContext * const s = &h->s;
03598     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
03599     int lf_x_start = s->mb_x;
03600 
03601     s->mb_skip_run= -1;
03602 
03603     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
03604                     (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
03605 
03606     if( h->pps.cabac ) {
03607         /* realign */
03608         align_get_bits( &s->gb );
03609 
03610         /* init cabac */
03611         ff_init_cabac_states( &h->cabac);
03612         ff_init_cabac_decoder( &h->cabac,
03613                                s->gb.buffer + get_bits_count(&s->gb)/8,
03614                                (get_bits_left(&s->gb) + 7)/8);
03615 
03616         ff_h264_init_cabac_states(h);
03617 
03618         for(;;){
03619 //START_TIMER
03620             int ret = ff_h264_decode_mb_cabac(h);
03621             int eos;
03622 //STOP_TIMER("decode_mb_cabac")
03623 
03624             if(ret>=0) ff_h264_hl_decode_mb(h);
03625 
03626             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
03627                 s->mb_y++;
03628 
03629                 ret = ff_h264_decode_mb_cabac(h);
03630 
03631                 if(ret>=0) ff_h264_hl_decode_mb(h);
03632                 s->mb_y--;
03633             }
03634             eos = get_cabac_terminate( &h->cabac );
03635 
03636             if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
03637                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03638                 if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1);
03639                 return 0;
03640             }
03641             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
03642                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
03643                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03644                 return -1;
03645             }
03646 
03647             if( ++s->mb_x >= s->mb_width ) {
03648                 loop_filter(h, lf_x_start, s->mb_x);
03649                 s->mb_x = lf_x_start = 0;
03650                 decode_finish_row(h);
03651                 ++s->mb_y;
03652                 if(FIELD_OR_MBAFF_PICTURE) {
03653                     ++s->mb_y;
03654                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03655                         predict_field_decoding_flag(h);
03656                 }
03657             }
03658 
03659             if( eos || s->mb_y >= s->mb_height ) {
03660                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03661                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03662                 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03663                 return 0;
03664             }
03665         }
03666 
03667     } else {
03668         for(;;){
03669             int ret = ff_h264_decode_mb_cavlc(h);
03670 
03671             if(ret>=0) ff_h264_hl_decode_mb(h);
03672 
03673             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
03674                 s->mb_y++;
03675                 ret = ff_h264_decode_mb_cavlc(h);
03676 
03677                 if(ret>=0) ff_h264_hl_decode_mb(h);
03678                 s->mb_y--;
03679             }
03680 
03681             if(ret<0){
03682                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03683                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03684                 return -1;
03685             }
03686 
03687             if(++s->mb_x >= s->mb_width){
03688                 loop_filter(h, lf_x_start, s->mb_x);
03689                 s->mb_x = lf_x_start = 0;
03690                 decode_finish_row(h);
03691                 ++s->mb_y;
03692                 if(FIELD_OR_MBAFF_PICTURE) {
03693                     ++s->mb_y;
03694                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03695                         predict_field_decoding_flag(h);
03696                 }
03697                 if(s->mb_y >= s->mb_height){
03698                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03699 
03700                     if(   get_bits_count(&s->gb) == s->gb.size_in_bits
03701                        || get_bits_count(&s->gb) <  s->gb.size_in_bits && s->avctx->error_recognition < FF_ER_AGGRESSIVE) {
03702                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03703 
03704                         return 0;
03705                     }else{
03706                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03707 
03708                         return -1;
03709                     }
03710                 }
03711             }
03712 
03713             if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
03714                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03715                 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
03716                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03717                     if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03718 
03719                     return 0;
03720                 }else{
03721                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03722 
03723                     return -1;
03724                 }
03725             }
03726         }
03727     }
03728 }
03729 
03736 static int execute_decode_slices(H264Context *h, int context_count){
03737     MpegEncContext * const s = &h->s;
03738     AVCodecContext * const avctx= s->avctx;
03739     H264Context *hx;
03740     int i;
03741 
03742     if (s->avctx->hwaccel || s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03743         return 0;
03744     if(context_count == 1) {
03745         return decode_slice(avctx, &h);
03746     } else {
03747         for(i = 1; i < context_count; i++) {
03748             hx = h->thread_context[i];
03749             hx->s.error_recognition = avctx->error_recognition;
03750             hx->s.error_count = 0;
03751             hx->x264_build= h->x264_build;
03752         }
03753 
03754         avctx->execute(avctx, (void *)decode_slice,
03755                        h->thread_context, NULL, context_count, sizeof(void*));
03756 
03757         /* pull back stuff from slices to master context */
03758         hx = h->thread_context[context_count - 1];
03759         s->mb_x = hx->s.mb_x;
03760         s->mb_y = hx->s.mb_y;
03761         s->dropable = hx->s.dropable;
03762         s->picture_structure = hx->s.picture_structure;
03763         for(i = 1; i < context_count; i++)
03764             h->s.error_count += h->thread_context[i]->s.error_count;
03765     }
03766 
03767     return 0;
03768 }
03769 
03770 
03771 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
03772     MpegEncContext * const s = &h->s;
03773     AVCodecContext * const avctx= s->avctx;
03774     H264Context *hx; 
03775     int buf_index;
03776     int context_count;
03777     int next_avc;
03778     int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
03779     int nals_needed=0; 
03780     int nal_index;
03781 
03782     h->nal_unit_type= 0;
03783 
03784     h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1;
03785     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
03786         h->current_slice = 0;
03787         if (!s->first_field)
03788             s->current_picture_ptr= NULL;
03789         ff_h264_reset_sei(h);
03790     }
03791 
03792     for(;pass <= 1;pass++){
03793         buf_index = 0;
03794         context_count = 0;
03795         next_avc = h->is_avc ? 0 : buf_size;
03796         nal_index = 0;
03797     for(;;){
03798         int consumed;
03799         int dst_length;
03800         int bit_length;
03801         const uint8_t *ptr;
03802         int i, nalsize = 0;
03803         int err;
03804 
03805         if(buf_index >= next_avc) {
03806             if (buf_index >= buf_size - h->nal_length_size) break;
03807             nalsize = 0;
03808             for(i = 0; i < h->nal_length_size; i++)
03809                 nalsize = (nalsize << 8) | buf[buf_index++];
03810             if(nalsize <= 0 || nalsize > buf_size - buf_index){
03811                 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
03812                 break;
03813             }
03814             next_avc= buf_index + nalsize;
03815         } else {
03816             // start code prefix search
03817             for(; buf_index + 3 < next_avc; buf_index++){
03818                 // This should always succeed in the first iteration.
03819                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
03820                     break;
03821             }
03822 
03823             if(buf_index+3 >= buf_size) break;
03824 
03825             buf_index+=3;
03826             if(buf_index >= next_avc) continue;
03827         }
03828 
03829         hx = h->thread_context[context_count];
03830 
03831         ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
03832         if (ptr==NULL || dst_length < 0){
03833             return -1;
03834         }
03835         i= buf_index + consumed;
03836         if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
03837            buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
03838             s->workaround_bugs |= FF_BUG_TRUNCATED;
03839 
03840         if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
03841         while(dst_length > 0 && ptr[dst_length - 1] == 0)
03842             dst_length--;
03843         }
03844         bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
03845 
03846         if(s->avctx->debug&FF_DEBUG_STARTCODE){
03847             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d/%d at %d/%d length %d pass %d\n", hx->nal_unit_type, hx->nal_ref_idc, buf_index, buf_size, dst_length, pass);
03848         }
03849 
03850         if (h->is_avc && (nalsize != consumed) && nalsize){
03851             av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
03852         }
03853 
03854         buf_index += consumed;
03855         nal_index++;
03856 
03857         if(pass == 0) {
03858             // packets can sometimes contain multiple PPS/SPS
03859             // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely
03860             // if so, when frame threading we can't start the next thread until we've read all of them
03861             switch (hx->nal_unit_type) {
03862                 case NAL_SPS:
03863                 case NAL_PPS:
03864                     nals_needed = nal_index;
03865                     break;
03866                 case NAL_IDR_SLICE:
03867                 case NAL_SLICE:
03868                     init_get_bits(&hx->s.gb, ptr, bit_length);
03869                     if (!get_ue_golomb(&hx->s.gb))
03870                         nals_needed = nal_index;
03871             }
03872             continue;
03873         }
03874 
03875         //FIXME do not discard SEI id
03876         if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0)
03877             continue;
03878 
03879       again:
03880         err = 0;
03881         switch(hx->nal_unit_type){
03882         case NAL_IDR_SLICE:
03883             if (h->nal_unit_type != NAL_IDR_SLICE) {
03884                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices\n");
03885                 return -1;
03886             }
03887             idr(h); //FIXME ensure we don't loose some frames if there is reordering
03888         case NAL_SLICE:
03889             init_get_bits(&hx->s.gb, ptr, bit_length);
03890             hx->intra_gb_ptr=
03891             hx->inter_gb_ptr= &hx->s.gb;
03892             hx->s.data_partitioning = 0;
03893 
03894             if((err = decode_slice_header(hx, h)))
03895                break;
03896 
03897             if (   h->sei_recovery_frame_cnt >= 0
03898                 && (   h->recovery_frame<0
03899                     || ((h->recovery_frame - h->frame_num) & ((1 << h->sps.log2_max_frame_num)-1)) > h->sei_recovery_frame_cnt)) {
03900                 h->recovery_frame = (h->frame_num + h->sei_recovery_frame_cnt) %
03901                                     (1 << h->sps.log2_max_frame_num);
03902             }
03903 
03904             s->current_picture_ptr->f.key_frame |=
03905                     (hx->nal_unit_type == NAL_IDR_SLICE);
03906 
03907             if (h->recovery_frame == h->frame_num) {
03908                 s->current_picture_ptr->sync |= 1;
03909                 h->recovery_frame = -1;
03910             }
03911 
03912             h->sync |= !!s->current_picture_ptr->f.key_frame;
03913             h->sync |= 3*!!(s->flags2 & CODEC_FLAG2_SHOW_ALL);
03914             s->current_picture_ptr->sync |= h->sync;
03915 
03916             if (h->current_slice == 1) {
03917                 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
03918                     decode_postinit(h, nal_index >= nals_needed);
03919                 }
03920 
03921                 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
03922                     return -1;
03923                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03924                     ff_vdpau_h264_picture_start(s);
03925             }
03926 
03927             if(hx->redundant_pic_count==0
03928                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03929                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03930                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03931                && avctx->skip_frame < AVDISCARD_ALL){
03932                 if(avctx->hwaccel) {
03933                     if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
03934                         return -1;
03935                 }else
03936                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
03937                     static const uint8_t start_code[] = {0x00, 0x00, 0x01};
03938                     ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
03939                     ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
03940                 }else
03941                     context_count++;
03942             }
03943             break;
03944         case NAL_DPA:
03945             init_get_bits(&hx->s.gb, ptr, bit_length);
03946             hx->intra_gb_ptr=
03947             hx->inter_gb_ptr= NULL;
03948 
03949             if ((err = decode_slice_header(hx, h)) < 0)
03950                 break;
03951 
03952             hx->s.data_partitioning = 1;
03953 
03954             break;
03955         case NAL_DPB:
03956             init_get_bits(&hx->intra_gb, ptr, bit_length);
03957             hx->intra_gb_ptr= &hx->intra_gb;
03958             break;
03959         case NAL_DPC:
03960             init_get_bits(&hx->inter_gb, ptr, bit_length);
03961             hx->inter_gb_ptr= &hx->inter_gb;
03962 
03963             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
03964                && s->context_initialized
03965                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03966                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03967                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03968                && avctx->skip_frame < AVDISCARD_ALL)
03969                 context_count++;
03970             break;
03971         case NAL_SEI:
03972             init_get_bits(&s->gb, ptr, bit_length);
03973             ff_h264_decode_sei(h);
03974             break;
03975         case NAL_SPS:
03976             init_get_bits(&s->gb, ptr, bit_length);
03977             if(ff_h264_decode_seq_parameter_set(h) < 0 && (h->is_avc ? (nalsize != consumed) && nalsize : 1)){
03978                 av_log(h->s.avctx, AV_LOG_DEBUG, "SPS decoding failure, trying alternative mode\n");
03979                 if(h->is_avc) av_assert0(next_avc - buf_index + consumed == nalsize);
03980                 init_get_bits(&s->gb, &buf[buf_index + 1 - consumed], 8*(next_avc - buf_index + consumed));
03981                 ff_h264_decode_seq_parameter_set(h);
03982             }
03983 
03984             if (s->flags& CODEC_FLAG_LOW_DELAY ||
03985                 (h->sps.bitstream_restriction_flag && !h->sps.num_reorder_frames))
03986                 s->low_delay=1;
03987 
03988             if(avctx->has_b_frames < 2)
03989                 avctx->has_b_frames= !s->low_delay;
03990             break;
03991         case NAL_PPS:
03992             init_get_bits(&s->gb, ptr, bit_length);
03993 
03994             ff_h264_decode_picture_parameter_set(h, bit_length);
03995 
03996             break;
03997         case NAL_AUD:
03998         case NAL_END_SEQUENCE:
03999         case NAL_END_STREAM:
04000         case NAL_FILLER_DATA:
04001         case NAL_SPS_EXT:
04002         case NAL_AUXILIARY_SLICE:
04003             break;
04004         default:
04005             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
04006         }
04007 
04008         if(context_count == h->max_contexts) {
04009             execute_decode_slices(h, context_count);
04010             context_count = 0;
04011         }
04012 
04013         if (err < 0)
04014             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
04015         else if(err == 1) {
04016             /* Slice could not be decoded in parallel mode, copy down
04017              * NAL unit stuff to context 0 and restart. Note that
04018              * rbsp_buffer is not transferred, but since we no longer
04019              * run in parallel mode this should not be an issue. */
04020             h->nal_unit_type = hx->nal_unit_type;
04021             h->nal_ref_idc   = hx->nal_ref_idc;
04022             hx = h;
04023             goto again;
04024         }
04025     }
04026     }
04027     if(context_count)
04028         execute_decode_slices(h, context_count);
04029     return buf_index;
04030 }
04031 
04035 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
04036         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
04037         if(pos+10>buf_size) pos=buf_size; // oops ;)
04038 
04039         return pos;
04040 }
04041 
04042 static int decode_frame(AVCodecContext *avctx,
04043                              void *data, int *data_size,
04044                              AVPacket *avpkt)
04045 {
04046     const uint8_t *buf = avpkt->data;
04047     int buf_size = avpkt->size;
04048     H264Context *h = avctx->priv_data;
04049     MpegEncContext *s = &h->s;
04050     AVFrame *pict = data;
04051     int buf_index;
04052     Picture *out;
04053     int i, out_idx;
04054 
04055     s->flags= avctx->flags;
04056     s->flags2= avctx->flags2;
04057 
04058    /* end of stream, output what is still in the buffers */
04059     if (buf_size == 0) {
04060  out:
04061 
04062         s->current_picture_ptr = NULL;
04063 
04064 //FIXME factorize this with the output code below
04065         out = h->delayed_pic[0];
04066         out_idx = 0;
04067         for (i = 1; h->delayed_pic[i] && !h->delayed_pic[i]->f.key_frame && !h->delayed_pic[i]->mmco_reset; i++)
04068             if(h->delayed_pic[i]->poc < out->poc){
04069                 out = h->delayed_pic[i];
04070                 out_idx = i;
04071             }
04072 
04073         for(i=out_idx; h->delayed_pic[i]; i++)
04074             h->delayed_pic[i] = h->delayed_pic[i+1];
04075 
04076         if(out){
04077             *data_size = sizeof(AVFrame);
04078             *pict= *(AVFrame*)out;
04079         }
04080 
04081         return buf_size;
04082     }
04083     if(h->is_avc && buf_size >= 9 && buf[0]==1 && buf[2]==0 && (buf[4]&0xFC)==0xFC && (buf[5]&0x1F) && buf[8]==0x67){
04084         int cnt= buf[5]&0x1f;
04085         uint8_t *p= buf+6;
04086         while(cnt--){
04087             int nalsize= AV_RB16(p) + 2;
04088             if(nalsize > buf_size - (p-buf) || p[2]!=0x67)
04089                 goto not_extra;
04090             p += nalsize;
04091         }
04092         cnt = *(p++);
04093         if(!cnt)
04094             goto not_extra;
04095         while(cnt--){
04096             int nalsize= AV_RB16(p) + 2;
04097             if(nalsize > buf_size - (p-buf) || p[2]!=0x68)
04098                 goto not_extra;
04099             p += nalsize;
04100         }
04101 
04102         return ff_h264_decode_extradata(h, buf, buf_size);
04103     }
04104 not_extra:
04105 
04106     buf_index=decode_nal_units(h, buf, buf_size);
04107     if(buf_index < 0)
04108         return -1;
04109 
04110     if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
04111         av_assert0(buf_index <= buf_size);
04112         buf_size = buf_index;
04113         goto out;
04114     }
04115 
04116     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
04117         if (avctx->skip_frame >= AVDISCARD_NONREF ||
04118             buf_size >= 4 && !memcmp("Q264", buf, 4))
04119             return buf_size;
04120         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
04121         return -1;
04122     }
04123 
04124     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
04125 
04126         if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);
04127 
04128         field_end(h, 0);
04129 
04130         *data_size = 0; /* Wait for second field. */
04131         if (h->next_output_pic && (h->next_output_pic->sync || h->sync>1)) {
04132                 *data_size = sizeof(AVFrame);
04133                 *pict = *(AVFrame*)h->next_output_pic;
04134         }
04135     }
04136 
04137     assert(pict->data[0] || !*data_size);
04138     ff_print_debug_info(s, pict);
04139 //printf("out %d\n", (int)pict->data[0]);
04140 
04141     return get_consumed_bytes(s, buf_index, buf_size);
04142 }
04143 #if 0
04144 static inline void fill_mb_avail(H264Context *h){
04145     MpegEncContext * const s = &h->s;
04146     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
04147 
04148     if(s->mb_y){
04149         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
04150         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
04151         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
04152     }else{
04153         h->mb_avail[0]=
04154         h->mb_avail[1]=
04155         h->mb_avail[2]= 0;
04156     }
04157     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
04158     h->mb_avail[4]= 1; //FIXME move out
04159     h->mb_avail[5]= 0; //FIXME move out
04160 }
04161 #endif
04162 
04163 #ifdef TEST
04164 #undef printf
04165 #undef random
04166 #define COUNT 8000
04167 #define SIZE (COUNT*40)
04168 extern AVCodec ff_h264_decoder;
04169 int main(void){
04170     int i;
04171     uint8_t temp[SIZE];
04172     PutBitContext pb;
04173     GetBitContext gb;
04174 //    int int_temp[10000];
04175     DSPContext dsp;
04176     AVCodecContext avctx;
04177 
04178     avcodec_get_context_defaults3(&avctx, &ff_h264_decoder);
04179 
04180     dsputil_init(&dsp, &avctx);
04181 
04182     init_put_bits(&pb, temp, SIZE);
04183     printf("testing unsigned exp golomb\n");
04184     for(i=0; i<COUNT; i++){
04185         START_TIMER
04186         set_ue_golomb(&pb, i);
04187         STOP_TIMER("set_ue_golomb");
04188     }
04189     flush_put_bits(&pb);
04190 
04191     init_get_bits(&gb, temp, 8*SIZE);
04192     for(i=0; i<COUNT; i++){
04193         int j, s;
04194 
04195         s= show_bits(&gb, 24);
04196 
04197         START_TIMER
04198         j= get_ue_golomb(&gb);
04199         if(j != i){
04200             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04201 //            return -1;
04202         }
04203         STOP_TIMER("get_ue_golomb");
04204     }
04205 
04206 
04207     init_put_bits(&pb, temp, SIZE);
04208     printf("testing signed exp golomb\n");
04209     for(i=0; i<COUNT; i++){
04210         START_TIMER
04211         set_se_golomb(&pb, i - COUNT/2);
04212         STOP_TIMER("set_se_golomb");
04213     }
04214     flush_put_bits(&pb);
04215 
04216     init_get_bits(&gb, temp, 8*SIZE);
04217     for(i=0; i<COUNT; i++){
04218         int j, s;
04219 
04220         s= show_bits(&gb, 24);
04221 
04222         START_TIMER
04223         j= get_se_golomb(&gb);
04224         if(j != i - COUNT/2){
04225             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04226 //            return -1;
04227         }
04228         STOP_TIMER("get_se_golomb");
04229     }
04230 
04231     printf("Testing RBSP\n");
04232 
04233 
04234     return 0;
04235 }
04236 #endif /* TEST */
04237 
04238 
04239 av_cold void ff_h264_free_context(H264Context *h)
04240 {
04241     int i;
04242 
04243     free_tables(h, 1); //FIXME cleanup init stuff perhaps
04244 
04245     for(i = 0; i < MAX_SPS_COUNT; i++)
04246         av_freep(h->sps_buffers + i);
04247 
04248     for(i = 0; i < MAX_PPS_COUNT; i++)
04249         av_freep(h->pps_buffers + i);
04250 }
04251 
04252 av_cold int ff_h264_decode_end(AVCodecContext *avctx)
04253 {
04254     H264Context *h = avctx->priv_data;
04255     MpegEncContext *s = &h->s;
04256 
04257     ff_h264_remove_all_refs(h);
04258     ff_h264_free_context(h);
04259 
04260     MPV_common_end(s);
04261 
04262 //    memset(h, 0, sizeof(H264Context));
04263 
04264     return 0;
04265 }
04266 
04267 static const AVProfile profiles[] = {
04268     { FF_PROFILE_H264_BASELINE,             "Baseline"              },
04269     { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
04270     { FF_PROFILE_H264_MAIN,                 "Main"                  },
04271     { FF_PROFILE_H264_EXTENDED,             "Extended"              },
04272     { FF_PROFILE_H264_HIGH,                 "High"                  },
04273     { FF_PROFILE_H264_HIGH_10,              "High 10"               },
04274     { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
04275     { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
04276     { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
04277     { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
04278     { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
04279     { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
04280     { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
04281     { FF_PROFILE_UNKNOWN },
04282 };
04283 
04284 static const AVOption h264_options[] = {
04285     {"is_avc", "is avc", offsetof(H264Context, is_avc), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 1, 0},
04286     {"nal_length_size", "nal_length_size", offsetof(H264Context, nal_length_size), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 4, 0},
04287     {NULL}
04288 };
04289 
04290 static const AVClass h264_class = {
04291     "H264 Decoder",
04292     av_default_item_name,
04293     h264_options,
04294     LIBAVUTIL_VERSION_INT,
04295 };
04296 
04297 static const AVClass h264_vdpau_class = {
04298     "H264 VDPAU Decoder",
04299     av_default_item_name,
04300     h264_options,
04301     LIBAVUTIL_VERSION_INT,
04302 };
04303 
04304 AVCodec ff_h264_decoder = {
04305     .name           = "h264",
04306     .type           = AVMEDIA_TYPE_VIDEO,
04307     .id             = CODEC_ID_H264,
04308     .priv_data_size = sizeof(H264Context),
04309     .init           = ff_h264_decode_init,
04310     .close          = ff_h264_decode_end,
04311     .decode         = decode_frame,
04312     .capabilities   = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
04313                       CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
04314     .flush= flush_dpb,
04315     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
04316     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
04317     .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
04318     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04319     .priv_class     = &h264_class,
04320 };
04321 
04322 #if CONFIG_H264_VDPAU_DECODER
04323 AVCodec ff_h264_vdpau_decoder = {
04324     .name           = "h264_vdpau",
04325     .type           = AVMEDIA_TYPE_VIDEO,
04326     .id             = CODEC_ID_H264,
04327     .priv_data_size = sizeof(H264Context),
04328     .init           = ff_h264_decode_init,
04329     .close          = ff_h264_decode_end,
04330     .decode         = decode_frame,
04331     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
04332     .flush= flush_dpb,
04333     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
04334     .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
04335     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04336     .priv_class     = &h264_vdpau_class,
04337 };
04338 #endif