FFmpeg: libavcodec/snow.c Source File

00001 /*
00002  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with FFmpeg; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 #include "libavutil/intmath.h"
00022 #include "libavutil/log.h"
00023 #include "libavutil/opt.h"
00024 #include "avcodec.h"
00025 #include "dsputil.h"
00026 #include "dwt.h"
00027 #include "internal.h"
00028 #include "snow.h"
00029 #include "snowdata.h"
00030 
00031 #include "rangecoder.h"
00032 #include "mathops.h"
00033 #include "h263.h"
00034 
00035 
00036 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
00037                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
00038     int y, x;
00039     IDWTELEM * dst;
00040     for(y=0; y<b_h; y++){
00041         //FIXME ugly misuse of obmc_stride
00042         const uint8_t *obmc1= obmc + y*obmc_stride;
00043         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
00044         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
00045         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
00046         dst = slice_buffer_get_line(sb, src_y + y);
00047         for(x=0; x<b_w; x++){
00048             int v=   obmc1[x] * block[3][x + y*src_stride]
00049                     +obmc2[x] * block[2][x + y*src_stride]
00050                     +obmc3[x] * block[1][x + y*src_stride]
00051                     +obmc4[x] * block[0][x + y*src_stride];
00052 
00053             v <<= 8 - LOG2_OBMC_MAX;
00054             if(FRAC_BITS != 8){
00055                 v >>= 8 - FRAC_BITS;
00056             }
00057             if(add){
00058                 v += dst[x + src_x];
00059                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
00060                 if(v&(~255)) v= ~(v>>31);
00061                 dst8[x + y*src_stride] = v;
00062             }else{
00063                 dst[x + src_x] -= v;
00064             }
00065         }
00066     }
00067 }
00068 
00069 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
00070     int plane_index, level, orientation;
00071 
00072     for(plane_index=0; plane_index<3; plane_index++){
00073         for(level=0; level<MAX_DECOMPOSITIONS; level++){
00074             for(orientation=level ? 1:0; orientation<4; orientation++){
00075                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
00076             }
00077         }
00078     }
00079     memset(s->header_state, MID_STATE, sizeof(s->header_state));
00080     memset(s->block_state, MID_STATE, sizeof(s->block_state));
00081 }
00082 
00083 int ff_snow_alloc_blocks(SnowContext *s){
00084     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
00085     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
00086 
00087     s->b_width = w;
00088     s->b_height= h;
00089 
00090     av_free(s->block);
00091     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
00092     return 0;
00093 }
00094 
00095 static void init_qexp(void){
00096     int i;
00097     double v=128;
00098 
00099     for(i=0; i<QROOT; i++){
00100         ff_qexp[i]= lrintf(v);
00101         v *= pow(2, 1.0 / QROOT);
00102     }
00103 }
00104 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
00105     static const uint8_t weight[64]={
00106     8,7,6,5,4,3,2,1,
00107     7,7,0,0,0,0,0,1,
00108     6,0,6,0,0,0,2,0,
00109     5,0,0,5,0,3,0,0,
00110     4,0,0,0,4,0,0,0,
00111     3,0,0,5,0,3,0,0,
00112     2,0,6,0,0,0,2,0,
00113     1,7,0,0,0,0,0,1,
00114     };
00115 
00116     static const uint8_t brane[256]={
00117     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00118     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
00119     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
00120     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
00121     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
00122     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
00123     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
00124     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
00125     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
00126     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
00127     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
00128     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
00129     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
00130     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
00131     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
00132     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
00133     };
00134 
00135     static const uint8_t needs[16]={
00136     0,1,0,0,
00137     2,4,2,0,
00138     0,1,0,0,
00139     15
00140     };
00141 
00142     int x, y, b, r, l;
00143     int16_t tmpIt   [64*(32+HTAPS_MAX)];
00144     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
00145     int16_t *tmpI= tmpIt;
00146     uint8_t *tmp2= tmp2t[0];
00147     const uint8_t *hpel[11];
00148     av_assert2(dx<16 && dy<16);
00149     r= brane[dx + 16*dy]&15;
00150     l= brane[dx + 16*dy]>>4;
00151 
00152     b= needs[l] | needs[r];
00153     if(p && !p->diag_mc)
00154         b= 15;
00155 
00156     if(b&5){
00157         for(y=0; y < b_h+HTAPS_MAX-1; y++){
00158             for(x=0; x < b_w; x++){
00159                 int a_1=src[x + HTAPS_MAX/2-4];
00160                 int a0= src[x + HTAPS_MAX/2-3];
00161                 int a1= src[x + HTAPS_MAX/2-2];
00162                 int a2= src[x + HTAPS_MAX/2-1];
00163                 int a3= src[x + HTAPS_MAX/2+0];
00164                 int a4= src[x + HTAPS_MAX/2+1];
00165                 int a5= src[x + HTAPS_MAX/2+2];
00166                 int a6= src[x + HTAPS_MAX/2+3];
00167                 int am=0;
00168                 if(!p || p->fast_mc){
00169                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
00170                     tmpI[x]= am;
00171                     am= (am+16)>>5;
00172                 }else{
00173                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
00174                     tmpI[x]= am;
00175                     am= (am+32)>>6;
00176                 }
00177 
00178                 if(am&(~255)) am= ~(am>>31);
00179                 tmp2[x]= am;
00180             }
00181             tmpI+= 64;
00182             tmp2+= 64;
00183             src += stride;
00184         }
00185         src -= stride*y;
00186     }
00187     src += HTAPS_MAX/2 - 1;
00188     tmp2= tmp2t[1];
00189 
00190     if(b&2){
00191         for(y=0; y < b_h; y++){
00192             for(x=0; x < b_w+1; x++){
00193                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
00194                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
00195                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
00196                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
00197                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
00198                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
00199                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
00200                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
00201                 int am=0;
00202                 if(!p || p->fast_mc)
00203                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
00204                 else
00205                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
00206 
00207                 if(am&(~255)) am= ~(am>>31);
00208                 tmp2[x]= am;
00209             }
00210             src += stride;
00211             tmp2+= 64;
00212         }
00213         src -= stride*y;
00214     }
00215     src += stride*(HTAPS_MAX/2 - 1);
00216     tmp2= tmp2t[2];
00217     tmpI= tmpIt;
00218     if(b&4){
00219         for(y=0; y < b_h; y++){
00220             for(x=0; x < b_w; x++){
00221                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
00222                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
00223                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
00224                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
00225                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
00226                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
00227                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
00228                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
00229                 int am=0;
00230                 if(!p || p->fast_mc)
00231                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
00232                 else
00233                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
00234                 if(am&(~255)) am= ~(am>>31);
00235                 tmp2[x]= am;
00236             }
00237             tmpI+= 64;
00238             tmp2+= 64;
00239         }
00240     }
00241 
00242     hpel[ 0]= src;
00243     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
00244     hpel[ 2]= src + 1;
00245 
00246     hpel[ 4]= tmp2t[1];
00247     hpel[ 5]= tmp2t[2];
00248     hpel[ 6]= tmp2t[1] + 1;
00249 
00250     hpel[ 8]= src + stride;
00251     hpel[ 9]= hpel[1] + 64;
00252     hpel[10]= hpel[8] + 1;
00253 
00254 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
00255 
00256     if(b==15){
00257         int dxy = dx / 8 + dy / 8 * 4;
00258         const uint8_t *src1 = hpel[dxy    ];
00259         const uint8_t *src2 = hpel[dxy + 1];
00260         const uint8_t *src3 = hpel[dxy + 4];
00261         const uint8_t *src4 = hpel[dxy + 5];
00262         int stride1 = MC_STRIDE(dxy);
00263         int stride2 = MC_STRIDE(dxy + 1);
00264         int stride3 = MC_STRIDE(dxy + 4);
00265         int stride4 = MC_STRIDE(dxy + 5);
00266         dx&=7;
00267         dy&=7;
00268         for(y=0; y < b_h; y++){
00269             for(x=0; x < b_w; x++){
00270                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
00271                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
00272             }
00273             src1+=stride1;
00274             src2+=stride2;
00275             src3+=stride3;
00276             src4+=stride4;
00277             dst +=stride;
00278         }
00279     }else{
00280         const uint8_t *src1= hpel[l];
00281         const uint8_t *src2= hpel[r];
00282         int stride1 = MC_STRIDE(l);
00283         int stride2 = MC_STRIDE(r);
00284         int a= weight[((dx&7) + (8*(dy&7)))];
00285         int b= 8-a;
00286         for(y=0; y < b_h; y++){
00287             for(x=0; x < b_w; x++){
00288                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
00289             }
00290             src1+=stride1;
00291             src2+=stride2;
00292             dst +=stride;
00293         }
00294     }
00295 }
00296 
00297 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
00298     if(block->type & BLOCK_INTRA){
00299         int x, y;
00300         const unsigned color  = block->color[plane_index];
00301         const unsigned color4 = color*0x01010101;
00302         if(b_w==32){
00303             for(y=0; y < b_h; y++){
00304                 *(uint32_t*)&dst[0 + y*stride]= color4;
00305                 *(uint32_t*)&dst[4 + y*stride]= color4;
00306                 *(uint32_t*)&dst[8 + y*stride]= color4;
00307                 *(uint32_t*)&dst[12+ y*stride]= color4;
00308                 *(uint32_t*)&dst[16+ y*stride]= color4;
00309                 *(uint32_t*)&dst[20+ y*stride]= color4;
00310                 *(uint32_t*)&dst[24+ y*stride]= color4;
00311                 *(uint32_t*)&dst[28+ y*stride]= color4;
00312             }
00313         }else if(b_w==16){
00314             for(y=0; y < b_h; y++){
00315                 *(uint32_t*)&dst[0 + y*stride]= color4;
00316                 *(uint32_t*)&dst[4 + y*stride]= color4;
00317                 *(uint32_t*)&dst[8 + y*stride]= color4;
00318                 *(uint32_t*)&dst[12+ y*stride]= color4;
00319             }
00320         }else if(b_w==8){
00321             for(y=0; y < b_h; y++){
00322                 *(uint32_t*)&dst[0 + y*stride]= color4;
00323                 *(uint32_t*)&dst[4 + y*stride]= color4;
00324             }
00325         }else if(b_w==4){
00326             for(y=0; y < b_h; y++){
00327                 *(uint32_t*)&dst[0 + y*stride]= color4;
00328             }
00329         }else{
00330             for(y=0; y < b_h; y++){
00331                 for(x=0; x < b_w; x++){
00332                     dst[x + y*stride]= color;
00333                 }
00334             }
00335         }
00336     }else{
00337         uint8_t *src= s->last_picture[block->ref].data[plane_index];
00338         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
00339         int mx= block->mx*scale;
00340         int my= block->my*scale;
00341         const int dx= mx&15;
00342         const int dy= my&15;
00343         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
00344         sx += (mx>>4) - (HTAPS_MAX/2-1);
00345         sy += (my>>4) - (HTAPS_MAX/2-1);
00346         src += sx + sy*stride;
00347         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
00348            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
00349             s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
00350             src= tmp + MB_SIZE;
00351         }
00352 
00353         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
00354 
00355 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
00356 //        assert(!(b_w&(b_w-1)));
00357         av_assert2(b_w>1 && b_h>1);
00358         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
00359         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
00360             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
00361         else if(b_w==32){
00362             int y;
00363             for(y=0; y<b_h; y+=16){
00364                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
00365                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
00366             }
00367         }else if(b_w==b_h)
00368             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
00369         else if(b_w==2*b_h){
00370             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
00371             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
00372         }else{
00373             av_assert2(2*b_w==b_h);
00374             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
00375             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
00376         }
00377     }
00378 }
00379 
00380 #define mca(dx,dy,b_w)\
00381 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
00382     av_assert2(h==b_w);\
00383     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
00384 }
00385 
00386 mca( 0, 0,16)
00387 mca( 8, 0,16)
00388 mca( 0, 8,16)
00389 mca( 8, 8,16)
00390 mca( 0, 0,8)
00391 mca( 8, 0,8)
00392 mca( 0, 8,8)
00393 mca( 8, 8,8)
00394 
00395 av_cold int ff_snow_common_init(AVCodecContext *avctx){
00396     SnowContext *s = avctx->priv_data;
00397     int width, height;
00398     int i, j;
00399 
00400     s->avctx= avctx;
00401     s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
00402 
00403     ff_dsputil_init(&s->dsp, avctx);
00404     ff_dwt_init(&s->dwt);
00405 
00406 #define mcf(dx,dy)\
00407     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
00408     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
00409         s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
00410     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
00411     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
00412         s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
00413 
00414     mcf( 0, 0)
00415     mcf( 4, 0)
00416     mcf( 8, 0)
00417     mcf(12, 0)
00418     mcf( 0, 4)
00419     mcf( 4, 4)
00420     mcf( 8, 4)
00421     mcf(12, 4)
00422     mcf( 0, 8)
00423     mcf( 4, 8)
00424     mcf( 8, 8)
00425     mcf(12, 8)
00426     mcf( 0,12)
00427     mcf( 4,12)
00428     mcf( 8,12)
00429     mcf(12,12)
00430 
00431 #define mcfh(dx,dy)\
00432     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
00433     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
00434         mc_block_hpel ## dx ## dy ## 16;\
00435     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
00436     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
00437         mc_block_hpel ## dx ## dy ## 8;
00438 
00439     mcfh(0, 0)
00440     mcfh(8, 0)
00441     mcfh(0, 8)
00442     mcfh(8, 8)
00443 
00444     init_qexp();
00445 
00446 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
00447 
00448     width= s->avctx->width;
00449     height= s->avctx->height;
00450 
00451     FF_ALLOCZ_OR_GOTO(avctx, s->spatial_idwt_buffer, width * height * sizeof(IDWTELEM), fail);
00452     FF_ALLOCZ_OR_GOTO(avctx, s->spatial_dwt_buffer,  width * height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
00453     FF_ALLOCZ_OR_GOTO(avctx, s->temp_dwt_buffer,     width * sizeof(DWTELEM),  fail);
00454     FF_ALLOCZ_OR_GOTO(avctx, s->temp_idwt_buffer,    width * sizeof(IDWTELEM), fail);
00455     FF_ALLOC_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1) * ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
00456 
00457     for(i=0; i<MAX_REF_FRAMES; i++)
00458         for(j=0; j<MAX_REF_FRAMES; j++)
00459             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
00460 
00461     return 0;
00462 fail:
00463     return AVERROR(ENOMEM);
00464 }
00465 
00466 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
00467     SnowContext *s = avctx->priv_data;
00468     int plane_index, level, orientation;
00469     int ret, emu_buf_size;
00470 
00471     if(!s->scratchbuf) {
00472         if ((ret = ff_get_buffer(s->avctx, &s->mconly_picture)) < 0) {
00473             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
00474             return ret;
00475         }
00476         FF_ALLOCZ_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture.linesize[0], 2*avctx->width+256)*7*MB_SIZE, fail);
00477         emu_buf_size = FFMAX(s->mconly_picture.linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
00478         FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
00479     }
00480 
00481     if(s->mconly_picture.format != avctx->pix_fmt) {
00482         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
00483         return AVERROR_INVALIDDATA;
00484     }
00485 
00486     for(plane_index=0; plane_index<3; plane_index++){
00487         int w= s->avctx->width;
00488         int h= s->avctx->height;
00489 
00490         if(plane_index){
00491             w>>= s->chroma_h_shift;
00492             h>>= s->chroma_v_shift;
00493         }
00494         s->plane[plane_index].width = w;
00495         s->plane[plane_index].height= h;
00496 
00497         for(level=s->spatial_decomposition_count-1; level>=0; level--){
00498             for(orientation=level ? 1 : 0; orientation<4; orientation++){
00499                 SubBand *b= &s->plane[plane_index].band[level][orientation];
00500 
00501                 b->buf= s->spatial_dwt_buffer;
00502                 b->level= level;
00503                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
00504                 b->width = (w + !(orientation&1))>>1;
00505                 b->height= (h + !(orientation>1))>>1;
00506 
00507                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
00508                 b->buf_x_offset = 0;
00509                 b->buf_y_offset = 0;
00510 
00511                 if(orientation&1){
00512                     b->buf += (w+1)>>1;
00513                     b->buf_x_offset = (w+1)>>1;
00514                 }
00515                 if(orientation>1){
00516                     b->buf += b->stride>>1;
00517                     b->buf_y_offset = b->stride_line >> 1;
00518                 }
00519                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
00520 
00521                 if(level)
00522                     b->parent= &s->plane[plane_index].band[level-1][orientation];
00523                 //FIXME avoid this realloc
00524                 av_freep(&b->x_coeff);
00525                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
00526             }
00527             w= (w+1)>>1;
00528             h= (h+1)>>1;
00529         }
00530     }
00531 
00532     return 0;
00533 fail:
00534     return AVERROR(ENOMEM);
00535 }
00536 
00537 #define USE_HALFPEL_PLANE 0
00538 
00539 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
00540     int p,x,y;
00541 
00542     for(p=0; p<3; p++){
00543         int is_chroma= !!p;
00544         int w= is_chroma ? s->avctx->width >>s->chroma_h_shift : s->avctx->width;
00545         int h= is_chroma ? s->avctx->height>>s->chroma_v_shift : s->avctx->height;
00546         int ls= frame->linesize[p];
00547         uint8_t *src= frame->data[p];
00548 
00549         halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
00550         halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
00551         halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
00552 
00553         halfpel[0][p]= src;
00554         for(y=0; y<h; y++){
00555             for(x=0; x<w; x++){
00556                 int i= y*ls + x;
00557 
00558                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
00559             }
00560         }
00561         for(y=0; y<h; y++){
00562             for(x=0; x<w; x++){
00563                 int i= y*ls + x;
00564 
00565                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
00566             }
00567         }
00568         src= halfpel[1][p];
00569         for(y=0; y<h; y++){
00570             for(x=0; x<w; x++){
00571                 int i= y*ls + x;
00572 
00573                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
00574             }
00575         }
00576 
00577 //FIXME border!
00578     }
00579 }
00580 
00581 void ff_snow_release_buffer(AVCodecContext *avctx)
00582 {
00583     SnowContext *s = avctx->priv_data;
00584     int i;
00585 
00586     if(s->last_picture[s->max_ref_frames-1].data[0]){
00587         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
00588         for(i=0; i<9; i++)
00589             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
00590                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
00591     }
00592 }
00593 
00594 int ff_snow_frame_start(SnowContext *s){
00595    AVFrame tmp;
00596    int w= s->avctx->width; //FIXME round up to x16 ?
00597    int h= s->avctx->height;
00598 
00599     if (s->current_picture.data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
00600         s->dsp.draw_edges(s->current_picture.data[0],
00601                           s->current_picture.linesize[0], w   , h   ,
00602                           EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
00603         s->dsp.draw_edges(s->current_picture.data[1],
00604                           s->current_picture.linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
00605                           EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
00606         s->dsp.draw_edges(s->current_picture.data[2],
00607                           s->current_picture.linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
00608                           EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
00609     }
00610 
00611     ff_snow_release_buffer(s->avctx);
00612 
00613     tmp= s->last_picture[s->max_ref_frames-1];
00614     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
00615     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
00616     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
00617         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
00618     s->last_picture[0]= s->current_picture;
00619     s->current_picture= tmp;
00620 
00621     if(s->keyframe){
00622         s->ref_frames= 0;
00623     }else{
00624         int i;
00625         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
00626             if(i && s->last_picture[i-1].key_frame)
00627                 break;
00628         s->ref_frames= i;
00629         if(s->ref_frames==0){
00630             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
00631             return -1;
00632         }
00633     }
00634 
00635     s->current_picture.reference= 3;
00636     if(ff_get_buffer(s->avctx, &s->current_picture) < 0){
00637         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
00638         return -1;
00639     }
00640 
00641     s->current_picture.key_frame= s->keyframe;
00642 
00643     return 0;
00644 }
00645 
00646 av_cold void ff_snow_common_end(SnowContext *s)
00647 {
00648     int plane_index, level, orientation, i;
00649 
00650     av_freep(&s->spatial_dwt_buffer);
00651     av_freep(&s->temp_dwt_buffer);
00652     av_freep(&s->spatial_idwt_buffer);
00653     av_freep(&s->temp_idwt_buffer);
00654     av_freep(&s->run_buffer);
00655 
00656     s->m.me.temp= NULL;
00657     av_freep(&s->m.me.scratchpad);
00658     av_freep(&s->m.me.map);
00659     av_freep(&s->m.me.score_map);
00660     av_freep(&s->m.obmc_scratchpad);
00661 
00662     av_freep(&s->block);
00663     av_freep(&s->scratchbuf);
00664     av_freep(&s->emu_edge_buffer);
00665 
00666     for(i=0; i<MAX_REF_FRAMES; i++){
00667         av_freep(&s->ref_mvs[i]);
00668         av_freep(&s->ref_scores[i]);
00669         if(s->last_picture[i].data[0]) {
00670             av_assert0(s->last_picture[i].data[0] != s->current_picture.data[0]);
00671             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
00672         }
00673     }
00674 
00675     for(plane_index=0; plane_index<3; plane_index++){
00676         for(level=s->spatial_decomposition_count-1; level>=0; level--){
00677             for(orientation=level ? 1 : 0; orientation<4; orientation++){
00678                 SubBand *b= &s->plane[plane_index].band[level][orientation];
00679 
00680                 av_freep(&b->x_coeff);
00681             }
00682         }
00683     }
00684     if (s->mconly_picture.data[0])
00685         s->avctx->release_buffer(s->avctx, &s->mconly_picture);
00686     if (s->current_picture.data[0])
00687         s->avctx->release_buffer(s->avctx, &s->current_picture);
00688 }