00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #include "avcodec.h"
00029 #include "mpegvideo.h"
00030 #include "h264pred.h"
00031
00032 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
00033 const uint32_t a= ((uint32_t*)(src-stride))[0];
00034 ((uint32_t*)(src+0*stride))[0]= a;
00035 ((uint32_t*)(src+1*stride))[0]= a;
00036 ((uint32_t*)(src+2*stride))[0]= a;
00037 ((uint32_t*)(src+3*stride))[0]= a;
00038 }
00039
00040 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
00041 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
00042 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
00043 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
00044 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
00045 }
00046
00047 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
00048 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
00049 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
00050
00051 ((uint32_t*)(src+0*stride))[0]=
00052 ((uint32_t*)(src+1*stride))[0]=
00053 ((uint32_t*)(src+2*stride))[0]=
00054 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
00055 }
00056
00057 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
00058 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
00059
00060 ((uint32_t*)(src+0*stride))[0]=
00061 ((uint32_t*)(src+1*stride))[0]=
00062 ((uint32_t*)(src+2*stride))[0]=
00063 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
00064 }
00065
00066 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
00067 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
00068
00069 ((uint32_t*)(src+0*stride))[0]=
00070 ((uint32_t*)(src+1*stride))[0]=
00071 ((uint32_t*)(src+2*stride))[0]=
00072 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
00073 }
00074
00075 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
00076 ((uint32_t*)(src+0*stride))[0]=
00077 ((uint32_t*)(src+1*stride))[0]=
00078 ((uint32_t*)(src+2*stride))[0]=
00079 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
00080 }
00081
00082
00083 #define LOAD_TOP_RIGHT_EDGE\
00084 const int av_unused t4= topright[0];\
00085 const int av_unused t5= topright[1];\
00086 const int av_unused t6= topright[2];\
00087 const int av_unused t7= topright[3];\
00088
00089 #define LOAD_DOWN_LEFT_EDGE\
00090 const int av_unused l4= src[-1+4*stride];\
00091 const int av_unused l5= src[-1+5*stride];\
00092 const int av_unused l6= src[-1+6*stride];\
00093 const int av_unused l7= src[-1+7*stride];\
00094
00095 #define LOAD_LEFT_EDGE\
00096 const int av_unused l0= src[-1+0*stride];\
00097 const int av_unused l1= src[-1+1*stride];\
00098 const int av_unused l2= src[-1+2*stride];\
00099 const int av_unused l3= src[-1+3*stride];\
00100
00101 #define LOAD_TOP_EDGE\
00102 const int av_unused t0= src[ 0-1*stride];\
00103 const int av_unused t1= src[ 1-1*stride];\
00104 const int av_unused t2= src[ 2-1*stride];\
00105 const int av_unused t3= src[ 3-1*stride];\
00106
00107 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
00108 const int lt= src[-1-1*stride];
00109 LOAD_TOP_EDGE
00110 LOAD_LEFT_EDGE
00111
00112 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
00113 src[0+2*stride]=
00114 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
00115 src[0+1*stride]=
00116 src[1+2*stride]=
00117 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
00118 src[0+0*stride]=
00119 src[1+1*stride]=
00120 src[2+2*stride]=
00121 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
00122 src[1+0*stride]=
00123 src[2+1*stride]=
00124 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
00125 src[2+0*stride]=
00126 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
00127 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
00128 }
00129
00130 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
00131 LOAD_TOP_EDGE
00132 LOAD_TOP_RIGHT_EDGE
00133
00134
00135 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
00136 src[1+0*stride]=
00137 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
00138 src[2+0*stride]=
00139 src[1+1*stride]=
00140 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
00141 src[3+0*stride]=
00142 src[2+1*stride]=
00143 src[1+2*stride]=
00144 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
00145 src[3+1*stride]=
00146 src[2+2*stride]=
00147 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
00148 src[3+2*stride]=
00149 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
00150 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
00151 }
00152
00153 static void pred4x4_down_left_svq3_c(uint8_t *src, uint8_t *topright, int stride){
00154 LOAD_TOP_EDGE
00155 LOAD_LEFT_EDGE
00156 const av_unused int unu0= t0;
00157 const av_unused int unu1= l0;
00158
00159 src[0+0*stride]=(l1 + t1)>>1;
00160 src[1+0*stride]=
00161 src[0+1*stride]=(l2 + t2)>>1;
00162 src[2+0*stride]=
00163 src[1+1*stride]=
00164 src[0+2*stride]=
00165 src[3+0*stride]=
00166 src[2+1*stride]=
00167 src[1+2*stride]=
00168 src[0+3*stride]=
00169 src[3+1*stride]=
00170 src[2+2*stride]=
00171 src[1+3*stride]=
00172 src[3+2*stride]=
00173 src[2+3*stride]=
00174 src[3+3*stride]=(l3 + t3)>>1;
00175 }
00176
00177 static void pred4x4_down_left_rv40_c(uint8_t *src, uint8_t *topright, int stride){
00178 LOAD_TOP_EDGE
00179 LOAD_TOP_RIGHT_EDGE
00180 LOAD_LEFT_EDGE
00181 LOAD_DOWN_LEFT_EDGE
00182
00183 src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
00184 src[1+0*stride]=
00185 src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
00186 src[2+0*stride]=
00187 src[1+1*stride]=
00188 src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + l4 + 2*l3 + 2)>>3;
00189 src[3+0*stride]=
00190 src[2+1*stride]=
00191 src[1+2*stride]=
00192 src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3 + l5 + 2*l4 + 2)>>3;
00193 src[3+1*stride]=
00194 src[2+2*stride]=
00195 src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l4 + l6 + 2*l5 + 2)>>3;
00196 src[3+2*stride]=
00197 src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l5 + l7 + 2*l6 + 2)>>3;
00198 src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2;
00199 }
00200
00201 static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, uint8_t *topright, int stride){
00202 LOAD_TOP_EDGE
00203 LOAD_TOP_RIGHT_EDGE
00204 LOAD_LEFT_EDGE
00205
00206 src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
00207 src[1+0*stride]=
00208 src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
00209 src[2+0*stride]=
00210 src[1+1*stride]=
00211 src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + 3*l3 + 2)>>3;
00212 src[3+0*stride]=
00213 src[2+1*stride]=
00214 src[1+2*stride]=
00215 src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3*4 + 2)>>3;
00216 src[3+1*stride]=
00217 src[2+2*stride]=
00218 src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l3*4 + 2)>>3;
00219 src[3+2*stride]=
00220 src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l3*4 + 2)>>3;
00221 src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2;
00222 }
00223
00224 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
00225 const int lt= src[-1-1*stride];
00226 LOAD_TOP_EDGE
00227 LOAD_LEFT_EDGE
00228
00229 src[0+0*stride]=
00230 src[1+2*stride]=(lt + t0 + 1)>>1;
00231 src[1+0*stride]=
00232 src[2+2*stride]=(t0 + t1 + 1)>>1;
00233 src[2+0*stride]=
00234 src[3+2*stride]=(t1 + t2 + 1)>>1;
00235 src[3+0*stride]=(t2 + t3 + 1)>>1;
00236 src[0+1*stride]=
00237 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
00238 src[1+1*stride]=
00239 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
00240 src[2+1*stride]=
00241 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
00242 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
00243 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
00244 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
00245 }
00246
00247 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
00248 LOAD_TOP_EDGE
00249 LOAD_TOP_RIGHT_EDGE
00250
00251 src[0+0*stride]=(t0 + t1 + 1)>>1;
00252 src[1+0*stride]=
00253 src[0+2*stride]=(t1 + t2 + 1)>>1;
00254 src[2+0*stride]=
00255 src[1+2*stride]=(t2 + t3 + 1)>>1;
00256 src[3+0*stride]=
00257 src[2+2*stride]=(t3 + t4+ 1)>>1;
00258 src[3+2*stride]=(t4 + t5+ 1)>>1;
00259 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
00260 src[1+1*stride]=
00261 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
00262 src[2+1*stride]=
00263 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
00264 src[3+1*stride]=
00265 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
00266 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
00267 }
00268
00269 static void pred4x4_vertical_left_rv40(uint8_t *src, uint8_t *topright, int stride,
00270 const int l0, const int l1, const int l2, const int l3, const int l4){
00271 LOAD_TOP_EDGE
00272 LOAD_TOP_RIGHT_EDGE
00273
00274 src[0+0*stride]=(2*t0 + 2*t1 + l1 + 2*l2 + l3 + 4)>>3;
00275 src[1+0*stride]=
00276 src[0+2*stride]=(t1 + t2 + 1)>>1;
00277 src[2+0*stride]=
00278 src[1+2*stride]=(t2 + t3 + 1)>>1;
00279 src[3+0*stride]=
00280 src[2+2*stride]=(t3 + t4+ 1)>>1;
00281 src[3+2*stride]=(t4 + t5+ 1)>>1;
00282 src[0+1*stride]=(t0 + 2*t1 + t2 + l2 + 2*l3 + l4 + 4)>>3;
00283 src[1+1*stride]=
00284 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
00285 src[2+1*stride]=
00286 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
00287 src[3+1*stride]=
00288 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
00289 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
00290 }
00291
00292 static void pred4x4_vertical_left_rv40_c(uint8_t *src, uint8_t *topright, int stride){
00293 LOAD_LEFT_EDGE
00294 LOAD_DOWN_LEFT_EDGE
00295
00296 pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4);
00297 }
00298
00299 static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src, uint8_t *topright, int stride){
00300 LOAD_LEFT_EDGE
00301
00302 pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3);
00303 }
00304
00305 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
00306 LOAD_LEFT_EDGE
00307
00308 src[0+0*stride]=(l0 + l1 + 1)>>1;
00309 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
00310 src[2+0*stride]=
00311 src[0+1*stride]=(l1 + l2 + 1)>>1;
00312 src[3+0*stride]=
00313 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
00314 src[2+1*stride]=
00315 src[0+2*stride]=(l2 + l3 + 1)>>1;
00316 src[3+1*stride]=
00317 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
00318 src[3+2*stride]=
00319 src[1+3*stride]=
00320 src[0+3*stride]=
00321 src[2+2*stride]=
00322 src[2+3*stride]=
00323 src[3+3*stride]=l3;
00324 }
00325
00326 static void pred4x4_horizontal_up_rv40_c(uint8_t *src, uint8_t *topright, int stride){
00327 LOAD_LEFT_EDGE
00328 LOAD_DOWN_LEFT_EDGE
00329 LOAD_TOP_EDGE
00330 LOAD_TOP_RIGHT_EDGE
00331
00332 src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
00333 src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
00334 src[2+0*stride]=
00335 src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
00336 src[3+0*stride]=
00337 src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
00338 src[2+1*stride]=
00339 src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
00340 src[3+1*stride]=
00341 src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
00342 src[3+2*stride]=
00343 src[1+3*stride]=(l3 + 2*l4 + l5 + 2)>>2;
00344 src[0+3*stride]=
00345 src[2+2*stride]=(t6 + t7 + l3 + l4 + 2)>>2;
00346 src[2+3*stride]=(l4 + l5 + 1)>>1;
00347 src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2;
00348 }
00349
00350 static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, uint8_t *topright, int stride){
00351 LOAD_LEFT_EDGE
00352 LOAD_TOP_EDGE
00353 LOAD_TOP_RIGHT_EDGE
00354
00355 src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
00356 src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
00357 src[2+0*stride]=
00358 src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
00359 src[3+0*stride]=
00360 src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
00361 src[2+1*stride]=
00362 src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
00363 src[3+1*stride]=
00364 src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
00365 src[3+2*stride]=
00366 src[1+3*stride]=l3;
00367 src[0+3*stride]=
00368 src[2+2*stride]=(t6 + t7 + 2*l3 + 2)>>2;
00369 src[2+3*stride]=
00370 src[3+3*stride]=l3;
00371 }
00372
00373 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
00374 const int lt= src[-1-1*stride];
00375 LOAD_TOP_EDGE
00376 LOAD_LEFT_EDGE
00377
00378 src[0+0*stride]=
00379 src[2+1*stride]=(lt + l0 + 1)>>1;
00380 src[1+0*stride]=
00381 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
00382 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
00383 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
00384 src[0+1*stride]=
00385 src[2+2*stride]=(l0 + l1 + 1)>>1;
00386 src[1+1*stride]=
00387 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
00388 src[0+2*stride]=
00389 src[2+3*stride]=(l1 + l2+ 1)>>1;
00390 src[1+2*stride]=
00391 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
00392 src[0+3*stride]=(l2 + l3 + 1)>>1;
00393 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
00394 }
00395
00396 static void pred16x16_vertical_c(uint8_t *src, int stride){
00397 int i;
00398 const uint32_t a= ((uint32_t*)(src-stride))[0];
00399 const uint32_t b= ((uint32_t*)(src-stride))[1];
00400 const uint32_t c= ((uint32_t*)(src-stride))[2];
00401 const uint32_t d= ((uint32_t*)(src-stride))[3];
00402
00403 for(i=0; i<16; i++){
00404 ((uint32_t*)(src+i*stride))[0]= a;
00405 ((uint32_t*)(src+i*stride))[1]= b;
00406 ((uint32_t*)(src+i*stride))[2]= c;
00407 ((uint32_t*)(src+i*stride))[3]= d;
00408 }
00409 }
00410
00411 static void pred16x16_horizontal_c(uint8_t *src, int stride){
00412 int i;
00413
00414 for(i=0; i<16; i++){
00415 ((uint32_t*)(src+i*stride))[0]=
00416 ((uint32_t*)(src+i*stride))[1]=
00417 ((uint32_t*)(src+i*stride))[2]=
00418 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
00419 }
00420 }
00421
00422 static void pred16x16_dc_c(uint8_t *src, int stride){
00423 int i, dc=0;
00424
00425 for(i=0;i<16; i++){
00426 dc+= src[-1+i*stride];
00427 }
00428
00429 for(i=0;i<16; i++){
00430 dc+= src[i-stride];
00431 }
00432
00433 dc= 0x01010101*((dc + 16)>>5);
00434
00435 for(i=0; i<16; i++){
00436 ((uint32_t*)(src+i*stride))[0]=
00437 ((uint32_t*)(src+i*stride))[1]=
00438 ((uint32_t*)(src+i*stride))[2]=
00439 ((uint32_t*)(src+i*stride))[3]= dc;
00440 }
00441 }
00442
00443 static void pred16x16_left_dc_c(uint8_t *src, int stride){
00444 int i, dc=0;
00445
00446 for(i=0;i<16; i++){
00447 dc+= src[-1+i*stride];
00448 }
00449
00450 dc= 0x01010101*((dc + 8)>>4);
00451
00452 for(i=0; i<16; i++){
00453 ((uint32_t*)(src+i*stride))[0]=
00454 ((uint32_t*)(src+i*stride))[1]=
00455 ((uint32_t*)(src+i*stride))[2]=
00456 ((uint32_t*)(src+i*stride))[3]= dc;
00457 }
00458 }
00459
00460 static void pred16x16_top_dc_c(uint8_t *src, int stride){
00461 int i, dc=0;
00462
00463 for(i=0;i<16; i++){
00464 dc+= src[i-stride];
00465 }
00466 dc= 0x01010101*((dc + 8)>>4);
00467
00468 for(i=0; i<16; i++){
00469 ((uint32_t*)(src+i*stride))[0]=
00470 ((uint32_t*)(src+i*stride))[1]=
00471 ((uint32_t*)(src+i*stride))[2]=
00472 ((uint32_t*)(src+i*stride))[3]= dc;
00473 }
00474 }
00475
00476 static void pred16x16_128_dc_c(uint8_t *src, int stride){
00477 int i;
00478
00479 for(i=0; i<16; i++){
00480 ((uint32_t*)(src+i*stride))[0]=
00481 ((uint32_t*)(src+i*stride))[1]=
00482 ((uint32_t*)(src+i*stride))[2]=
00483 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
00484 }
00485 }
00486
00487 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3, const int rv40){
00488 int i, j, k;
00489 int a;
00490 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00491 const uint8_t * const src0 = src+7-stride;
00492 const uint8_t *src1 = src+8*stride-1;
00493 const uint8_t *src2 = src1-2*stride;
00494 int H = src0[1] - src0[-1];
00495 int V = src1[0] - src2[ 0];
00496 for(k=2; k<=8; ++k) {
00497 src1 += stride; src2 -= stride;
00498 H += k*(src0[k] - src0[-k]);
00499 V += k*(src1[0] - src2[ 0]);
00500 }
00501 if(svq3){
00502 H = ( 5*(H/4) ) / 16;
00503 V = ( 5*(V/4) ) / 16;
00504
00505
00506 i = H; H = V; V = i;
00507 }else if(rv40){
00508 H = ( H + (H>>2) ) >> 4;
00509 V = ( V + (V>>2) ) >> 4;
00510 }else{
00511 H = ( 5*H+32 ) >> 6;
00512 V = ( 5*V+32 ) >> 6;
00513 }
00514
00515 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
00516 for(j=16; j>0; --j) {
00517 int b = a;
00518 a += V;
00519 for(i=-16; i<0; i+=4) {
00520 src[16+i] = cm[ (b ) >> 5 ];
00521 src[17+i] = cm[ (b+ H) >> 5 ];
00522 src[18+i] = cm[ (b+2*H) >> 5 ];
00523 src[19+i] = cm[ (b+3*H) >> 5 ];
00524 b += 4*H;
00525 }
00526 src += stride;
00527 }
00528 }
00529
00530 static void pred16x16_plane_c(uint8_t *src, int stride){
00531 pred16x16_plane_compat_c(src, stride, 0, 0);
00532 }
00533
00534 static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
00535 pred16x16_plane_compat_c(src, stride, 1, 0);
00536 }
00537
00538 static void pred16x16_plane_rv40_c(uint8_t *src, int stride){
00539 pred16x16_plane_compat_c(src, stride, 0, 1);
00540 }
00541
00542 static void pred8x8_vertical_c(uint8_t *src, int stride){
00543 int i;
00544 const uint32_t a= ((uint32_t*)(src-stride))[0];
00545 const uint32_t b= ((uint32_t*)(src-stride))[1];
00546
00547 for(i=0; i<8; i++){
00548 ((uint32_t*)(src+i*stride))[0]= a;
00549 ((uint32_t*)(src+i*stride))[1]= b;
00550 }
00551 }
00552
00553 static void pred8x8_horizontal_c(uint8_t *src, int stride){
00554 int i;
00555
00556 for(i=0; i<8; i++){
00557 ((uint32_t*)(src+i*stride))[0]=
00558 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
00559 }
00560 }
00561
00562 static void pred8x8_128_dc_c(uint8_t *src, int stride){
00563 int i;
00564
00565 for(i=0; i<8; i++){
00566 ((uint32_t*)(src+i*stride))[0]=
00567 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
00568 }
00569 }
00570
00571 static void pred8x8_left_dc_c(uint8_t *src, int stride){
00572 int i;
00573 int dc0, dc2;
00574
00575 dc0=dc2=0;
00576 for(i=0;i<4; i++){
00577 dc0+= src[-1+i*stride];
00578 dc2+= src[-1+(i+4)*stride];
00579 }
00580 dc0= 0x01010101*((dc0 + 2)>>2);
00581 dc2= 0x01010101*((dc2 + 2)>>2);
00582
00583 for(i=0; i<4; i++){
00584 ((uint32_t*)(src+i*stride))[0]=
00585 ((uint32_t*)(src+i*stride))[1]= dc0;
00586 }
00587 for(i=4; i<8; i++){
00588 ((uint32_t*)(src+i*stride))[0]=
00589 ((uint32_t*)(src+i*stride))[1]= dc2;
00590 }
00591 }
00592
00593 static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){
00594 int i;
00595 int dc0;
00596
00597 dc0=0;
00598 for(i=0;i<8; i++)
00599 dc0+= src[-1+i*stride];
00600 dc0= 0x01010101*((dc0 + 4)>>3);
00601
00602 for(i=0; i<8; i++){
00603 ((uint32_t*)(src+i*stride))[0]=
00604 ((uint32_t*)(src+i*stride))[1]= dc0;
00605 }
00606 }
00607
00608 static void pred8x8_top_dc_c(uint8_t *src, int stride){
00609 int i;
00610 int dc0, dc1;
00611
00612 dc0=dc1=0;
00613 for(i=0;i<4; i++){
00614 dc0+= src[i-stride];
00615 dc1+= src[4+i-stride];
00616 }
00617 dc0= 0x01010101*((dc0 + 2)>>2);
00618 dc1= 0x01010101*((dc1 + 2)>>2);
00619
00620 for(i=0; i<4; i++){
00621 ((uint32_t*)(src+i*stride))[0]= dc0;
00622 ((uint32_t*)(src+i*stride))[1]= dc1;
00623 }
00624 for(i=4; i<8; i++){
00625 ((uint32_t*)(src+i*stride))[0]= dc0;
00626 ((uint32_t*)(src+i*stride))[1]= dc1;
00627 }
00628 }
00629
00630 static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){
00631 int i;
00632 int dc0;
00633
00634 dc0=0;
00635 for(i=0;i<8; i++)
00636 dc0+= src[i-stride];
00637 dc0= 0x01010101*((dc0 + 4)>>3);
00638
00639 for(i=0; i<8; i++){
00640 ((uint32_t*)(src+i*stride))[0]=
00641 ((uint32_t*)(src+i*stride))[1]= dc0;
00642 }
00643 }
00644
00645
00646 static void pred8x8_dc_c(uint8_t *src, int stride){
00647 int i;
00648 int dc0, dc1, dc2, dc3;
00649
00650 dc0=dc1=dc2=0;
00651 for(i=0;i<4; i++){
00652 dc0+= src[-1+i*stride] + src[i-stride];
00653 dc1+= src[4+i-stride];
00654 dc2+= src[-1+(i+4)*stride];
00655 }
00656 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
00657 dc0= 0x01010101*((dc0 + 4)>>3);
00658 dc1= 0x01010101*((dc1 + 2)>>2);
00659 dc2= 0x01010101*((dc2 + 2)>>2);
00660
00661 for(i=0; i<4; i++){
00662 ((uint32_t*)(src+i*stride))[0]= dc0;
00663 ((uint32_t*)(src+i*stride))[1]= dc1;
00664 }
00665 for(i=4; i<8; i++){
00666 ((uint32_t*)(src+i*stride))[0]= dc2;
00667 ((uint32_t*)(src+i*stride))[1]= dc3;
00668 }
00669 }
00670
00671
00672 static void pred8x8_mad_cow_dc_l0t(uint8_t *src, int stride){
00673 pred8x8_top_dc_c(src, stride);
00674 pred4x4_dc_c(src, NULL, stride);
00675 }
00676
00677 static void pred8x8_mad_cow_dc_0lt(uint8_t *src, int stride){
00678 pred8x8_dc_c(src, stride);
00679 pred4x4_top_dc_c(src, NULL, stride);
00680 }
00681
00682 static void pred8x8_mad_cow_dc_l00(uint8_t *src, int stride){
00683 pred8x8_left_dc_c(src, stride);
00684 pred4x4_128_dc_c(src + 4*stride , NULL, stride);
00685 pred4x4_128_dc_c(src + 4*stride + 4, NULL, stride);
00686 }
00687
00688 static void pred8x8_mad_cow_dc_0l0(uint8_t *src, int stride){
00689 pred8x8_left_dc_c(src, stride);
00690 pred4x4_128_dc_c(src , NULL, stride);
00691 pred4x4_128_dc_c(src + 4, NULL, stride);
00692 }
00693
00694 static void pred8x8_dc_rv40_c(uint8_t *src, int stride){
00695 int i;
00696 int dc0=0;
00697
00698 for(i=0;i<4; i++){
00699 dc0+= src[-1+i*stride] + src[i-stride];
00700 dc0+= src[4+i-stride];
00701 dc0+= src[-1+(i+4)*stride];
00702 }
00703 dc0= 0x01010101*((dc0 + 8)>>4);
00704
00705 for(i=0; i<4; i++){
00706 ((uint32_t*)(src+i*stride))[0]= dc0;
00707 ((uint32_t*)(src+i*stride))[1]= dc0;
00708 }
00709 for(i=4; i<8; i++){
00710 ((uint32_t*)(src+i*stride))[0]= dc0;
00711 ((uint32_t*)(src+i*stride))[1]= dc0;
00712 }
00713 }
00714
00715 static void pred8x8_plane_c(uint8_t *src, int stride){
00716 int j, k;
00717 int a;
00718 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00719 const uint8_t * const src0 = src+3-stride;
00720 const uint8_t *src1 = src+4*stride-1;
00721 const uint8_t *src2 = src1-2*stride;
00722 int H = src0[1] - src0[-1];
00723 int V = src1[0] - src2[ 0];
00724 for(k=2; k<=4; ++k) {
00725 src1 += stride; src2 -= stride;
00726 H += k*(src0[k] - src0[-k]);
00727 V += k*(src1[0] - src2[ 0]);
00728 }
00729 H = ( 17*H+16 ) >> 5;
00730 V = ( 17*V+16 ) >> 5;
00731
00732 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
00733 for(j=8; j>0; --j) {
00734 int b = a;
00735 a += V;
00736 src[0] = cm[ (b ) >> 5 ];
00737 src[1] = cm[ (b+ H) >> 5 ];
00738 src[2] = cm[ (b+2*H) >> 5 ];
00739 src[3] = cm[ (b+3*H) >> 5 ];
00740 src[4] = cm[ (b+4*H) >> 5 ];
00741 src[5] = cm[ (b+5*H) >> 5 ];
00742 src[6] = cm[ (b+6*H) >> 5 ];
00743 src[7] = cm[ (b+7*H) >> 5 ];
00744 src += stride;
00745 }
00746 }
00747
00748 #define SRC(x,y) src[(x)+(y)*stride]
00749 #define PL(y) \
00750 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
00751 #define PREDICT_8x8_LOAD_LEFT \
00752 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
00753 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
00754 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
00755 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
00756
00757 #define PT(x) \
00758 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
00759 #define PREDICT_8x8_LOAD_TOP \
00760 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
00761 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
00762 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
00763 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
00764 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
00765
00766 #define PTR(x) \
00767 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
00768 #define PREDICT_8x8_LOAD_TOPRIGHT \
00769 int t8, t9, t10, t11, t12, t13, t14, t15; \
00770 if(has_topright) { \
00771 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
00772 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
00773 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
00774
00775 #define PREDICT_8x8_LOAD_TOPLEFT \
00776 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
00777
00778 #define PREDICT_8x8_DC(v) \
00779 int y; \
00780 for( y = 0; y < 8; y++ ) { \
00781 ((uint32_t*)src)[0] = \
00782 ((uint32_t*)src)[1] = v; \
00783 src += stride; \
00784 }
00785
00786 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
00787 {
00788 PREDICT_8x8_DC(0x80808080);
00789 }
00790 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
00791 {
00792 PREDICT_8x8_LOAD_LEFT;
00793 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
00794 PREDICT_8x8_DC(dc);
00795 }
00796 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
00797 {
00798 PREDICT_8x8_LOAD_TOP;
00799 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
00800 PREDICT_8x8_DC(dc);
00801 }
00802 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
00803 {
00804 PREDICT_8x8_LOAD_LEFT;
00805 PREDICT_8x8_LOAD_TOP;
00806 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
00807 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
00808 PREDICT_8x8_DC(dc);
00809 }
00810 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
00811 {
00812 PREDICT_8x8_LOAD_LEFT;
00813 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
00814 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
00815 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
00816 #undef ROW
00817 }
00818 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
00819 {
00820 int y;
00821 PREDICT_8x8_LOAD_TOP;
00822 src[0] = t0;
00823 src[1] = t1;
00824 src[2] = t2;
00825 src[3] = t3;
00826 src[4] = t4;
00827 src[5] = t5;
00828 src[6] = t6;
00829 src[7] = t7;
00830 for( y = 1; y < 8; y++ )
00831 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
00832 }
00833 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
00834 {
00835 PREDICT_8x8_LOAD_TOP;
00836 PREDICT_8x8_LOAD_TOPRIGHT;
00837 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
00838 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
00839 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
00840 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
00841 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
00842 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
00843 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
00844 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
00845 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
00846 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
00847 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
00848 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
00849 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
00850 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
00851 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
00852 }
00853 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
00854 {
00855 PREDICT_8x8_LOAD_TOP;
00856 PREDICT_8x8_LOAD_LEFT;
00857 PREDICT_8x8_LOAD_TOPLEFT;
00858 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
00859 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
00860 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
00861 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
00862 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
00863 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
00864 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
00865 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
00866 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
00867 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
00868 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
00869 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
00870 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
00871 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
00872 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
00873
00874 }
00875 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
00876 {
00877 PREDICT_8x8_LOAD_TOP;
00878 PREDICT_8x8_LOAD_LEFT;
00879 PREDICT_8x8_LOAD_TOPLEFT;
00880 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
00881 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
00882 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
00883 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
00884 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
00885 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
00886 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
00887 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
00888 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
00889 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
00890 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
00891 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
00892 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
00893 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
00894 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
00895 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
00896 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
00897 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
00898 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
00899 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
00900 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
00901 SRC(7,0)= (t6 + t7 + 1) >> 1;
00902 }
00903 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
00904 {
00905 PREDICT_8x8_LOAD_TOP;
00906 PREDICT_8x8_LOAD_LEFT;
00907 PREDICT_8x8_LOAD_TOPLEFT;
00908 SRC(0,7)= (l6 + l7 + 1) >> 1;
00909 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
00910 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
00911 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
00912 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
00913 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
00914 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
00915 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
00916 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
00917 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
00918 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
00919 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
00920 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
00921 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
00922 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
00923 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
00924 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
00925 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
00926 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
00927 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
00928 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
00929 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
00930 }
00931 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
00932 {
00933 PREDICT_8x8_LOAD_TOP;
00934 PREDICT_8x8_LOAD_TOPRIGHT;
00935 SRC(0,0)= (t0 + t1 + 1) >> 1;
00936 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
00937 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
00938 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
00939 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
00940 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
00941 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
00942 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
00943 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
00944 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
00945 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
00946 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
00947 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
00948 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
00949 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
00950 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
00951 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
00952 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
00953 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
00954 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
00955 SRC(7,6)= (t10 + t11 + 1) >> 1;
00956 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
00957 }
00958 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
00959 {
00960 PREDICT_8x8_LOAD_LEFT;
00961 SRC(0,0)= (l0 + l1 + 1) >> 1;
00962 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
00963 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
00964 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
00965 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
00966 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
00967 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
00968 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
00969 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
00970 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
00971 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
00972 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
00973 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
00974 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
00975 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
00976 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
00977 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
00978 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
00979 }
00980 #undef PREDICT_8x8_LOAD_LEFT
00981 #undef PREDICT_8x8_LOAD_TOP
00982 #undef PREDICT_8x8_LOAD_TOPLEFT
00983 #undef PREDICT_8x8_LOAD_TOPRIGHT
00984 #undef PREDICT_8x8_DC
00985 #undef PTR
00986 #undef PT
00987 #undef PL
00988 #undef SRC
00989
00990 static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
00991 int i;
00992 pix -= stride;
00993 for(i=0; i<4; i++){
00994 uint8_t v = pix[0];
00995 pix[1*stride]= v += block[0];
00996 pix[2*stride]= v += block[4];
00997 pix[3*stride]= v += block[8];
00998 pix[4*stride]= v += block[12];
00999 pix++;
01000 block++;
01001 }
01002 }
01003
01004 static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
01005 int i;
01006 for(i=0; i<4; i++){
01007 uint8_t v = pix[-1];
01008 pix[0]= v += block[0];
01009 pix[1]= v += block[1];
01010 pix[2]= v += block[2];
01011 pix[3]= v += block[3];
01012 pix+= stride;
01013 block+= 4;
01014 }
01015 }
01016
01017 static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
01018 int i;
01019 pix -= stride;
01020 for(i=0; i<8; i++){
01021 uint8_t v = pix[0];
01022 pix[1*stride]= v += block[0];
01023 pix[2*stride]= v += block[8];
01024 pix[3*stride]= v += block[16];
01025 pix[4*stride]= v += block[24];
01026 pix[5*stride]= v += block[32];
01027 pix[6*stride]= v += block[40];
01028 pix[7*stride]= v += block[48];
01029 pix[8*stride]= v += block[56];
01030 pix++;
01031 block++;
01032 }
01033 }
01034
01035 static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
01036 int i;
01037 for(i=0; i<8; i++){
01038 uint8_t v = pix[-1];
01039 pix[0]= v += block[0];
01040 pix[1]= v += block[1];
01041 pix[2]= v += block[2];
01042 pix[3]= v += block[3];
01043 pix[4]= v += block[4];
01044 pix[5]= v += block[5];
01045 pix[6]= v += block[6];
01046 pix[7]= v += block[7];
01047 pix+= stride;
01048 block+= 8;
01049 }
01050 }
01051
01052 static void pred16x16_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01053 int i;
01054 for(i=0; i<16; i++)
01055 pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride);
01056 }
01057
01058 static void pred16x16_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01059 int i;
01060 for(i=0; i<16; i++)
01061 pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride);
01062 }
01063
01064 static void pred8x8_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01065 int i;
01066 for(i=0; i<4; i++)
01067 pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride);
01068 }
01069
01070 static void pred8x8_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01071 int i;
01072 for(i=0; i<4; i++)
01073 pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride);
01074 }
01075
01076
01080 void ff_h264_pred_init(H264PredContext *h, int codec_id){
01081
01082
01083 if(codec_id != CODEC_ID_RV40){
01084 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
01085 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
01086 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
01087 if(codec_id == CODEC_ID_SVQ3)
01088 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_svq3_c;
01089 else
01090 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
01091 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
01092 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
01093 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
01094 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
01095 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
01096 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
01097 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
01098 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
01099 }else{
01100 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
01101 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
01102 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
01103 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_rv40_c;
01104 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
01105 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
01106 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
01107 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_rv40_c;
01108 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_rv40_c;
01109 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
01110 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
01111 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
01112 h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= pred4x4_down_left_rv40_nodown_c;
01113 h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= pred4x4_horizontal_up_rv40_nodown_c;
01114 h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= pred4x4_vertical_left_rv40_nodown_c;
01115 }
01116
01117 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
01118 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
01119 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
01120 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
01121 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
01122 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
01123 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
01124 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
01125 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
01126 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
01127 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
01128 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
01129
01130 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
01131 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
01132 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
01133 if(codec_id != CODEC_ID_RV40){
01134 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
01135 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
01136 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
01137 h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= pred8x8_mad_cow_dc_l0t;
01138 h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= pred8x8_mad_cow_dc_0lt;
01139 h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= pred8x8_mad_cow_dc_l00;
01140 h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= pred8x8_mad_cow_dc_0l0;
01141 }else{
01142 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_rv40_c;
01143 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_rv40_c;
01144 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_rv40_c;
01145 }
01146 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
01147
01148 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
01149 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
01150 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
01151 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
01152 switch(codec_id){
01153 case CODEC_ID_SVQ3:
01154 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_svq3_c;
01155 break;
01156 case CODEC_ID_RV40:
01157 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_rv40_c;
01158 break;
01159 default:
01160 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
01161 }
01162 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
01163 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
01164 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
01165
01166
01167 h->pred4x4_add [VERT_PRED ]= pred4x4_vertical_add_c;
01168 h->pred4x4_add [ HOR_PRED ]= pred4x4_horizontal_add_c;
01169 h->pred8x8l_add [VERT_PRED ]= pred8x8l_vertical_add_c;
01170 h->pred8x8l_add [ HOR_PRED ]= pred8x8l_horizontal_add_c;
01171 h->pred8x8_add [VERT_PRED8x8]= pred8x8_vertical_add_c;
01172 h->pred8x8_add [ HOR_PRED8x8]= pred8x8_horizontal_add_c;
01173 h->pred16x16_add[VERT_PRED8x8]= pred16x16_vertical_add_c;
01174 h->pred16x16_add[ HOR_PRED8x8]= pred16x16_horizontal_add_c;
01175 }