00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041 #include "diracdsp.h"
00042
00043 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00044 uint32_t ff_squareTbl[512] = {0, };
00045
00046 #define BIT_DEPTH 9
00047 #include "dsputil_template.c"
00048 #undef BIT_DEPTH
00049
00050 #define BIT_DEPTH 10
00051 #include "dsputil_template.c"
00052 #undef BIT_DEPTH
00053
00054 #define BIT_DEPTH 8
00055 #include "dsputil_template.c"
00056
00057
00058 #define pb_7f (~0UL/255 * 0x7f)
00059 #define pb_80 (~0UL/255 * 0x80)
00060
00061 const uint8_t ff_zigzag_direct[64] = {
00062 0, 1, 8, 16, 9, 2, 3, 10,
00063 17, 24, 32, 25, 18, 11, 4, 5,
00064 12, 19, 26, 33, 40, 48, 41, 34,
00065 27, 20, 13, 6, 7, 14, 21, 28,
00066 35, 42, 49, 56, 57, 50, 43, 36,
00067 29, 22, 15, 23, 30, 37, 44, 51,
00068 58, 59, 52, 45, 38, 31, 39, 46,
00069 53, 60, 61, 54, 47, 55, 62, 63
00070 };
00071
00072
00073
00074 const uint8_t ff_zigzag248_direct[64] = {
00075 0, 8, 1, 9, 16, 24, 2, 10,
00076 17, 25, 32, 40, 48, 56, 33, 41,
00077 18, 26, 3, 11, 4, 12, 19, 27,
00078 34, 42, 49, 57, 50, 58, 35, 43,
00079 20, 28, 5, 13, 6, 14, 21, 29,
00080 36, 44, 51, 59, 52, 60, 37, 45,
00081 22, 30, 7, 15, 23, 31, 38, 46,
00082 53, 61, 54, 62, 39, 47, 55, 63,
00083 };
00084
00085
00086 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
00087
00088 const uint8_t ff_alternate_horizontal_scan[64] = {
00089 0, 1, 2, 3, 8, 9, 16, 17,
00090 10, 11, 4, 5, 6, 7, 15, 14,
00091 13, 12, 19, 18, 24, 25, 32, 33,
00092 26, 27, 20, 21, 22, 23, 28, 29,
00093 30, 31, 34, 35, 40, 41, 48, 49,
00094 42, 43, 36, 37, 38, 39, 44, 45,
00095 46, 47, 50, 51, 56, 57, 58, 59,
00096 52, 53, 54, 55, 60, 61, 62, 63,
00097 };
00098
00099 const uint8_t ff_alternate_vertical_scan[64] = {
00100 0, 8, 16, 24, 1, 9, 2, 10,
00101 17, 25, 32, 40, 48, 56, 57, 49,
00102 41, 33, 26, 18, 3, 11, 4, 12,
00103 19, 27, 34, 42, 50, 58, 35, 43,
00104 51, 59, 20, 28, 5, 13, 6, 14,
00105 21, 29, 36, 44, 52, 60, 37, 45,
00106 53, 61, 22, 30, 7, 15, 23, 31,
00107 38, 46, 54, 62, 39, 47, 55, 63,
00108 };
00109
00110
00111 static const uint8_t simple_mmx_permutation[64]={
00112 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00113 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00114 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00115 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00116 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00117 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00118 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00119 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00120 };
00121
00122 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00123
00124 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00125 int i;
00126 int end;
00127
00128 st->scantable= src_scantable;
00129
00130 for(i=0; i<64; i++){
00131 int j;
00132 j = src_scantable[i];
00133 st->permutated[i] = permutation[j];
00134 #if ARCH_PPC
00135 st->inverse[j] = i;
00136 #endif
00137 }
00138
00139 end=-1;
00140 for(i=0; i<64; i++){
00141 int j;
00142 j = st->permutated[i];
00143 if(j>end) end=j;
00144 st->raster_end[i]= end;
00145 }
00146 }
00147
00148 void ff_init_scantable_permutation(uint8_t *idct_permutation,
00149 int idct_permutation_type)
00150 {
00151 int i;
00152
00153 switch(idct_permutation_type){
00154 case FF_NO_IDCT_PERM:
00155 for(i=0; i<64; i++)
00156 idct_permutation[i]= i;
00157 break;
00158 case FF_LIBMPEG2_IDCT_PERM:
00159 for(i=0; i<64; i++)
00160 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00161 break;
00162 case FF_SIMPLE_IDCT_PERM:
00163 for(i=0; i<64; i++)
00164 idct_permutation[i]= simple_mmx_permutation[i];
00165 break;
00166 case FF_TRANSPOSE_IDCT_PERM:
00167 for(i=0; i<64; i++)
00168 idct_permutation[i]= ((i&7)<<3) | (i>>3);
00169 break;
00170 case FF_PARTTRANS_IDCT_PERM:
00171 for(i=0; i<64; i++)
00172 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
00173 break;
00174 case FF_SSE2_IDCT_PERM:
00175 for(i=0; i<64; i++)
00176 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
00177 break;
00178 default:
00179 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
00180 }
00181 }
00182
00183 static int pix_sum_c(uint8_t * pix, int line_size)
00184 {
00185 int s, i, j;
00186
00187 s = 0;
00188 for (i = 0; i < 16; i++) {
00189 for (j = 0; j < 16; j += 8) {
00190 s += pix[0];
00191 s += pix[1];
00192 s += pix[2];
00193 s += pix[3];
00194 s += pix[4];
00195 s += pix[5];
00196 s += pix[6];
00197 s += pix[7];
00198 pix += 8;
00199 }
00200 pix += line_size - 16;
00201 }
00202 return s;
00203 }
00204
00205 static int pix_norm1_c(uint8_t * pix, int line_size)
00206 {
00207 int s, i, j;
00208 uint32_t *sq = ff_squareTbl + 256;
00209
00210 s = 0;
00211 for (i = 0; i < 16; i++) {
00212 for (j = 0; j < 16; j += 8) {
00213 #if 0
00214 s += sq[pix[0]];
00215 s += sq[pix[1]];
00216 s += sq[pix[2]];
00217 s += sq[pix[3]];
00218 s += sq[pix[4]];
00219 s += sq[pix[5]];
00220 s += sq[pix[6]];
00221 s += sq[pix[7]];
00222 #else
00223 #if HAVE_FAST_64BIT
00224 register uint64_t x=*(uint64_t*)pix;
00225 s += sq[x&0xff];
00226 s += sq[(x>>8)&0xff];
00227 s += sq[(x>>16)&0xff];
00228 s += sq[(x>>24)&0xff];
00229 s += sq[(x>>32)&0xff];
00230 s += sq[(x>>40)&0xff];
00231 s += sq[(x>>48)&0xff];
00232 s += sq[(x>>56)&0xff];
00233 #else
00234 register uint32_t x=*(uint32_t*)pix;
00235 s += sq[x&0xff];
00236 s += sq[(x>>8)&0xff];
00237 s += sq[(x>>16)&0xff];
00238 s += sq[(x>>24)&0xff];
00239 x=*(uint32_t*)(pix+4);
00240 s += sq[x&0xff];
00241 s += sq[(x>>8)&0xff];
00242 s += sq[(x>>16)&0xff];
00243 s += sq[(x>>24)&0xff];
00244 #endif
00245 #endif
00246 pix += 8;
00247 }
00248 pix += line_size - 16;
00249 }
00250 return s;
00251 }
00252
00253 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00254 int i;
00255
00256 for(i=0; i+8<=w; i+=8){
00257 dst[i+0]= av_bswap32(src[i+0]);
00258 dst[i+1]= av_bswap32(src[i+1]);
00259 dst[i+2]= av_bswap32(src[i+2]);
00260 dst[i+3]= av_bswap32(src[i+3]);
00261 dst[i+4]= av_bswap32(src[i+4]);
00262 dst[i+5]= av_bswap32(src[i+5]);
00263 dst[i+6]= av_bswap32(src[i+6]);
00264 dst[i+7]= av_bswap32(src[i+7]);
00265 }
00266 for(;i<w; i++){
00267 dst[i+0]= av_bswap32(src[i+0]);
00268 }
00269 }
00270
00271 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00272 {
00273 while (len--)
00274 *dst++ = av_bswap16(*src++);
00275 }
00276
00277 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00278 {
00279 int s, i;
00280 uint32_t *sq = ff_squareTbl + 256;
00281
00282 s = 0;
00283 for (i = 0; i < h; i++) {
00284 s += sq[pix1[0] - pix2[0]];
00285 s += sq[pix1[1] - pix2[1]];
00286 s += sq[pix1[2] - pix2[2]];
00287 s += sq[pix1[3] - pix2[3]];
00288 pix1 += line_size;
00289 pix2 += line_size;
00290 }
00291 return s;
00292 }
00293
00294 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00295 {
00296 int s, i;
00297 uint32_t *sq = ff_squareTbl + 256;
00298
00299 s = 0;
00300 for (i = 0; i < h; i++) {
00301 s += sq[pix1[0] - pix2[0]];
00302 s += sq[pix1[1] - pix2[1]];
00303 s += sq[pix1[2] - pix2[2]];
00304 s += sq[pix1[3] - pix2[3]];
00305 s += sq[pix1[4] - pix2[4]];
00306 s += sq[pix1[5] - pix2[5]];
00307 s += sq[pix1[6] - pix2[6]];
00308 s += sq[pix1[7] - pix2[7]];
00309 pix1 += line_size;
00310 pix2 += line_size;
00311 }
00312 return s;
00313 }
00314
00315 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00316 {
00317 int s, i;
00318 uint32_t *sq = ff_squareTbl + 256;
00319
00320 s = 0;
00321 for (i = 0; i < h; i++) {
00322 s += sq[pix1[ 0] - pix2[ 0]];
00323 s += sq[pix1[ 1] - pix2[ 1]];
00324 s += sq[pix1[ 2] - pix2[ 2]];
00325 s += sq[pix1[ 3] - pix2[ 3]];
00326 s += sq[pix1[ 4] - pix2[ 4]];
00327 s += sq[pix1[ 5] - pix2[ 5]];
00328 s += sq[pix1[ 6] - pix2[ 6]];
00329 s += sq[pix1[ 7] - pix2[ 7]];
00330 s += sq[pix1[ 8] - pix2[ 8]];
00331 s += sq[pix1[ 9] - pix2[ 9]];
00332 s += sq[pix1[10] - pix2[10]];
00333 s += sq[pix1[11] - pix2[11]];
00334 s += sq[pix1[12] - pix2[12]];
00335 s += sq[pix1[13] - pix2[13]];
00336 s += sq[pix1[14] - pix2[14]];
00337 s += sq[pix1[15] - pix2[15]];
00338
00339 pix1 += line_size;
00340 pix2 += line_size;
00341 }
00342 return s;
00343 }
00344
00345 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00346 const uint8_t *s2, int stride){
00347 int i;
00348
00349
00350 for(i=0;i<8;i++) {
00351 block[0] = s1[0] - s2[0];
00352 block[1] = s1[1] - s2[1];
00353 block[2] = s1[2] - s2[2];
00354 block[3] = s1[3] - s2[3];
00355 block[4] = s1[4] - s2[4];
00356 block[5] = s1[5] - s2[5];
00357 block[6] = s1[6] - s2[6];
00358 block[7] = s1[7] - s2[7];
00359 s1 += stride;
00360 s2 += stride;
00361 block += 8;
00362 }
00363 }
00364
00365
00366 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00367 int line_size)
00368 {
00369 int i;
00370
00371
00372 for(i=0;i<8;i++) {
00373 pixels[0] = av_clip_uint8(block[0]);
00374 pixels[1] = av_clip_uint8(block[1]);
00375 pixels[2] = av_clip_uint8(block[2]);
00376 pixels[3] = av_clip_uint8(block[3]);
00377 pixels[4] = av_clip_uint8(block[4]);
00378 pixels[5] = av_clip_uint8(block[5]);
00379 pixels[6] = av_clip_uint8(block[6]);
00380 pixels[7] = av_clip_uint8(block[7]);
00381
00382 pixels += line_size;
00383 block += 8;
00384 }
00385 }
00386
00387 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00388 int line_size)
00389 {
00390 int i;
00391
00392
00393 for(i=0;i<4;i++) {
00394 pixels[0] = av_clip_uint8(block[0]);
00395 pixels[1] = av_clip_uint8(block[1]);
00396 pixels[2] = av_clip_uint8(block[2]);
00397 pixels[3] = av_clip_uint8(block[3]);
00398
00399 pixels += line_size;
00400 block += 8;
00401 }
00402 }
00403
00404 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00405 int line_size)
00406 {
00407 int i;
00408
00409
00410 for(i=0;i<2;i++) {
00411 pixels[0] = av_clip_uint8(block[0]);
00412 pixels[1] = av_clip_uint8(block[1]);
00413
00414 pixels += line_size;
00415 block += 8;
00416 }
00417 }
00418
00419 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
00420 uint8_t *restrict pixels,
00421 int line_size)
00422 {
00423 int i, j;
00424
00425 for (i = 0; i < 8; i++) {
00426 for (j = 0; j < 8; j++) {
00427 if (*block < -128)
00428 *pixels = 0;
00429 else if (*block > 127)
00430 *pixels = 255;
00431 else
00432 *pixels = (uint8_t)(*block + 128);
00433 block++;
00434 pixels++;
00435 }
00436 pixels += (line_size - 8);
00437 }
00438 }
00439
00440 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00441 int line_size)
00442 {
00443 int i;
00444
00445
00446 for(i=0;i<8;i++) {
00447 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00448 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00449 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00450 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00451 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
00452 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
00453 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
00454 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
00455 pixels += line_size;
00456 block += 8;
00457 }
00458 }
00459
00460 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00461 int line_size)
00462 {
00463 int i;
00464
00465
00466 for(i=0;i<4;i++) {
00467 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00468 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00469 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00470 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00471 pixels += line_size;
00472 block += 8;
00473 }
00474 }
00475
00476 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00477 int line_size)
00478 {
00479 int i;
00480
00481
00482 for(i=0;i<2;i++) {
00483 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00484 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00485 pixels += line_size;
00486 block += 8;
00487 }
00488 }
00489
00490 static int sum_abs_dctelem_c(DCTELEM *block)
00491 {
00492 int sum=0, i;
00493 for(i=0; i<64; i++)
00494 sum+= FFABS(block[i]);
00495 return sum;
00496 }
00497
00498 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00499 {
00500 int i;
00501
00502 for (i = 0; i < h; i++) {
00503 memset(block, value, 16);
00504 block += line_size;
00505 }
00506 }
00507
00508 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00509 {
00510 int i;
00511
00512 for (i = 0; i < h; i++) {
00513 memset(block, value, 8);
00514 block += line_size;
00515 }
00516 }
00517
00518 #define avg2(a,b) ((a+b+1)>>1)
00519 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00520
00521 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00522 {
00523 const int A=(16-x16)*(16-y16);
00524 const int B=( x16)*(16-y16);
00525 const int C=(16-x16)*( y16);
00526 const int D=( x16)*( y16);
00527 int i;
00528
00529 for(i=0; i<h; i++)
00530 {
00531 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00532 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00533 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00534 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00535 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00536 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00537 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00538 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00539 dst+= stride;
00540 src+= stride;
00541 }
00542 }
00543
00544 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00545 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00546 {
00547 int y, vx, vy;
00548 const int s= 1<<shift;
00549
00550 width--;
00551 height--;
00552
00553 for(y=0; y<h; y++){
00554 int x;
00555
00556 vx= ox;
00557 vy= oy;
00558 for(x=0; x<8; x++){
00559 int src_x, src_y, frac_x, frac_y, index;
00560
00561 src_x= vx>>16;
00562 src_y= vy>>16;
00563 frac_x= src_x&(s-1);
00564 frac_y= src_y&(s-1);
00565 src_x>>=shift;
00566 src_y>>=shift;
00567
00568 if((unsigned)src_x < width){
00569 if((unsigned)src_y < height){
00570 index= src_x + src_y*stride;
00571 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00572 + src[index +1]* frac_x )*(s-frac_y)
00573 + ( src[index+stride ]*(s-frac_x)
00574 + src[index+stride+1]* frac_x )* frac_y
00575 + r)>>(shift*2);
00576 }else{
00577 index= src_x + av_clip(src_y, 0, height)*stride;
00578 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00579 + src[index +1]* frac_x )*s
00580 + r)>>(shift*2);
00581 }
00582 }else{
00583 if((unsigned)src_y < height){
00584 index= av_clip(src_x, 0, width) + src_y*stride;
00585 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
00586 + src[index+stride ]* frac_y )*s
00587 + r)>>(shift*2);
00588 }else{
00589 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00590 dst[y*stride + x]= src[index ];
00591 }
00592 }
00593
00594 vx+= dxx;
00595 vy+= dyx;
00596 }
00597 ox += dxy;
00598 oy += dyy;
00599 }
00600 }
00601
00602 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00603 switch(width){
00604 case 2: put_pixels2_8_c (dst, src, stride, height); break;
00605 case 4: put_pixels4_8_c (dst, src, stride, height); break;
00606 case 8: put_pixels8_8_c (dst, src, stride, height); break;
00607 case 16:put_pixels16_8_c(dst, src, stride, height); break;
00608 }
00609 }
00610
00611 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00612 int i,j;
00613 for (i=0; i < height; i++) {
00614 for (j=0; j < width; j++) {
00615 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
00616 }
00617 src += stride;
00618 dst += stride;
00619 }
00620 }
00621
00622 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00623 int i,j;
00624 for (i=0; i < height; i++) {
00625 for (j=0; j < width; j++) {
00626 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
00627 }
00628 src += stride;
00629 dst += stride;
00630 }
00631 }
00632
00633 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00634 int i,j;
00635 for (i=0; i < height; i++) {
00636 for (j=0; j < width; j++) {
00637 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
00638 }
00639 src += stride;
00640 dst += stride;
00641 }
00642 }
00643
00644 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00645 int i,j;
00646 for (i=0; i < height; i++) {
00647 for (j=0; j < width; j++) {
00648 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
00649 }
00650 src += stride;
00651 dst += stride;
00652 }
00653 }
00654
00655 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00656 int i,j;
00657 for (i=0; i < height; i++) {
00658 for (j=0; j < width; j++) {
00659 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00660 }
00661 src += stride;
00662 dst += stride;
00663 }
00664 }
00665
00666 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00667 int i,j;
00668 for (i=0; i < height; i++) {
00669 for (j=0; j < width; j++) {
00670 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
00671 }
00672 src += stride;
00673 dst += stride;
00674 }
00675 }
00676
00677 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00678 int i,j;
00679 for (i=0; i < height; i++) {
00680 for (j=0; j < width; j++) {
00681 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00682 }
00683 src += stride;
00684 dst += stride;
00685 }
00686 }
00687
00688 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00689 int i,j;
00690 for (i=0; i < height; i++) {
00691 for (j=0; j < width; j++) {
00692 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
00693 }
00694 src += stride;
00695 dst += stride;
00696 }
00697 }
00698
00699 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00700 switch(width){
00701 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
00702 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
00703 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
00704 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
00705 }
00706 }
00707
00708 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00709 int i,j;
00710 for (i=0; i < height; i++) {
00711 for (j=0; j < width; j++) {
00712 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
00713 }
00714 src += stride;
00715 dst += stride;
00716 }
00717 }
00718
00719 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00720 int i,j;
00721 for (i=0; i < height; i++) {
00722 for (j=0; j < width; j++) {
00723 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
00724 }
00725 src += stride;
00726 dst += stride;
00727 }
00728 }
00729
00730 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00731 int i,j;
00732 for (i=0; i < height; i++) {
00733 for (j=0; j < width; j++) {
00734 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
00735 }
00736 src += stride;
00737 dst += stride;
00738 }
00739 }
00740
00741 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00742 int i,j;
00743 for (i=0; i < height; i++) {
00744 for (j=0; j < width; j++) {
00745 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00746 }
00747 src += stride;
00748 dst += stride;
00749 }
00750 }
00751
00752 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00753 int i,j;
00754 for (i=0; i < height; i++) {
00755 for (j=0; j < width; j++) {
00756 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00757 }
00758 src += stride;
00759 dst += stride;
00760 }
00761 }
00762
00763 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00764 int i,j;
00765 for (i=0; i < height; i++) {
00766 for (j=0; j < width; j++) {
00767 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
00768 }
00769 src += stride;
00770 dst += stride;
00771 }
00772 }
00773
00774 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00775 int i,j;
00776 for (i=0; i < height; i++) {
00777 for (j=0; j < width; j++) {
00778 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00779 }
00780 src += stride;
00781 dst += stride;
00782 }
00783 }
00784
00785 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00786 int i,j;
00787 for (i=0; i < height; i++) {
00788 for (j=0; j < width; j++) {
00789 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00790 }
00791 src += stride;
00792 dst += stride;
00793 }
00794 }
00795
00796 #define QPEL_MC(r, OPNAME, RND, OP) \
00797 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00798 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00799 int i;\
00800 for(i=0; i<h; i++)\
00801 {\
00802 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
00803 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
00804 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
00805 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
00806 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
00807 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
00808 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
00809 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
00810 dst+=dstStride;\
00811 src+=srcStride;\
00812 }\
00813 }\
00814 \
00815 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00816 const int w=8;\
00817 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00818 int i;\
00819 for(i=0; i<w; i++)\
00820 {\
00821 const int src0= src[0*srcStride];\
00822 const int src1= src[1*srcStride];\
00823 const int src2= src[2*srcStride];\
00824 const int src3= src[3*srcStride];\
00825 const int src4= src[4*srcStride];\
00826 const int src5= src[5*srcStride];\
00827 const int src6= src[6*srcStride];\
00828 const int src7= src[7*srcStride];\
00829 const int src8= src[8*srcStride];\
00830 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00831 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00832 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00833 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00834 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00835 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00836 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00837 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00838 dst++;\
00839 src++;\
00840 }\
00841 }\
00842 \
00843 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00844 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00845 int i;\
00846 \
00847 for(i=0; i<h; i++)\
00848 {\
00849 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
00850 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
00851 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
00852 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
00853 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
00854 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
00855 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
00856 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
00857 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
00858 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
00859 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
00860 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
00861 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
00862 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
00863 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
00864 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
00865 dst+=dstStride;\
00866 src+=srcStride;\
00867 }\
00868 }\
00869 \
00870 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00871 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00872 int i;\
00873 const int w=16;\
00874 for(i=0; i<w; i++)\
00875 {\
00876 const int src0= src[0*srcStride];\
00877 const int src1= src[1*srcStride];\
00878 const int src2= src[2*srcStride];\
00879 const int src3= src[3*srcStride];\
00880 const int src4= src[4*srcStride];\
00881 const int src5= src[5*srcStride];\
00882 const int src6= src[6*srcStride];\
00883 const int src7= src[7*srcStride];\
00884 const int src8= src[8*srcStride];\
00885 const int src9= src[9*srcStride];\
00886 const int src10= src[10*srcStride];\
00887 const int src11= src[11*srcStride];\
00888 const int src12= src[12*srcStride];\
00889 const int src13= src[13*srcStride];\
00890 const int src14= src[14*srcStride];\
00891 const int src15= src[15*srcStride];\
00892 const int src16= src[16*srcStride];\
00893 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00894 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00895 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00896 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00897 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00898 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00899 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00900 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00901 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00902 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00903 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00904 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00905 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00906 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00907 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00908 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00909 dst++;\
00910 src++;\
00911 }\
00912 }\
00913 \
00914 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00915 uint8_t half[64];\
00916 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00917 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
00918 }\
00919 \
00920 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00921 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00922 }\
00923 \
00924 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00925 uint8_t half[64];\
00926 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00927 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
00928 }\
00929 \
00930 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00931 uint8_t full[16*9];\
00932 uint8_t half[64];\
00933 copy_block9(full, src, 16, stride, 9);\
00934 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00935 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
00936 }\
00937 \
00938 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00939 uint8_t full[16*9];\
00940 copy_block9(full, src, 16, stride, 9);\
00941 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00942 }\
00943 \
00944 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00945 uint8_t full[16*9];\
00946 uint8_t half[64];\
00947 copy_block9(full, src, 16, stride, 9);\
00948 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00949 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
00950 }\
00951 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00952 uint8_t full[16*9];\
00953 uint8_t halfH[72];\
00954 uint8_t halfV[64];\
00955 uint8_t halfHV[64];\
00956 copy_block9(full, src, 16, stride, 9);\
00957 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00958 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00959 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00960 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00961 }\
00962 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00963 uint8_t full[16*9];\
00964 uint8_t halfH[72];\
00965 uint8_t halfHV[64];\
00966 copy_block9(full, src, 16, stride, 9);\
00967 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00968 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
00969 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00970 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00971 }\
00972 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00973 uint8_t full[16*9];\
00974 uint8_t halfH[72];\
00975 uint8_t halfV[64];\
00976 uint8_t halfHV[64];\
00977 copy_block9(full, src, 16, stride, 9);\
00978 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00979 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00980 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00981 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00982 }\
00983 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00984 uint8_t full[16*9];\
00985 uint8_t halfH[72];\
00986 uint8_t halfHV[64];\
00987 copy_block9(full, src, 16, stride, 9);\
00988 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00989 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
00990 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00991 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00992 }\
00993 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
00994 uint8_t full[16*9];\
00995 uint8_t halfH[72];\
00996 uint8_t halfV[64];\
00997 uint8_t halfHV[64];\
00998 copy_block9(full, src, 16, stride, 9);\
00999 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01000 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01001 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01002 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01003 }\
01004 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01005 uint8_t full[16*9];\
01006 uint8_t halfH[72];\
01007 uint8_t halfHV[64];\
01008 copy_block9(full, src, 16, stride, 9);\
01009 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01010 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01011 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01012 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01013 }\
01014 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01015 uint8_t full[16*9];\
01016 uint8_t halfH[72];\
01017 uint8_t halfV[64];\
01018 uint8_t halfHV[64];\
01019 copy_block9(full, src, 16, stride, 9);\
01020 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01021 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01022 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01023 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01024 }\
01025 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01026 uint8_t full[16*9];\
01027 uint8_t halfH[72];\
01028 uint8_t halfHV[64];\
01029 copy_block9(full, src, 16, stride, 9);\
01030 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01031 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01032 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01033 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01034 }\
01035 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01036 uint8_t halfH[72];\
01037 uint8_t halfHV[64];\
01038 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01039 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01040 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01041 }\
01042 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01043 uint8_t halfH[72];\
01044 uint8_t halfHV[64];\
01045 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01046 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01047 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01048 }\
01049 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01050 uint8_t full[16*9];\
01051 uint8_t halfH[72];\
01052 uint8_t halfV[64];\
01053 uint8_t halfHV[64];\
01054 copy_block9(full, src, 16, stride, 9);\
01055 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01056 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01057 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01058 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01059 }\
01060 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01061 uint8_t full[16*9];\
01062 uint8_t halfH[72];\
01063 copy_block9(full, src, 16, stride, 9);\
01064 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01065 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01066 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01067 }\
01068 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01069 uint8_t full[16*9];\
01070 uint8_t halfH[72];\
01071 uint8_t halfV[64];\
01072 uint8_t halfHV[64];\
01073 copy_block9(full, src, 16, stride, 9);\
01074 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01075 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01076 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01077 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01078 }\
01079 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01080 uint8_t full[16*9];\
01081 uint8_t halfH[72];\
01082 copy_block9(full, src, 16, stride, 9);\
01083 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01084 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01085 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01086 }\
01087 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01088 uint8_t halfH[72];\
01089 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01090 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01091 }\
01092 \
01093 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01094 uint8_t half[256];\
01095 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01096 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
01097 }\
01098 \
01099 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01100 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01101 }\
01102 \
01103 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01104 uint8_t half[256];\
01105 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01106 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
01107 }\
01108 \
01109 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01110 uint8_t full[24*17];\
01111 uint8_t half[256];\
01112 copy_block17(full, src, 24, stride, 17);\
01113 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01114 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
01115 }\
01116 \
01117 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01118 uint8_t full[24*17];\
01119 copy_block17(full, src, 24, stride, 17);\
01120 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01121 }\
01122 \
01123 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01124 uint8_t full[24*17];\
01125 uint8_t half[256];\
01126 copy_block17(full, src, 24, stride, 17);\
01127 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01128 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
01129 }\
01130 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01131 uint8_t full[24*17];\
01132 uint8_t halfH[272];\
01133 uint8_t halfV[256];\
01134 uint8_t halfHV[256];\
01135 copy_block17(full, src, 24, stride, 17);\
01136 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01137 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01138 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01139 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01140 }\
01141 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01142 uint8_t full[24*17];\
01143 uint8_t halfH[272];\
01144 uint8_t halfHV[256];\
01145 copy_block17(full, src, 24, stride, 17);\
01146 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01147 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01148 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01149 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01150 }\
01151 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01152 uint8_t full[24*17];\
01153 uint8_t halfH[272];\
01154 uint8_t halfV[256];\
01155 uint8_t halfHV[256];\
01156 copy_block17(full, src, 24, stride, 17);\
01157 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01158 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01159 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01160 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01161 }\
01162 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01163 uint8_t full[24*17];\
01164 uint8_t halfH[272];\
01165 uint8_t halfHV[256];\
01166 copy_block17(full, src, 24, stride, 17);\
01167 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01168 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01169 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01170 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01171 }\
01172 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01173 uint8_t full[24*17];\
01174 uint8_t halfH[272];\
01175 uint8_t halfV[256];\
01176 uint8_t halfHV[256];\
01177 copy_block17(full, src, 24, stride, 17);\
01178 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01179 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01180 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01181 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01182 }\
01183 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01184 uint8_t full[24*17];\
01185 uint8_t halfH[272];\
01186 uint8_t halfHV[256];\
01187 copy_block17(full, src, 24, stride, 17);\
01188 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01189 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01190 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01191 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01192 }\
01193 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01194 uint8_t full[24*17];\
01195 uint8_t halfH[272];\
01196 uint8_t halfV[256];\
01197 uint8_t halfHV[256];\
01198 copy_block17(full, src, 24, stride, 17);\
01199 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
01200 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01201 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01202 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01203 }\
01204 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01205 uint8_t full[24*17];\
01206 uint8_t halfH[272];\
01207 uint8_t halfHV[256];\
01208 copy_block17(full, src, 24, stride, 17);\
01209 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01210 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01211 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01212 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01213 }\
01214 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01215 uint8_t halfH[272];\
01216 uint8_t halfHV[256];\
01217 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01218 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01219 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01220 }\
01221 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01222 uint8_t halfH[272];\
01223 uint8_t halfHV[256];\
01224 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01225 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01226 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01227 }\
01228 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01229 uint8_t full[24*17];\
01230 uint8_t halfH[272];\
01231 uint8_t halfV[256];\
01232 uint8_t halfHV[256];\
01233 copy_block17(full, src, 24, stride, 17);\
01234 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01235 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01236 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01237 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01238 }\
01239 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01240 uint8_t full[24*17];\
01241 uint8_t halfH[272];\
01242 copy_block17(full, src, 24, stride, 17);\
01243 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01244 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01245 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01246 }\
01247 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01248 uint8_t full[24*17];\
01249 uint8_t halfH[272];\
01250 uint8_t halfV[256];\
01251 uint8_t halfHV[256];\
01252 copy_block17(full, src, 24, stride, 17);\
01253 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01254 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01255 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01256 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01257 }\
01258 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01259 uint8_t full[24*17];\
01260 uint8_t halfH[272];\
01261 copy_block17(full, src, 24, stride, 17);\
01262 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01263 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01264 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01265 }\
01266 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01267 uint8_t halfH[272];\
01268 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01269 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01270 }
01271
01272 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01273 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01274 #define op_put(a, b) a = cm[((b) + 16)>>5]
01275 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01276
01277 QPEL_MC(0, put_ , _ , op_put)
01278 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01279 QPEL_MC(0, avg_ , _ , op_avg)
01280
01281 #undef op_avg
01282 #undef op_avg_no_rnd
01283 #undef op_put
01284 #undef op_put_no_rnd
01285
01286 #define put_qpel8_mc00_c ff_put_pixels8x8_c
01287 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
01288 #define put_qpel16_mc00_c ff_put_pixels16x16_c
01289 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
01290 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
01291 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
01292
01293 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01294 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01295 int i;
01296
01297 for(i=0; i<h; i++){
01298 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
01299 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
01300 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
01301 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
01302 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
01303 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
01304 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
01305 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
01306 dst+=dstStride;
01307 src+=srcStride;
01308 }
01309 }
01310
01311 #if CONFIG_RV40_DECODER
01312 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01313 put_pixels16_xy2_8_c(dst, src, stride, 16);
01314 }
01315 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01316 avg_pixels16_xy2_8_c(dst, src, stride, 16);
01317 }
01318 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01319 put_pixels8_xy2_8_c(dst, src, stride, 8);
01320 }
01321 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01322 avg_pixels8_xy2_8_c(dst, src, stride, 8);
01323 }
01324 #endif
01325
01326 #if CONFIG_DIRAC_DECODER
01327 #define DIRAC_MC(OPNAME)\
01328 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01329 {\
01330 OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
01331 }\
01332 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01333 {\
01334 OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
01335 }\
01336 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01337 {\
01338 OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
01339 OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
01340 }\
01341 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01342 {\
01343 OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01344 }\
01345 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01346 {\
01347 OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01348 }\
01349 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01350 {\
01351 OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
01352 OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
01353 }\
01354 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01355 {\
01356 OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01357 }\
01358 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01359 {\
01360 OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01361 }\
01362 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01363 {\
01364 OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
01365 OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
01366 }
01367 DIRAC_MC(put)
01368 DIRAC_MC(avg)
01369 #endif
01370
01371 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01372 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01373 int i;
01374
01375 for(i=0; i<w; i++){
01376 const int src_1= src[ -srcStride];
01377 const int src0 = src[0 ];
01378 const int src1 = src[ srcStride];
01379 const int src2 = src[2*srcStride];
01380 const int src3 = src[3*srcStride];
01381 const int src4 = src[4*srcStride];
01382 const int src5 = src[5*srcStride];
01383 const int src6 = src[6*srcStride];
01384 const int src7 = src[7*srcStride];
01385 const int src8 = src[8*srcStride];
01386 const int src9 = src[9*srcStride];
01387 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01388 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
01389 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
01390 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
01391 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
01392 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
01393 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
01394 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
01395 src++;
01396 dst++;
01397 }
01398 }
01399
01400 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01401 uint8_t half[64];
01402 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01403 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
01404 }
01405
01406 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01407 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01408 }
01409
01410 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01411 uint8_t half[64];
01412 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01413 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
01414 }
01415
01416 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01417 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01418 }
01419
01420 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01421 uint8_t halfH[88];
01422 uint8_t halfV[64];
01423 uint8_t halfHV[64];
01424 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01425 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01426 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01427 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01428 }
01429 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01430 uint8_t halfH[88];
01431 uint8_t halfV[64];
01432 uint8_t halfHV[64];
01433 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01434 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01435 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01436 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01437 }
01438 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01439 uint8_t halfH[88];
01440 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01441 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01442 }
01443
01444 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
01445 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01446 int x;
01447 const int strength= ff_h263_loop_filter_strength[qscale];
01448
01449 for(x=0; x<8; x++){
01450 int d1, d2, ad1;
01451 int p0= src[x-2*stride];
01452 int p1= src[x-1*stride];
01453 int p2= src[x+0*stride];
01454 int p3= src[x+1*stride];
01455 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01456
01457 if (d<-2*strength) d1= 0;
01458 else if(d<- strength) d1=-2*strength - d;
01459 else if(d< strength) d1= d;
01460 else if(d< 2*strength) d1= 2*strength - d;
01461 else d1= 0;
01462
01463 p1 += d1;
01464 p2 -= d1;
01465 if(p1&256) p1= ~(p1>>31);
01466 if(p2&256) p2= ~(p2>>31);
01467
01468 src[x-1*stride] = p1;
01469 src[x+0*stride] = p2;
01470
01471 ad1= FFABS(d1)>>1;
01472
01473 d2= av_clip((p0-p3)/4, -ad1, ad1);
01474
01475 src[x-2*stride] = p0 - d2;
01476 src[x+ stride] = p3 + d2;
01477 }
01478 }
01479 }
01480
01481 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
01482 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01483 int y;
01484 const int strength= ff_h263_loop_filter_strength[qscale];
01485
01486 for(y=0; y<8; y++){
01487 int d1, d2, ad1;
01488 int p0= src[y*stride-2];
01489 int p1= src[y*stride-1];
01490 int p2= src[y*stride+0];
01491 int p3= src[y*stride+1];
01492 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01493
01494 if (d<-2*strength) d1= 0;
01495 else if(d<- strength) d1=-2*strength - d;
01496 else if(d< strength) d1= d;
01497 else if(d< 2*strength) d1= 2*strength - d;
01498 else d1= 0;
01499
01500 p1 += d1;
01501 p2 -= d1;
01502 if(p1&256) p1= ~(p1>>31);
01503 if(p2&256) p2= ~(p2>>31);
01504
01505 src[y*stride-1] = p1;
01506 src[y*stride+0] = p2;
01507
01508 ad1= FFABS(d1)>>1;
01509
01510 d2= av_clip((p0-p3)/4, -ad1, ad1);
01511
01512 src[y*stride-2] = p0 - d2;
01513 src[y*stride+1] = p3 + d2;
01514 }
01515 }
01516 }
01517
01518 static void h261_loop_filter_c(uint8_t *src, int stride){
01519 int x,y,xy,yz;
01520 int temp[64];
01521
01522 for(x=0; x<8; x++){
01523 temp[x ] = 4*src[x ];
01524 temp[x + 7*8] = 4*src[x + 7*stride];
01525 }
01526 for(y=1; y<7; y++){
01527 for(x=0; x<8; x++){
01528 xy = y * stride + x;
01529 yz = y * 8 + x;
01530 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
01531 }
01532 }
01533
01534 for(y=0; y<8; y++){
01535 src[ y*stride] = (temp[ y*8] + 2)>>2;
01536 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
01537 for(x=1; x<7; x++){
01538 xy = y * stride + x;
01539 yz = y * 8 + x;
01540 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
01541 }
01542 }
01543 }
01544
01545 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01546 {
01547 int s, i;
01548
01549 s = 0;
01550 for(i=0;i<h;i++) {
01551 s += abs(pix1[0] - pix2[0]);
01552 s += abs(pix1[1] - pix2[1]);
01553 s += abs(pix1[2] - pix2[2]);
01554 s += abs(pix1[3] - pix2[3]);
01555 s += abs(pix1[4] - pix2[4]);
01556 s += abs(pix1[5] - pix2[5]);
01557 s += abs(pix1[6] - pix2[6]);
01558 s += abs(pix1[7] - pix2[7]);
01559 s += abs(pix1[8] - pix2[8]);
01560 s += abs(pix1[9] - pix2[9]);
01561 s += abs(pix1[10] - pix2[10]);
01562 s += abs(pix1[11] - pix2[11]);
01563 s += abs(pix1[12] - pix2[12]);
01564 s += abs(pix1[13] - pix2[13]);
01565 s += abs(pix1[14] - pix2[14]);
01566 s += abs(pix1[15] - pix2[15]);
01567 pix1 += line_size;
01568 pix2 += line_size;
01569 }
01570 return s;
01571 }
01572
01573 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01574 {
01575 int s, i;
01576
01577 s = 0;
01578 for(i=0;i<h;i++) {
01579 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01580 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01581 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01582 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01583 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01584 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01585 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01586 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01587 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
01588 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
01589 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
01590 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
01591 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
01592 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
01593 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
01594 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
01595 pix1 += line_size;
01596 pix2 += line_size;
01597 }
01598 return s;
01599 }
01600
01601 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01602 {
01603 int s, i;
01604 uint8_t *pix3 = pix2 + line_size;
01605
01606 s = 0;
01607 for(i=0;i<h;i++) {
01608 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01609 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01610 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01611 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01612 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01613 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01614 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01615 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01616 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
01617 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
01618 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
01619 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
01620 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
01621 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
01622 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
01623 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
01624 pix1 += line_size;
01625 pix2 += line_size;
01626 pix3 += line_size;
01627 }
01628 return s;
01629 }
01630
01631 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01632 {
01633 int s, i;
01634 uint8_t *pix3 = pix2 + line_size;
01635
01636 s = 0;
01637 for(i=0;i<h;i++) {
01638 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01639 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01640 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01641 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01642 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01643 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01644 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01645 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01646 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
01647 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
01648 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
01649 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
01650 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
01651 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
01652 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
01653 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
01654 pix1 += line_size;
01655 pix2 += line_size;
01656 pix3 += line_size;
01657 }
01658 return s;
01659 }
01660
01661 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01662 {
01663 int s, i;
01664
01665 s = 0;
01666 for(i=0;i<h;i++) {
01667 s += abs(pix1[0] - pix2[0]);
01668 s += abs(pix1[1] - pix2[1]);
01669 s += abs(pix1[2] - pix2[2]);
01670 s += abs(pix1[3] - pix2[3]);
01671 s += abs(pix1[4] - pix2[4]);
01672 s += abs(pix1[5] - pix2[5]);
01673 s += abs(pix1[6] - pix2[6]);
01674 s += abs(pix1[7] - pix2[7]);
01675 pix1 += line_size;
01676 pix2 += line_size;
01677 }
01678 return s;
01679 }
01680
01681 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01682 {
01683 int s, i;
01684
01685 s = 0;
01686 for(i=0;i<h;i++) {
01687 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01688 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01689 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01690 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01691 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01692 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01693 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01694 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01695 pix1 += line_size;
01696 pix2 += line_size;
01697 }
01698 return s;
01699 }
01700
01701 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01702 {
01703 int s, i;
01704 uint8_t *pix3 = pix2 + line_size;
01705
01706 s = 0;
01707 for(i=0;i<h;i++) {
01708 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01709 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01710 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01711 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01712 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01713 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01714 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01715 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01716 pix1 += line_size;
01717 pix2 += line_size;
01718 pix3 += line_size;
01719 }
01720 return s;
01721 }
01722
01723 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01724 {
01725 int s, i;
01726 uint8_t *pix3 = pix2 + line_size;
01727
01728 s = 0;
01729 for(i=0;i<h;i++) {
01730 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01731 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01732 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01733 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01734 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01735 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01736 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01737 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01738 pix1 += line_size;
01739 pix2 += line_size;
01740 pix3 += line_size;
01741 }
01742 return s;
01743 }
01744
01745 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01746 MpegEncContext *c = v;
01747 int score1=0;
01748 int score2=0;
01749 int x,y;
01750
01751 for(y=0; y<h; y++){
01752 for(x=0; x<16; x++){
01753 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01754 }
01755 if(y+1<h){
01756 for(x=0; x<15; x++){
01757 score2+= FFABS( s1[x ] - s1[x +stride]
01758 - s1[x+1] + s1[x+1+stride])
01759 -FFABS( s2[x ] - s2[x +stride]
01760 - s2[x+1] + s2[x+1+stride]);
01761 }
01762 }
01763 s1+= stride;
01764 s2+= stride;
01765 }
01766
01767 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01768 else return score1 + FFABS(score2)*8;
01769 }
01770
01771 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01772 MpegEncContext *c = v;
01773 int score1=0;
01774 int score2=0;
01775 int x,y;
01776
01777 for(y=0; y<h; y++){
01778 for(x=0; x<8; x++){
01779 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01780 }
01781 if(y+1<h){
01782 for(x=0; x<7; x++){
01783 score2+= FFABS( s1[x ] - s1[x +stride]
01784 - s1[x+1] + s1[x+1+stride])
01785 -FFABS( s2[x ] - s2[x +stride]
01786 - s2[x+1] + s2[x+1+stride]);
01787 }
01788 }
01789 s1+= stride;
01790 s2+= stride;
01791 }
01792
01793 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01794 else return score1 + FFABS(score2)*8;
01795 }
01796
01797 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
01798 int i;
01799 unsigned int sum=0;
01800
01801 for(i=0; i<8*8; i++){
01802 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
01803 int w= weight[i];
01804 b>>= RECON_SHIFT;
01805 assert(-512<b && b<512);
01806
01807 sum += (w*b)*(w*b)>>4;
01808 }
01809 return sum>>2;
01810 }
01811
01812 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
01813 int i;
01814
01815 for(i=0; i<8*8; i++){
01816 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
01817 }
01818 }
01819
01828 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
01829 {
01830 int i;
01831 DCTELEM temp[64];
01832
01833 if(last<=0) return;
01834
01835
01836 for(i=0; i<=last; i++){
01837 const int j= scantable[i];
01838 temp[j]= block[j];
01839 block[j]=0;
01840 }
01841
01842 for(i=0; i<=last; i++){
01843 const int j= scantable[i];
01844 const int perm_j= permutation[j];
01845 block[perm_j]= temp[j];
01846 }
01847 }
01848
01849 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
01850 return 0;
01851 }
01852
01853 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
01854 int i;
01855
01856 memset(cmp, 0, sizeof(void*)*6);
01857
01858 for(i=0; i<6; i++){
01859 switch(type&0xFF){
01860 case FF_CMP_SAD:
01861 cmp[i]= c->sad[i];
01862 break;
01863 case FF_CMP_SATD:
01864 cmp[i]= c->hadamard8_diff[i];
01865 break;
01866 case FF_CMP_SSE:
01867 cmp[i]= c->sse[i];
01868 break;
01869 case FF_CMP_DCT:
01870 cmp[i]= c->dct_sad[i];
01871 break;
01872 case FF_CMP_DCT264:
01873 cmp[i]= c->dct264_sad[i];
01874 break;
01875 case FF_CMP_DCTMAX:
01876 cmp[i]= c->dct_max[i];
01877 break;
01878 case FF_CMP_PSNR:
01879 cmp[i]= c->quant_psnr[i];
01880 break;
01881 case FF_CMP_BIT:
01882 cmp[i]= c->bit[i];
01883 break;
01884 case FF_CMP_RD:
01885 cmp[i]= c->rd[i];
01886 break;
01887 case FF_CMP_VSAD:
01888 cmp[i]= c->vsad[i];
01889 break;
01890 case FF_CMP_VSSE:
01891 cmp[i]= c->vsse[i];
01892 break;
01893 case FF_CMP_ZERO:
01894 cmp[i]= zero_cmp;
01895 break;
01896 case FF_CMP_NSSE:
01897 cmp[i]= c->nsse[i];
01898 break;
01899 #if CONFIG_DWT
01900 case FF_CMP_W53:
01901 cmp[i]= c->w53[i];
01902 break;
01903 case FF_CMP_W97:
01904 cmp[i]= c->w97[i];
01905 break;
01906 #endif
01907 default:
01908 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
01909 }
01910 }
01911 }
01912
01913 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
01914 long i;
01915 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01916 long a = *(long*)(src+i);
01917 long b = *(long*)(dst+i);
01918 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01919 }
01920 for(; i<w; i++)
01921 dst[i+0] += src[i+0];
01922 }
01923
01924 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
01925 long i;
01926 #if !HAVE_FAST_UNALIGNED
01927 if((long)src2 & (sizeof(long)-1)){
01928 for(i=0; i+7<w; i+=8){
01929 dst[i+0] = src1[i+0]-src2[i+0];
01930 dst[i+1] = src1[i+1]-src2[i+1];
01931 dst[i+2] = src1[i+2]-src2[i+2];
01932 dst[i+3] = src1[i+3]-src2[i+3];
01933 dst[i+4] = src1[i+4]-src2[i+4];
01934 dst[i+5] = src1[i+5]-src2[i+5];
01935 dst[i+6] = src1[i+6]-src2[i+6];
01936 dst[i+7] = src1[i+7]-src2[i+7];
01937 }
01938 }else
01939 #endif
01940 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01941 long a = *(long*)(src1+i);
01942 long b = *(long*)(src2+i);
01943 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
01944 }
01945 for(; i<w; i++)
01946 dst[i+0] = src1[i+0]-src2[i+0];
01947 }
01948
01949 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
01950 int i;
01951 uint8_t l, lt;
01952
01953 l= *left;
01954 lt= *left_top;
01955
01956 for(i=0; i<w; i++){
01957 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
01958 lt= src1[i];
01959 dst[i]= l;
01960 }
01961
01962 *left= l;
01963 *left_top= lt;
01964 }
01965
01966 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
01967 int i;
01968 uint8_t l, lt;
01969
01970 l= *left;
01971 lt= *left_top;
01972
01973 for(i=0; i<w; i++){
01974 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
01975 lt= src1[i];
01976 l= src2[i];
01977 dst[i]= l - pred;
01978 }
01979
01980 *left= l;
01981 *left_top= lt;
01982 }
01983
01984 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
01985 int i;
01986
01987 for(i=0; i<w-1; i++){
01988 acc+= src[i];
01989 dst[i]= acc;
01990 i++;
01991 acc+= src[i];
01992 dst[i]= acc;
01993 }
01994
01995 for(; i<w; i++){
01996 acc+= src[i];
01997 dst[i]= acc;
01998 }
01999
02000 return acc;
02001 }
02002
02003 #if HAVE_BIGENDIAN
02004 #define B 3
02005 #define G 2
02006 #define R 1
02007 #define A 0
02008 #else
02009 #define B 0
02010 #define G 1
02011 #define R 2
02012 #define A 3
02013 #endif
02014 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
02015 int i;
02016 int r,g,b,a;
02017 r= *red;
02018 g= *green;
02019 b= *blue;
02020 a= *alpha;
02021
02022 for(i=0; i<w; i++){
02023 b+= src[4*i+B];
02024 g+= src[4*i+G];
02025 r+= src[4*i+R];
02026 a+= src[4*i+A];
02027
02028 dst[4*i+B]= b;
02029 dst[4*i+G]= g;
02030 dst[4*i+R]= r;
02031 dst[4*i+A]= a;
02032 }
02033
02034 *red= r;
02035 *green= g;
02036 *blue= b;
02037 *alpha= a;
02038 }
02039 #undef B
02040 #undef G
02041 #undef R
02042 #undef A
02043
02044 #define BUTTERFLY2(o1,o2,i1,i2) \
02045 o1= (i1)+(i2);\
02046 o2= (i1)-(i2);
02047
02048 #define BUTTERFLY1(x,y) \
02049 {\
02050 int a,b;\
02051 a= x;\
02052 b= y;\
02053 x= a+b;\
02054 y= a-b;\
02055 }
02056
02057 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
02058
02059 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
02060 int i;
02061 int temp[64];
02062 int sum=0;
02063
02064 assert(h==8);
02065
02066 for(i=0; i<8; i++){
02067
02068 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
02069 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
02070 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
02071 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
02072
02073 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02074 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02075 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02076 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02077
02078 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02079 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02080 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02081 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02082 }
02083
02084 for(i=0; i<8; i++){
02085 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02086 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02087 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02088 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02089
02090 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02091 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02092 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02093 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02094
02095 sum +=
02096 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02097 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02098 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02099 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02100 }
02101 return sum;
02102 }
02103
02104 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
02105 int i;
02106 int temp[64];
02107 int sum=0;
02108
02109 assert(h==8);
02110
02111 for(i=0; i<8; i++){
02112
02113 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
02114 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
02115 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
02116 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
02117
02118 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02119 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02120 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02121 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02122
02123 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02124 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02125 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02126 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02127 }
02128
02129 for(i=0; i<8; i++){
02130 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02131 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02132 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02133 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02134
02135 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02136 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02137 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02138 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02139
02140 sum +=
02141 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02142 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02143 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02144 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02145 }
02146
02147 sum -= FFABS(temp[8*0] + temp[8*4]);
02148
02149 return sum;
02150 }
02151
02152 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02153 MpegEncContext * const s= (MpegEncContext *)c;
02154 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02155
02156 assert(h==8);
02157
02158 s->dsp.diff_pixels(temp, src1, src2, stride);
02159 s->dsp.fdct(temp);
02160 return s->dsp.sum_abs_dctelem(temp);
02161 }
02162
02163 #if CONFIG_GPL
02164 #define DCT8_1D {\
02165 const int s07 = SRC(0) + SRC(7);\
02166 const int s16 = SRC(1) + SRC(6);\
02167 const int s25 = SRC(2) + SRC(5);\
02168 const int s34 = SRC(3) + SRC(4);\
02169 const int a0 = s07 + s34;\
02170 const int a1 = s16 + s25;\
02171 const int a2 = s07 - s34;\
02172 const int a3 = s16 - s25;\
02173 const int d07 = SRC(0) - SRC(7);\
02174 const int d16 = SRC(1) - SRC(6);\
02175 const int d25 = SRC(2) - SRC(5);\
02176 const int d34 = SRC(3) - SRC(4);\
02177 const int a4 = d16 + d25 + (d07 + (d07>>1));\
02178 const int a5 = d07 - d34 - (d25 + (d25>>1));\
02179 const int a6 = d07 + d34 - (d16 + (d16>>1));\
02180 const int a7 = d16 - d25 + (d34 + (d34>>1));\
02181 DST(0, a0 + a1 ) ;\
02182 DST(1, a4 + (a7>>2)) ;\
02183 DST(2, a2 + (a3>>1)) ;\
02184 DST(3, a5 + (a6>>2)) ;\
02185 DST(4, a0 - a1 ) ;\
02186 DST(5, a6 - (a5>>2)) ;\
02187 DST(6, (a2>>1) - a3 ) ;\
02188 DST(7, (a4>>2) - a7 ) ;\
02189 }
02190
02191 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02192 MpegEncContext * const s= (MpegEncContext *)c;
02193 DCTELEM dct[8][8];
02194 int i;
02195 int sum=0;
02196
02197 s->dsp.diff_pixels(dct[0], src1, src2, stride);
02198
02199 #define SRC(x) dct[i][x]
02200 #define DST(x,v) dct[i][x]= v
02201 for( i = 0; i < 8; i++ )
02202 DCT8_1D
02203 #undef SRC
02204 #undef DST
02205
02206 #define SRC(x) dct[x][i]
02207 #define DST(x,v) sum += FFABS(v)
02208 for( i = 0; i < 8; i++ )
02209 DCT8_1D
02210 #undef SRC
02211 #undef DST
02212 return sum;
02213 }
02214 #endif
02215
02216 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02217 MpegEncContext * const s= (MpegEncContext *)c;
02218 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02219 int sum=0, i;
02220
02221 assert(h==8);
02222
02223 s->dsp.diff_pixels(temp, src1, src2, stride);
02224 s->dsp.fdct(temp);
02225
02226 for(i=0; i<64; i++)
02227 sum= FFMAX(sum, FFABS(temp[i]));
02228
02229 return sum;
02230 }
02231
02232 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02233 MpegEncContext * const s= (MpegEncContext *)c;
02234 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
02235 DCTELEM * const bak = temp+64;
02236 int sum=0, i;
02237
02238 assert(h==8);
02239 s->mb_intra=0;
02240
02241 s->dsp.diff_pixels(temp, src1, src2, stride);
02242
02243 memcpy(bak, temp, 64*sizeof(DCTELEM));
02244
02245 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02246 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02247 ff_simple_idct_8(temp);
02248
02249 for(i=0; i<64; i++)
02250 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
02251
02252 return sum;
02253 }
02254
02255 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02256 MpegEncContext * const s= (MpegEncContext *)c;
02257 const uint8_t *scantable= s->intra_scantable.permutated;
02258 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02259 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
02260 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
02261 int i, last, run, bits, level, distortion, start_i;
02262 const int esc_length= s->ac_esc_length;
02263 uint8_t * length;
02264 uint8_t * last_length;
02265
02266 assert(h==8);
02267
02268 copy_block8(lsrc1, src1, 8, stride, 8);
02269 copy_block8(lsrc2, src2, 8, stride, 8);
02270
02271 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
02272
02273 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02274
02275 bits=0;
02276
02277 if (s->mb_intra) {
02278 start_i = 1;
02279 length = s->intra_ac_vlc_length;
02280 last_length= s->intra_ac_vlc_last_length;
02281 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02282 } else {
02283 start_i = 0;
02284 length = s->inter_ac_vlc_length;
02285 last_length= s->inter_ac_vlc_last_length;
02286 }
02287
02288 if(last>=start_i){
02289 run=0;
02290 for(i=start_i; i<last; i++){
02291 int j= scantable[i];
02292 level= temp[j];
02293
02294 if(level){
02295 level+=64;
02296 if((level&(~127)) == 0){
02297 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02298 }else
02299 bits+= esc_length;
02300 run=0;
02301 }else
02302 run++;
02303 }
02304 i= scantable[last];
02305
02306 level= temp[i] + 64;
02307
02308 assert(level - 64);
02309
02310 if((level&(~127)) == 0){
02311 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02312 }else
02313 bits+= esc_length;
02314
02315 }
02316
02317 if(last>=0){
02318 if(s->mb_intra)
02319 s->dct_unquantize_intra(s, temp, 0, s->qscale);
02320 else
02321 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02322 }
02323
02324 s->dsp.idct_add(lsrc2, 8, temp);
02325
02326 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
02327
02328 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
02329 }
02330
02331 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02332 MpegEncContext * const s= (MpegEncContext *)c;
02333 const uint8_t *scantable= s->intra_scantable.permutated;
02334 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02335 int i, last, run, bits, level, start_i;
02336 const int esc_length= s->ac_esc_length;
02337 uint8_t * length;
02338 uint8_t * last_length;
02339
02340 assert(h==8);
02341
02342 s->dsp.diff_pixels(temp, src1, src2, stride);
02343
02344 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02345
02346 bits=0;
02347
02348 if (s->mb_intra) {
02349 start_i = 1;
02350 length = s->intra_ac_vlc_length;
02351 last_length= s->intra_ac_vlc_last_length;
02352 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02353 } else {
02354 start_i = 0;
02355 length = s->inter_ac_vlc_length;
02356 last_length= s->inter_ac_vlc_last_length;
02357 }
02358
02359 if(last>=start_i){
02360 run=0;
02361 for(i=start_i; i<last; i++){
02362 int j= scantable[i];
02363 level= temp[j];
02364
02365 if(level){
02366 level+=64;
02367 if((level&(~127)) == 0){
02368 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02369 }else
02370 bits+= esc_length;
02371 run=0;
02372 }else
02373 run++;
02374 }
02375 i= scantable[last];
02376
02377 level= temp[i] + 64;
02378
02379 assert(level - 64);
02380
02381 if((level&(~127)) == 0){
02382 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02383 }else
02384 bits+= esc_length;
02385 }
02386
02387 return bits;
02388 }
02389
02390 #define VSAD_INTRA(size) \
02391 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02392 int score=0; \
02393 int x,y; \
02394 \
02395 for(y=1; y<h; y++){ \
02396 for(x=0; x<size; x+=4){ \
02397 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
02398 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
02399 } \
02400 s+= stride; \
02401 } \
02402 \
02403 return score; \
02404 }
02405 VSAD_INTRA(8)
02406 VSAD_INTRA(16)
02407
02408 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02409 int score=0;
02410 int x,y;
02411
02412 for(y=1; y<h; y++){
02413 for(x=0; x<16; x++){
02414 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02415 }
02416 s1+= stride;
02417 s2+= stride;
02418 }
02419
02420 return score;
02421 }
02422
02423 #define SQ(a) ((a)*(a))
02424 #define VSSE_INTRA(size) \
02425 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02426 int score=0; \
02427 int x,y; \
02428 \
02429 for(y=1; y<h; y++){ \
02430 for(x=0; x<size; x+=4){ \
02431 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
02432 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
02433 } \
02434 s+= stride; \
02435 } \
02436 \
02437 return score; \
02438 }
02439 VSSE_INTRA(8)
02440 VSSE_INTRA(16)
02441
02442 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02443 int score=0;
02444 int x,y;
02445
02446 for(y=1; y<h; y++){
02447 for(x=0; x<16; x++){
02448 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02449 }
02450 s1+= stride;
02451 s2+= stride;
02452 }
02453
02454 return score;
02455 }
02456
02457 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
02458 int size){
02459 int score=0;
02460 int i;
02461 for(i=0; i<size; i++)
02462 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
02463 return score;
02464 }
02465
02466 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
02467 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
02468 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
02469 #if CONFIG_GPL
02470 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
02471 #endif
02472 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
02473 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
02474 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
02475 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
02476
02477 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
02478 int i;
02479 for(i=0; i<len; i++)
02480 dst[i] = src0[i] * src1[i];
02481 }
02482
02483 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
02484 int i;
02485 src1 += len-1;
02486 for(i=0; i<len; i++)
02487 dst[i] = src0[i] * src1[-i];
02488 }
02489
02490 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
02491 int i;
02492 for(i=0; i<len; i++)
02493 dst[i] = src0[i] * src1[i] + src2[i];
02494 }
02495
02496 static void vector_fmul_window_c(float *dst, const float *src0,
02497 const float *src1, const float *win, int len)
02498 {
02499 int i,j;
02500 dst += len;
02501 win += len;
02502 src0+= len;
02503 for(i=-len, j=len-1; i<0; i++, j--) {
02504 float s0 = src0[i];
02505 float s1 = src1[j];
02506 float wi = win[i];
02507 float wj = win[j];
02508 dst[i] = s0*wj - s1*wi;
02509 dst[j] = s0*wi + s1*wj;
02510 }
02511 }
02512
02513 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
02514 int len)
02515 {
02516 int i;
02517 for (i = 0; i < len; i++)
02518 dst[i] = src[i] * mul;
02519 }
02520
02521 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
02522 int len)
02523 {
02524 int i;
02525 for (i = 0; i < len; i++)
02526 dst[i] += src[i] * mul;
02527 }
02528
02529 static void butterflies_float_c(float *restrict v1, float *restrict v2,
02530 int len)
02531 {
02532 int i;
02533 for (i = 0; i < len; i++) {
02534 float t = v1[i] - v2[i];
02535 v1[i] += v2[i];
02536 v2[i] = t;
02537 }
02538 }
02539
02540 static void butterflies_float_interleave_c(float *dst, const float *src0,
02541 const float *src1, int len)
02542 {
02543 int i;
02544 for (i = 0; i < len; i++) {
02545 float f1 = src0[i];
02546 float f2 = src1[i];
02547 dst[2*i ] = f1 + f2;
02548 dst[2*i + 1] = f1 - f2;
02549 }
02550 }
02551
02552 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
02553 {
02554 float p = 0.0;
02555 int i;
02556
02557 for (i = 0; i < len; i++)
02558 p += v1[i] * v2[i];
02559
02560 return p;
02561 }
02562
02563 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
02564 uint32_t maxi, uint32_t maxisign)
02565 {
02566
02567 if(a > mini) return mini;
02568 else if((a^(1U<<31)) > maxisign) return maxi;
02569 else return a;
02570 }
02571
02572 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
02573 int i;
02574 uint32_t mini = *(uint32_t*)min;
02575 uint32_t maxi = *(uint32_t*)max;
02576 uint32_t maxisign = maxi ^ (1U<<31);
02577 uint32_t *dsti = (uint32_t*)dst;
02578 const uint32_t *srci = (const uint32_t*)src;
02579 for(i=0; i<len; i+=8) {
02580 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
02581 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
02582 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
02583 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
02584 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
02585 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
02586 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
02587 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
02588 }
02589 }
02590 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
02591 int i;
02592 if(min < 0 && max > 0) {
02593 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
02594 } else {
02595 for(i=0; i < len; i+=8) {
02596 dst[i ] = av_clipf(src[i ], min, max);
02597 dst[i + 1] = av_clipf(src[i + 1], min, max);
02598 dst[i + 2] = av_clipf(src[i + 2], min, max);
02599 dst[i + 3] = av_clipf(src[i + 3], min, max);
02600 dst[i + 4] = av_clipf(src[i + 4], min, max);
02601 dst[i + 5] = av_clipf(src[i + 5], min, max);
02602 dst[i + 6] = av_clipf(src[i + 6], min, max);
02603 dst[i + 7] = av_clipf(src[i + 7], min, max);
02604 }
02605 }
02606 }
02607
02608 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
02609 {
02610 int res = 0;
02611
02612 while (order--)
02613 res += (*v1++ * *v2++) >> shift;
02614
02615 return res;
02616 }
02617
02618 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
02619 {
02620 int res = 0;
02621 while (order--) {
02622 res += *v1 * *v2++;
02623 *v1++ += mul * *v3++;
02624 }
02625 return res;
02626 }
02627
02628 static void apply_window_int16_c(int16_t *output, const int16_t *input,
02629 const int16_t *window, unsigned int len)
02630 {
02631 int i;
02632 int len2 = len >> 1;
02633
02634 for (i = 0; i < len2; i++) {
02635 int16_t w = window[i];
02636 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
02637 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
02638 }
02639 }
02640
02641 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
02642 int32_t max, unsigned int len)
02643 {
02644 do {
02645 *dst++ = av_clip(*src++, min, max);
02646 *dst++ = av_clip(*src++, min, max);
02647 *dst++ = av_clip(*src++, min, max);
02648 *dst++ = av_clip(*src++, min, max);
02649 *dst++ = av_clip(*src++, min, max);
02650 *dst++ = av_clip(*src++, min, max);
02651 *dst++ = av_clip(*src++, min, max);
02652 *dst++ = av_clip(*src++, min, max);
02653 len -= 8;
02654 } while (len > 0);
02655 }
02656
02657 #define W0 2048
02658 #define W1 2841
02659 #define W2 2676
02660 #define W3 2408
02661 #define W4 2048
02662 #define W5 1609
02663 #define W6 1108
02664 #define W7 565
02665
02666 static void wmv2_idct_row(short * b)
02667 {
02668 int s1,s2;
02669 int a0,a1,a2,a3,a4,a5,a6,a7;
02670
02671 a1 = W1*b[1]+W7*b[7];
02672 a7 = W7*b[1]-W1*b[7];
02673 a5 = W5*b[5]+W3*b[3];
02674 a3 = W3*b[5]-W5*b[3];
02675 a2 = W2*b[2]+W6*b[6];
02676 a6 = W6*b[2]-W2*b[6];
02677 a0 = W0*b[0]+W0*b[4];
02678 a4 = W0*b[0]-W0*b[4];
02679
02680 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02681 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02682
02683 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
02684 b[1] = (a4+a6 +s1 + (1<<7))>>8;
02685 b[2] = (a4-a6 +s2 + (1<<7))>>8;
02686 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
02687 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
02688 b[5] = (a4-a6 -s2 + (1<<7))>>8;
02689 b[6] = (a4+a6 -s1 + (1<<7))>>8;
02690 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
02691 }
02692 static void wmv2_idct_col(short * b)
02693 {
02694 int s1,s2;
02695 int a0,a1,a2,a3,a4,a5,a6,a7;
02696
02697 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
02698 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
02699 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
02700 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
02701 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
02702 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
02703 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
02704 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
02705
02706 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02707 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02708
02709 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
02710 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
02711 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
02712 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
02713
02714 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
02715 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
02716 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
02717 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
02718 }
02719 void ff_wmv2_idct_c(short * block){
02720 int i;
02721
02722 for(i=0;i<64;i+=8){
02723 wmv2_idct_row(block+i);
02724 }
02725 for(i=0;i<8;i++){
02726 wmv2_idct_col(block+i);
02727 }
02728 }
02729
02730
02731 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
02732 {
02733 ff_wmv2_idct_c(block);
02734 ff_put_pixels_clamped_c(block, dest, line_size);
02735 }
02736 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
02737 {
02738 ff_wmv2_idct_c(block);
02739 ff_add_pixels_clamped_c(block, dest, line_size);
02740 }
02741 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
02742 {
02743 j_rev_dct (block);
02744 ff_put_pixels_clamped_c(block, dest, line_size);
02745 }
02746 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
02747 {
02748 j_rev_dct (block);
02749 ff_add_pixels_clamped_c(block, dest, line_size);
02750 }
02751
02752 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
02753 {
02754 j_rev_dct4 (block);
02755 put_pixels_clamped4_c(block, dest, line_size);
02756 }
02757 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
02758 {
02759 j_rev_dct4 (block);
02760 add_pixels_clamped4_c(block, dest, line_size);
02761 }
02762
02763 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
02764 {
02765 j_rev_dct2 (block);
02766 put_pixels_clamped2_c(block, dest, line_size);
02767 }
02768 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
02769 {
02770 j_rev_dct2 (block);
02771 add_pixels_clamped2_c(block, dest, line_size);
02772 }
02773
02774 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
02775 {
02776 dest[0] = av_clip_uint8((block[0] + 4)>>3);
02777 }
02778 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
02779 {
02780 dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
02781 }
02782
02783 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
02784
02785
02786 av_cold void dsputil_static_init(void)
02787 {
02788 int i;
02789
02790 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
02791 for(i=0;i<MAX_NEG_CROP;i++) {
02792 ff_cropTbl[i] = 0;
02793 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
02794 }
02795
02796 for(i=0;i<512;i++) {
02797 ff_squareTbl[i] = (i - 256) * (i - 256);
02798 }
02799
02800 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
02801 }
02802
02803 int ff_check_alignment(void){
02804 static int did_fail=0;
02805 LOCAL_ALIGNED_16(int, aligned, [4]);
02806
02807 if((intptr_t)aligned & 15){
02808 if(!did_fail){
02809 #if HAVE_MMX || HAVE_ALTIVEC
02810 av_log(NULL, AV_LOG_ERROR,
02811 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
02812 "and may be very slow or crash. This is not a bug in libavcodec,\n"
02813 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
02814 "Do not report crashes to FFmpeg developers.\n");
02815 #endif
02816 did_fail=1;
02817 }
02818 return -1;
02819 }
02820 return 0;
02821 }
02822
02823 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
02824 {
02825 int i;
02826
02827 ff_check_alignment();
02828
02829 #if CONFIG_ENCODERS
02830 if (avctx->bits_per_raw_sample == 10) {
02831 c->fdct = ff_jpeg_fdct_islow_10;
02832 c->fdct248 = ff_fdct248_islow_10;
02833 } else {
02834 if(avctx->dct_algo==FF_DCT_FASTINT) {
02835 c->fdct = fdct_ifast;
02836 c->fdct248 = fdct_ifast248;
02837 }
02838 else if(avctx->dct_algo==FF_DCT_FAAN) {
02839 c->fdct = ff_faandct;
02840 c->fdct248 = ff_faandct248;
02841 }
02842 else {
02843 c->fdct = ff_jpeg_fdct_islow_8;
02844 c->fdct248 = ff_fdct248_islow_8;
02845 }
02846 }
02847 #endif //CONFIG_ENCODERS
02848
02849 if(avctx->lowres==1){
02850 c->idct_put= ff_jref_idct4_put;
02851 c->idct_add= ff_jref_idct4_add;
02852 c->idct = j_rev_dct4;
02853 c->idct_permutation_type= FF_NO_IDCT_PERM;
02854 }else if(avctx->lowres==2){
02855 c->idct_put= ff_jref_idct2_put;
02856 c->idct_add= ff_jref_idct2_add;
02857 c->idct = j_rev_dct2;
02858 c->idct_permutation_type= FF_NO_IDCT_PERM;
02859 }else if(avctx->lowres==3){
02860 c->idct_put= ff_jref_idct1_put;
02861 c->idct_add= ff_jref_idct1_add;
02862 c->idct = j_rev_dct1;
02863 c->idct_permutation_type= FF_NO_IDCT_PERM;
02864 }else{
02865 if (avctx->bits_per_raw_sample == 10) {
02866 c->idct_put = ff_simple_idct_put_10;
02867 c->idct_add = ff_simple_idct_add_10;
02868 c->idct = ff_simple_idct_10;
02869 c->idct_permutation_type = FF_NO_IDCT_PERM;
02870 } else {
02871 if(avctx->idct_algo==FF_IDCT_INT){
02872 c->idct_put= ff_jref_idct_put;
02873 c->idct_add= ff_jref_idct_add;
02874 c->idct = j_rev_dct;
02875 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
02876 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
02877 avctx->idct_algo==FF_IDCT_VP3){
02878 c->idct_put= ff_vp3_idct_put_c;
02879 c->idct_add= ff_vp3_idct_add_c;
02880 c->idct = ff_vp3_idct_c;
02881 c->idct_permutation_type= FF_NO_IDCT_PERM;
02882 }else if(avctx->idct_algo==FF_IDCT_WMV2){
02883 c->idct_put= ff_wmv2_idct_put_c;
02884 c->idct_add= ff_wmv2_idct_add_c;
02885 c->idct = ff_wmv2_idct_c;
02886 c->idct_permutation_type= FF_NO_IDCT_PERM;
02887 }else if(avctx->idct_algo==FF_IDCT_FAAN){
02888 c->idct_put= ff_faanidct_put;
02889 c->idct_add= ff_faanidct_add;
02890 c->idct = ff_faanidct;
02891 c->idct_permutation_type= FF_NO_IDCT_PERM;
02892 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
02893 c->idct_put= ff_ea_idct_put_c;
02894 c->idct_permutation_type= FF_NO_IDCT_PERM;
02895 }else{
02896 c->idct_put = ff_simple_idct_put_8;
02897 c->idct_add = ff_simple_idct_add_8;
02898 c->idct = ff_simple_idct_8;
02899 c->idct_permutation_type= FF_NO_IDCT_PERM;
02900 }
02901 }
02902 }
02903
02904 c->diff_pixels = diff_pixels_c;
02905 c->put_pixels_clamped = ff_put_pixels_clamped_c;
02906 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
02907 c->add_pixels_clamped = ff_add_pixels_clamped_c;
02908 c->sum_abs_dctelem = sum_abs_dctelem_c;
02909 c->gmc1 = gmc1_c;
02910 c->gmc = ff_gmc_c;
02911 c->pix_sum = pix_sum_c;
02912 c->pix_norm1 = pix_norm1_c;
02913
02914 c->fill_block_tab[0] = fill_block16_c;
02915 c->fill_block_tab[1] = fill_block8_c;
02916
02917
02918 c->pix_abs[0][0] = pix_abs16_c;
02919 c->pix_abs[0][1] = pix_abs16_x2_c;
02920 c->pix_abs[0][2] = pix_abs16_y2_c;
02921 c->pix_abs[0][3] = pix_abs16_xy2_c;
02922 c->pix_abs[1][0] = pix_abs8_c;
02923 c->pix_abs[1][1] = pix_abs8_x2_c;
02924 c->pix_abs[1][2] = pix_abs8_y2_c;
02925 c->pix_abs[1][3] = pix_abs8_xy2_c;
02926
02927 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
02928 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
02929 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
02930 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
02931 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
02932 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
02933 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
02934 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
02935 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
02936
02937 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
02938 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
02939 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
02940 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
02941 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
02942 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
02943 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
02944 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
02945 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
02946
02947 #define dspfunc(PFX, IDX, NUM) \
02948 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
02949 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
02950 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
02951 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
02952 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
02953 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
02954 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
02955 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
02956 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
02957 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
02958 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
02959 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
02960 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
02961 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
02962 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
02963 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
02964
02965 dspfunc(put_qpel, 0, 16);
02966 dspfunc(put_no_rnd_qpel, 0, 16);
02967
02968 dspfunc(avg_qpel, 0, 16);
02969
02970
02971 dspfunc(put_qpel, 1, 8);
02972 dspfunc(put_no_rnd_qpel, 1, 8);
02973
02974 dspfunc(avg_qpel, 1, 8);
02975
02976
02977 #undef dspfunc
02978
02979 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
02980 ff_mlp_init(c, avctx);
02981 #endif
02982 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
02983 ff_intrax8dsp_init(c,avctx);
02984 #endif
02985
02986 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
02987 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
02988 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
02989 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
02990 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
02991 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
02992 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
02993 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
02994
02995 #define SET_CMP_FUNC(name) \
02996 c->name[0]= name ## 16_c;\
02997 c->name[1]= name ## 8x8_c;
02998
02999 SET_CMP_FUNC(hadamard8_diff)
03000 c->hadamard8_diff[4]= hadamard8_intra16_c;
03001 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
03002 SET_CMP_FUNC(dct_sad)
03003 SET_CMP_FUNC(dct_max)
03004 #if CONFIG_GPL
03005 SET_CMP_FUNC(dct264_sad)
03006 #endif
03007 c->sad[0]= pix_abs16_c;
03008 c->sad[1]= pix_abs8_c;
03009 c->sse[0]= sse16_c;
03010 c->sse[1]= sse8_c;
03011 c->sse[2]= sse4_c;
03012 SET_CMP_FUNC(quant_psnr)
03013 SET_CMP_FUNC(rd)
03014 SET_CMP_FUNC(bit)
03015 c->vsad[0]= vsad16_c;
03016 c->vsad[4]= vsad_intra16_c;
03017 c->vsad[5]= vsad_intra8_c;
03018 c->vsse[0]= vsse16_c;
03019 c->vsse[4]= vsse_intra16_c;
03020 c->vsse[5]= vsse_intra8_c;
03021 c->nsse[0]= nsse16_c;
03022 c->nsse[1]= nsse8_c;
03023 #if CONFIG_DWT
03024 ff_dsputil_init_dwt(c);
03025 #endif
03026
03027 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
03028
03029 c->add_bytes= add_bytes_c;
03030 c->diff_bytes= diff_bytes_c;
03031 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
03032 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03033 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
03034 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
03035 c->bswap_buf= bswap_buf;
03036 c->bswap16_buf = bswap16_buf;
03037
03038 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
03039 c->h263_h_loop_filter= h263_h_loop_filter_c;
03040 c->h263_v_loop_filter= h263_v_loop_filter_c;
03041 }
03042
03043 if (CONFIG_VP3_DECODER) {
03044 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
03045 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
03046 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
03047 }
03048
03049 c->h261_loop_filter= h261_loop_filter_c;
03050
03051 c->try_8x8basis= try_8x8basis_c;
03052 c->add_8x8basis= add_8x8basis_c;
03053
03054 #if CONFIG_VORBIS_DECODER
03055 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
03056 #endif
03057 #if CONFIG_AC3_DECODER
03058 c->ac3_downmix = ff_ac3_downmix_c;
03059 #endif
03060 c->vector_fmul = vector_fmul_c;
03061 c->vector_fmul_reverse = vector_fmul_reverse_c;
03062 c->vector_fmul_add = vector_fmul_add_c;
03063 c->vector_fmul_window = vector_fmul_window_c;
03064 c->vector_clipf = vector_clipf_c;
03065 c->scalarproduct_int16 = scalarproduct_int16_c;
03066 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
03067 c->apply_window_int16 = apply_window_int16_c;
03068 c->vector_clip_int32 = vector_clip_int32_c;
03069 c->scalarproduct_float = scalarproduct_float_c;
03070 c->butterflies_float = butterflies_float_c;
03071 c->butterflies_float_interleave = butterflies_float_interleave_c;
03072 c->vector_fmul_scalar = vector_fmul_scalar_c;
03073 c->vector_fmac_scalar = vector_fmac_scalar_c;
03074
03075 c->shrink[0]= av_image_copy_plane;
03076 c->shrink[1]= ff_shrink22;
03077 c->shrink[2]= ff_shrink44;
03078 c->shrink[3]= ff_shrink88;
03079
03080 c->prefetch= just_return;
03081
03082 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
03083 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
03084
03085 #undef FUNC
03086 #undef FUNCC
03087 #define FUNC(f, depth) f ## _ ## depth
03088 #define FUNCC(f, depth) f ## _ ## depth ## _c
03089
03090 #define dspfunc1(PFX, IDX, NUM, depth)\
03091 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
03092 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
03093 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
03094 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
03095
03096 #define dspfunc2(PFX, IDX, NUM, depth)\
03097 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
03098 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
03099 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
03100 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
03101 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
03102 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
03103 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
03104 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
03105 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
03106 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
03107 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
03108 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
03109 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
03110 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
03111 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
03112 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
03113
03114
03115 #define BIT_DEPTH_FUNCS(depth, dct)\
03116 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
03117 c->draw_edges = FUNCC(draw_edges , depth);\
03118 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
03119 c->clear_block = FUNCC(clear_block ## dct , depth);\
03120 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
03121 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
03122 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
03123 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
03124 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
03125 \
03126 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
03127 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
03128 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
03129 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
03130 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
03131 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
03132 \
03133 dspfunc1(put , 0, 16, depth);\
03134 dspfunc1(put , 1, 8, depth);\
03135 dspfunc1(put , 2, 4, depth);\
03136 dspfunc1(put , 3, 2, depth);\
03137 dspfunc1(put_no_rnd, 0, 16, depth);\
03138 dspfunc1(put_no_rnd, 1, 8, depth);\
03139 dspfunc1(avg , 0, 16, depth);\
03140 dspfunc1(avg , 1, 8, depth);\
03141 dspfunc1(avg , 2, 4, depth);\
03142 dspfunc1(avg , 3, 2, depth);\
03143 dspfunc1(avg_no_rnd, 0, 16, depth);\
03144 dspfunc1(avg_no_rnd, 1, 8, depth);\
03145 \
03146 dspfunc2(put_h264_qpel, 0, 16, depth);\
03147 dspfunc2(put_h264_qpel, 1, 8, depth);\
03148 dspfunc2(put_h264_qpel, 2, 4, depth);\
03149 dspfunc2(put_h264_qpel, 3, 2, depth);\
03150 dspfunc2(avg_h264_qpel, 0, 16, depth);\
03151 dspfunc2(avg_h264_qpel, 1, 8, depth);\
03152 dspfunc2(avg_h264_qpel, 2, 4, depth);
03153
03154 switch (avctx->bits_per_raw_sample) {
03155 case 9:
03156 if (c->dct_bits == 32) {
03157 BIT_DEPTH_FUNCS(9, _32);
03158 } else {
03159 BIT_DEPTH_FUNCS(9, _16);
03160 }
03161 break;
03162 case 10:
03163 if (c->dct_bits == 32) {
03164 BIT_DEPTH_FUNCS(10, _32);
03165 } else {
03166 BIT_DEPTH_FUNCS(10, _16);
03167 }
03168 break;
03169 default:
03170 av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
03171 case 8:
03172 BIT_DEPTH_FUNCS(8, _16);
03173 break;
03174 }
03175
03176
03177 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
03178 if (ARCH_ARM) dsputil_init_arm (c, avctx);
03179 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
03180 if (HAVE_VIS) dsputil_init_vis (c, avctx);
03181 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
03182 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
03183 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
03184 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
03185 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
03186
03187 for(i=0; i<64; i++){
03188 if(!c->put_2tap_qpel_pixels_tab[0][i])
03189 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
03190 if(!c->avg_2tap_qpel_pixels_tab[0][i])
03191 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
03192 }
03193
03194 ff_init_scantable_permutation(c->idct_permutation,
03195 c->idct_permutation_type);
03196 }