00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "avcodec.h"
00031 #include "dsputil.h"
00032 #include "simple_idct.h"
00033 #include "faandct.h"
00034 #include "faanidct.h"
00035 #include "mathops.h"
00036 #include "h263.h"
00037 #include "snow.h"
00038
00039
00040 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
00041
00042
00043 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
00044
00045
00046 void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
00047
00048
00049 void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc);
00050
00051
00052 void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
00053
00054
00055 void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
00056
00057 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00058 uint32_t ff_squareTbl[512] = {0, };
00059
00060
00061 #define pb_7f (~0UL/255 * 0x7f)
00062 #define pb_80 (~0UL/255 * 0x80)
00063
00064 const uint8_t ff_zigzag_direct[64] = {
00065 0, 1, 8, 16, 9, 2, 3, 10,
00066 17, 24, 32, 25, 18, 11, 4, 5,
00067 12, 19, 26, 33, 40, 48, 41, 34,
00068 27, 20, 13, 6, 7, 14, 21, 28,
00069 35, 42, 49, 56, 57, 50, 43, 36,
00070 29, 22, 15, 23, 30, 37, 44, 51,
00071 58, 59, 52, 45, 38, 31, 39, 46,
00072 53, 60, 61, 54, 47, 55, 62, 63
00073 };
00074
00075
00076
00077 const uint8_t ff_zigzag248_direct[64] = {
00078 0, 8, 1, 9, 16, 24, 2, 10,
00079 17, 25, 32, 40, 48, 56, 33, 41,
00080 18, 26, 3, 11, 4, 12, 19, 27,
00081 34, 42, 49, 57, 50, 58, 35, 43,
00082 20, 28, 5, 13, 6, 14, 21, 29,
00083 36, 44, 51, 59, 52, 60, 37, 45,
00084 22, 30, 7, 15, 23, 31, 38, 46,
00085 53, 61, 54, 62, 39, 47, 55, 63,
00086 };
00087
00088
00089 DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, };
00090
00091 const uint8_t ff_alternate_horizontal_scan[64] = {
00092 0, 1, 2, 3, 8, 9, 16, 17,
00093 10, 11, 4, 5, 6, 7, 15, 14,
00094 13, 12, 19, 18, 24, 25, 32, 33,
00095 26, 27, 20, 21, 22, 23, 28, 29,
00096 30, 31, 34, 35, 40, 41, 48, 49,
00097 42, 43, 36, 37, 38, 39, 44, 45,
00098 46, 47, 50, 51, 56, 57, 58, 59,
00099 52, 53, 54, 55, 60, 61, 62, 63,
00100 };
00101
00102 const uint8_t ff_alternate_vertical_scan[64] = {
00103 0, 8, 16, 24, 1, 9, 2, 10,
00104 17, 25, 32, 40, 48, 56, 57, 49,
00105 41, 33, 26, 18, 3, 11, 4, 12,
00106 19, 27, 34, 42, 50, 58, 35, 43,
00107 51, 59, 20, 28, 5, 13, 6, 14,
00108 21, 29, 36, 44, 52, 60, 37, 45,
00109 53, 61, 22, 30, 7, 15, 23, 31,
00110 38, 46, 54, 62, 39, 47, 55, 63,
00111 };
00112
00113
00114 const uint32_t ff_inverse[256]={
00115 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
00116 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
00117 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
00118 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
00119 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
00120 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
00121 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
00122 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
00123 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
00124 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
00125 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
00126 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
00127 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
00128 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
00129 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
00130 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
00131 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
00132 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
00133 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
00134 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
00135 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
00136 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
00137 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
00138 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
00139 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
00140 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
00141 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
00142 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
00143 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
00144 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
00145 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
00146 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
00147 };
00148
00149
00150 static const uint8_t simple_mmx_permutation[64]={
00151 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00152 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00153 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00154 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00155 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00156 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00157 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00158 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00159 };
00160
00161 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00162
00163 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00164 int i;
00165 int end;
00166
00167 st->scantable= src_scantable;
00168
00169 for(i=0; i<64; i++){
00170 int j;
00171 j = src_scantable[i];
00172 st->permutated[i] = permutation[j];
00173 #if ARCH_PPC
00174 st->inverse[j] = i;
00175 #endif
00176 }
00177
00178 end=-1;
00179 for(i=0; i<64; i++){
00180 int j;
00181 j = st->permutated[i];
00182 if(j>end) end=j;
00183 st->raster_end[i]= end;
00184 }
00185 }
00186
00187 static int pix_sum_c(uint8_t * pix, int line_size)
00188 {
00189 int s, i, j;
00190
00191 s = 0;
00192 for (i = 0; i < 16; i++) {
00193 for (j = 0; j < 16; j += 8) {
00194 s += pix[0];
00195 s += pix[1];
00196 s += pix[2];
00197 s += pix[3];
00198 s += pix[4];
00199 s += pix[5];
00200 s += pix[6];
00201 s += pix[7];
00202 pix += 8;
00203 }
00204 pix += line_size - 16;
00205 }
00206 return s;
00207 }
00208
00209 static int pix_norm1_c(uint8_t * pix, int line_size)
00210 {
00211 int s, i, j;
00212 uint32_t *sq = ff_squareTbl + 256;
00213
00214 s = 0;
00215 for (i = 0; i < 16; i++) {
00216 for (j = 0; j < 16; j += 8) {
00217 #if 0
00218 s += sq[pix[0]];
00219 s += sq[pix[1]];
00220 s += sq[pix[2]];
00221 s += sq[pix[3]];
00222 s += sq[pix[4]];
00223 s += sq[pix[5]];
00224 s += sq[pix[6]];
00225 s += sq[pix[7]];
00226 #else
00227 #if LONG_MAX > 2147483647
00228 register uint64_t x=*(uint64_t*)pix;
00229 s += sq[x&0xff];
00230 s += sq[(x>>8)&0xff];
00231 s += sq[(x>>16)&0xff];
00232 s += sq[(x>>24)&0xff];
00233 s += sq[(x>>32)&0xff];
00234 s += sq[(x>>40)&0xff];
00235 s += sq[(x>>48)&0xff];
00236 s += sq[(x>>56)&0xff];
00237 #else
00238 register uint32_t x=*(uint32_t*)pix;
00239 s += sq[x&0xff];
00240 s += sq[(x>>8)&0xff];
00241 s += sq[(x>>16)&0xff];
00242 s += sq[(x>>24)&0xff];
00243 x=*(uint32_t*)(pix+4);
00244 s += sq[x&0xff];
00245 s += sq[(x>>8)&0xff];
00246 s += sq[(x>>16)&0xff];
00247 s += sq[(x>>24)&0xff];
00248 #endif
00249 #endif
00250 pix += 8;
00251 }
00252 pix += line_size - 16;
00253 }
00254 return s;
00255 }
00256
00257 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00258 int i;
00259
00260 for(i=0; i+8<=w; i+=8){
00261 dst[i+0]= bswap_32(src[i+0]);
00262 dst[i+1]= bswap_32(src[i+1]);
00263 dst[i+2]= bswap_32(src[i+2]);
00264 dst[i+3]= bswap_32(src[i+3]);
00265 dst[i+4]= bswap_32(src[i+4]);
00266 dst[i+5]= bswap_32(src[i+5]);
00267 dst[i+6]= bswap_32(src[i+6]);
00268 dst[i+7]= bswap_32(src[i+7]);
00269 }
00270 for(;i<w; i++){
00271 dst[i+0]= bswap_32(src[i+0]);
00272 }
00273 }
00274
00275 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00276 {
00277 int s, i;
00278 uint32_t *sq = ff_squareTbl + 256;
00279
00280 s = 0;
00281 for (i = 0; i < h; i++) {
00282 s += sq[pix1[0] - pix2[0]];
00283 s += sq[pix1[1] - pix2[1]];
00284 s += sq[pix1[2] - pix2[2]];
00285 s += sq[pix1[3] - pix2[3]];
00286 pix1 += line_size;
00287 pix2 += line_size;
00288 }
00289 return s;
00290 }
00291
00292 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00293 {
00294 int s, i;
00295 uint32_t *sq = ff_squareTbl + 256;
00296
00297 s = 0;
00298 for (i = 0; i < h; i++) {
00299 s += sq[pix1[0] - pix2[0]];
00300 s += sq[pix1[1] - pix2[1]];
00301 s += sq[pix1[2] - pix2[2]];
00302 s += sq[pix1[3] - pix2[3]];
00303 s += sq[pix1[4] - pix2[4]];
00304 s += sq[pix1[5] - pix2[5]];
00305 s += sq[pix1[6] - pix2[6]];
00306 s += sq[pix1[7] - pix2[7]];
00307 pix1 += line_size;
00308 pix2 += line_size;
00309 }
00310 return s;
00311 }
00312
00313 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00314 {
00315 int s, i;
00316 uint32_t *sq = ff_squareTbl + 256;
00317
00318 s = 0;
00319 for (i = 0; i < h; i++) {
00320 s += sq[pix1[ 0] - pix2[ 0]];
00321 s += sq[pix1[ 1] - pix2[ 1]];
00322 s += sq[pix1[ 2] - pix2[ 2]];
00323 s += sq[pix1[ 3] - pix2[ 3]];
00324 s += sq[pix1[ 4] - pix2[ 4]];
00325 s += sq[pix1[ 5] - pix2[ 5]];
00326 s += sq[pix1[ 6] - pix2[ 6]];
00327 s += sq[pix1[ 7] - pix2[ 7]];
00328 s += sq[pix1[ 8] - pix2[ 8]];
00329 s += sq[pix1[ 9] - pix2[ 9]];
00330 s += sq[pix1[10] - pix2[10]];
00331 s += sq[pix1[11] - pix2[11]];
00332 s += sq[pix1[12] - pix2[12]];
00333 s += sq[pix1[13] - pix2[13]];
00334 s += sq[pix1[14] - pix2[14]];
00335 s += sq[pix1[15] - pix2[15]];
00336
00337 pix1 += line_size;
00338 pix2 += line_size;
00339 }
00340 return s;
00341 }
00342
00343
00344 #if CONFIG_SNOW_ENCODER //dwt is in snow.c
00345 static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
00346 int s, i, j;
00347 const int dec_count= w==8 ? 3 : 4;
00348 int tmp[32*32];
00349 int level, ori;
00350 static const int scale[2][2][4][4]={
00351 {
00352 {
00353
00354 {268, 239, 239, 213},
00355 { 0, 224, 224, 152},
00356 { 0, 135, 135, 110},
00357 },{
00358
00359 {344, 310, 310, 280},
00360 { 0, 320, 320, 228},
00361 { 0, 175, 175, 136},
00362 { 0, 129, 129, 102},
00363 }
00364 },{
00365 {
00366
00367 {275, 245, 245, 218},
00368 { 0, 230, 230, 156},
00369 { 0, 138, 138, 113},
00370 },{
00371
00372 {352, 317, 317, 286},
00373 { 0, 328, 328, 233},
00374 { 0, 180, 180, 140},
00375 { 0, 132, 132, 105},
00376 }
00377 }
00378 };
00379
00380 for (i = 0; i < h; i++) {
00381 for (j = 0; j < w; j+=4) {
00382 tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
00383 tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
00384 tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
00385 tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
00386 }
00387 pix1 += line_size;
00388 pix2 += line_size;
00389 }
00390
00391 ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
00392
00393 s=0;
00394 assert(w==h);
00395 for(level=0; level<dec_count; level++){
00396 for(ori= level ? 1 : 0; ori<4; ori++){
00397 int size= w>>(dec_count-level);
00398 int sx= (ori&1) ? size : 0;
00399 int stride= 32<<(dec_count-level);
00400 int sy= (ori&2) ? stride>>1 : 0;
00401
00402 for(i=0; i<size; i++){
00403 for(j=0; j<size; j++){
00404 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
00405 s += FFABS(v);
00406 }
00407 }
00408 }
00409 }
00410 assert(s>=0);
00411 return s>>9;
00412 }
00413
00414 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00415 return w_c(v, pix1, pix2, line_size, 8, h, 1);
00416 }
00417
00418 static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00419 return w_c(v, pix1, pix2, line_size, 8, h, 0);
00420 }
00421
00422 static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00423 return w_c(v, pix1, pix2, line_size, 16, h, 1);
00424 }
00425
00426 static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00427 return w_c(v, pix1, pix2, line_size, 16, h, 0);
00428 }
00429
00430 int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00431 return w_c(v, pix1, pix2, line_size, 32, h, 1);
00432 }
00433
00434 int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00435 return w_c(v, pix1, pix2, line_size, 32, h, 0);
00436 }
00437 #endif
00438
00439
00440
00441 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
00442 {
00443 uint8_t *ptr, *last_line;
00444 int i;
00445
00446 last_line = buf + (height - 1) * wrap;
00447 for(i=0;i<w;i++) {
00448
00449 memcpy(buf - (i + 1) * wrap, buf, width);
00450 memcpy(last_line + (i + 1) * wrap, last_line, width);
00451 }
00452
00453 ptr = buf;
00454 for(i=0;i<height;i++) {
00455 memset(ptr - w, ptr[0], w);
00456 memset(ptr + width, ptr[width-1], w);
00457 ptr += wrap;
00458 }
00459
00460 for(i=0;i<w;i++) {
00461 memset(buf - (i + 1) * wrap - w, buf[0], w);
00462 memset(buf - (i + 1) * wrap + width, buf[width-1], w);
00463 memset(last_line + (i + 1) * wrap - w, last_line[0], w);
00464 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w);
00465 }
00466 }
00467
00480 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
00481 int src_x, int src_y, int w, int h){
00482 int x, y;
00483 int start_y, start_x, end_y, end_x;
00484
00485 if(src_y>= h){
00486 src+= (h-1-src_y)*linesize;
00487 src_y=h-1;
00488 }else if(src_y<=-block_h){
00489 src+= (1-block_h-src_y)*linesize;
00490 src_y=1-block_h;
00491 }
00492 if(src_x>= w){
00493 src+= (w-1-src_x);
00494 src_x=w-1;
00495 }else if(src_x<=-block_w){
00496 src+= (1-block_w-src_x);
00497 src_x=1-block_w;
00498 }
00499
00500 start_y= FFMAX(0, -src_y);
00501 start_x= FFMAX(0, -src_x);
00502 end_y= FFMIN(block_h, h-src_y);
00503 end_x= FFMIN(block_w, w-src_x);
00504
00505
00506 for(y=start_y; y<end_y; y++){
00507 for(x=start_x; x<end_x; x++){
00508 buf[x + y*linesize]= src[x + y*linesize];
00509 }
00510 }
00511
00512
00513 for(y=0; y<start_y; y++){
00514 for(x=start_x; x<end_x; x++){
00515 buf[x + y*linesize]= buf[x + start_y*linesize];
00516 }
00517 }
00518
00519
00520 for(y=end_y; y<block_h; y++){
00521 for(x=start_x; x<end_x; x++){
00522 buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
00523 }
00524 }
00525
00526 for(y=0; y<block_h; y++){
00527
00528 for(x=0; x<start_x; x++){
00529 buf[x + y*linesize]= buf[start_x + y*linesize];
00530 }
00531
00532
00533 for(x=end_x; x<block_w; x++){
00534 buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
00535 }
00536 }
00537 }
00538
00539 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00540 {
00541 int i;
00542
00543
00544 for(i=0;i<8;i++) {
00545 block[0] = pixels[0];
00546 block[1] = pixels[1];
00547 block[2] = pixels[2];
00548 block[3] = pixels[3];
00549 block[4] = pixels[4];
00550 block[5] = pixels[5];
00551 block[6] = pixels[6];
00552 block[7] = pixels[7];
00553 pixels += line_size;
00554 block += 8;
00555 }
00556 }
00557
00558 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00559 const uint8_t *s2, int stride){
00560 int i;
00561
00562
00563 for(i=0;i<8;i++) {
00564 block[0] = s1[0] - s2[0];
00565 block[1] = s1[1] - s2[1];
00566 block[2] = s1[2] - s2[2];
00567 block[3] = s1[3] - s2[3];
00568 block[4] = s1[4] - s2[4];
00569 block[5] = s1[5] - s2[5];
00570 block[6] = s1[6] - s2[6];
00571 block[7] = s1[7] - s2[7];
00572 s1 += stride;
00573 s2 += stride;
00574 block += 8;
00575 }
00576 }
00577
00578
00579 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00580 int line_size)
00581 {
00582 int i;
00583 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00584
00585
00586 for(i=0;i<8;i++) {
00587 pixels[0] = cm[block[0]];
00588 pixels[1] = cm[block[1]];
00589 pixels[2] = cm[block[2]];
00590 pixels[3] = cm[block[3]];
00591 pixels[4] = cm[block[4]];
00592 pixels[5] = cm[block[5]];
00593 pixels[6] = cm[block[6]];
00594 pixels[7] = cm[block[7]];
00595
00596 pixels += line_size;
00597 block += 8;
00598 }
00599 }
00600
00601 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00602 int line_size)
00603 {
00604 int i;
00605 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00606
00607
00608 for(i=0;i<4;i++) {
00609 pixels[0] = cm[block[0]];
00610 pixels[1] = cm[block[1]];
00611 pixels[2] = cm[block[2]];
00612 pixels[3] = cm[block[3]];
00613
00614 pixels += line_size;
00615 block += 8;
00616 }
00617 }
00618
00619 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00620 int line_size)
00621 {
00622 int i;
00623 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00624
00625
00626 for(i=0;i<2;i++) {
00627 pixels[0] = cm[block[0]];
00628 pixels[1] = cm[block[1]];
00629
00630 pixels += line_size;
00631 block += 8;
00632 }
00633 }
00634
00635 static void put_signed_pixels_clamped_c(const DCTELEM *block,
00636 uint8_t *restrict pixels,
00637 int line_size)
00638 {
00639 int i, j;
00640
00641 for (i = 0; i < 8; i++) {
00642 for (j = 0; j < 8; j++) {
00643 if (*block < -128)
00644 *pixels = 0;
00645 else if (*block > 127)
00646 *pixels = 255;
00647 else
00648 *pixels = (uint8_t)(*block + 128);
00649 block++;
00650 pixels++;
00651 }
00652 pixels += (line_size - 8);
00653 }
00654 }
00655
00656 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00657 int line_size)
00658 {
00659 int i;
00660 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00661
00662
00663 for(i=0;i<8;i++) {
00664 pixels[0] = cm[pixels[0] + block[0]];
00665 pixels[1] = cm[pixels[1] + block[1]];
00666 pixels[2] = cm[pixels[2] + block[2]];
00667 pixels[3] = cm[pixels[3] + block[3]];
00668 pixels[4] = cm[pixels[4] + block[4]];
00669 pixels[5] = cm[pixels[5] + block[5]];
00670 pixels[6] = cm[pixels[6] + block[6]];
00671 pixels[7] = cm[pixels[7] + block[7]];
00672 pixels += line_size;
00673 block += 8;
00674 }
00675 }
00676
00677 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00678 int line_size)
00679 {
00680 int i;
00681 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00682
00683
00684 for(i=0;i<4;i++) {
00685 pixels[0] = cm[pixels[0] + block[0]];
00686 pixels[1] = cm[pixels[1] + block[1]];
00687 pixels[2] = cm[pixels[2] + block[2]];
00688 pixels[3] = cm[pixels[3] + block[3]];
00689 pixels += line_size;
00690 block += 8;
00691 }
00692 }
00693
00694 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00695 int line_size)
00696 {
00697 int i;
00698 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00699
00700
00701 for(i=0;i<2;i++) {
00702 pixels[0] = cm[pixels[0] + block[0]];
00703 pixels[1] = cm[pixels[1] + block[1]];
00704 pixels += line_size;
00705 block += 8;
00706 }
00707 }
00708
00709 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00710 {
00711 int i;
00712 for(i=0;i<8;i++) {
00713 pixels[0] += block[0];
00714 pixels[1] += block[1];
00715 pixels[2] += block[2];
00716 pixels[3] += block[3];
00717 pixels[4] += block[4];
00718 pixels[5] += block[5];
00719 pixels[6] += block[6];
00720 pixels[7] += block[7];
00721 pixels += line_size;
00722 block += 8;
00723 }
00724 }
00725
00726 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00727 {
00728 int i;
00729 for(i=0;i<4;i++) {
00730 pixels[0] += block[0];
00731 pixels[1] += block[1];
00732 pixels[2] += block[2];
00733 pixels[3] += block[3];
00734 pixels += line_size;
00735 block += 4;
00736 }
00737 }
00738
00739 static int sum_abs_dctelem_c(DCTELEM *block)
00740 {
00741 int sum=0, i;
00742 for(i=0; i<64; i++)
00743 sum+= FFABS(block[i]);
00744 return sum;
00745 }
00746
00747 #if 0
00748
00749 #define PIXOP2(OPNAME, OP) \
00750 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00751 {\
00752 int i;\
00753 for(i=0; i<h; i++){\
00754 OP(*((uint64_t*)block), AV_RN64(pixels));\
00755 pixels+=line_size;\
00756 block +=line_size;\
00757 }\
00758 }\
00759 \
00760 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00761 {\
00762 int i;\
00763 for(i=0; i<h; i++){\
00764 const uint64_t a= AV_RN64(pixels );\
00765 const uint64_t b= AV_RN64(pixels+1);\
00766 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00767 pixels+=line_size;\
00768 block +=line_size;\
00769 }\
00770 }\
00771 \
00772 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00773 {\
00774 int i;\
00775 for(i=0; i<h; i++){\
00776 const uint64_t a= AV_RN64(pixels );\
00777 const uint64_t b= AV_RN64(pixels+1);\
00778 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00779 pixels+=line_size;\
00780 block +=line_size;\
00781 }\
00782 }\
00783 \
00784 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00785 {\
00786 int i;\
00787 for(i=0; i<h; i++){\
00788 const uint64_t a= AV_RN64(pixels );\
00789 const uint64_t b= AV_RN64(pixels+line_size);\
00790 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00791 pixels+=line_size;\
00792 block +=line_size;\
00793 }\
00794 }\
00795 \
00796 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00797 {\
00798 int i;\
00799 for(i=0; i<h; i++){\
00800 const uint64_t a= AV_RN64(pixels );\
00801 const uint64_t b= AV_RN64(pixels+line_size);\
00802 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00803 pixels+=line_size;\
00804 block +=line_size;\
00805 }\
00806 }\
00807 \
00808 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00809 {\
00810 int i;\
00811 const uint64_t a= AV_RN64(pixels );\
00812 const uint64_t b= AV_RN64(pixels+1);\
00813 uint64_t l0= (a&0x0303030303030303ULL)\
00814 + (b&0x0303030303030303ULL)\
00815 + 0x0202020202020202ULL;\
00816 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00817 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00818 uint64_t l1,h1;\
00819 \
00820 pixels+=line_size;\
00821 for(i=0; i<h; i+=2){\
00822 uint64_t a= AV_RN64(pixels );\
00823 uint64_t b= AV_RN64(pixels+1);\
00824 l1= (a&0x0303030303030303ULL)\
00825 + (b&0x0303030303030303ULL);\
00826 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00827 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00828 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00829 pixels+=line_size;\
00830 block +=line_size;\
00831 a= AV_RN64(pixels );\
00832 b= AV_RN64(pixels+1);\
00833 l0= (a&0x0303030303030303ULL)\
00834 + (b&0x0303030303030303ULL)\
00835 + 0x0202020202020202ULL;\
00836 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00837 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00838 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00839 pixels+=line_size;\
00840 block +=line_size;\
00841 }\
00842 }\
00843 \
00844 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00845 {\
00846 int i;\
00847 const uint64_t a= AV_RN64(pixels );\
00848 const uint64_t b= AV_RN64(pixels+1);\
00849 uint64_t l0= (a&0x0303030303030303ULL)\
00850 + (b&0x0303030303030303ULL)\
00851 + 0x0101010101010101ULL;\
00852 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00853 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00854 uint64_t l1,h1;\
00855 \
00856 pixels+=line_size;\
00857 for(i=0; i<h; i+=2){\
00858 uint64_t a= AV_RN64(pixels );\
00859 uint64_t b= AV_RN64(pixels+1);\
00860 l1= (a&0x0303030303030303ULL)\
00861 + (b&0x0303030303030303ULL);\
00862 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00863 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00864 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00865 pixels+=line_size;\
00866 block +=line_size;\
00867 a= AV_RN64(pixels );\
00868 b= AV_RN64(pixels+1);\
00869 l0= (a&0x0303030303030303ULL)\
00870 + (b&0x0303030303030303ULL)\
00871 + 0x0101010101010101ULL;\
00872 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00873 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00874 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00875 pixels+=line_size;\
00876 block +=line_size;\
00877 }\
00878 }\
00879 \
00880 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
00881 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
00882 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
00883 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
00884 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
00885 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
00886 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
00887
00888 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
00889 #else // 64 bit variant
00890
00891 #define PIXOP2(OPNAME, OP) \
00892 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00893 int i;\
00894 for(i=0; i<h; i++){\
00895 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
00896 pixels+=line_size;\
00897 block +=line_size;\
00898 }\
00899 }\
00900 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00901 int i;\
00902 for(i=0; i<h; i++){\
00903 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00904 pixels+=line_size;\
00905 block +=line_size;\
00906 }\
00907 }\
00908 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00909 int i;\
00910 for(i=0; i<h; i++){\
00911 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00912 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
00913 pixels+=line_size;\
00914 block +=line_size;\
00915 }\
00916 }\
00917 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00918 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
00919 }\
00920 \
00921 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00922 int src_stride1, int src_stride2, int h){\
00923 int i;\
00924 for(i=0; i<h; i++){\
00925 uint32_t a,b;\
00926 a= AV_RN32(&src1[i*src_stride1 ]);\
00927 b= AV_RN32(&src2[i*src_stride2 ]);\
00928 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
00929 a= AV_RN32(&src1[i*src_stride1+4]);\
00930 b= AV_RN32(&src2[i*src_stride2+4]);\
00931 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
00932 }\
00933 }\
00934 \
00935 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00936 int src_stride1, int src_stride2, int h){\
00937 int i;\
00938 for(i=0; i<h; i++){\
00939 uint32_t a,b;\
00940 a= AV_RN32(&src1[i*src_stride1 ]);\
00941 b= AV_RN32(&src2[i*src_stride2 ]);\
00942 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00943 a= AV_RN32(&src1[i*src_stride1+4]);\
00944 b= AV_RN32(&src2[i*src_stride2+4]);\
00945 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
00946 }\
00947 }\
00948 \
00949 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00950 int src_stride1, int src_stride2, int h){\
00951 int i;\
00952 for(i=0; i<h; i++){\
00953 uint32_t a,b;\
00954 a= AV_RN32(&src1[i*src_stride1 ]);\
00955 b= AV_RN32(&src2[i*src_stride2 ]);\
00956 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00957 }\
00958 }\
00959 \
00960 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00961 int src_stride1, int src_stride2, int h){\
00962 int i;\
00963 for(i=0; i<h; i++){\
00964 uint32_t a,b;\
00965 a= AV_RN16(&src1[i*src_stride1 ]);\
00966 b= AV_RN16(&src2[i*src_stride2 ]);\
00967 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00968 }\
00969 }\
00970 \
00971 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00972 int src_stride1, int src_stride2, int h){\
00973 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00974 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00975 }\
00976 \
00977 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00978 int src_stride1, int src_stride2, int h){\
00979 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00980 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00981 }\
00982 \
00983 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00984 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00985 }\
00986 \
00987 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00988 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00989 }\
00990 \
00991 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00992 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00993 }\
00994 \
00995 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00996 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00997 }\
00998 \
00999 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
01000 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
01001 int i;\
01002 for(i=0; i<h; i++){\
01003 uint32_t a, b, c, d, l0, l1, h0, h1;\
01004 a= AV_RN32(&src1[i*src_stride1]);\
01005 b= AV_RN32(&src2[i*src_stride2]);\
01006 c= AV_RN32(&src3[i*src_stride3]);\
01007 d= AV_RN32(&src4[i*src_stride4]);\
01008 l0= (a&0x03030303UL)\
01009 + (b&0x03030303UL)\
01010 + 0x02020202UL;\
01011 h0= ((a&0xFCFCFCFCUL)>>2)\
01012 + ((b&0xFCFCFCFCUL)>>2);\
01013 l1= (c&0x03030303UL)\
01014 + (d&0x03030303UL);\
01015 h1= ((c&0xFCFCFCFCUL)>>2)\
01016 + ((d&0xFCFCFCFCUL)>>2);\
01017 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01018 a= AV_RN32(&src1[i*src_stride1+4]);\
01019 b= AV_RN32(&src2[i*src_stride2+4]);\
01020 c= AV_RN32(&src3[i*src_stride3+4]);\
01021 d= AV_RN32(&src4[i*src_stride4+4]);\
01022 l0= (a&0x03030303UL)\
01023 + (b&0x03030303UL)\
01024 + 0x02020202UL;\
01025 h0= ((a&0xFCFCFCFCUL)>>2)\
01026 + ((b&0xFCFCFCFCUL)>>2);\
01027 l1= (c&0x03030303UL)\
01028 + (d&0x03030303UL);\
01029 h1= ((c&0xFCFCFCFCUL)>>2)\
01030 + ((d&0xFCFCFCFCUL)>>2);\
01031 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01032 }\
01033 }\
01034 \
01035 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
01036 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
01037 }\
01038 \
01039 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
01040 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
01041 }\
01042 \
01043 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
01044 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
01045 }\
01046 \
01047 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
01048 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
01049 }\
01050 \
01051 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
01052 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
01053 int i;\
01054 for(i=0; i<h; i++){\
01055 uint32_t a, b, c, d, l0, l1, h0, h1;\
01056 a= AV_RN32(&src1[i*src_stride1]);\
01057 b= AV_RN32(&src2[i*src_stride2]);\
01058 c= AV_RN32(&src3[i*src_stride3]);\
01059 d= AV_RN32(&src4[i*src_stride4]);\
01060 l0= (a&0x03030303UL)\
01061 + (b&0x03030303UL)\
01062 + 0x01010101UL;\
01063 h0= ((a&0xFCFCFCFCUL)>>2)\
01064 + ((b&0xFCFCFCFCUL)>>2);\
01065 l1= (c&0x03030303UL)\
01066 + (d&0x03030303UL);\
01067 h1= ((c&0xFCFCFCFCUL)>>2)\
01068 + ((d&0xFCFCFCFCUL)>>2);\
01069 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01070 a= AV_RN32(&src1[i*src_stride1+4]);\
01071 b= AV_RN32(&src2[i*src_stride2+4]);\
01072 c= AV_RN32(&src3[i*src_stride3+4]);\
01073 d= AV_RN32(&src4[i*src_stride4+4]);\
01074 l0= (a&0x03030303UL)\
01075 + (b&0x03030303UL)\
01076 + 0x01010101UL;\
01077 h0= ((a&0xFCFCFCFCUL)>>2)\
01078 + ((b&0xFCFCFCFCUL)>>2);\
01079 l1= (c&0x03030303UL)\
01080 + (d&0x03030303UL);\
01081 h1= ((c&0xFCFCFCFCUL)>>2)\
01082 + ((d&0xFCFCFCFCUL)>>2);\
01083 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01084 }\
01085 }\
01086 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
01087 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
01088 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01089 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01090 }\
01091 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
01092 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
01093 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01094 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01095 }\
01096 \
01097 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01098 {\
01099 int i, a0, b0, a1, b1;\
01100 a0= pixels[0];\
01101 b0= pixels[1] + 2;\
01102 a0 += b0;\
01103 b0 += pixels[2];\
01104 \
01105 pixels+=line_size;\
01106 for(i=0; i<h; i+=2){\
01107 a1= pixels[0];\
01108 b1= pixels[1];\
01109 a1 += b1;\
01110 b1 += pixels[2];\
01111 \
01112 block[0]= (a1+a0)>>2; \
01113 block[1]= (b1+b0)>>2;\
01114 \
01115 pixels+=line_size;\
01116 block +=line_size;\
01117 \
01118 a0= pixels[0];\
01119 b0= pixels[1] + 2;\
01120 a0 += b0;\
01121 b0 += pixels[2];\
01122 \
01123 block[0]= (a1+a0)>>2;\
01124 block[1]= (b1+b0)>>2;\
01125 pixels+=line_size;\
01126 block +=line_size;\
01127 }\
01128 }\
01129 \
01130 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01131 {\
01132 int i;\
01133 const uint32_t a= AV_RN32(pixels );\
01134 const uint32_t b= AV_RN32(pixels+1);\
01135 uint32_t l0= (a&0x03030303UL)\
01136 + (b&0x03030303UL)\
01137 + 0x02020202UL;\
01138 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01139 + ((b&0xFCFCFCFCUL)>>2);\
01140 uint32_t l1,h1;\
01141 \
01142 pixels+=line_size;\
01143 for(i=0; i<h; i+=2){\
01144 uint32_t a= AV_RN32(pixels );\
01145 uint32_t b= AV_RN32(pixels+1);\
01146 l1= (a&0x03030303UL)\
01147 + (b&0x03030303UL);\
01148 h1= ((a&0xFCFCFCFCUL)>>2)\
01149 + ((b&0xFCFCFCFCUL)>>2);\
01150 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01151 pixels+=line_size;\
01152 block +=line_size;\
01153 a= AV_RN32(pixels );\
01154 b= AV_RN32(pixels+1);\
01155 l0= (a&0x03030303UL)\
01156 + (b&0x03030303UL)\
01157 + 0x02020202UL;\
01158 h0= ((a&0xFCFCFCFCUL)>>2)\
01159 + ((b&0xFCFCFCFCUL)>>2);\
01160 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01161 pixels+=line_size;\
01162 block +=line_size;\
01163 }\
01164 }\
01165 \
01166 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01167 {\
01168 int j;\
01169 for(j=0; j<2; j++){\
01170 int i;\
01171 const uint32_t a= AV_RN32(pixels );\
01172 const uint32_t b= AV_RN32(pixels+1);\
01173 uint32_t l0= (a&0x03030303UL)\
01174 + (b&0x03030303UL)\
01175 + 0x02020202UL;\
01176 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01177 + ((b&0xFCFCFCFCUL)>>2);\
01178 uint32_t l1,h1;\
01179 \
01180 pixels+=line_size;\
01181 for(i=0; i<h; i+=2){\
01182 uint32_t a= AV_RN32(pixels );\
01183 uint32_t b= AV_RN32(pixels+1);\
01184 l1= (a&0x03030303UL)\
01185 + (b&0x03030303UL);\
01186 h1= ((a&0xFCFCFCFCUL)>>2)\
01187 + ((b&0xFCFCFCFCUL)>>2);\
01188 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01189 pixels+=line_size;\
01190 block +=line_size;\
01191 a= AV_RN32(pixels );\
01192 b= AV_RN32(pixels+1);\
01193 l0= (a&0x03030303UL)\
01194 + (b&0x03030303UL)\
01195 + 0x02020202UL;\
01196 h0= ((a&0xFCFCFCFCUL)>>2)\
01197 + ((b&0xFCFCFCFCUL)>>2);\
01198 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01199 pixels+=line_size;\
01200 block +=line_size;\
01201 }\
01202 pixels+=4-line_size*(h+1);\
01203 block +=4-line_size*h;\
01204 }\
01205 }\
01206 \
01207 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01208 {\
01209 int j;\
01210 for(j=0; j<2; j++){\
01211 int i;\
01212 const uint32_t a= AV_RN32(pixels );\
01213 const uint32_t b= AV_RN32(pixels+1);\
01214 uint32_t l0= (a&0x03030303UL)\
01215 + (b&0x03030303UL)\
01216 + 0x01010101UL;\
01217 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01218 + ((b&0xFCFCFCFCUL)>>2);\
01219 uint32_t l1,h1;\
01220 \
01221 pixels+=line_size;\
01222 for(i=0; i<h; i+=2){\
01223 uint32_t a= AV_RN32(pixels );\
01224 uint32_t b= AV_RN32(pixels+1);\
01225 l1= (a&0x03030303UL)\
01226 + (b&0x03030303UL);\
01227 h1= ((a&0xFCFCFCFCUL)>>2)\
01228 + ((b&0xFCFCFCFCUL)>>2);\
01229 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01230 pixels+=line_size;\
01231 block +=line_size;\
01232 a= AV_RN32(pixels );\
01233 b= AV_RN32(pixels+1);\
01234 l0= (a&0x03030303UL)\
01235 + (b&0x03030303UL)\
01236 + 0x01010101UL;\
01237 h0= ((a&0xFCFCFCFCUL)>>2)\
01238 + ((b&0xFCFCFCFCUL)>>2);\
01239 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01240 pixels+=line_size;\
01241 block +=line_size;\
01242 }\
01243 pixels+=4-line_size*(h+1);\
01244 block +=4-line_size*h;\
01245 }\
01246 }\
01247 \
01248 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
01249 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
01250 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
01251 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
01252 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
01253 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
01254 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
01255 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
01256
01257 #define op_avg(a, b) a = rnd_avg32(a, b)
01258 #endif
01259 #define op_put(a, b) a = b
01260
01261 PIXOP2(avg, op_avg)
01262 PIXOP2(put, op_put)
01263 #undef op_avg
01264 #undef op_put
01265
01266 #define avg2(a,b) ((a+b+1)>>1)
01267 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
01268
01269 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01270 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
01271 }
01272
01273 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01274 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
01275 }
01276
01277 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
01278 {
01279 const int A=(16-x16)*(16-y16);
01280 const int B=( x16)*(16-y16);
01281 const int C=(16-x16)*( y16);
01282 const int D=( x16)*( y16);
01283 int i;
01284
01285 for(i=0; i<h; i++)
01286 {
01287 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
01288 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
01289 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
01290 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
01291 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
01292 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
01293 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
01294 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
01295 dst+= stride;
01296 src+= stride;
01297 }
01298 }
01299
01300 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
01301 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
01302 {
01303 int y, vx, vy;
01304 const int s= 1<<shift;
01305
01306 width--;
01307 height--;
01308
01309 for(y=0; y<h; y++){
01310 int x;
01311
01312 vx= ox;
01313 vy= oy;
01314 for(x=0; x<8; x++){
01315 int src_x, src_y, frac_x, frac_y, index;
01316
01317 src_x= vx>>16;
01318 src_y= vy>>16;
01319 frac_x= src_x&(s-1);
01320 frac_y= src_y&(s-1);
01321 src_x>>=shift;
01322 src_y>>=shift;
01323
01324 if((unsigned)src_x < width){
01325 if((unsigned)src_y < height){
01326 index= src_x + src_y*stride;
01327 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01328 + src[index +1]* frac_x )*(s-frac_y)
01329 + ( src[index+stride ]*(s-frac_x)
01330 + src[index+stride+1]* frac_x )* frac_y
01331 + r)>>(shift*2);
01332 }else{
01333 index= src_x + av_clip(src_y, 0, height)*stride;
01334 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01335 + src[index +1]* frac_x )*s
01336 + r)>>(shift*2);
01337 }
01338 }else{
01339 if((unsigned)src_y < height){
01340 index= av_clip(src_x, 0, width) + src_y*stride;
01341 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
01342 + src[index+stride ]* frac_y )*s
01343 + r)>>(shift*2);
01344 }else{
01345 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
01346 dst[y*stride + x]= src[index ];
01347 }
01348 }
01349
01350 vx+= dxx;
01351 vy+= dyx;
01352 }
01353 ox += dxy;
01354 oy += dyy;
01355 }
01356 }
01357
01358 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01359 switch(width){
01360 case 2: put_pixels2_c (dst, src, stride, height); break;
01361 case 4: put_pixels4_c (dst, src, stride, height); break;
01362 case 8: put_pixels8_c (dst, src, stride, height); break;
01363 case 16:put_pixels16_c(dst, src, stride, height); break;
01364 }
01365 }
01366
01367 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01368 int i,j;
01369 for (i=0; i < height; i++) {
01370 for (j=0; j < width; j++) {
01371 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
01372 }
01373 src += stride;
01374 dst += stride;
01375 }
01376 }
01377
01378 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01379 int i,j;
01380 for (i=0; i < height; i++) {
01381 for (j=0; j < width; j++) {
01382 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
01383 }
01384 src += stride;
01385 dst += stride;
01386 }
01387 }
01388
01389 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01390 int i,j;
01391 for (i=0; i < height; i++) {
01392 for (j=0; j < width; j++) {
01393 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
01394 }
01395 src += stride;
01396 dst += stride;
01397 }
01398 }
01399
01400 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01401 int i,j;
01402 for (i=0; i < height; i++) {
01403 for (j=0; j < width; j++) {
01404 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
01405 }
01406 src += stride;
01407 dst += stride;
01408 }
01409 }
01410
01411 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01412 int i,j;
01413 for (i=0; i < height; i++) {
01414 for (j=0; j < width; j++) {
01415 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01416 }
01417 src += stride;
01418 dst += stride;
01419 }
01420 }
01421
01422 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01423 int i,j;
01424 for (i=0; i < height; i++) {
01425 for (j=0; j < width; j++) {
01426 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
01427 }
01428 src += stride;
01429 dst += stride;
01430 }
01431 }
01432
01433 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01434 int i,j;
01435 for (i=0; i < height; i++) {
01436 for (j=0; j < width; j++) {
01437 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01438 }
01439 src += stride;
01440 dst += stride;
01441 }
01442 }
01443
01444 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01445 int i,j;
01446 for (i=0; i < height; i++) {
01447 for (j=0; j < width; j++) {
01448 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
01449 }
01450 src += stride;
01451 dst += stride;
01452 }
01453 }
01454
01455 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01456 switch(width){
01457 case 2: avg_pixels2_c (dst, src, stride, height); break;
01458 case 4: avg_pixels4_c (dst, src, stride, height); break;
01459 case 8: avg_pixels8_c (dst, src, stride, height); break;
01460 case 16:avg_pixels16_c(dst, src, stride, height); break;
01461 }
01462 }
01463
01464 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01465 int i,j;
01466 for (i=0; i < height; i++) {
01467 for (j=0; j < width; j++) {
01468 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
01469 }
01470 src += stride;
01471 dst += stride;
01472 }
01473 }
01474
01475 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01476 int i,j;
01477 for (i=0; i < height; i++) {
01478 for (j=0; j < width; j++) {
01479 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
01480 }
01481 src += stride;
01482 dst += stride;
01483 }
01484 }
01485
01486 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01487 int i,j;
01488 for (i=0; i < height; i++) {
01489 for (j=0; j < width; j++) {
01490 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
01491 }
01492 src += stride;
01493 dst += stride;
01494 }
01495 }
01496
01497 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01498 int i,j;
01499 for (i=0; i < height; i++) {
01500 for (j=0; j < width; j++) {
01501 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01502 }
01503 src += stride;
01504 dst += stride;
01505 }
01506 }
01507
01508 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01509 int i,j;
01510 for (i=0; i < height; i++) {
01511 for (j=0; j < width; j++) {
01512 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01513 }
01514 src += stride;
01515 dst += stride;
01516 }
01517 }
01518
01519 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01520 int i,j;
01521 for (i=0; i < height; i++) {
01522 for (j=0; j < width; j++) {
01523 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
01524 }
01525 src += stride;
01526 dst += stride;
01527 }
01528 }
01529
01530 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01531 int i,j;
01532 for (i=0; i < height; i++) {
01533 for (j=0; j < width; j++) {
01534 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01535 }
01536 src += stride;
01537 dst += stride;
01538 }
01539 }
01540
01541 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01542 int i,j;
01543 for (i=0; i < height; i++) {
01544 for (j=0; j < width; j++) {
01545 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01546 }
01547 src += stride;
01548 dst += stride;
01549 }
01550 }
01551 #if 0
01552 #define TPEL_WIDTH(width)\
01553 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01554 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
01555 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01556 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
01557 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01558 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
01559 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01560 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
01561 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01562 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
01563 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01564 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
01565 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01566 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
01567 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01568 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
01569 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01570 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
01571 #endif
01572
01573 #define H264_CHROMA_MC(OPNAME, OP)\
01574 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01575 const int A=(8-x)*(8-y);\
01576 const int B=( x)*(8-y);\
01577 const int C=(8-x)*( y);\
01578 const int D=( x)*( y);\
01579 int i;\
01580 \
01581 assert(x<8 && y<8 && x>=0 && y>=0);\
01582 \
01583 if(D){\
01584 for(i=0; i<h; i++){\
01585 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01586 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01587 dst+= stride;\
01588 src+= stride;\
01589 }\
01590 }else{\
01591 const int E= B+C;\
01592 const int step= C ? stride : 1;\
01593 for(i=0; i<h; i++){\
01594 OP(dst[0], (A*src[0] + E*src[step+0]));\
01595 OP(dst[1], (A*src[1] + E*src[step+1]));\
01596 dst+= stride;\
01597 src+= stride;\
01598 }\
01599 }\
01600 }\
01601 \
01602 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01603 const int A=(8-x)*(8-y);\
01604 const int B=( x)*(8-y);\
01605 const int C=(8-x)*( y);\
01606 const int D=( x)*( y);\
01607 int i;\
01608 \
01609 assert(x<8 && y<8 && x>=0 && y>=0);\
01610 \
01611 if(D){\
01612 for(i=0; i<h; i++){\
01613 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01614 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01615 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01616 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01617 dst+= stride;\
01618 src+= stride;\
01619 }\
01620 }else{\
01621 const int E= B+C;\
01622 const int step= C ? stride : 1;\
01623 for(i=0; i<h; i++){\
01624 OP(dst[0], (A*src[0] + E*src[step+0]));\
01625 OP(dst[1], (A*src[1] + E*src[step+1]));\
01626 OP(dst[2], (A*src[2] + E*src[step+2]));\
01627 OP(dst[3], (A*src[3] + E*src[step+3]));\
01628 dst+= stride;\
01629 src+= stride;\
01630 }\
01631 }\
01632 }\
01633 \
01634 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01635 const int A=(8-x)*(8-y);\
01636 const int B=( x)*(8-y);\
01637 const int C=(8-x)*( y);\
01638 const int D=( x)*( y);\
01639 int i;\
01640 \
01641 assert(x<8 && y<8 && x>=0 && y>=0);\
01642 \
01643 if(D){\
01644 for(i=0; i<h; i++){\
01645 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01646 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01647 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01648 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01649 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
01650 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
01651 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
01652 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
01653 dst+= stride;\
01654 src+= stride;\
01655 }\
01656 }else{\
01657 const int E= B+C;\
01658 const int step= C ? stride : 1;\
01659 for(i=0; i<h; i++){\
01660 OP(dst[0], (A*src[0] + E*src[step+0]));\
01661 OP(dst[1], (A*src[1] + E*src[step+1]));\
01662 OP(dst[2], (A*src[2] + E*src[step+2]));\
01663 OP(dst[3], (A*src[3] + E*src[step+3]));\
01664 OP(dst[4], (A*src[4] + E*src[step+4]));\
01665 OP(dst[5], (A*src[5] + E*src[step+5]));\
01666 OP(dst[6], (A*src[6] + E*src[step+6]));\
01667 OP(dst[7], (A*src[7] + E*src[step+7]));\
01668 dst+= stride;\
01669 src+= stride;\
01670 }\
01671 }\
01672 }
01673
01674 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
01675 #define op_put(a, b) a = (((b) + 32)>>6)
01676
01677 H264_CHROMA_MC(put_ , op_put)
01678 H264_CHROMA_MC(avg_ , op_avg)
01679 #undef op_avg
01680 #undef op_put
01681
01682 static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){
01683 const int A=(8-x)*(8-y);
01684 const int B=( x)*(8-y);
01685 const int C=(8-x)*( y);
01686 const int D=( x)*( y);
01687 int i;
01688
01689 assert(x<8 && y<8 && x>=0 && y>=0);
01690
01691 for(i=0; i<h; i++)
01692 {
01693 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
01694 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
01695 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
01696 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
01697 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
01698 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
01699 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
01700 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
01701 dst+= stride;
01702 src+= stride;
01703 }
01704 }
01705
01706 #define QPEL_MC(r, OPNAME, RND, OP) \
01707 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01708 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01709 int i;\
01710 for(i=0; i<h; i++)\
01711 {\
01712 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
01713 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
01714 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
01715 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
01716 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
01717 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
01718 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
01719 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
01720 dst+=dstStride;\
01721 src+=srcStride;\
01722 }\
01723 }\
01724 \
01725 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01726 const int w=8;\
01727 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01728 int i;\
01729 for(i=0; i<w; i++)\
01730 {\
01731 const int src0= src[0*srcStride];\
01732 const int src1= src[1*srcStride];\
01733 const int src2= src[2*srcStride];\
01734 const int src3= src[3*srcStride];\
01735 const int src4= src[4*srcStride];\
01736 const int src5= src[5*srcStride];\
01737 const int src6= src[6*srcStride];\
01738 const int src7= src[7*srcStride];\
01739 const int src8= src[8*srcStride];\
01740 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
01741 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
01742 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
01743 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
01744 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
01745 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
01746 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
01747 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
01748 dst++;\
01749 src++;\
01750 }\
01751 }\
01752 \
01753 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01754 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01755 int i;\
01756 \
01757 for(i=0; i<h; i++)\
01758 {\
01759 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
01760 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
01761 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
01762 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
01763 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
01764 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
01765 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
01766 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
01767 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
01768 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
01769 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
01770 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
01771 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
01772 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
01773 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
01774 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
01775 dst+=dstStride;\
01776 src+=srcStride;\
01777 }\
01778 }\
01779 \
01780 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01781 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01782 int i;\
01783 const int w=16;\
01784 for(i=0; i<w; i++)\
01785 {\
01786 const int src0= src[0*srcStride];\
01787 const int src1= src[1*srcStride];\
01788 const int src2= src[2*srcStride];\
01789 const int src3= src[3*srcStride];\
01790 const int src4= src[4*srcStride];\
01791 const int src5= src[5*srcStride];\
01792 const int src6= src[6*srcStride];\
01793 const int src7= src[7*srcStride];\
01794 const int src8= src[8*srcStride];\
01795 const int src9= src[9*srcStride];\
01796 const int src10= src[10*srcStride];\
01797 const int src11= src[11*srcStride];\
01798 const int src12= src[12*srcStride];\
01799 const int src13= src[13*srcStride];\
01800 const int src14= src[14*srcStride];\
01801 const int src15= src[15*srcStride];\
01802 const int src16= src[16*srcStride];\
01803 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
01804 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
01805 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
01806 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
01807 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
01808 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
01809 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
01810 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
01811 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
01812 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
01813 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
01814 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
01815 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
01816 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
01817 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
01818 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
01819 dst++;\
01820 src++;\
01821 }\
01822 }\
01823 \
01824 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
01825 OPNAME ## pixels8_c(dst, src, stride, 8);\
01826 }\
01827 \
01828 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01829 uint8_t half[64];\
01830 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01831 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
01832 }\
01833 \
01834 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01835 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
01836 }\
01837 \
01838 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01839 uint8_t half[64];\
01840 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01841 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
01842 }\
01843 \
01844 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01845 uint8_t full[16*9];\
01846 uint8_t half[64];\
01847 copy_block9(full, src, 16, stride, 9);\
01848 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01849 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
01850 }\
01851 \
01852 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01853 uint8_t full[16*9];\
01854 copy_block9(full, src, 16, stride, 9);\
01855 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
01856 }\
01857 \
01858 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01859 uint8_t full[16*9];\
01860 uint8_t half[64];\
01861 copy_block9(full, src, 16, stride, 9);\
01862 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01863 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
01864 }\
01865 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01866 uint8_t full[16*9];\
01867 uint8_t halfH[72];\
01868 uint8_t halfV[64];\
01869 uint8_t halfHV[64];\
01870 copy_block9(full, src, 16, stride, 9);\
01871 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01872 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01873 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01874 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01875 }\
01876 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01877 uint8_t full[16*9];\
01878 uint8_t halfH[72];\
01879 uint8_t halfHV[64];\
01880 copy_block9(full, src, 16, stride, 9);\
01881 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01882 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01883 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01884 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01885 }\
01886 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01887 uint8_t full[16*9];\
01888 uint8_t halfH[72];\
01889 uint8_t halfV[64];\
01890 uint8_t halfHV[64];\
01891 copy_block9(full, src, 16, stride, 9);\
01892 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01893 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01894 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01895 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01896 }\
01897 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01898 uint8_t full[16*9];\
01899 uint8_t halfH[72];\
01900 uint8_t halfHV[64];\
01901 copy_block9(full, src, 16, stride, 9);\
01902 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01903 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01904 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01905 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01906 }\
01907 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01908 uint8_t full[16*9];\
01909 uint8_t halfH[72];\
01910 uint8_t halfV[64];\
01911 uint8_t halfHV[64];\
01912 copy_block9(full, src, 16, stride, 9);\
01913 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01914 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01915 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01916 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01917 }\
01918 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01919 uint8_t full[16*9];\
01920 uint8_t halfH[72];\
01921 uint8_t halfHV[64];\
01922 copy_block9(full, src, 16, stride, 9);\
01923 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01924 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01925 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01926 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01927 }\
01928 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01929 uint8_t full[16*9];\
01930 uint8_t halfH[72];\
01931 uint8_t halfV[64];\
01932 uint8_t halfHV[64];\
01933 copy_block9(full, src, 16, stride, 9);\
01934 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01935 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01936 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01937 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01938 }\
01939 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01940 uint8_t full[16*9];\
01941 uint8_t halfH[72];\
01942 uint8_t halfHV[64];\
01943 copy_block9(full, src, 16, stride, 9);\
01944 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01945 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01946 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01947 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01948 }\
01949 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01950 uint8_t halfH[72];\
01951 uint8_t halfHV[64];\
01952 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01953 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01954 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01955 }\
01956 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01957 uint8_t halfH[72];\
01958 uint8_t halfHV[64];\
01959 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01960 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01961 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01962 }\
01963 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01964 uint8_t full[16*9];\
01965 uint8_t halfH[72];\
01966 uint8_t halfV[64];\
01967 uint8_t halfHV[64];\
01968 copy_block9(full, src, 16, stride, 9);\
01969 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01970 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01971 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01972 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01973 }\
01974 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01975 uint8_t full[16*9];\
01976 uint8_t halfH[72];\
01977 copy_block9(full, src, 16, stride, 9);\
01978 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01979 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01980 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01981 }\
01982 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01983 uint8_t full[16*9];\
01984 uint8_t halfH[72];\
01985 uint8_t halfV[64];\
01986 uint8_t halfHV[64];\
01987 copy_block9(full, src, 16, stride, 9);\
01988 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01989 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01990 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01991 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01992 }\
01993 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01994 uint8_t full[16*9];\
01995 uint8_t halfH[72];\
01996 copy_block9(full, src, 16, stride, 9);\
01997 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01998 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01999 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
02000 }\
02001 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02002 uint8_t halfH[72];\
02003 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
02004 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
02005 }\
02006 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
02007 OPNAME ## pixels16_c(dst, src, stride, 16);\
02008 }\
02009 \
02010 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
02011 uint8_t half[256];\
02012 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
02013 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
02014 }\
02015 \
02016 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
02017 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
02018 }\
02019 \
02020 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
02021 uint8_t half[256];\
02022 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
02023 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
02024 }\
02025 \
02026 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
02027 uint8_t full[24*17];\
02028 uint8_t half[256];\
02029 copy_block17(full, src, 24, stride, 17);\
02030 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
02031 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
02032 }\
02033 \
02034 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
02035 uint8_t full[24*17];\
02036 copy_block17(full, src, 24, stride, 17);\
02037 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
02038 }\
02039 \
02040 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
02041 uint8_t full[24*17];\
02042 uint8_t half[256];\
02043 copy_block17(full, src, 24, stride, 17);\
02044 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
02045 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
02046 }\
02047 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
02048 uint8_t full[24*17];\
02049 uint8_t halfH[272];\
02050 uint8_t halfV[256];\
02051 uint8_t halfHV[256];\
02052 copy_block17(full, src, 24, stride, 17);\
02053 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02054 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
02055 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02056 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02057 }\
02058 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
02059 uint8_t full[24*17];\
02060 uint8_t halfH[272];\
02061 uint8_t halfHV[256];\
02062 copy_block17(full, src, 24, stride, 17);\
02063 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02064 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
02065 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02066 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
02067 }\
02068 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
02069 uint8_t full[24*17];\
02070 uint8_t halfH[272];\
02071 uint8_t halfV[256];\
02072 uint8_t halfHV[256];\
02073 copy_block17(full, src, 24, stride, 17);\
02074 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02075 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02076 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02077 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02078 }\
02079 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
02080 uint8_t full[24*17];\
02081 uint8_t halfH[272];\
02082 uint8_t halfHV[256];\
02083 copy_block17(full, src, 24, stride, 17);\
02084 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02085 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02086 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02087 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
02088 }\
02089 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
02090 uint8_t full[24*17];\
02091 uint8_t halfH[272];\
02092 uint8_t halfV[256];\
02093 uint8_t halfHV[256];\
02094 copy_block17(full, src, 24, stride, 17);\
02095 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02096 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
02097 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02098 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02099 }\
02100 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
02101 uint8_t full[24*17];\
02102 uint8_t halfH[272];\
02103 uint8_t halfHV[256];\
02104 copy_block17(full, src, 24, stride, 17);\
02105 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02106 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
02107 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02108 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02109 }\
02110 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
02111 uint8_t full[24*17];\
02112 uint8_t halfH[272];\
02113 uint8_t halfV[256];\
02114 uint8_t halfHV[256];\
02115 copy_block17(full, src, 24, stride, 17);\
02116 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
02117 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02118 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02119 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02120 }\
02121 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
02122 uint8_t full[24*17];\
02123 uint8_t halfH[272];\
02124 uint8_t halfHV[256];\
02125 copy_block17(full, src, 24, stride, 17);\
02126 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02127 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02128 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02129 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02130 }\
02131 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
02132 uint8_t halfH[272];\
02133 uint8_t halfHV[256];\
02134 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02135 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02136 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
02137 }\
02138 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
02139 uint8_t halfH[272];\
02140 uint8_t halfHV[256];\
02141 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02142 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02143 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02144 }\
02145 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
02146 uint8_t full[24*17];\
02147 uint8_t halfH[272];\
02148 uint8_t halfV[256];\
02149 uint8_t halfHV[256];\
02150 copy_block17(full, src, 24, stride, 17);\
02151 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02152 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
02153 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02154 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02155 }\
02156 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02157 uint8_t full[24*17];\
02158 uint8_t halfH[272];\
02159 copy_block17(full, src, 24, stride, 17);\
02160 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02161 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
02162 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02163 }\
02164 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
02165 uint8_t full[24*17];\
02166 uint8_t halfH[272];\
02167 uint8_t halfV[256];\
02168 uint8_t halfHV[256];\
02169 copy_block17(full, src, 24, stride, 17);\
02170 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02171 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02172 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02173 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02174 }\
02175 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02176 uint8_t full[24*17];\
02177 uint8_t halfH[272];\
02178 copy_block17(full, src, 24, stride, 17);\
02179 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02180 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02181 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02182 }\
02183 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02184 uint8_t halfH[272];\
02185 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02186 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02187 }
02188
02189 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02190 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
02191 #define op_put(a, b) a = cm[((b) + 16)>>5]
02192 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
02193
02194 QPEL_MC(0, put_ , _ , op_put)
02195 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
02196 QPEL_MC(0, avg_ , _ , op_avg)
02197
02198 #undef op_avg
02199 #undef op_avg_no_rnd
02200 #undef op_put
02201 #undef op_put_no_rnd
02202
02203 #if 1
02204 #define H264_LOWPASS(OPNAME, OP, OP2) \
02205 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02206 const int h=2;\
02207 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02208 int i;\
02209 for(i=0; i<h; i++)\
02210 {\
02211 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02212 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02213 dst+=dstStride;\
02214 src+=srcStride;\
02215 }\
02216 }\
02217 \
02218 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02219 const int w=2;\
02220 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02221 int i;\
02222 for(i=0; i<w; i++)\
02223 {\
02224 const int srcB= src[-2*srcStride];\
02225 const int srcA= src[-1*srcStride];\
02226 const int src0= src[0 *srcStride];\
02227 const int src1= src[1 *srcStride];\
02228 const int src2= src[2 *srcStride];\
02229 const int src3= src[3 *srcStride];\
02230 const int src4= src[4 *srcStride];\
02231 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02232 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02233 dst++;\
02234 src++;\
02235 }\
02236 }\
02237 \
02238 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02239 const int h=2;\
02240 const int w=2;\
02241 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02242 int i;\
02243 src -= 2*srcStride;\
02244 for(i=0; i<h+5; i++)\
02245 {\
02246 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02247 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02248 tmp+=tmpStride;\
02249 src+=srcStride;\
02250 }\
02251 tmp -= tmpStride*(h+5-2);\
02252 for(i=0; i<w; i++)\
02253 {\
02254 const int tmpB= tmp[-2*tmpStride];\
02255 const int tmpA= tmp[-1*tmpStride];\
02256 const int tmp0= tmp[0 *tmpStride];\
02257 const int tmp1= tmp[1 *tmpStride];\
02258 const int tmp2= tmp[2 *tmpStride];\
02259 const int tmp3= tmp[3 *tmpStride];\
02260 const int tmp4= tmp[4 *tmpStride];\
02261 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02262 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02263 dst++;\
02264 tmp++;\
02265 }\
02266 }\
02267 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02268 const int h=4;\
02269 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02270 int i;\
02271 for(i=0; i<h; i++)\
02272 {\
02273 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02274 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02275 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
02276 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
02277 dst+=dstStride;\
02278 src+=srcStride;\
02279 }\
02280 }\
02281 \
02282 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02283 const int w=4;\
02284 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02285 int i;\
02286 for(i=0; i<w; i++)\
02287 {\
02288 const int srcB= src[-2*srcStride];\
02289 const int srcA= src[-1*srcStride];\
02290 const int src0= src[0 *srcStride];\
02291 const int src1= src[1 *srcStride];\
02292 const int src2= src[2 *srcStride];\
02293 const int src3= src[3 *srcStride];\
02294 const int src4= src[4 *srcStride];\
02295 const int src5= src[5 *srcStride];\
02296 const int src6= src[6 *srcStride];\
02297 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02298 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02299 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02300 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02301 dst++;\
02302 src++;\
02303 }\
02304 }\
02305 \
02306 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02307 const int h=4;\
02308 const int w=4;\
02309 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02310 int i;\
02311 src -= 2*srcStride;\
02312 for(i=0; i<h+5; i++)\
02313 {\
02314 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02315 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02316 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
02317 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
02318 tmp+=tmpStride;\
02319 src+=srcStride;\
02320 }\
02321 tmp -= tmpStride*(h+5-2);\
02322 for(i=0; i<w; i++)\
02323 {\
02324 const int tmpB= tmp[-2*tmpStride];\
02325 const int tmpA= tmp[-1*tmpStride];\
02326 const int tmp0= tmp[0 *tmpStride];\
02327 const int tmp1= tmp[1 *tmpStride];\
02328 const int tmp2= tmp[2 *tmpStride];\
02329 const int tmp3= tmp[3 *tmpStride];\
02330 const int tmp4= tmp[4 *tmpStride];\
02331 const int tmp5= tmp[5 *tmpStride];\
02332 const int tmp6= tmp[6 *tmpStride];\
02333 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02334 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02335 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02336 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02337 dst++;\
02338 tmp++;\
02339 }\
02340 }\
02341 \
02342 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02343 const int h=8;\
02344 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02345 int i;\
02346 for(i=0; i<h; i++)\
02347 {\
02348 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
02349 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
02350 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
02351 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
02352 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
02353 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
02354 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
02355 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
02356 dst+=dstStride;\
02357 src+=srcStride;\
02358 }\
02359 }\
02360 \
02361 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02362 const int w=8;\
02363 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02364 int i;\
02365 for(i=0; i<w; i++)\
02366 {\
02367 const int srcB= src[-2*srcStride];\
02368 const int srcA= src[-1*srcStride];\
02369 const int src0= src[0 *srcStride];\
02370 const int src1= src[1 *srcStride];\
02371 const int src2= src[2 *srcStride];\
02372 const int src3= src[3 *srcStride];\
02373 const int src4= src[4 *srcStride];\
02374 const int src5= src[5 *srcStride];\
02375 const int src6= src[6 *srcStride];\
02376 const int src7= src[7 *srcStride];\
02377 const int src8= src[8 *srcStride];\
02378 const int src9= src[9 *srcStride];\
02379 const int src10=src[10*srcStride];\
02380 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02381 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02382 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02383 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02384 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
02385 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
02386 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
02387 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
02388 dst++;\
02389 src++;\
02390 }\
02391 }\
02392 \
02393 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02394 const int h=8;\
02395 const int w=8;\
02396 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02397 int i;\
02398 src -= 2*srcStride;\
02399 for(i=0; i<h+5; i++)\
02400 {\
02401 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
02402 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
02403 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
02404 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
02405 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
02406 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
02407 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
02408 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
02409 tmp+=tmpStride;\
02410 src+=srcStride;\
02411 }\
02412 tmp -= tmpStride*(h+5-2);\
02413 for(i=0; i<w; i++)\
02414 {\
02415 const int tmpB= tmp[-2*tmpStride];\
02416 const int tmpA= tmp[-1*tmpStride];\
02417 const int tmp0= tmp[0 *tmpStride];\
02418 const int tmp1= tmp[1 *tmpStride];\
02419 const int tmp2= tmp[2 *tmpStride];\
02420 const int tmp3= tmp[3 *tmpStride];\
02421 const int tmp4= tmp[4 *tmpStride];\
02422 const int tmp5= tmp[5 *tmpStride];\
02423 const int tmp6= tmp[6 *tmpStride];\
02424 const int tmp7= tmp[7 *tmpStride];\
02425 const int tmp8= tmp[8 *tmpStride];\
02426 const int tmp9= tmp[9 *tmpStride];\
02427 const int tmp10=tmp[10*tmpStride];\
02428 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02429 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02430 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02431 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02432 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
02433 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
02434 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
02435 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
02436 dst++;\
02437 tmp++;\
02438 }\
02439 }\
02440 \
02441 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02442 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02443 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02444 src += 8*srcStride;\
02445 dst += 8*dstStride;\
02446 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02447 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02448 }\
02449 \
02450 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02451 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02452 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02453 src += 8*srcStride;\
02454 dst += 8*dstStride;\
02455 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02456 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02457 }\
02458 \
02459 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02460 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02461 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02462 src += 8*srcStride;\
02463 dst += 8*dstStride;\
02464 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02465 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02466 }\
02467
02468 #define H264_MC(OPNAME, SIZE) \
02469 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
02470 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
02471 }\
02472 \
02473 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
02474 uint8_t half[SIZE*SIZE];\
02475 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02476 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
02477 }\
02478 \
02479 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
02480 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
02481 }\
02482 \
02483 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
02484 uint8_t half[SIZE*SIZE];\
02485 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02486 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
02487 }\
02488 \
02489 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
02490 uint8_t full[SIZE*(SIZE+5)];\
02491 uint8_t * const full_mid= full + SIZE*2;\
02492 uint8_t half[SIZE*SIZE];\
02493 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02494 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02495 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
02496 }\
02497 \
02498 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
02499 uint8_t full[SIZE*(SIZE+5)];\
02500 uint8_t * const full_mid= full + SIZE*2;\
02501 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02502 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
02503 }\
02504 \
02505 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
02506 uint8_t full[SIZE*(SIZE+5)];\
02507 uint8_t * const full_mid= full + SIZE*2;\
02508 uint8_t half[SIZE*SIZE];\
02509 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02510 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02511 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
02512 }\
02513 \
02514 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
02515 uint8_t full[SIZE*(SIZE+5)];\
02516 uint8_t * const full_mid= full + SIZE*2;\
02517 uint8_t halfH[SIZE*SIZE];\
02518 uint8_t halfV[SIZE*SIZE];\
02519 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02520 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02521 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02522 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02523 }\
02524 \
02525 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
02526 uint8_t full[SIZE*(SIZE+5)];\
02527 uint8_t * const full_mid= full + SIZE*2;\
02528 uint8_t halfH[SIZE*SIZE];\
02529 uint8_t halfV[SIZE*SIZE];\
02530 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02531 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02532 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02533 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02534 }\
02535 \
02536 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
02537 uint8_t full[SIZE*(SIZE+5)];\
02538 uint8_t * const full_mid= full + SIZE*2;\
02539 uint8_t halfH[SIZE*SIZE];\
02540 uint8_t halfV[SIZE*SIZE];\
02541 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02542 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02543 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02544 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02545 }\
02546 \
02547 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
02548 uint8_t full[SIZE*(SIZE+5)];\
02549 uint8_t * const full_mid= full + SIZE*2;\
02550 uint8_t halfH[SIZE*SIZE];\
02551 uint8_t halfV[SIZE*SIZE];\
02552 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02553 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02554 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02555 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02556 }\
02557 \
02558 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02559 int16_t tmp[SIZE*(SIZE+5)];\
02560 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
02561 }\
02562 \
02563 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
02564 int16_t tmp[SIZE*(SIZE+5)];\
02565 uint8_t halfH[SIZE*SIZE];\
02566 uint8_t halfHV[SIZE*SIZE];\
02567 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02568 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02569 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02570 }\
02571 \
02572 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
02573 int16_t tmp[SIZE*(SIZE+5)];\
02574 uint8_t halfH[SIZE*SIZE];\
02575 uint8_t halfHV[SIZE*SIZE];\
02576 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02577 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02578 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02579 }\
02580 \
02581 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02582 uint8_t full[SIZE*(SIZE+5)];\
02583 uint8_t * const full_mid= full + SIZE*2;\
02584 int16_t tmp[SIZE*(SIZE+5)];\
02585 uint8_t halfV[SIZE*SIZE];\
02586 uint8_t halfHV[SIZE*SIZE];\
02587 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02588 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02589 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02590 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02591 }\
02592 \
02593 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02594 uint8_t full[SIZE*(SIZE+5)];\
02595 uint8_t * const full_mid= full + SIZE*2;\
02596 int16_t tmp[SIZE*(SIZE+5)];\
02597 uint8_t halfV[SIZE*SIZE];\
02598 uint8_t halfHV[SIZE*SIZE];\
02599 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02600 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02601 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02602 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02603 }\
02604
02605 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02606
02607 #define op_put(a, b) a = cm[((b) + 16)>>5]
02608 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
02609 #define op2_put(a, b) a = cm[((b) + 512)>>10]
02610
02611 H264_LOWPASS(put_ , op_put, op2_put)
02612 H264_LOWPASS(avg_ , op_avg, op2_avg)
02613 H264_MC(put_, 2)
02614 H264_MC(put_, 4)
02615 H264_MC(put_, 8)
02616 H264_MC(put_, 16)
02617 H264_MC(avg_, 4)
02618 H264_MC(avg_, 8)
02619 H264_MC(avg_, 16)
02620
02621 #undef op_avg
02622 #undef op_put
02623 #undef op2_avg
02624 #undef op2_put
02625 #endif
02626
02627 #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
02628 #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
02629 #define H264_WEIGHT(W,H) \
02630 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
02631 int y; \
02632 offset <<= log2_denom; \
02633 if(log2_denom) offset += 1<<(log2_denom-1); \
02634 for(y=0; y<H; y++, block += stride){ \
02635 op_scale1(0); \
02636 op_scale1(1); \
02637 if(W==2) continue; \
02638 op_scale1(2); \
02639 op_scale1(3); \
02640 if(W==4) continue; \
02641 op_scale1(4); \
02642 op_scale1(5); \
02643 op_scale1(6); \
02644 op_scale1(7); \
02645 if(W==8) continue; \
02646 op_scale1(8); \
02647 op_scale1(9); \
02648 op_scale1(10); \
02649 op_scale1(11); \
02650 op_scale1(12); \
02651 op_scale1(13); \
02652 op_scale1(14); \
02653 op_scale1(15); \
02654 } \
02655 } \
02656 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
02657 int y; \
02658 offset = ((offset + 1) | 1) << log2_denom; \
02659 for(y=0; y<H; y++, dst += stride, src += stride){ \
02660 op_scale2(0); \
02661 op_scale2(1); \
02662 if(W==2) continue; \
02663 op_scale2(2); \
02664 op_scale2(3); \
02665 if(W==4) continue; \
02666 op_scale2(4); \
02667 op_scale2(5); \
02668 op_scale2(6); \
02669 op_scale2(7); \
02670 if(W==8) continue; \
02671 op_scale2(8); \
02672 op_scale2(9); \
02673 op_scale2(10); \
02674 op_scale2(11); \
02675 op_scale2(12); \
02676 op_scale2(13); \
02677 op_scale2(14); \
02678 op_scale2(15); \
02679 } \
02680 }
02681
02682 H264_WEIGHT(16,16)
02683 H264_WEIGHT(16,8)
02684 H264_WEIGHT(8,16)
02685 H264_WEIGHT(8,8)
02686 H264_WEIGHT(8,4)
02687 H264_WEIGHT(4,8)
02688 H264_WEIGHT(4,4)
02689 H264_WEIGHT(4,2)
02690 H264_WEIGHT(2,4)
02691 H264_WEIGHT(2,2)
02692
02693 #undef op_scale1
02694 #undef op_scale2
02695 #undef H264_WEIGHT
02696
02697 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
02698 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02699 int i;
02700
02701 for(i=0; i<h; i++){
02702 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
02703 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
02704 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
02705 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
02706 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
02707 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
02708 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
02709 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
02710 dst+=dstStride;
02711 src+=srcStride;
02712 }
02713 }
02714
02715 #if CONFIG_CAVS_DECODER
02716
02717 void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
02718
02719 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02720 put_pixels8_c(dst, src, stride, 8);
02721 }
02722 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02723 avg_pixels8_c(dst, src, stride, 8);
02724 }
02725 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02726 put_pixels16_c(dst, src, stride, 16);
02727 }
02728 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02729 avg_pixels16_c(dst, src, stride, 16);
02730 }
02731 #endif
02732
02733 #if CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER
02734
02735 void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
02736
02737 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
02738 put_pixels8_c(dst, src, stride, 8);
02739 }
02740 #endif
02741
02742 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
02743
02744
02745 void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx);
02746
02747 #if CONFIG_RV30_DECODER
02748 void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
02749 #endif
02750
02751 #if CONFIG_RV40_DECODER
02752 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02753 put_pixels16_xy2_c(dst, src, stride, 16);
02754 }
02755 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02756 avg_pixels16_xy2_c(dst, src, stride, 16);
02757 }
02758 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02759 put_pixels8_xy2_c(dst, src, stride, 8);
02760 }
02761 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02762 avg_pixels8_xy2_c(dst, src, stride, 8);
02763 }
02764
02765 void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
02766 #endif
02767
02768 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
02769 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02770 int i;
02771
02772 for(i=0; i<w; i++){
02773 const int src_1= src[ -srcStride];
02774 const int src0 = src[0 ];
02775 const int src1 = src[ srcStride];
02776 const int src2 = src[2*srcStride];
02777 const int src3 = src[3*srcStride];
02778 const int src4 = src[4*srcStride];
02779 const int src5 = src[5*srcStride];
02780 const int src6 = src[6*srcStride];
02781 const int src7 = src[7*srcStride];
02782 const int src8 = src[8*srcStride];
02783 const int src9 = src[9*srcStride];
02784 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
02785 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
02786 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
02787 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
02788 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
02789 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
02790 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
02791 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
02792 src++;
02793 dst++;
02794 }
02795 }
02796
02797 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
02798 put_pixels8_c(dst, src, stride, 8);
02799 }
02800
02801 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
02802 uint8_t half[64];
02803 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02804 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
02805 }
02806
02807 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
02808 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
02809 }
02810
02811 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
02812 uint8_t half[64];
02813 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02814 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
02815 }
02816
02817 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
02818 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
02819 }
02820
02821 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
02822 uint8_t halfH[88];
02823 uint8_t halfV[64];
02824 uint8_t halfHV[64];
02825 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02826 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
02827 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02828 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02829 }
02830 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
02831 uint8_t halfH[88];
02832 uint8_t halfV[64];
02833 uint8_t halfHV[64];
02834 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02835 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
02836 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02837 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02838 }
02839 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
02840 uint8_t halfH[88];
02841 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02842 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
02843 }
02844
02845 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
02846 if(CONFIG_ANY_H263) {
02847 int x;
02848 const int strength= ff_h263_loop_filter_strength[qscale];
02849
02850 for(x=0; x<8; x++){
02851 int d1, d2, ad1;
02852 int p0= src[x-2*stride];
02853 int p1= src[x-1*stride];
02854 int p2= src[x+0*stride];
02855 int p3= src[x+1*stride];
02856 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02857
02858 if (d<-2*strength) d1= 0;
02859 else if(d<- strength) d1=-2*strength - d;
02860 else if(d< strength) d1= d;
02861 else if(d< 2*strength) d1= 2*strength - d;
02862 else d1= 0;
02863
02864 p1 += d1;
02865 p2 -= d1;
02866 if(p1&256) p1= ~(p1>>31);
02867 if(p2&256) p2= ~(p2>>31);
02868
02869 src[x-1*stride] = p1;
02870 src[x+0*stride] = p2;
02871
02872 ad1= FFABS(d1)>>1;
02873
02874 d2= av_clip((p0-p3)/4, -ad1, ad1);
02875
02876 src[x-2*stride] = p0 - d2;
02877 src[x+ stride] = p3 + d2;
02878 }
02879 }
02880 }
02881
02882 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
02883 if(CONFIG_ANY_H263) {
02884 int y;
02885 const int strength= ff_h263_loop_filter_strength[qscale];
02886
02887 for(y=0; y<8; y++){
02888 int d1, d2, ad1;
02889 int p0= src[y*stride-2];
02890 int p1= src[y*stride-1];
02891 int p2= src[y*stride+0];
02892 int p3= src[y*stride+1];
02893 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02894
02895 if (d<-2*strength) d1= 0;
02896 else if(d<- strength) d1=-2*strength - d;
02897 else if(d< strength) d1= d;
02898 else if(d< 2*strength) d1= 2*strength - d;
02899 else d1= 0;
02900
02901 p1 += d1;
02902 p2 -= d1;
02903 if(p1&256) p1= ~(p1>>31);
02904 if(p2&256) p2= ~(p2>>31);
02905
02906 src[y*stride-1] = p1;
02907 src[y*stride+0] = p2;
02908
02909 ad1= FFABS(d1)>>1;
02910
02911 d2= av_clip((p0-p3)/4, -ad1, ad1);
02912
02913 src[y*stride-2] = p0 - d2;
02914 src[y*stride+1] = p3 + d2;
02915 }
02916 }
02917 }
02918
02919 static void h261_loop_filter_c(uint8_t *src, int stride){
02920 int x,y,xy,yz;
02921 int temp[64];
02922
02923 for(x=0; x<8; x++){
02924 temp[x ] = 4*src[x ];
02925 temp[x + 7*8] = 4*src[x + 7*stride];
02926 }
02927 for(y=1; y<7; y++){
02928 for(x=0; x<8; x++){
02929 xy = y * stride + x;
02930 yz = y * 8 + x;
02931 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
02932 }
02933 }
02934
02935 for(y=0; y<8; y++){
02936 src[ y*stride] = (temp[ y*8] + 2)>>2;
02937 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
02938 for(x=1; x<7; x++){
02939 xy = y * stride + x;
02940 yz = y * 8 + x;
02941 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
02942 }
02943 }
02944 }
02945
02946 static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
02947 {
02948 int i, d;
02949 for( i = 0; i < 4; i++ ) {
02950 if( tc0[i] < 0 ) {
02951 pix += 4*ystride;
02952 continue;
02953 }
02954 for( d = 0; d < 4; d++ ) {
02955 const int p0 = pix[-1*xstride];
02956 const int p1 = pix[-2*xstride];
02957 const int p2 = pix[-3*xstride];
02958 const int q0 = pix[0];
02959 const int q1 = pix[1*xstride];
02960 const int q2 = pix[2*xstride];
02961
02962 if( FFABS( p0 - q0 ) < alpha &&
02963 FFABS( p1 - p0 ) < beta &&
02964 FFABS( q1 - q0 ) < beta ) {
02965
02966 int tc = tc0[i];
02967 int i_delta;
02968
02969 if( FFABS( p2 - p0 ) < beta ) {
02970 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
02971 tc++;
02972 }
02973 if( FFABS( q2 - q0 ) < beta ) {
02974 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
02975 tc++;
02976 }
02977
02978 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
02979 pix[-xstride] = av_clip_uint8( p0 + i_delta );
02980 pix[0] = av_clip_uint8( q0 - i_delta );
02981 }
02982 pix += ystride;
02983 }
02984 }
02985 }
02986 static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02987 {
02988 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
02989 }
02990 static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02991 {
02992 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
02993 }
02994
02995 static inline void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
02996 {
02997 int d;
02998 for( d = 0; d < 16; d++ ) {
02999 const int p2 = pix[-3*xstride];
03000 const int p1 = pix[-2*xstride];
03001 const int p0 = pix[-1*xstride];
03002
03003 const int q0 = pix[ 0*xstride];
03004 const int q1 = pix[ 1*xstride];
03005 const int q2 = pix[ 2*xstride];
03006
03007 if( FFABS( p0 - q0 ) < alpha &&
03008 FFABS( p1 - p0 ) < beta &&
03009 FFABS( q1 - q0 ) < beta ) {
03010
03011 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
03012 if( FFABS( p2 - p0 ) < beta)
03013 {
03014 const int p3 = pix[-4*xstride];
03015
03016 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
03017 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
03018 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
03019 } else {
03020
03021 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
03022 }
03023 if( FFABS( q2 - q0 ) < beta)
03024 {
03025 const int q3 = pix[3*xstride];
03026
03027 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
03028 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
03029 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
03030 } else {
03031
03032 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
03033 }
03034 }else{
03035
03036 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
03037 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
03038 }
03039 }
03040 pix += ystride;
03041 }
03042 }
03043 static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
03044 {
03045 h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta);
03046 }
03047 static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
03048 {
03049 h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta);
03050 }
03051
03052 static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
03053 {
03054 int i, d;
03055 for( i = 0; i < 4; i++ ) {
03056 const int tc = tc0[i];
03057 if( tc <= 0 ) {
03058 pix += 2*ystride;
03059 continue;
03060 }
03061 for( d = 0; d < 2; d++ ) {
03062 const int p0 = pix[-1*xstride];
03063 const int p1 = pix[-2*xstride];
03064 const int q0 = pix[0];
03065 const int q1 = pix[1*xstride];
03066
03067 if( FFABS( p0 - q0 ) < alpha &&
03068 FFABS( p1 - p0 ) < beta &&
03069 FFABS( q1 - q0 ) < beta ) {
03070
03071 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
03072
03073 pix[-xstride] = av_clip_uint8( p0 + delta );
03074 pix[0] = av_clip_uint8( q0 - delta );
03075 }
03076 pix += ystride;
03077 }
03078 }
03079 }
03080 static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
03081 {
03082 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
03083 }
03084 static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
03085 {
03086 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
03087 }
03088
03089 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
03090 {
03091 int d;
03092 for( d = 0; d < 8; d++ ) {
03093 const int p0 = pix[-1*xstride];
03094 const int p1 = pix[-2*xstride];
03095 const int q0 = pix[0];
03096 const int q1 = pix[1*xstride];
03097
03098 if( FFABS( p0 - q0 ) < alpha &&
03099 FFABS( p1 - p0 ) < beta &&
03100 FFABS( q1 - q0 ) < beta ) {
03101
03102 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
03103 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
03104 }
03105 pix += ystride;
03106 }
03107 }
03108 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
03109 {
03110 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
03111 }
03112 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
03113 {
03114 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
03115 }
03116
03117 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03118 {
03119 int s, i;
03120
03121 s = 0;
03122 for(i=0;i<h;i++) {
03123 s += abs(pix1[0] - pix2[0]);
03124 s += abs(pix1[1] - pix2[1]);
03125 s += abs(pix1[2] - pix2[2]);
03126 s += abs(pix1[3] - pix2[3]);
03127 s += abs(pix1[4] - pix2[4]);
03128 s += abs(pix1[5] - pix2[5]);
03129 s += abs(pix1[6] - pix2[6]);
03130 s += abs(pix1[7] - pix2[7]);
03131 s += abs(pix1[8] - pix2[8]);
03132 s += abs(pix1[9] - pix2[9]);
03133 s += abs(pix1[10] - pix2[10]);
03134 s += abs(pix1[11] - pix2[11]);
03135 s += abs(pix1[12] - pix2[12]);
03136 s += abs(pix1[13] - pix2[13]);
03137 s += abs(pix1[14] - pix2[14]);
03138 s += abs(pix1[15] - pix2[15]);
03139 pix1 += line_size;
03140 pix2 += line_size;
03141 }
03142 return s;
03143 }
03144
03145 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03146 {
03147 int s, i;
03148
03149 s = 0;
03150 for(i=0;i<h;i++) {
03151 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
03152 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
03153 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
03154 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
03155 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
03156 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
03157 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
03158 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
03159 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
03160 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
03161 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
03162 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
03163 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
03164 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
03165 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
03166 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
03167 pix1 += line_size;
03168 pix2 += line_size;
03169 }
03170 return s;
03171 }
03172
03173 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03174 {
03175 int s, i;
03176 uint8_t *pix3 = pix2 + line_size;
03177
03178 s = 0;
03179 for(i=0;i<h;i++) {
03180 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
03181 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
03182 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
03183 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
03184 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
03185 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
03186 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
03187 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
03188 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
03189 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
03190 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
03191 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
03192 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
03193 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
03194 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
03195 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
03196 pix1 += line_size;
03197 pix2 += line_size;
03198 pix3 += line_size;
03199 }
03200 return s;
03201 }
03202
03203 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03204 {
03205 int s, i;
03206 uint8_t *pix3 = pix2 + line_size;
03207
03208 s = 0;
03209 for(i=0;i<h;i++) {
03210 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
03211 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
03212 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
03213 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
03214 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
03215 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
03216 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
03217 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
03218 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
03219 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
03220 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
03221 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
03222 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
03223 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
03224 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
03225 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
03226 pix1 += line_size;
03227 pix2 += line_size;
03228 pix3 += line_size;
03229 }
03230 return s;
03231 }
03232
03233 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03234 {
03235 int s, i;
03236
03237 s = 0;
03238 for(i=0;i<h;i++) {
03239 s += abs(pix1[0] - pix2[0]);
03240 s += abs(pix1[1] - pix2[1]);
03241 s += abs(pix1[2] - pix2[2]);
03242 s += abs(pix1[3] - pix2[3]);
03243 s += abs(pix1[4] - pix2[4]);
03244 s += abs(pix1[5] - pix2[5]);
03245 s += abs(pix1[6] - pix2[6]);
03246 s += abs(pix1[7] - pix2[7]);
03247 pix1 += line_size;
03248 pix2 += line_size;
03249 }
03250 return s;
03251 }
03252
03253 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03254 {
03255 int s, i;
03256
03257 s = 0;
03258 for(i=0;i<h;i++) {
03259 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
03260 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
03261 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
03262 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
03263 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
03264 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
03265 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
03266 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
03267 pix1 += line_size;
03268 pix2 += line_size;
03269 }
03270 return s;
03271 }
03272
03273 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03274 {
03275 int s, i;
03276 uint8_t *pix3 = pix2 + line_size;
03277
03278 s = 0;
03279 for(i=0;i<h;i++) {
03280 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
03281 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
03282 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
03283 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
03284 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
03285 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
03286 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
03287 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
03288 pix1 += line_size;
03289 pix2 += line_size;
03290 pix3 += line_size;
03291 }
03292 return s;
03293 }
03294
03295 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03296 {
03297 int s, i;
03298 uint8_t *pix3 = pix2 + line_size;
03299
03300 s = 0;
03301 for(i=0;i<h;i++) {
03302 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
03303 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
03304 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
03305 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
03306 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
03307 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
03308 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
03309 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
03310 pix1 += line_size;
03311 pix2 += line_size;
03312 pix3 += line_size;
03313 }
03314 return s;
03315 }
03316
03317 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
03318 MpegEncContext *c = v;
03319 int score1=0;
03320 int score2=0;
03321 int x,y;
03322
03323 for(y=0; y<h; y++){
03324 for(x=0; x<16; x++){
03325 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
03326 }
03327 if(y+1<h){
03328 for(x=0; x<15; x++){
03329 score2+= FFABS( s1[x ] - s1[x +stride]
03330 - s1[x+1] + s1[x+1+stride])
03331 -FFABS( s2[x ] - s2[x +stride]
03332 - s2[x+1] + s2[x+1+stride]);
03333 }
03334 }
03335 s1+= stride;
03336 s2+= stride;
03337 }
03338
03339 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
03340 else return score1 + FFABS(score2)*8;
03341 }
03342
03343 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
03344 MpegEncContext *c = v;
03345 int score1=0;
03346 int score2=0;
03347 int x,y;
03348
03349 for(y=0; y<h; y++){
03350 for(x=0; x<8; x++){
03351 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
03352 }
03353 if(y+1<h){
03354 for(x=0; x<7; x++){
03355 score2+= FFABS( s1[x ] - s1[x +stride]
03356 - s1[x+1] + s1[x+1+stride])
03357 -FFABS( s2[x ] - s2[x +stride]
03358 - s2[x+1] + s2[x+1+stride]);
03359 }
03360 }
03361 s1+= stride;
03362 s2+= stride;
03363 }
03364
03365 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
03366 else return score1 + FFABS(score2)*8;
03367 }
03368
03369 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
03370 int i;
03371 unsigned int sum=0;
03372
03373 for(i=0; i<8*8; i++){
03374 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
03375 int w= weight[i];
03376 b>>= RECON_SHIFT;
03377 assert(-512<b && b<512);
03378
03379 sum += (w*b)*(w*b)>>4;
03380 }
03381 return sum>>2;
03382 }
03383
03384 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
03385 int i;
03386
03387 for(i=0; i<8*8; i++){
03388 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
03389 }
03390 }
03391
03400 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
03401 {
03402 int i;
03403 DCTELEM temp[64];
03404
03405 if(last<=0) return;
03406
03407
03408 for(i=0; i<=last; i++){
03409 const int j= scantable[i];
03410 temp[j]= block[j];
03411 block[j]=0;
03412 }
03413
03414 for(i=0; i<=last; i++){
03415 const int j= scantable[i];
03416 const int perm_j= permutation[j];
03417 block[perm_j]= temp[j];
03418 }
03419 }
03420
03421 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
03422 return 0;
03423 }
03424
03425 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
03426 int i;
03427
03428 memset(cmp, 0, sizeof(void*)*6);
03429
03430 for(i=0; i<6; i++){
03431 switch(type&0xFF){
03432 case FF_CMP_SAD:
03433 cmp[i]= c->sad[i];
03434 break;
03435 case FF_CMP_SATD:
03436 cmp[i]= c->hadamard8_diff[i];
03437 break;
03438 case FF_CMP_SSE:
03439 cmp[i]= c->sse[i];
03440 break;
03441 case FF_CMP_DCT:
03442 cmp[i]= c->dct_sad[i];
03443 break;
03444 case FF_CMP_DCT264:
03445 cmp[i]= c->dct264_sad[i];
03446 break;
03447 case FF_CMP_DCTMAX:
03448 cmp[i]= c->dct_max[i];
03449 break;
03450 case FF_CMP_PSNR:
03451 cmp[i]= c->quant_psnr[i];
03452 break;
03453 case FF_CMP_BIT:
03454 cmp[i]= c->bit[i];
03455 break;
03456 case FF_CMP_RD:
03457 cmp[i]= c->rd[i];
03458 break;
03459 case FF_CMP_VSAD:
03460 cmp[i]= c->vsad[i];
03461 break;
03462 case FF_CMP_VSSE:
03463 cmp[i]= c->vsse[i];
03464 break;
03465 case FF_CMP_ZERO:
03466 cmp[i]= zero_cmp;
03467 break;
03468 case FF_CMP_NSSE:
03469 cmp[i]= c->nsse[i];
03470 break;
03471 #if CONFIG_SNOW_ENCODER
03472 case FF_CMP_W53:
03473 cmp[i]= c->w53[i];
03474 break;
03475 case FF_CMP_W97:
03476 cmp[i]= c->w97[i];
03477 break;
03478 #endif
03479 default:
03480 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
03481 }
03482 }
03483 }
03484
03485 static void clear_block_c(DCTELEM *block)
03486 {
03487 memset(block, 0, sizeof(DCTELEM)*64);
03488 }
03489
03493 static void clear_blocks_c(DCTELEM *blocks)
03494 {
03495 memset(blocks, 0, sizeof(DCTELEM)*6*64);
03496 }
03497
03498 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
03499 long i;
03500 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03501 long a = *(long*)(src+i);
03502 long b = *(long*)(dst+i);
03503 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
03504 }
03505 for(; i<w; i++)
03506 dst[i+0] += src[i+0];
03507 }
03508
03509 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
03510 long i;
03511 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03512 long a = *(long*)(src1+i);
03513 long b = *(long*)(src2+i);
03514 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
03515 }
03516 for(; i<w; i++)
03517 dst[i] = src1[i]+src2[i];
03518 }
03519
03520 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
03521 long i;
03522 #if !HAVE_FAST_UNALIGNED
03523 if((long)src2 & (sizeof(long)-1)){
03524 for(i=0; i+7<w; i+=8){
03525 dst[i+0] = src1[i+0]-src2[i+0];
03526 dst[i+1] = src1[i+1]-src2[i+1];
03527 dst[i+2] = src1[i+2]-src2[i+2];
03528 dst[i+3] = src1[i+3]-src2[i+3];
03529 dst[i+4] = src1[i+4]-src2[i+4];
03530 dst[i+5] = src1[i+5]-src2[i+5];
03531 dst[i+6] = src1[i+6]-src2[i+6];
03532 dst[i+7] = src1[i+7]-src2[i+7];
03533 }
03534 }else
03535 #endif
03536 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03537 long a = *(long*)(src1+i);
03538 long b = *(long*)(src2+i);
03539 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
03540 }
03541 for(; i<w; i++)
03542 dst[i+0] = src1[i+0]-src2[i+0];
03543 }
03544
03545 static void add_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *diff, int w, int *left, int *left_top){
03546 int i;
03547 uint8_t l, lt;
03548
03549 l= *left;
03550 lt= *left_top;
03551
03552 for(i=0; i<w; i++){
03553 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
03554 lt= src1[i];
03555 dst[i]= l;
03556 }
03557
03558 *left= l;
03559 *left_top= lt;
03560 }
03561
03562 static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
03563 int i;
03564 uint8_t l, lt;
03565
03566 l= *left;
03567 lt= *left_top;
03568
03569 for(i=0; i<w; i++){
03570 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
03571 lt= src1[i];
03572 l= src2[i];
03573 dst[i]= l - pred;
03574 }
03575
03576 *left= l;
03577 *left_top= lt;
03578 }
03579
03580 #define BUTTERFLY2(o1,o2,i1,i2) \
03581 o1= (i1)+(i2);\
03582 o2= (i1)-(i2);
03583
03584 #define BUTTERFLY1(x,y) \
03585 {\
03586 int a,b;\
03587 a= x;\
03588 b= y;\
03589 x= a+b;\
03590 y= a-b;\
03591 }
03592
03593 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
03594
03595 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
03596 int i;
03597 int temp[64];
03598 int sum=0;
03599
03600 assert(h==8);
03601
03602 for(i=0; i<8; i++){
03603
03604 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
03605 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
03606 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
03607 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
03608
03609 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03610 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03611 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03612 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03613
03614 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03615 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03616 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03617 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03618 }
03619
03620 for(i=0; i<8; i++){
03621 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03622 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03623 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03624 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03625
03626 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03627 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03628 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03629 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03630
03631 sum +=
03632 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03633 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03634 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03635 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03636 }
03637 #if 0
03638 static int maxi=0;
03639 if(sum>maxi){
03640 maxi=sum;
03641 printf("MAX:%d\n", maxi);
03642 }
03643 #endif
03644 return sum;
03645 }
03646
03647 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
03648 int i;
03649 int temp[64];
03650 int sum=0;
03651
03652 assert(h==8);
03653
03654 for(i=0; i<8; i++){
03655
03656 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
03657 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
03658 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
03659 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
03660
03661 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03662 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03663 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03664 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03665
03666 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03667 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03668 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03669 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03670 }
03671
03672 for(i=0; i<8; i++){
03673 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03674 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03675 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03676 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03677
03678 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03679 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03680 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03681 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03682
03683 sum +=
03684 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03685 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03686 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03687 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03688 }
03689
03690 sum -= FFABS(temp[8*0] + temp[8*4]);
03691
03692 return sum;
03693 }
03694
03695 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03696 MpegEncContext * const s= (MpegEncContext *)c;
03697 DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
03698 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03699
03700 assert(h==8);
03701
03702 s->dsp.diff_pixels(temp, src1, src2, stride);
03703 s->dsp.fdct(temp);
03704 return s->dsp.sum_abs_dctelem(temp);
03705 }
03706
03707 #if CONFIG_GPL
03708 #define DCT8_1D {\
03709 const int s07 = SRC(0) + SRC(7);\
03710 const int s16 = SRC(1) + SRC(6);\
03711 const int s25 = SRC(2) + SRC(5);\
03712 const int s34 = SRC(3) + SRC(4);\
03713 const int a0 = s07 + s34;\
03714 const int a1 = s16 + s25;\
03715 const int a2 = s07 - s34;\
03716 const int a3 = s16 - s25;\
03717 const int d07 = SRC(0) - SRC(7);\
03718 const int d16 = SRC(1) - SRC(6);\
03719 const int d25 = SRC(2) - SRC(5);\
03720 const int d34 = SRC(3) - SRC(4);\
03721 const int a4 = d16 + d25 + (d07 + (d07>>1));\
03722 const int a5 = d07 - d34 - (d25 + (d25>>1));\
03723 const int a6 = d07 + d34 - (d16 + (d16>>1));\
03724 const int a7 = d16 - d25 + (d34 + (d34>>1));\
03725 DST(0, a0 + a1 ) ;\
03726 DST(1, a4 + (a7>>2)) ;\
03727 DST(2, a2 + (a3>>1)) ;\
03728 DST(3, a5 + (a6>>2)) ;\
03729 DST(4, a0 - a1 ) ;\
03730 DST(5, a6 - (a5>>2)) ;\
03731 DST(6, (a2>>1) - a3 ) ;\
03732 DST(7, (a4>>2) - a7 ) ;\
03733 }
03734
03735 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03736 MpegEncContext * const s= (MpegEncContext *)c;
03737 DCTELEM dct[8][8];
03738 int i;
03739 int sum=0;
03740
03741 s->dsp.diff_pixels(dct[0], src1, src2, stride);
03742
03743 #define SRC(x) dct[i][x]
03744 #define DST(x,v) dct[i][x]= v
03745 for( i = 0; i < 8; i++ )
03746 DCT8_1D
03747 #undef SRC
03748 #undef DST
03749
03750 #define SRC(x) dct[x][i]
03751 #define DST(x,v) sum += FFABS(v)
03752 for( i = 0; i < 8; i++ )
03753 DCT8_1D
03754 #undef SRC
03755 #undef DST
03756 return sum;
03757 }
03758 #endif
03759
03760 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03761 MpegEncContext * const s= (MpegEncContext *)c;
03762 DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
03763 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03764 int sum=0, i;
03765
03766 assert(h==8);
03767
03768 s->dsp.diff_pixels(temp, src1, src2, stride);
03769 s->dsp.fdct(temp);
03770
03771 for(i=0; i<64; i++)
03772 sum= FFMAX(sum, FFABS(temp[i]));
03773
03774 return sum;
03775 }
03776
03777 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03778 MpegEncContext * const s= (MpegEncContext *)c;
03779 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]);
03780 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03781 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
03782 int sum=0, i;
03783
03784 assert(h==8);
03785 s->mb_intra=0;
03786
03787 s->dsp.diff_pixels(temp, src1, src2, stride);
03788
03789 memcpy(bak, temp, 64*sizeof(DCTELEM));
03790
03791 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03792 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03793 ff_simple_idct(temp);
03794
03795 for(i=0; i<64; i++)
03796 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
03797
03798 return sum;
03799 }
03800
03801 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03802 MpegEncContext * const s= (MpegEncContext *)c;
03803 const uint8_t *scantable= s->intra_scantable.permutated;
03804 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
03805 DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]);
03806 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03807 uint8_t * const bak= (uint8_t*)aligned_bak;
03808 int i, last, run, bits, level, distortion, start_i;
03809 const int esc_length= s->ac_esc_length;
03810 uint8_t * length;
03811 uint8_t * last_length;
03812
03813 assert(h==8);
03814
03815 for(i=0; i<8; i++){
03816 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
03817 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
03818 }
03819
03820 s->dsp.diff_pixels(temp, src1, src2, stride);
03821
03822 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03823
03824 bits=0;
03825
03826 if (s->mb_intra) {
03827 start_i = 1;
03828 length = s->intra_ac_vlc_length;
03829 last_length= s->intra_ac_vlc_last_length;
03830 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03831 } else {
03832 start_i = 0;
03833 length = s->inter_ac_vlc_length;
03834 last_length= s->inter_ac_vlc_last_length;
03835 }
03836
03837 if(last>=start_i){
03838 run=0;
03839 for(i=start_i; i<last; i++){
03840 int j= scantable[i];
03841 level= temp[j];
03842
03843 if(level){
03844 level+=64;
03845 if((level&(~127)) == 0){
03846 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03847 }else
03848 bits+= esc_length;
03849 run=0;
03850 }else
03851 run++;
03852 }
03853 i= scantable[last];
03854
03855 level= temp[i] + 64;
03856
03857 assert(level - 64);
03858
03859 if((level&(~127)) == 0){
03860 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03861 }else
03862 bits+= esc_length;
03863
03864 }
03865
03866 if(last>=0){
03867 if(s->mb_intra)
03868 s->dct_unquantize_intra(s, temp, 0, s->qscale);
03869 else
03870 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03871 }
03872
03873 s->dsp.idct_add(bak, stride, temp);
03874
03875 distortion= s->dsp.sse[1](NULL, bak, src1, stride, 8);
03876
03877 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
03878 }
03879
03880 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03881 MpegEncContext * const s= (MpegEncContext *)c;
03882 const uint8_t *scantable= s->intra_scantable.permutated;
03883 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
03884 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03885 int i, last, run, bits, level, start_i;
03886 const int esc_length= s->ac_esc_length;
03887 uint8_t * length;
03888 uint8_t * last_length;
03889
03890 assert(h==8);
03891
03892 s->dsp.diff_pixels(temp, src1, src2, stride);
03893
03894 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03895
03896 bits=0;
03897
03898 if (s->mb_intra) {
03899 start_i = 1;
03900 length = s->intra_ac_vlc_length;
03901 last_length= s->intra_ac_vlc_last_length;
03902 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03903 } else {
03904 start_i = 0;
03905 length = s->inter_ac_vlc_length;
03906 last_length= s->inter_ac_vlc_last_length;
03907 }
03908
03909 if(last>=start_i){
03910 run=0;
03911 for(i=start_i; i<last; i++){
03912 int j= scantable[i];
03913 level= temp[j];
03914
03915 if(level){
03916 level+=64;
03917 if((level&(~127)) == 0){
03918 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03919 }else
03920 bits+= esc_length;
03921 run=0;
03922 }else
03923 run++;
03924 }
03925 i= scantable[last];
03926
03927 level= temp[i] + 64;
03928
03929 assert(level - 64);
03930
03931 if((level&(~127)) == 0){
03932 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03933 }else
03934 bits+= esc_length;
03935 }
03936
03937 return bits;
03938 }
03939
03940 #define VSAD_INTRA(size) \
03941 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
03942 int score=0; \
03943 int x,y; \
03944 \
03945 for(y=1; y<h; y++){ \
03946 for(x=0; x<size; x+=4){ \
03947 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
03948 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
03949 } \
03950 s+= stride; \
03951 } \
03952 \
03953 return score; \
03954 }
03955 VSAD_INTRA(8)
03956 VSAD_INTRA(16)
03957
03958 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03959 int score=0;
03960 int x,y;
03961
03962 for(y=1; y<h; y++){
03963 for(x=0; x<16; x++){
03964 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03965 }
03966 s1+= stride;
03967 s2+= stride;
03968 }
03969
03970 return score;
03971 }
03972
03973 #define SQ(a) ((a)*(a))
03974 #define VSSE_INTRA(size) \
03975 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
03976 int score=0; \
03977 int x,y; \
03978 \
03979 for(y=1; y<h; y++){ \
03980 for(x=0; x<size; x+=4){ \
03981 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
03982 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
03983 } \
03984 s+= stride; \
03985 } \
03986 \
03987 return score; \
03988 }
03989 VSSE_INTRA(8)
03990 VSSE_INTRA(16)
03991
03992 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03993 int score=0;
03994 int x,y;
03995
03996 for(y=1; y<h; y++){
03997 for(x=0; x<16; x++){
03998 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03999 }
04000 s1+= stride;
04001 s2+= stride;
04002 }
04003
04004 return score;
04005 }
04006
04007 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
04008 int size){
04009 int score=0;
04010 int i;
04011 for(i=0; i<size; i++)
04012 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
04013 return score;
04014 }
04015
04016 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
04017 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
04018 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
04019 #if CONFIG_GPL
04020 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
04021 #endif
04022 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
04023 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
04024 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
04025 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
04026
04027 static void vector_fmul_c(float *dst, const float *src, int len){
04028 int i;
04029 for(i=0; i<len; i++)
04030 dst[i] *= src[i];
04031 }
04032
04033 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
04034 int i;
04035 src1 += len-1;
04036 for(i=0; i<len; i++)
04037 dst[i] = src0[i] * src1[-i];
04038 }
04039
04040 void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){
04041 int i;
04042 for(i=0; i<len; i++)
04043 dst[i*step] = src0[i] * src1[i] + src2[i] + src3;
04044 }
04045
04046 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
04047 int i,j;
04048 dst += len;
04049 win += len;
04050 src0+= len;
04051 for(i=-len, j=len-1; i<0; i++, j--) {
04052 float s0 = src0[i];
04053 float s1 = src1[j];
04054 float wi = win[i];
04055 float wj = win[j];
04056 dst[i] = s0*wj - s1*wi + add_bias;
04057 dst[j] = s0*wi + s1*wj + add_bias;
04058 }
04059 }
04060
04061 static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){
04062 int i;
04063 for(i=0; i<len; i++)
04064 dst[i] = src[i] * mul;
04065 }
04066
04067 static av_always_inline int float_to_int16_one(const float *src){
04068 int_fast32_t tmp = *(const int32_t*)src;
04069 if(tmp & 0xf0000){
04070 tmp = (0x43c0ffff - tmp)>>31;
04071
04072
04073
04074 }
04075 return tmp - 0x8000;
04076 }
04077
04078 void ff_float_to_int16_c(int16_t *dst, const float *src, long len){
04079 int i;
04080 for(i=0; i<len; i++)
04081 dst[i] = float_to_int16_one(src+i);
04082 }
04083
04084 void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){
04085 int i,j,c;
04086 if(channels==2){
04087 for(i=0; i<len; i++){
04088 dst[2*i] = float_to_int16_one(src[0]+i);
04089 dst[2*i+1] = float_to_int16_one(src[1]+i);
04090 }
04091 }else{
04092 for(c=0; c<channels; c++)
04093 for(i=0, j=c; i<len; i++, j+=channels)
04094 dst[j] = float_to_int16_one(src[c]+i);
04095 }
04096 }
04097
04098 static void add_int16_c(int16_t * v1, int16_t * v2, int order)
04099 {
04100 while (order--)
04101 *v1++ += *v2++;
04102 }
04103
04104 static void sub_int16_c(int16_t * v1, int16_t * v2, int order)
04105 {
04106 while (order--)
04107 *v1++ -= *v2++;
04108 }
04109
04110 static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift)
04111 {
04112 int res = 0;
04113
04114 while (order--)
04115 res += (*v1++ * *v2++) >> shift;
04116
04117 return res;
04118 }
04119
04120 #define W0 2048
04121 #define W1 2841
04122 #define W2 2676
04123 #define W3 2408
04124 #define W4 2048
04125 #define W5 1609
04126 #define W6 1108
04127 #define W7 565
04128
04129 static void wmv2_idct_row(short * b)
04130 {
04131 int s1,s2;
04132 int a0,a1,a2,a3,a4,a5,a6,a7;
04133
04134 a1 = W1*b[1]+W7*b[7];
04135 a7 = W7*b[1]-W1*b[7];
04136 a5 = W5*b[5]+W3*b[3];
04137 a3 = W3*b[5]-W5*b[3];
04138 a2 = W2*b[2]+W6*b[6];
04139 a6 = W6*b[2]-W2*b[6];
04140 a0 = W0*b[0]+W0*b[4];
04141 a4 = W0*b[0]-W0*b[4];
04142
04143 s1 = (181*(a1-a5+a7-a3)+128)>>8;
04144 s2 = (181*(a1-a5-a7+a3)+128)>>8;
04145
04146 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
04147 b[1] = (a4+a6 +s1 + (1<<7))>>8;
04148 b[2] = (a4-a6 +s2 + (1<<7))>>8;
04149 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
04150 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
04151 b[5] = (a4-a6 -s2 + (1<<7))>>8;
04152 b[6] = (a4+a6 -s1 + (1<<7))>>8;
04153 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
04154 }
04155 static void wmv2_idct_col(short * b)
04156 {
04157 int s1,s2;
04158 int a0,a1,a2,a3,a4,a5,a6,a7;
04159
04160 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
04161 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
04162 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
04163 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
04164 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
04165 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
04166 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
04167 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
04168
04169 s1 = (181*(a1-a5+a7-a3)+128)>>8;
04170 s2 = (181*(a1-a5-a7+a3)+128)>>8;
04171
04172 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
04173 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
04174 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
04175 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
04176
04177 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
04178 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
04179 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
04180 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
04181 }
04182 void ff_wmv2_idct_c(short * block){
04183 int i;
04184
04185 for(i=0;i<64;i+=8){
04186 wmv2_idct_row(block+i);
04187 }
04188 for(i=0;i<8;i++){
04189 wmv2_idct_col(block+i);
04190 }
04191 }
04192
04193
04194 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
04195 {
04196 ff_wmv2_idct_c(block);
04197 put_pixels_clamped_c(block, dest, line_size);
04198 }
04199 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
04200 {
04201 ff_wmv2_idct_c(block);
04202 add_pixels_clamped_c(block, dest, line_size);
04203 }
04204 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
04205 {
04206 j_rev_dct (block);
04207 put_pixels_clamped_c(block, dest, line_size);
04208 }
04209 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
04210 {
04211 j_rev_dct (block);
04212 add_pixels_clamped_c(block, dest, line_size);
04213 }
04214
04215 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
04216 {
04217 j_rev_dct4 (block);
04218 put_pixels_clamped4_c(block, dest, line_size);
04219 }
04220 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
04221 {
04222 j_rev_dct4 (block);
04223 add_pixels_clamped4_c(block, dest, line_size);
04224 }
04225
04226 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
04227 {
04228 j_rev_dct2 (block);
04229 put_pixels_clamped2_c(block, dest, line_size);
04230 }
04231 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
04232 {
04233 j_rev_dct2 (block);
04234 add_pixels_clamped2_c(block, dest, line_size);
04235 }
04236
04237 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
04238 {
04239 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
04240
04241 dest[0] = cm[(block[0] + 4)>>3];
04242 }
04243 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
04244 {
04245 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
04246
04247 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
04248 }
04249
04250 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
04251
04252
04253 void dsputil_static_init(void)
04254 {
04255 int i;
04256
04257 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
04258 for(i=0;i<MAX_NEG_CROP;i++) {
04259 ff_cropTbl[i] = 0;
04260 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
04261 }
04262
04263 for(i=0;i<512;i++) {
04264 ff_squareTbl[i] = (i - 256) * (i - 256);
04265 }
04266
04267 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
04268 }
04269
04270 int ff_check_alignment(void){
04271 static int did_fail=0;
04272 DECLARE_ALIGNED_16(int, aligned);
04273
04274 if((long)&aligned & 15){
04275 if(!did_fail){
04276 #if HAVE_MMX || HAVE_ALTIVEC
04277 av_log(NULL, AV_LOG_ERROR,
04278 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
04279 "and may be very slow or crash. This is not a bug in libavcodec,\n"
04280 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
04281 "Do not report crashes to FFmpeg developers.\n");
04282 #endif
04283 did_fail=1;
04284 }
04285 return -1;
04286 }
04287 return 0;
04288 }
04289
04290 void dsputil_init(DSPContext* c, AVCodecContext *avctx)
04291 {
04292 int i;
04293
04294 ff_check_alignment();
04295
04296 #if CONFIG_ENCODERS
04297 if(avctx->dct_algo==FF_DCT_FASTINT) {
04298 c->fdct = fdct_ifast;
04299 c->fdct248 = fdct_ifast248;
04300 }
04301 else if(avctx->dct_algo==FF_DCT_FAAN) {
04302 c->fdct = ff_faandct;
04303 c->fdct248 = ff_faandct248;
04304 }
04305 else {
04306 c->fdct = ff_jpeg_fdct_islow;
04307 c->fdct248 = ff_fdct248_islow;
04308 }
04309 #endif //CONFIG_ENCODERS
04310
04311 if(avctx->lowres==1){
04312 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){
04313 c->idct_put= ff_jref_idct4_put;
04314 c->idct_add= ff_jref_idct4_add;
04315 }else{
04316 c->idct_put= ff_h264_lowres_idct_put_c;
04317 c->idct_add= ff_h264_lowres_idct_add_c;
04318 }
04319 c->idct = j_rev_dct4;
04320 c->idct_permutation_type= FF_NO_IDCT_PERM;
04321 }else if(avctx->lowres==2){
04322 c->idct_put= ff_jref_idct2_put;
04323 c->idct_add= ff_jref_idct2_add;
04324 c->idct = j_rev_dct2;
04325 c->idct_permutation_type= FF_NO_IDCT_PERM;
04326 }else if(avctx->lowres==3){
04327 c->idct_put= ff_jref_idct1_put;
04328 c->idct_add= ff_jref_idct1_add;
04329 c->idct = j_rev_dct1;
04330 c->idct_permutation_type= FF_NO_IDCT_PERM;
04331 }else{
04332 if(avctx->idct_algo==FF_IDCT_INT){
04333 c->idct_put= ff_jref_idct_put;
04334 c->idct_add= ff_jref_idct_add;
04335 c->idct = j_rev_dct;
04336 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
04337 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER || CONFIG_THEORA_DECODER ) &&
04338 avctx->idct_algo==FF_IDCT_VP3){
04339 c->idct_put= ff_vp3_idct_put_c;
04340 c->idct_add= ff_vp3_idct_add_c;
04341 c->idct = ff_vp3_idct_c;
04342 c->idct_permutation_type= FF_NO_IDCT_PERM;
04343 }else if(avctx->idct_algo==FF_IDCT_WMV2){
04344 c->idct_put= ff_wmv2_idct_put_c;
04345 c->idct_add= ff_wmv2_idct_add_c;
04346 c->idct = ff_wmv2_idct_c;
04347 c->idct_permutation_type= FF_NO_IDCT_PERM;
04348 }else if(avctx->idct_algo==FF_IDCT_FAAN){
04349 c->idct_put= ff_faanidct_put;
04350 c->idct_add= ff_faanidct_add;
04351 c->idct = ff_faanidct;
04352 c->idct_permutation_type= FF_NO_IDCT_PERM;
04353 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
04354 c->idct_put= ff_ea_idct_put_c;
04355 c->idct_permutation_type= FF_NO_IDCT_PERM;
04356 }else{
04357 c->idct_put= ff_simple_idct_put;
04358 c->idct_add= ff_simple_idct_add;
04359 c->idct = ff_simple_idct;
04360 c->idct_permutation_type= FF_NO_IDCT_PERM;
04361 }
04362 }
04363
04364 if (CONFIG_H264_DECODER) {
04365 c->h264_idct_add= ff_h264_idct_add_c;
04366 c->h264_idct8_add= ff_h264_idct8_add_c;
04367 c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
04368 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
04369 c->h264_idct_add16 = ff_h264_idct_add16_c;
04370 c->h264_idct8_add4 = ff_h264_idct8_add4_c;
04371 c->h264_idct_add8 = ff_h264_idct_add8_c;
04372 c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
04373 }
04374
04375 c->get_pixels = get_pixels_c;
04376 c->diff_pixels = diff_pixels_c;
04377 c->put_pixels_clamped = put_pixels_clamped_c;
04378 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
04379 c->add_pixels_clamped = add_pixels_clamped_c;
04380 c->add_pixels8 = add_pixels8_c;
04381 c->add_pixels4 = add_pixels4_c;
04382 c->sum_abs_dctelem = sum_abs_dctelem_c;
04383 c->gmc1 = gmc1_c;
04384 c->gmc = ff_gmc_c;
04385 c->clear_block = clear_block_c;
04386 c->clear_blocks = clear_blocks_c;
04387 c->pix_sum = pix_sum_c;
04388 c->pix_norm1 = pix_norm1_c;
04389
04390
04391 c->pix_abs[0][0] = pix_abs16_c;
04392 c->pix_abs[0][1] = pix_abs16_x2_c;
04393 c->pix_abs[0][2] = pix_abs16_y2_c;
04394 c->pix_abs[0][3] = pix_abs16_xy2_c;
04395 c->pix_abs[1][0] = pix_abs8_c;
04396 c->pix_abs[1][1] = pix_abs8_x2_c;
04397 c->pix_abs[1][2] = pix_abs8_y2_c;
04398 c->pix_abs[1][3] = pix_abs8_xy2_c;
04399
04400 #define dspfunc(PFX, IDX, NUM) \
04401 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
04402 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
04403 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
04404 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
04405
04406 dspfunc(put, 0, 16);
04407 dspfunc(put_no_rnd, 0, 16);
04408 dspfunc(put, 1, 8);
04409 dspfunc(put_no_rnd, 1, 8);
04410 dspfunc(put, 2, 4);
04411 dspfunc(put, 3, 2);
04412
04413 dspfunc(avg, 0, 16);
04414 dspfunc(avg_no_rnd, 0, 16);
04415 dspfunc(avg, 1, 8);
04416 dspfunc(avg_no_rnd, 1, 8);
04417 dspfunc(avg, 2, 4);
04418 dspfunc(avg, 3, 2);
04419 #undef dspfunc
04420
04421 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
04422 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
04423
04424 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
04425 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
04426 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
04427 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
04428 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
04429 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
04430 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
04431 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
04432 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
04433
04434 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
04435 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
04436 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
04437 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
04438 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
04439 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
04440 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
04441 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
04442 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
04443
04444 #define dspfunc(PFX, IDX, NUM) \
04445 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
04446 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
04447 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
04448 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
04449 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
04450 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
04451 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
04452 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
04453 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
04454 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
04455 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
04456 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
04457 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
04458 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
04459 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
04460 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
04461
04462 dspfunc(put_qpel, 0, 16);
04463 dspfunc(put_no_rnd_qpel, 0, 16);
04464
04465 dspfunc(avg_qpel, 0, 16);
04466
04467
04468 dspfunc(put_qpel, 1, 8);
04469 dspfunc(put_no_rnd_qpel, 1, 8);
04470
04471 dspfunc(avg_qpel, 1, 8);
04472
04473
04474 dspfunc(put_h264_qpel, 0, 16);
04475 dspfunc(put_h264_qpel, 1, 8);
04476 dspfunc(put_h264_qpel, 2, 4);
04477 dspfunc(put_h264_qpel, 3, 2);
04478 dspfunc(avg_h264_qpel, 0, 16);
04479 dspfunc(avg_h264_qpel, 1, 8);
04480 dspfunc(avg_h264_qpel, 2, 4);
04481
04482 #undef dspfunc
04483 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
04484 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
04485 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
04486 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
04487 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
04488 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
04489 c->put_no_rnd_h264_chroma_pixels_tab[0]= put_no_rnd_h264_chroma_mc8_c;
04490
04491 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
04492 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
04493 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
04494 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
04495 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
04496 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
04497 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
04498 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
04499 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
04500 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
04501 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
04502 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
04503 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
04504 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
04505 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
04506 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
04507 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
04508 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
04509 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
04510 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
04511
04512 c->draw_edges = draw_edges_c;
04513
04514 #if CONFIG_CAVS_DECODER
04515 ff_cavsdsp_init(c,avctx);
04516 #endif
04517 #if CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER
04518 ff_vc1dsp_init(c,avctx);
04519 #endif
04520 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER
04521 ff_intrax8dsp_init(c,avctx);
04522 #endif
04523 #if CONFIG_RV30_DECODER
04524 ff_rv30dsp_init(c,avctx);
04525 #endif
04526 #if CONFIG_RV40_DECODER
04527 ff_rv40dsp_init(c,avctx);
04528 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
04529 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
04530 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
04531 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
04532 #endif
04533
04534 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
04535 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
04536 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
04537 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
04538 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
04539 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
04540 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
04541 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
04542
04543 #define SET_CMP_FUNC(name) \
04544 c->name[0]= name ## 16_c;\
04545 c->name[1]= name ## 8x8_c;
04546
04547 SET_CMP_FUNC(hadamard8_diff)
04548 c->hadamard8_diff[4]= hadamard8_intra16_c;
04549 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
04550 SET_CMP_FUNC(dct_sad)
04551 SET_CMP_FUNC(dct_max)
04552 #if CONFIG_GPL
04553 SET_CMP_FUNC(dct264_sad)
04554 #endif
04555 c->sad[0]= pix_abs16_c;
04556 c->sad[1]= pix_abs8_c;
04557 c->sse[0]= sse16_c;
04558 c->sse[1]= sse8_c;
04559 c->sse[2]= sse4_c;
04560 SET_CMP_FUNC(quant_psnr)
04561 SET_CMP_FUNC(rd)
04562 SET_CMP_FUNC(bit)
04563 c->vsad[0]= vsad16_c;
04564 c->vsad[4]= vsad_intra16_c;
04565 c->vsad[5]= vsad_intra8_c;
04566 c->vsse[0]= vsse16_c;
04567 c->vsse[4]= vsse_intra16_c;
04568 c->vsse[5]= vsse_intra8_c;
04569 c->nsse[0]= nsse16_c;
04570 c->nsse[1]= nsse8_c;
04571 #if CONFIG_SNOW_ENCODER
04572 c->w53[0]= w53_16_c;
04573 c->w53[1]= w53_8_c;
04574 c->w97[0]= w97_16_c;
04575 c->w97[1]= w97_8_c;
04576 #endif
04577
04578 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
04579
04580 c->add_bytes= add_bytes_c;
04581 c->add_bytes_l2= add_bytes_l2_c;
04582 c->diff_bytes= diff_bytes_c;
04583 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
04584 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
04585 c->bswap_buf= bswap_buf;
04586 #if CONFIG_PNG_DECODER
04587 c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
04588 #endif
04589
04590 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
04591 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
04592 c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c;
04593 c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c;
04594 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
04595 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
04596 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
04597 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
04598 c->h264_loop_filter_strength= NULL;
04599
04600 if (CONFIG_ANY_H263) {
04601 c->h263_h_loop_filter= h263_h_loop_filter_c;
04602 c->h263_v_loop_filter= h263_v_loop_filter_c;
04603 }
04604
04605 if (CONFIG_VP3_DECODER || CONFIG_THEORA_DECODER) {
04606 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
04607 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
04608 }
04609 if (CONFIG_VP6_DECODER) {
04610 c->vp6_filter_diag4= ff_vp6_filter_diag4_c;
04611 }
04612
04613 c->h261_loop_filter= h261_loop_filter_c;
04614
04615 c->try_8x8basis= try_8x8basis_c;
04616 c->add_8x8basis= add_8x8basis_c;
04617
04618 #if CONFIG_SNOW_DECODER
04619 c->vertical_compose97i = ff_snow_vertical_compose97i;
04620 c->horizontal_compose97i = ff_snow_horizontal_compose97i;
04621 c->inner_add_yblock = ff_snow_inner_add_yblock;
04622 #endif
04623
04624 #if CONFIG_VORBIS_DECODER
04625 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
04626 #endif
04627 #if CONFIG_AC3_DECODER
04628 c->ac3_downmix = ff_ac3_downmix_c;
04629 #endif
04630 #if CONFIG_FLAC_ENCODER
04631 c->flac_compute_autocorr = ff_flac_compute_autocorr;
04632 #endif
04633 c->vector_fmul = vector_fmul_c;
04634 c->vector_fmul_reverse = vector_fmul_reverse_c;
04635 c->vector_fmul_add_add = ff_vector_fmul_add_add_c;
04636 c->vector_fmul_window = ff_vector_fmul_window_c;
04637 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
04638 c->float_to_int16 = ff_float_to_int16_c;
04639 c->float_to_int16_interleave = ff_float_to_int16_interleave_c;
04640 c->add_int16 = add_int16_c;
04641 c->sub_int16 = sub_int16_c;
04642 c->scalarproduct_int16 = scalarproduct_int16_c;
04643
04644 c->shrink[0]= ff_img_copy_plane;
04645 c->shrink[1]= ff_shrink22;
04646 c->shrink[2]= ff_shrink44;
04647 c->shrink[3]= ff_shrink88;
04648
04649 c->prefetch= just_return;
04650
04651 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
04652 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
04653
04654 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
04655 if (ARCH_ARM) dsputil_init_arm (c, avctx);
04656 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
04657 if (HAVE_VIS) dsputil_init_vis (c, avctx);
04658 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
04659 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
04660 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
04661 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
04662 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
04663
04664 for(i=0; i<64; i++){
04665 if(!c->put_2tap_qpel_pixels_tab[0][i])
04666 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
04667 if(!c->avg_2tap_qpel_pixels_tab[0][i])
04668 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
04669 }
04670
04671 switch(c->idct_permutation_type){
04672 case FF_NO_IDCT_PERM:
04673 for(i=0; i<64; i++)
04674 c->idct_permutation[i]= i;
04675 break;
04676 case FF_LIBMPEG2_IDCT_PERM:
04677 for(i=0; i<64; i++)
04678 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
04679 break;
04680 case FF_SIMPLE_IDCT_PERM:
04681 for(i=0; i<64; i++)
04682 c->idct_permutation[i]= simple_mmx_permutation[i];
04683 break;
04684 case FF_TRANSPOSE_IDCT_PERM:
04685 for(i=0; i<64; i++)
04686 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
04687 break;
04688 case FF_PARTTRANS_IDCT_PERM:
04689 for(i=0; i<64; i++)
04690 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
04691 break;
04692 case FF_SSE2_IDCT_PERM:
04693 for(i=0; i<64; i++)
04694 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
04695 break;
04696 default:
04697 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
04698 }
04699 }
04700