00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00026 #include "libavutil/x86_cpu.h"
00027
00028 #define ALIGN_MASK "$-8"
00029
00030 #undef PAVGB
00031 #undef PMINUB
00032 #undef PMAXUB
00033
00034 #if HAVE_MMX2
00035 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
00036 #elif HAVE_AMD3DNOW
00037 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
00038 #endif
00039 #define PAVGB(a,b) REAL_PAVGB(a,b)
00040
00041 #if HAVE_MMX2
00042 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
00043 #elif HAVE_MMX
00044 #define PMINUB(b,a,t) \
00045 "movq " #a ", " #t " \n\t"\
00046 "psubusb " #b ", " #t " \n\t"\
00047 "psubb " #t ", " #a " \n\t"
00048 #endif
00049
00050 #if HAVE_MMX2
00051 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
00052 #elif HAVE_MMX
00053 #define PMAXUB(a,b) \
00054 "psubusb " #a ", " #b " \n\t"\
00055 "paddb " #a ", " #b " \n\t"
00056 #endif
00057
00058
00059 #if HAVE_MMX
00060
00063 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
00064 int numEq= 0, dcOk;
00065 src+= stride*4;
00066 __asm__ volatile(
00067 "movq %0, %%mm7 \n\t"
00068 "movq %1, %%mm6 \n\t"
00069 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
00070 );
00071
00072 __asm__ volatile(
00073 "lea (%2, %3), %%"REG_a" \n\t"
00074
00075
00076
00077 "movq (%2), %%mm0 \n\t"
00078 "movq (%%"REG_a"), %%mm1 \n\t"
00079 "movq %%mm0, %%mm3 \n\t"
00080 "movq %%mm0, %%mm4 \n\t"
00081 PMAXUB(%%mm1, %%mm4)
00082 PMINUB(%%mm1, %%mm3, %%mm5)
00083 "psubb %%mm1, %%mm0 \n\t"
00084 "paddb %%mm7, %%mm0 \n\t"
00085 "pcmpgtb %%mm6, %%mm0 \n\t"
00086
00087 "movq (%%"REG_a",%3), %%mm2 \n\t"
00088 PMAXUB(%%mm2, %%mm4)
00089 PMINUB(%%mm2, %%mm3, %%mm5)
00090 "psubb %%mm2, %%mm1 \n\t"
00091 "paddb %%mm7, %%mm1 \n\t"
00092 "pcmpgtb %%mm6, %%mm1 \n\t"
00093 "paddb %%mm1, %%mm0 \n\t"
00094
00095 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
00096 PMAXUB(%%mm1, %%mm4)
00097 PMINUB(%%mm1, %%mm3, %%mm5)
00098 "psubb %%mm1, %%mm2 \n\t"
00099 "paddb %%mm7, %%mm2 \n\t"
00100 "pcmpgtb %%mm6, %%mm2 \n\t"
00101 "paddb %%mm2, %%mm0 \n\t"
00102
00103 "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t"
00104
00105 "movq (%2, %3, 4), %%mm2 \n\t"
00106 PMAXUB(%%mm2, %%mm4)
00107 PMINUB(%%mm2, %%mm3, %%mm5)
00108 "psubb %%mm2, %%mm1 \n\t"
00109 "paddb %%mm7, %%mm1 \n\t"
00110 "pcmpgtb %%mm6, %%mm1 \n\t"
00111 "paddb %%mm1, %%mm0 \n\t"
00112
00113 "movq (%%"REG_a"), %%mm1 \n\t"
00114 PMAXUB(%%mm1, %%mm4)
00115 PMINUB(%%mm1, %%mm3, %%mm5)
00116 "psubb %%mm1, %%mm2 \n\t"
00117 "paddb %%mm7, %%mm2 \n\t"
00118 "pcmpgtb %%mm6, %%mm2 \n\t"
00119 "paddb %%mm2, %%mm0 \n\t"
00120
00121 "movq (%%"REG_a", %3), %%mm2 \n\t"
00122 PMAXUB(%%mm2, %%mm4)
00123 PMINUB(%%mm2, %%mm3, %%mm5)
00124 "psubb %%mm2, %%mm1 \n\t"
00125 "paddb %%mm7, %%mm1 \n\t"
00126 "pcmpgtb %%mm6, %%mm1 \n\t"
00127 "paddb %%mm1, %%mm0 \n\t"
00128
00129 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
00130 PMAXUB(%%mm1, %%mm4)
00131 PMINUB(%%mm1, %%mm3, %%mm5)
00132 "psubb %%mm1, %%mm2 \n\t"
00133 "paddb %%mm7, %%mm2 \n\t"
00134 "pcmpgtb %%mm6, %%mm2 \n\t"
00135 "paddb %%mm2, %%mm0 \n\t"
00136 "psubusb %%mm3, %%mm4 \n\t"
00137
00138 " \n\t"
00139 #if HAVE_MMX2
00140 "pxor %%mm7, %%mm7 \n\t"
00141 "psadbw %%mm7, %%mm0 \n\t"
00142 #else
00143 "movq %%mm0, %%mm1 \n\t"
00144 "psrlw $8, %%mm0 \n\t"
00145 "paddb %%mm1, %%mm0 \n\t"
00146 "movq %%mm0, %%mm1 \n\t"
00147 "psrlq $16, %%mm0 \n\t"
00148 "paddb %%mm1, %%mm0 \n\t"
00149 "movq %%mm0, %%mm1 \n\t"
00150 "psrlq $32, %%mm0 \n\t"
00151 "paddb %%mm1, %%mm0 \n\t"
00152 #endif
00153 "movq %4, %%mm7 \n\t"
00154 "paddusb %%mm7, %%mm7 \n\t"
00155 "psubusb %%mm7, %%mm4 \n\t"
00156 "packssdw %%mm4, %%mm4 \n\t"
00157 "movd %%mm0, %0 \n\t"
00158 "movd %%mm4, %1 \n\t"
00159
00160 : "=r" (numEq), "=r" (dcOk)
00161 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
00162 : "%"REG_a
00163 );
00164
00165 numEq= (-numEq) &0xFF;
00166 if(numEq > c->ppMode.flatnessThreshold){
00167 if(dcOk) return 0;
00168 else return 1;
00169 }else{
00170 return 2;
00171 }
00172 }
00173 #endif //HAVE_MMX
00174
00179 #if !HAVE_ALTIVEC
00180 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
00181 {
00182 #if HAVE_MMX2 || HAVE_AMD3DNOW
00183 src+= stride*3;
00184 __asm__ volatile(
00185 "movq %2, %%mm0 \n\t"
00186 "pxor %%mm4, %%mm4 \n\t"
00187
00188 "movq (%0), %%mm6 \n\t"
00189 "movq (%0, %1), %%mm5 \n\t"
00190 "movq %%mm5, %%mm1 \n\t"
00191 "movq %%mm6, %%mm2 \n\t"
00192 "psubusb %%mm6, %%mm5 \n\t"
00193 "psubusb %%mm1, %%mm2 \n\t"
00194 "por %%mm5, %%mm2 \n\t"
00195 "psubusb %%mm0, %%mm2 \n\t"
00196 "pcmpeqb %%mm4, %%mm2 \n\t"
00197
00198 "pand %%mm2, %%mm6 \n\t"
00199 "pandn %%mm1, %%mm2 \n\t"
00200 "por %%mm2, %%mm6 \n\t"
00201
00202 "movq (%0, %1, 8), %%mm5 \n\t"
00203 "lea (%0, %1, 4), %%"REG_a" \n\t"
00204 "lea (%0, %1, 8), %%"REG_c" \n\t"
00205 "sub %1, %%"REG_c" \n\t"
00206 "add %1, %0 \n\t"
00207 "movq (%0, %1, 8), %%mm7 \n\t"
00208 "movq %%mm5, %%mm1 \n\t"
00209 "movq %%mm7, %%mm2 \n\t"
00210 "psubusb %%mm7, %%mm5 \n\t"
00211 "psubusb %%mm1, %%mm2 \n\t"
00212 "por %%mm5, %%mm2 \n\t"
00213 "psubusb %%mm0, %%mm2 \n\t"
00214 "pcmpeqb %%mm4, %%mm2 \n\t"
00215
00216 "pand %%mm2, %%mm7 \n\t"
00217 "pandn %%mm1, %%mm2 \n\t"
00218 "por %%mm2, %%mm7 \n\t"
00219
00220
00221
00222
00223
00224
00225
00226
00227 "movq (%0, %1), %%mm0 \n\t"
00228 "movq %%mm0, %%mm1 \n\t"
00229 PAVGB(%%mm6, %%mm0)
00230 PAVGB(%%mm6, %%mm0)
00231
00232 "movq (%0, %1, 4), %%mm2 \n\t"
00233 "movq %%mm2, %%mm5 \n\t"
00234 PAVGB((%%REGa), %%mm2)
00235 PAVGB((%0, %1, 2), %%mm2)
00236 "movq %%mm2, %%mm3 \n\t"
00237 "movq (%0), %%mm4 \n\t"
00238 PAVGB(%%mm4, %%mm3)
00239 PAVGB(%%mm0, %%mm3)
00240 "movq %%mm3, (%0) \n\t"
00241
00242 "movq %%mm1, %%mm0 \n\t"
00243 PAVGB(%%mm6, %%mm0)
00244 "movq %%mm4, %%mm3 \n\t"
00245 PAVGB((%0,%1,2), %%mm3)
00246 PAVGB((%%REGa,%1,2), %%mm5)
00247 PAVGB((%%REGa), %%mm5)
00248 PAVGB(%%mm5, %%mm3)
00249 PAVGB(%%mm0, %%mm3)
00250 "movq %%mm3, (%0,%1) \n\t"
00251
00252 PAVGB(%%mm4, %%mm6)
00253 "movq (%%"REG_c"), %%mm0 \n\t"
00254 PAVGB((%%REGa, %1, 2), %%mm0)
00255 "movq %%mm0, %%mm3 \n\t"
00256 PAVGB(%%mm1, %%mm0)
00257 PAVGB(%%mm6, %%mm0)
00258 PAVGB(%%mm2, %%mm0)
00259 "movq (%0, %1, 2), %%mm2 \n\t"
00260 "movq %%mm0, (%0, %1, 2) \n\t"
00261
00262 "movq (%%"REG_a", %1, 4), %%mm0 \n\t"
00263 PAVGB((%%REGc), %%mm0)
00264 PAVGB(%%mm0, %%mm6)
00265 PAVGB(%%mm1, %%mm4)
00266 PAVGB(%%mm2, %%mm1)
00267 PAVGB(%%mm1, %%mm6)
00268 PAVGB(%%mm5, %%mm6)
00269 "movq (%%"REG_a"), %%mm5 \n\t"
00270 "movq %%mm6, (%%"REG_a") \n\t"
00271
00272 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00273 PAVGB(%%mm7, %%mm6)
00274 PAVGB(%%mm4, %%mm6)
00275 PAVGB(%%mm3, %%mm6)
00276 PAVGB(%%mm5, %%mm2)
00277 "movq (%0, %1, 4), %%mm4 \n\t"
00278 PAVGB(%%mm4, %%mm2)
00279 PAVGB(%%mm2, %%mm6)
00280 "movq %%mm6, (%0, %1, 4) \n\t"
00281
00282 PAVGB(%%mm7, %%mm1)
00283 PAVGB(%%mm4, %%mm5)
00284 PAVGB(%%mm5, %%mm0)
00285 "movq (%%"REG_a", %1, 2), %%mm6 \n\t"
00286 PAVGB(%%mm6, %%mm1)
00287 PAVGB(%%mm0, %%mm1)
00288 "movq %%mm1, (%%"REG_a", %1, 2) \n\t"
00289
00290 PAVGB((%%REGc), %%mm2)
00291 "movq (%%"REG_a", %1, 4), %%mm0 \n\t"
00292 PAVGB(%%mm0, %%mm6)
00293 PAVGB(%%mm7, %%mm6)
00294 PAVGB(%%mm2, %%mm6)
00295 "movq %%mm6, (%%"REG_c") \n\t"
00296
00297 PAVGB(%%mm7, %%mm5)
00298 PAVGB(%%mm7, %%mm5)
00299
00300 PAVGB(%%mm3, %%mm0)
00301 PAVGB(%%mm0, %%mm5)
00302 "movq %%mm5, (%%"REG_a", %1, 4) \n\t"
00303 "sub %1, %0 \n\t"
00304
00305 :
00306 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
00307 : "%"REG_a, "%"REG_c
00308 );
00309 #else //HAVE_MMX2 || HAVE_AMD3DNOW
00310 const int l1= stride;
00311 const int l2= stride + l1;
00312 const int l3= stride + l2;
00313 const int l4= stride + l3;
00314 const int l5= stride + l4;
00315 const int l6= stride + l5;
00316 const int l7= stride + l6;
00317 const int l8= stride + l7;
00318 const int l9= stride + l8;
00319 int x;
00320 src+= stride*3;
00321 for(x=0; x<BLOCK_SIZE; x++){
00322 const int first= FFABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
00323 const int last= FFABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
00324
00325 int sums[10];
00326 sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4;
00327 sums[1] = sums[0] - first + src[l4];
00328 sums[2] = sums[1] - first + src[l5];
00329 sums[3] = sums[2] - first + src[l6];
00330 sums[4] = sums[3] - first + src[l7];
00331 sums[5] = sums[4] - src[l1] + src[l8];
00332 sums[6] = sums[5] - src[l2] + last;
00333 sums[7] = sums[6] - src[l3] + last;
00334 sums[8] = sums[7] - src[l4] + last;
00335 sums[9] = sums[8] - src[l5] + last;
00336
00337 src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
00338 src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
00339 src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
00340 src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
00341 src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
00342 src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
00343 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
00344 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
00345
00346 src++;
00347 }
00348 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
00349 }
00350 #endif //HAVE_ALTIVEC
00351
00352 #if 0
00353
00365 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
00366 {
00367 #if HAVE_MMX2 || HAVE_AMD3DNOW
00368 src+= stride*3;
00369
00370 __asm__ volatile(
00371 "pxor %%mm7, %%mm7 \n\t"
00372 "movq "MANGLE(b80)", %%mm6 \n\t"
00373 "leal (%0, %1), %%"REG_a" \n\t"
00374 "leal (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00375
00376
00377 "movq "MANGLE(pQPb)", %%mm0 \n\t"
00378 "movq %%mm0, %%mm1 \n\t"
00379 "paddusb "MANGLE(b02)", %%mm0 \n\t"
00380 "psrlw $2, %%mm0 \n\t"
00381 "pand "MANGLE(b3F)", %%mm0 \n\t"
00382 "paddusb %%mm1, %%mm0 \n\t"
00383 "movq (%0, %1, 4), %%mm2 \n\t"
00384 "movq (%%"REG_c"), %%mm3 \n\t"
00385 "movq %%mm2, %%mm4 \n\t"
00386 "pcmpeqb %%mm5, %%mm5 \n\t"
00387 "pxor %%mm2, %%mm5 \n\t"
00388 PAVGB(%%mm3, %%mm5)
00389 "paddb %%mm6, %%mm5 \n\t"
00390 "psubusb %%mm3, %%mm4 \n\t"
00391 "psubusb %%mm2, %%mm3 \n\t"
00392 "por %%mm3, %%mm4 \n\t"
00393 "psubusb %%mm0, %%mm4 \n\t"
00394 "pcmpeqb %%mm7, %%mm4 \n\t"
00395 "pand %%mm4, %%mm5 \n\t"
00396
00397
00398 "paddb %%mm5, %%mm2 \n\t"
00399
00400 "movq %%mm2, (%0,%1, 4) \n\t"
00401
00402 "movq (%%"REG_c"), %%mm2 \n\t"
00403
00404 "psubb %%mm5, %%mm2 \n\t"
00405
00406 "movq %%mm2, (%%"REG_c") \n\t"
00407
00408 "paddb %%mm6, %%mm5 \n\t"
00409 "psrlw $2, %%mm5 \n\t"
00410 "pand "MANGLE(b3F)", %%mm5 \n\t"
00411 "psubb "MANGLE(b20)", %%mm5 \n\t"
00412
00413 "movq (%%"REG_a", %1, 2), %%mm2 \n\t"
00414 "paddb %%mm6, %%mm2 \n\t"
00415 "paddsb %%mm5, %%mm2 \n\t"
00416 "psubb %%mm6, %%mm2 \n\t"
00417 "movq %%mm2, (%%"REG_a", %1, 2) \n\t"
00418
00419 "movq (%%"REG_c", %1), %%mm2 \n\t"
00420 "paddb %%mm6, %%mm2 \n\t"
00421 "psubsb %%mm5, %%mm2 \n\t"
00422 "psubb %%mm6, %%mm2 \n\t"
00423 "movq %%mm2, (%%"REG_c", %1) \n\t"
00424
00425 :
00426 : "r" (src), "r" ((x86_reg)stride)
00427 : "%"REG_a, "%"REG_c
00428 );
00429 #else //HAVE_MMX2 || HAVE_AMD3DNOW
00430 const int l1= stride;
00431 const int l2= stride + l1;
00432 const int l3= stride + l2;
00433 const int l4= stride + l3;
00434 const int l5= stride + l4;
00435 const int l6= stride + l5;
00436
00437
00438
00439 int x;
00440 const int QP15= QP + (QP>>2);
00441 src+= stride*3;
00442 for(x=0; x<BLOCK_SIZE; x++){
00443 const int v = (src[x+l5] - src[x+l4]);
00444 if(FFABS(v) < QP15){
00445 src[x+l3] +=v>>3;
00446 src[x+l4] +=v>>1;
00447 src[x+l5] -=v>>1;
00448 src[x+l6] -=v>>3;
00449 }
00450 }
00451
00452 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
00453 }
00454 #endif //0
00455
00463 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
00464 {
00465 #if HAVE_MMX2 || HAVE_AMD3DNOW
00466 src+= stride*3;
00467
00468 __asm__ volatile(
00469 "pxor %%mm7, %%mm7 \n\t"
00470 "lea (%0, %1), %%"REG_a" \n\t"
00471 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00472
00473
00474 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00475 "movq (%0, %1, 4), %%mm1 \n\t"
00476 "movq %%mm1, %%mm2 \n\t"
00477 "psubusb %%mm0, %%mm1 \n\t"
00478 "psubusb %%mm2, %%mm0 \n\t"
00479 "por %%mm1, %%mm0 \n\t"
00480 "movq (%%"REG_c"), %%mm3 \n\t"
00481 "movq (%%"REG_c", %1), %%mm4 \n\t"
00482 "movq %%mm3, %%mm5 \n\t"
00483 "psubusb %%mm4, %%mm3 \n\t"
00484 "psubusb %%mm5, %%mm4 \n\t"
00485 "por %%mm4, %%mm3 \n\t"
00486 PAVGB(%%mm3, %%mm0)
00487 "movq %%mm2, %%mm1 \n\t"
00488 "psubusb %%mm5, %%mm2 \n\t"
00489 "movq %%mm2, %%mm4 \n\t"
00490 "pcmpeqb %%mm7, %%mm2 \n\t"
00491 "psubusb %%mm1, %%mm5 \n\t"
00492 "por %%mm5, %%mm4 \n\t"
00493 "psubusb %%mm0, %%mm4 \n\t"
00494 "movq %%mm4, %%mm3 \n\t"
00495 "movq %2, %%mm0 \n\t"
00496 "paddusb %%mm0, %%mm0 \n\t"
00497 "psubusb %%mm0, %%mm4 \n\t"
00498 "pcmpeqb %%mm7, %%mm4 \n\t"
00499 "psubusb "MANGLE(b01)", %%mm3 \n\t"
00500 "pand %%mm4, %%mm3 \n\t"
00501
00502 PAVGB(%%mm7, %%mm3)
00503 "movq %%mm3, %%mm1 \n\t"
00504 PAVGB(%%mm7, %%mm3)
00505 PAVGB(%%mm1, %%mm3)
00506
00507 "movq (%0, %1, 4), %%mm0 \n\t"
00508 "pxor %%mm2, %%mm0 \n\t"
00509 "psubusb %%mm3, %%mm0 \n\t"
00510 "pxor %%mm2, %%mm0 \n\t"
00511 "movq %%mm0, (%0, %1, 4) \n\t"
00512
00513 "movq (%%"REG_c"), %%mm0 \n\t"
00514 "pxor %%mm2, %%mm0 \n\t"
00515 "paddusb %%mm3, %%mm0 \n\t"
00516 "pxor %%mm2, %%mm0 \n\t"
00517 "movq %%mm0, (%%"REG_c") \n\t"
00518
00519 PAVGB(%%mm7, %%mm1)
00520
00521 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00522 "pxor %%mm2, %%mm0 \n\t"
00523 "psubusb %%mm1, %%mm0 \n\t"
00524 "pxor %%mm2, %%mm0 \n\t"
00525 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00526
00527 "movq (%%"REG_c", %1), %%mm0 \n\t"
00528 "pxor %%mm2, %%mm0 \n\t"
00529 "paddusb %%mm1, %%mm0 \n\t"
00530 "pxor %%mm2, %%mm0 \n\t"
00531 "movq %%mm0, (%%"REG_c", %1) \n\t"
00532
00533 PAVGB(%%mm7, %%mm1)
00534
00535 "movq (%%"REG_a", %1), %%mm0 \n\t"
00536 "pxor %%mm2, %%mm0 \n\t"
00537 "psubusb %%mm1, %%mm0 \n\t"
00538 "pxor %%mm2, %%mm0 \n\t"
00539 "movq %%mm0, (%%"REG_a", %1) \n\t"
00540
00541 "movq (%%"REG_c", %1, 2), %%mm0 \n\t"
00542 "pxor %%mm2, %%mm0 \n\t"
00543 "paddusb %%mm1, %%mm0 \n\t"
00544 "pxor %%mm2, %%mm0 \n\t"
00545 "movq %%mm0, (%%"REG_c", %1, 2) \n\t"
00546
00547 :
00548 : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
00549 : "%"REG_a, "%"REG_c
00550 );
00551 #else //HAVE_MMX2 || HAVE_AMD3DNOW
00552
00553 const int l1= stride;
00554 const int l2= stride + l1;
00555 const int l3= stride + l2;
00556 const int l4= stride + l3;
00557 const int l5= stride + l4;
00558 const int l6= stride + l5;
00559 const int l7= stride + l6;
00560
00561
00562 int x;
00563
00564 src+= stride*3;
00565 for(x=0; x<BLOCK_SIZE; x++){
00566 int a= src[l3] - src[l4];
00567 int b= src[l4] - src[l5];
00568 int c= src[l5] - src[l6];
00569
00570 int d= FFABS(b) - ((FFABS(a) + FFABS(c))>>1);
00571 d= FFMAX(d, 0);
00572
00573 if(d < co->QP*2){
00574 int v = d * FFSIGN(-b);
00575
00576 src[l2] +=v>>3;
00577 src[l3] +=v>>2;
00578 src[l4] +=(3*v)>>3;
00579 src[l5] -=(3*v)>>3;
00580 src[l6] -=v>>2;
00581 src[l7] -=v>>3;
00582 }
00583 src++;
00584 }
00585 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
00586 }
00587
00588 #if !HAVE_ALTIVEC
00589 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
00590 {
00591 #if HAVE_MMX2 || HAVE_AMD3DNOW
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606 src+= stride*4;
00607 __asm__ volatile(
00608
00609 #if 0 //slightly more accurate and slightly slower
00610 "pxor %%mm7, %%mm7 \n\t"
00611 "lea (%0, %1), %%"REG_a" \n\t"
00612 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00613
00614
00615
00616
00617
00618 "movq (%0, %1, 2), %%mm0 \n\t"
00619 "movq (%0), %%mm1 \n\t"
00620 "movq %%mm0, %%mm2 \n\t"
00621 PAVGB(%%mm7, %%mm0)
00622 PAVGB(%%mm1, %%mm0)
00623 PAVGB(%%mm2, %%mm0)
00624
00625 "movq (%%"REG_a"), %%mm1 \n\t"
00626 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
00627 "movq %%mm1, %%mm4 \n\t"
00628 PAVGB(%%mm7, %%mm1)
00629 PAVGB(%%mm3, %%mm1)
00630 PAVGB(%%mm4, %%mm1)
00631
00632 "movq %%mm0, %%mm4 \n\t"
00633 "psubusb %%mm1, %%mm0 \n\t"
00634 "psubusb %%mm4, %%mm1 \n\t"
00635 "por %%mm0, %%mm1 \n\t"
00636
00637
00638 "movq (%0, %1, 4), %%mm0 \n\t"
00639 "movq %%mm0, %%mm4 \n\t"
00640 PAVGB(%%mm7, %%mm0)
00641 PAVGB(%%mm2, %%mm0)
00642 PAVGB(%%mm4, %%mm0)
00643
00644 "movq (%%"REG_c"), %%mm2 \n\t"
00645 "movq %%mm3, %%mm5 \n\t"
00646 PAVGB(%%mm7, %%mm3)
00647 PAVGB(%%mm2, %%mm3)
00648 PAVGB(%%mm5, %%mm3)
00649
00650 "movq %%mm0, %%mm6 \n\t"
00651 "psubusb %%mm3, %%mm0 \n\t"
00652 "psubusb %%mm6, %%mm3 \n\t"
00653 "por %%mm0, %%mm3 \n\t"
00654 "pcmpeqb %%mm7, %%mm0 \n\t"
00655
00656
00657 "movq (%%"REG_c", %1), %%mm6 \n\t"
00658 "movq %%mm6, %%mm5 \n\t"
00659 PAVGB(%%mm7, %%mm6)
00660 PAVGB(%%mm4, %%mm6)
00661 PAVGB(%%mm5, %%mm6)
00662
00663 "movq (%%"REG_c", %1, 2), %%mm5 \n\t"
00664 "movq %%mm2, %%mm4 \n\t"
00665 PAVGB(%%mm7, %%mm2)
00666 PAVGB(%%mm5, %%mm2)
00667 PAVGB(%%mm4, %%mm2)
00668
00669 "movq %%mm6, %%mm4 \n\t"
00670 "psubusb %%mm2, %%mm6 \n\t"
00671 "psubusb %%mm4, %%mm2 \n\t"
00672 "por %%mm6, %%mm2 \n\t"
00673
00674
00675
00676 PMINUB(%%mm2, %%mm1, %%mm4)
00677 "movq %2, %%mm4 \n\t"
00678 "paddusb "MANGLE(b01)", %%mm4 \n\t"
00679 "pcmpgtb %%mm3, %%mm4 \n\t"
00680 "psubusb %%mm1, %%mm3 \n\t"
00681 "pand %%mm4, %%mm3 \n\t"
00682
00683 "movq %%mm3, %%mm1 \n\t"
00684
00685 PAVGB(%%mm7, %%mm3)
00686 PAVGB(%%mm7, %%mm3)
00687 "paddusb %%mm1, %%mm3 \n\t"
00688
00689
00690 "movq (%%"REG_a", %1, 2), %%mm6 \n\t"
00691 "movq (%0, %1, 4), %%mm5 \n\t"
00692 "movq (%0, %1, 4), %%mm4 \n\t"
00693 "psubusb %%mm6, %%mm5 \n\t"
00694 "psubusb %%mm4, %%mm6 \n\t"
00695 "por %%mm6, %%mm5 \n\t"
00696 "pcmpeqb %%mm7, %%mm6 \n\t"
00697 "pxor %%mm6, %%mm0 \n\t"
00698 "pand %%mm0, %%mm3 \n\t"
00699 PMINUB(%%mm5, %%mm3, %%mm0)
00700
00701 "psubusb "MANGLE(b01)", %%mm3 \n\t"
00702 PAVGB(%%mm7, %%mm3)
00703
00704 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00705 "movq (%0, %1, 4), %%mm2 \n\t"
00706 "pxor %%mm6, %%mm0 \n\t"
00707 "pxor %%mm6, %%mm2 \n\t"
00708 "psubb %%mm3, %%mm0 \n\t"
00709 "paddb %%mm3, %%mm2 \n\t"
00710 "pxor %%mm6, %%mm0 \n\t"
00711 "pxor %%mm6, %%mm2 \n\t"
00712 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00713 "movq %%mm2, (%0, %1, 4) \n\t"
00714 #endif //0
00715
00716 "lea (%0, %1), %%"REG_a" \n\t"
00717 "pcmpeqb %%mm6, %%mm6 \n\t"
00718
00719
00720
00721
00722
00723 "movq (%%"REG_a", %1, 2), %%mm1 \n\t"
00724 "movq (%0, %1, 4), %%mm0 \n\t"
00725 "pxor %%mm6, %%mm1 \n\t"
00726 PAVGB(%%mm1, %%mm0)
00727
00728
00729 "movq (%%"REG_a", %1, 4), %%mm2 \n\t"
00730 "movq (%%"REG_a", %1), %%mm3 \n\t"
00731 "pxor %%mm6, %%mm2 \n\t"
00732 "movq %%mm2, %%mm5 \n\t"
00733 "movq "MANGLE(b80)", %%mm4 \n\t"
00734 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00735 PAVGB(%%mm3, %%mm2)
00736 PAVGB(%%mm0, %%mm4)
00737 PAVGB(%%mm2, %%mm4)
00738 PAVGB(%%mm0, %%mm4)
00739
00740
00741 "movq (%%"REG_a"), %%mm2 \n\t"
00742 "pxor %%mm6, %%mm2 \n\t"
00743 PAVGB(%%mm3, %%mm2)
00744 PAVGB((%0), %%mm1)
00745 "movq "MANGLE(b80)", %%mm3 \n\t"
00746 PAVGB(%%mm2, %%mm3)
00747 PAVGB(%%mm1, %%mm3)
00748 PAVGB(%%mm2, %%mm3)
00749
00750
00751 PAVGB((%%REGc, %1), %%mm5)
00752 "movq (%%"REG_c", %1, 2), %%mm1 \n\t"
00753 "pxor %%mm6, %%mm1 \n\t"
00754 PAVGB((%0, %1, 4), %%mm1)
00755 "movq "MANGLE(b80)", %%mm2 \n\t"
00756 PAVGB(%%mm5, %%mm2)
00757 PAVGB(%%mm1, %%mm2)
00758 PAVGB(%%mm5, %%mm2)
00759
00760
00761 "movq "MANGLE(b00)", %%mm1 \n\t"
00762 "movq "MANGLE(b00)", %%mm5 \n\t"
00763 "psubb %%mm2, %%mm1 \n\t"
00764 "psubb %%mm3, %%mm5 \n\t"
00765 PMAXUB(%%mm1, %%mm2)
00766 PMAXUB(%%mm5, %%mm3)
00767 PMINUB(%%mm2, %%mm3, %%mm1)
00768
00769
00770
00771 "movq "MANGLE(b00)", %%mm7 \n\t"
00772 "movq %2, %%mm2 \n\t"
00773 PAVGB(%%mm6, %%mm2)
00774 "psubb %%mm6, %%mm2 \n\t"
00775
00776 "movq %%mm4, %%mm1 \n\t"
00777 "pcmpgtb %%mm7, %%mm1 \n\t"
00778 "pxor %%mm1, %%mm4 \n\t"
00779 "psubb %%mm1, %%mm4 \n\t"
00780 "pcmpgtb %%mm4, %%mm2 \n\t"
00781 "psubusb %%mm3, %%mm4 \n\t"
00782
00783
00784 "movq %%mm4, %%mm3 \n\t"
00785 "psubusb "MANGLE(b01)", %%mm4 \n\t"
00786 PAVGB(%%mm7, %%mm4)
00787 PAVGB(%%mm7, %%mm4)
00788 "paddb %%mm3, %%mm4 \n\t"
00789 "pand %%mm2, %%mm4 \n\t"
00790
00791 "movq "MANGLE(b80)", %%mm5 \n\t"
00792 "psubb %%mm0, %%mm5 \n\t"
00793 "paddsb %%mm6, %%mm5 \n\t"
00794 "pcmpgtb %%mm5, %%mm7 \n\t"
00795 "pxor %%mm7, %%mm5 \n\t"
00796
00797 PMINUB(%%mm5, %%mm4, %%mm3)
00798 "pxor %%mm1, %%mm7 \n\t"
00799
00800 "pand %%mm7, %%mm4 \n\t"
00801 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00802 "movq (%0, %1, 4), %%mm2 \n\t"
00803 "pxor %%mm1, %%mm0 \n\t"
00804 "pxor %%mm1, %%mm2 \n\t"
00805 "paddb %%mm4, %%mm0 \n\t"
00806 "psubb %%mm4, %%mm2 \n\t"
00807 "pxor %%mm1, %%mm0 \n\t"
00808 "pxor %%mm1, %%mm2 \n\t"
00809 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00810 "movq %%mm2, (%0, %1, 4) \n\t"
00811
00812 :
00813 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
00814 : "%"REG_a, "%"REG_c
00815 );
00816
00817
00818
00819
00820
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872 #elif HAVE_MMX
00873 src+= stride*4;
00874 __asm__ volatile(
00875 "pxor %%mm7, %%mm7 \n\t"
00876 "lea -40(%%"REG_SP"), %%"REG_c" \n\t"
00877 "and "ALIGN_MASK", %%"REG_c" \n\t"
00878
00879
00880
00881
00882 "movq (%0), %%mm0 \n\t"
00883 "movq %%mm0, %%mm1 \n\t"
00884 "punpcklbw %%mm7, %%mm0 \n\t"
00885 "punpckhbw %%mm7, %%mm1 \n\t"
00886
00887 "movq (%0, %1), %%mm2 \n\t"
00888 "lea (%0, %1, 2), %%"REG_a" \n\t"
00889 "movq %%mm2, %%mm3 \n\t"
00890 "punpcklbw %%mm7, %%mm2 \n\t"
00891 "punpckhbw %%mm7, %%mm3 \n\t"
00892
00893 "movq (%%"REG_a"), %%mm4 \n\t"
00894 "movq %%mm4, %%mm5 \n\t"
00895 "punpcklbw %%mm7, %%mm4 \n\t"
00896 "punpckhbw %%mm7, %%mm5 \n\t"
00897
00898 "paddw %%mm0, %%mm0 \n\t"
00899 "paddw %%mm1, %%mm1 \n\t"
00900 "psubw %%mm4, %%mm2 \n\t"
00901 "psubw %%mm5, %%mm3 \n\t"
00902 "psubw %%mm2, %%mm0 \n\t"
00903 "psubw %%mm3, %%mm1 \n\t"
00904
00905 "psllw $2, %%mm2 \n\t"
00906 "psllw $2, %%mm3 \n\t"
00907 "psubw %%mm2, %%mm0 \n\t"
00908 "psubw %%mm3, %%mm1 \n\t"
00909
00910 "movq (%%"REG_a", %1), %%mm2 \n\t"
00911 "movq %%mm2, %%mm3 \n\t"
00912 "punpcklbw %%mm7, %%mm2 \n\t"
00913 "punpckhbw %%mm7, %%mm3 \n\t"
00914
00915 "psubw %%mm2, %%mm0 \n\t"
00916 "psubw %%mm3, %%mm1 \n\t"
00917 "psubw %%mm2, %%mm0 \n\t"
00918 "psubw %%mm3, %%mm1 \n\t"
00919 "movq %%mm0, (%%"REG_c") \n\t"
00920 "movq %%mm1, 8(%%"REG_c") \n\t"
00921
00922 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00923 "movq %%mm0, %%mm1 \n\t"
00924 "punpcklbw %%mm7, %%mm0 \n\t"
00925 "punpckhbw %%mm7, %%mm1 \n\t"
00926
00927 "psubw %%mm0, %%mm2 \n\t"
00928 "psubw %%mm1, %%mm3 \n\t"
00929 "movq %%mm2, 16(%%"REG_c") \n\t"
00930 "movq %%mm3, 24(%%"REG_c") \n\t"
00931 "paddw %%mm4, %%mm4 \n\t"
00932 "paddw %%mm5, %%mm5 \n\t"
00933 "psubw %%mm2, %%mm4 \n\t"
00934 "psubw %%mm3, %%mm5 \n\t"
00935
00936 "lea (%%"REG_a", %1), %0 \n\t"
00937 "psllw $2, %%mm2 \n\t"
00938 "psllw $2, %%mm3 \n\t"
00939 "psubw %%mm2, %%mm4 \n\t"
00940 "psubw %%mm3, %%mm5 \n\t"
00941
00942 "movq (%0, %1, 2), %%mm2 \n\t"
00943 "movq %%mm2, %%mm3 \n\t"
00944 "punpcklbw %%mm7, %%mm2 \n\t"
00945 "punpckhbw %%mm7, %%mm3 \n\t"
00946 "psubw %%mm2, %%mm4 \n\t"
00947 "psubw %%mm3, %%mm5 \n\t"
00948 "psubw %%mm2, %%mm4 \n\t"
00949 "psubw %%mm3, %%mm5 \n\t"
00950
00951 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00952 "punpcklbw %%mm7, %%mm6 \n\t"
00953 "psubw %%mm6, %%mm2 \n\t"
00954 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00955 "punpckhbw %%mm7, %%mm6 \n\t"
00956 "psubw %%mm6, %%mm3 \n\t"
00957
00958 "paddw %%mm0, %%mm0 \n\t"
00959 "paddw %%mm1, %%mm1 \n\t"
00960 "psubw %%mm2, %%mm0 \n\t"
00961 "psubw %%mm3, %%mm1 \n\t"
00962
00963 "psllw $2, %%mm2 \n\t"
00964 "psllw $2, %%mm3 \n\t"
00965 "psubw %%mm2, %%mm0 \n\t"
00966 "psubw %%mm3, %%mm1 \n\t"
00967
00968 "movq (%0, %1, 4), %%mm2 \n\t"
00969 "movq %%mm2, %%mm3 \n\t"
00970 "punpcklbw %%mm7, %%mm2 \n\t"
00971 "punpckhbw %%mm7, %%mm3 \n\t"
00972
00973 "paddw %%mm2, %%mm2 \n\t"
00974 "paddw %%mm3, %%mm3 \n\t"
00975 "psubw %%mm2, %%mm0 \n\t"
00976 "psubw %%mm3, %%mm1 \n\t"
00977
00978 "movq (%%"REG_c"), %%mm2 \n\t"
00979 "movq 8(%%"REG_c"), %%mm3 \n\t"
00980
00981 #if HAVE_MMX2
00982 "movq %%mm7, %%mm6 \n\t"
00983 "psubw %%mm0, %%mm6 \n\t"
00984 "pmaxsw %%mm6, %%mm0 \n\t"
00985 "movq %%mm7, %%mm6 \n\t"
00986 "psubw %%mm1, %%mm6 \n\t"
00987 "pmaxsw %%mm6, %%mm1 \n\t"
00988 "movq %%mm7, %%mm6 \n\t"
00989 "psubw %%mm2, %%mm6 \n\t"
00990 "pmaxsw %%mm6, %%mm2 \n\t"
00991 "movq %%mm7, %%mm6 \n\t"
00992 "psubw %%mm3, %%mm6 \n\t"
00993 "pmaxsw %%mm6, %%mm3 \n\t"
00994 #else
00995 "movq %%mm7, %%mm6 \n\t"
00996 "pcmpgtw %%mm0, %%mm6 \n\t"
00997 "pxor %%mm6, %%mm0 \n\t"
00998 "psubw %%mm6, %%mm0 \n\t"
00999 "movq %%mm7, %%mm6 \n\t"
01000 "pcmpgtw %%mm1, %%mm6 \n\t"
01001 "pxor %%mm6, %%mm1 \n\t"
01002 "psubw %%mm6, %%mm1 \n\t"
01003 "movq %%mm7, %%mm6 \n\t"
01004 "pcmpgtw %%mm2, %%mm6 \n\t"
01005 "pxor %%mm6, %%mm2 \n\t"
01006 "psubw %%mm6, %%mm2 \n\t"
01007 "movq %%mm7, %%mm6 \n\t"
01008 "pcmpgtw %%mm3, %%mm6 \n\t"
01009 "pxor %%mm6, %%mm3 \n\t"
01010 "psubw %%mm6, %%mm3 \n\t"
01011 #endif
01012
01013 #if HAVE_MMX2
01014 "pminsw %%mm2, %%mm0 \n\t"
01015 "pminsw %%mm3, %%mm1 \n\t"
01016 #else
01017 "movq %%mm0, %%mm6 \n\t"
01018 "psubusw %%mm2, %%mm6 \n\t"
01019 "psubw %%mm6, %%mm0 \n\t"
01020 "movq %%mm1, %%mm6 \n\t"
01021 "psubusw %%mm3, %%mm6 \n\t"
01022 "psubw %%mm6, %%mm1 \n\t"
01023 #endif
01024
01025 "movd %2, %%mm2 \n\t"
01026 "punpcklbw %%mm7, %%mm2 \n\t"
01027
01028 "movq %%mm7, %%mm6 \n\t"
01029 "pcmpgtw %%mm4, %%mm6 \n\t"
01030 "pxor %%mm6, %%mm4 \n\t"
01031 "psubw %%mm6, %%mm4 \n\t"
01032 "pcmpgtw %%mm5, %%mm7 \n\t"
01033 "pxor %%mm7, %%mm5 \n\t"
01034 "psubw %%mm7, %%mm5 \n\t"
01035
01036 "psllw $3, %%mm2 \n\t"
01037 "movq %%mm2, %%mm3 \n\t"
01038 "pcmpgtw %%mm4, %%mm2 \n\t"
01039 "pcmpgtw %%mm5, %%mm3 \n\t"
01040 "pand %%mm2, %%mm4 \n\t"
01041 "pand %%mm3, %%mm5 \n\t"
01042
01043
01044 "psubusw %%mm0, %%mm4 \n\t"
01045 "psubusw %%mm1, %%mm5 \n\t"
01046
01047
01048 "movq "MANGLE(w05)", %%mm2 \n\t"
01049 "pmullw %%mm2, %%mm4 \n\t"
01050 "pmullw %%mm2, %%mm5 \n\t"
01051 "movq "MANGLE(w20)", %%mm2 \n\t"
01052 "paddw %%mm2, %%mm4 \n\t"
01053 "paddw %%mm2, %%mm5 \n\t"
01054 "psrlw $6, %%mm4 \n\t"
01055 "psrlw $6, %%mm5 \n\t"
01056
01057 "movq 16(%%"REG_c"), %%mm0 \n\t"
01058 "movq 24(%%"REG_c"), %%mm1 \n\t"
01059
01060 "pxor %%mm2, %%mm2 \n\t"
01061 "pxor %%mm3, %%mm3 \n\t"
01062
01063 "pcmpgtw %%mm0, %%mm2 \n\t"
01064 "pcmpgtw %%mm1, %%mm3 \n\t"
01065 "pxor %%mm2, %%mm0 \n\t"
01066 "pxor %%mm3, %%mm1 \n\t"
01067 "psubw %%mm2, %%mm0 \n\t"
01068 "psubw %%mm3, %%mm1 \n\t"
01069 "psrlw $1, %%mm0 \n\t"
01070 "psrlw $1, %%mm1 \n\t"
01071
01072 "pxor %%mm6, %%mm2 \n\t"
01073 "pxor %%mm7, %%mm3 \n\t"
01074 "pand %%mm2, %%mm4 \n\t"
01075 "pand %%mm3, %%mm5 \n\t"
01076
01077 #if HAVE_MMX2
01078 "pminsw %%mm0, %%mm4 \n\t"
01079 "pminsw %%mm1, %%mm5 \n\t"
01080 #else
01081 "movq %%mm4, %%mm2 \n\t"
01082 "psubusw %%mm0, %%mm2 \n\t"
01083 "psubw %%mm2, %%mm4 \n\t"
01084 "movq %%mm5, %%mm2 \n\t"
01085 "psubusw %%mm1, %%mm2 \n\t"
01086 "psubw %%mm2, %%mm5 \n\t"
01087 #endif
01088 "pxor %%mm6, %%mm4 \n\t"
01089 "pxor %%mm7, %%mm5 \n\t"
01090 "psubw %%mm6, %%mm4 \n\t"
01091 "psubw %%mm7, %%mm5 \n\t"
01092 "packsswb %%mm5, %%mm4 \n\t"
01093 "movq (%0), %%mm0 \n\t"
01094 "paddb %%mm4, %%mm0 \n\t"
01095 "movq %%mm0, (%0) \n\t"
01096 "movq (%0, %1), %%mm0 \n\t"
01097 "psubb %%mm4, %%mm0 \n\t"
01098 "movq %%mm0, (%0, %1) \n\t"
01099
01100 : "+r" (src)
01101 : "r" ((x86_reg)stride), "m" (c->pQPb)
01102 : "%"REG_a, "%"REG_c
01103 );
01104 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01105 const int l1= stride;
01106 const int l2= stride + l1;
01107 const int l3= stride + l2;
01108 const int l4= stride + l3;
01109 const int l5= stride + l4;
01110 const int l6= stride + l5;
01111 const int l7= stride + l6;
01112 const int l8= stride + l7;
01113
01114 int x;
01115 src+= stride*3;
01116 for(x=0; x<BLOCK_SIZE; x++){
01117 const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
01118 if(FFABS(middleEnergy) < 8*c->QP){
01119 const int q=(src[l4] - src[l5])/2;
01120 const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
01121 const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
01122
01123 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
01124 d= FFMAX(d, 0);
01125
01126 d= (5*d + 32) >> 6;
01127 d*= FFSIGN(-middleEnergy);
01128
01129 if(q>0){
01130 d= d<0 ? 0 : d;
01131 d= d>q ? q : d;
01132 }else{
01133 d= d>0 ? 0 : d;
01134 d= d<q ? q : d;
01135 }
01136
01137 src[l4]-= d;
01138 src[l5]+= d;
01139 }
01140 src++;
01141 }
01142 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01143 }
01144 #endif //HAVE_ALTIVEC
01145
01146 #if !HAVE_ALTIVEC
01147 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
01148 {
01149 #if HAVE_MMX2 || HAVE_AMD3DNOW
01150 __asm__ volatile(
01151 "pxor %%mm6, %%mm6 \n\t"
01152 "pcmpeqb %%mm7, %%mm7 \n\t"
01153 "movq %2, %%mm0 \n\t"
01154 "punpcklbw %%mm6, %%mm0 \n\t"
01155 "psrlw $1, %%mm0 \n\t"
01156 "psubw %%mm7, %%mm0 \n\t"
01157 "packuswb %%mm0, %%mm0 \n\t"
01158 "movq %%mm0, %3 \n\t"
01159
01160 "lea (%0, %1), %%"REG_a" \n\t"
01161 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01162
01163
01164
01165
01166 #undef FIND_MIN_MAX
01167 #if HAVE_MMX2
01168 #define REAL_FIND_MIN_MAX(addr)\
01169 "movq " #addr ", %%mm0 \n\t"\
01170 "pminub %%mm0, %%mm7 \n\t"\
01171 "pmaxub %%mm0, %%mm6 \n\t"
01172 #else
01173 #define REAL_FIND_MIN_MAX(addr)\
01174 "movq " #addr ", %%mm0 \n\t"\
01175 "movq %%mm7, %%mm1 \n\t"\
01176 "psubusb %%mm0, %%mm6 \n\t"\
01177 "paddb %%mm0, %%mm6 \n\t"\
01178 "psubusb %%mm0, %%mm1 \n\t"\
01179 "psubb %%mm1, %%mm7 \n\t"
01180 #endif
01181 #define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr)
01182
01183 FIND_MIN_MAX((%%REGa))
01184 FIND_MIN_MAX((%%REGa, %1))
01185 FIND_MIN_MAX((%%REGa, %1, 2))
01186 FIND_MIN_MAX((%0, %1, 4))
01187 FIND_MIN_MAX((%%REGd))
01188 FIND_MIN_MAX((%%REGd, %1))
01189 FIND_MIN_MAX((%%REGd, %1, 2))
01190 FIND_MIN_MAX((%0, %1, 8))
01191
01192 "movq %%mm7, %%mm4 \n\t"
01193 "psrlq $8, %%mm7 \n\t"
01194 #if HAVE_MMX2
01195 "pminub %%mm4, %%mm7 \n\t"
01196 "pshufw $0xF9, %%mm7, %%mm4 \n\t"
01197 "pminub %%mm4, %%mm7 \n\t"
01198 "pshufw $0xFE, %%mm7, %%mm4 \n\t"
01199 "pminub %%mm4, %%mm7 \n\t"
01200 #else
01201 "movq %%mm7, %%mm1 \n\t"
01202 "psubusb %%mm4, %%mm1 \n\t"
01203 "psubb %%mm1, %%mm7 \n\t"
01204 "movq %%mm7, %%mm4 \n\t"
01205 "psrlq $16, %%mm7 \n\t"
01206 "movq %%mm7, %%mm1 \n\t"
01207 "psubusb %%mm4, %%mm1 \n\t"
01208 "psubb %%mm1, %%mm7 \n\t"
01209 "movq %%mm7, %%mm4 \n\t"
01210 "psrlq $32, %%mm7 \n\t"
01211 "movq %%mm7, %%mm1 \n\t"
01212 "psubusb %%mm4, %%mm1 \n\t"
01213 "psubb %%mm1, %%mm7 \n\t"
01214 #endif
01215
01216
01217 "movq %%mm6, %%mm4 \n\t"
01218 "psrlq $8, %%mm6 \n\t"
01219 #if HAVE_MMX2
01220 "pmaxub %%mm4, %%mm6 \n\t"
01221 "pshufw $0xF9, %%mm6, %%mm4 \n\t"
01222 "pmaxub %%mm4, %%mm6 \n\t"
01223 "pshufw $0xFE, %%mm6, %%mm4 \n\t"
01224 "pmaxub %%mm4, %%mm6 \n\t"
01225 #else
01226 "psubusb %%mm4, %%mm6 \n\t"
01227 "paddb %%mm4, %%mm6 \n\t"
01228 "movq %%mm6, %%mm4 \n\t"
01229 "psrlq $16, %%mm6 \n\t"
01230 "psubusb %%mm4, %%mm6 \n\t"
01231 "paddb %%mm4, %%mm6 \n\t"
01232 "movq %%mm6, %%mm4 \n\t"
01233 "psrlq $32, %%mm6 \n\t"
01234 "psubusb %%mm4, %%mm6 \n\t"
01235 "paddb %%mm4, %%mm6 \n\t"
01236 #endif
01237 "movq %%mm6, %%mm0 \n\t"
01238 "psubb %%mm7, %%mm6 \n\t"
01239 "movd %%mm6, %%ecx \n\t"
01240 "cmpb "MANGLE(deringThreshold)", %%cl \n\t"
01241 " jb 1f \n\t"
01242 "lea -24(%%"REG_SP"), %%"REG_c" \n\t"
01243 "and "ALIGN_MASK", %%"REG_c" \n\t"
01244 PAVGB(%%mm0, %%mm7)
01245 "punpcklbw %%mm7, %%mm7 \n\t"
01246 "punpcklbw %%mm7, %%mm7 \n\t"
01247 "punpcklbw %%mm7, %%mm7 \n\t"
01248 "movq %%mm7, (%%"REG_c") \n\t"
01249
01250 "movq (%0), %%mm0 \n\t"
01251 "movq %%mm0, %%mm1 \n\t"
01252 "movq %%mm0, %%mm2 \n\t"
01253 "psllq $8, %%mm1 \n\t"
01254 "psrlq $8, %%mm2 \n\t"
01255 "movd -4(%0), %%mm3 \n\t"
01256 "movd 8(%0), %%mm4 \n\t"
01257 "psrlq $24, %%mm3 \n\t"
01258 "psllq $56, %%mm4 \n\t"
01259 "por %%mm3, %%mm1 \n\t"
01260 "por %%mm4, %%mm2 \n\t"
01261 "movq %%mm1, %%mm3 \n\t"
01262 PAVGB(%%mm2, %%mm1)
01263 PAVGB(%%mm0, %%mm1)
01264 "psubusb %%mm7, %%mm0 \n\t"
01265 "psubusb %%mm7, %%mm2 \n\t"
01266 "psubusb %%mm7, %%mm3 \n\t"
01267 "pcmpeqb "MANGLE(b00)", %%mm0 \n\t"
01268 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t"
01269 "pcmpeqb "MANGLE(b00)", %%mm3 \n\t"
01270 "paddb %%mm2, %%mm0 \n\t"
01271 "paddb %%mm3, %%mm0 \n\t"
01272
01273 "movq (%%"REG_a"), %%mm2 \n\t"
01274 "movq %%mm2, %%mm3 \n\t"
01275 "movq %%mm2, %%mm4 \n\t"
01276 "psllq $8, %%mm3 \n\t"
01277 "psrlq $8, %%mm4 \n\t"
01278 "movd -4(%%"REG_a"), %%mm5 \n\t"
01279 "movd 8(%%"REG_a"), %%mm6 \n\t"
01280 "psrlq $24, %%mm5 \n\t"
01281 "psllq $56, %%mm6 \n\t"
01282 "por %%mm5, %%mm3 \n\t"
01283 "por %%mm6, %%mm4 \n\t"
01284 "movq %%mm3, %%mm5 \n\t"
01285 PAVGB(%%mm4, %%mm3)
01286 PAVGB(%%mm2, %%mm3)
01287 "psubusb %%mm7, %%mm2 \n\t"
01288 "psubusb %%mm7, %%mm4 \n\t"
01289 "psubusb %%mm7, %%mm5 \n\t"
01290 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t"
01291 "pcmpeqb "MANGLE(b00)", %%mm4 \n\t"
01292 "pcmpeqb "MANGLE(b00)", %%mm5 \n\t"
01293 "paddb %%mm4, %%mm2 \n\t"
01294 "paddb %%mm5, %%mm2 \n\t"
01295
01296 #define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
01297 "movq " #src ", " #sx " \n\t" \
01298 "movq " #sx ", " #lx " \n\t" \
01299 "movq " #sx ", " #t0 " \n\t" \
01300 "psllq $8, " #lx " \n\t"\
01301 "psrlq $8, " #t0 " \n\t"\
01302 "movd -4" #src ", " #t1 " \n\t"\
01303 "psrlq $24, " #t1 " \n\t"\
01304 "por " #t1 ", " #lx " \n\t" \
01305 "movd 8" #src ", " #t1 " \n\t"\
01306 "psllq $56, " #t1 " \n\t"\
01307 "por " #t1 ", " #t0 " \n\t" \
01308 "movq " #lx ", " #t1 " \n\t" \
01309 PAVGB(t0, lx) \
01310 PAVGB(sx, lx) \
01311 PAVGB(lx, pplx) \
01312 "movq " #lx ", 8(%%"REG_c") \n\t"\
01313 "movq (%%"REG_c"), " #lx " \n\t"\
01314 "psubusb " #lx ", " #t1 " \n\t"\
01315 "psubusb " #lx ", " #t0 " \n\t"\
01316 "psubusb " #lx ", " #sx " \n\t"\
01317 "movq "MANGLE(b00)", " #lx " \n\t"\
01318 "pcmpeqb " #lx ", " #t1 " \n\t" \
01319 "pcmpeqb " #lx ", " #t0 " \n\t" \
01320 "pcmpeqb " #lx ", " #sx " \n\t" \
01321 "paddb " #t1 ", " #t0 " \n\t"\
01322 "paddb " #t0 ", " #sx " \n\t"\
01323 \
01324 PAVGB(plx, pplx) \
01325 "movq " #dst ", " #t0 " \n\t" \
01326 "movq " #t0 ", " #t1 " \n\t" \
01327 "psubusb %3, " #t0 " \n\t"\
01328 "paddusb %3, " #t1 " \n\t"\
01329 PMAXUB(t0, pplx)\
01330 PMINUB(t1, pplx, t0)\
01331 "paddb " #sx ", " #ppsx " \n\t"\
01332 "paddb " #psx ", " #ppsx " \n\t"\
01333 "#paddb "MANGLE(b02)", " #ppsx " \n\t"\
01334 "pand "MANGLE(b08)", " #ppsx " \n\t"\
01335 "pcmpeqb " #lx ", " #ppsx " \n\t"\
01336 "pand " #ppsx ", " #pplx " \n\t"\
01337 "pandn " #dst ", " #ppsx " \n\t"\
01338 "por " #pplx ", " #ppsx " \n\t"\
01339 "movq " #ppsx ", " #dst " \n\t"\
01340 "movq 8(%%"REG_c"), " #lx " \n\t"
01341
01342 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
01343 REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)
01344
01345
01346
01347
01348
01349
01350
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360 DERING_CORE((%%REGa) ,(%%REGa, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01361 DERING_CORE((%%REGa, %1) ,(%%REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01362 DERING_CORE((%%REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
01363 DERING_CORE((%0, %1, 4) ,(%%REGd) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01364 DERING_CORE((%%REGd) ,(%%REGd, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01365 DERING_CORE((%%REGd, %1) ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
01366 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01367 DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01368
01369 "1: \n\t"
01370 : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2)
01371 : "%"REG_a, "%"REG_d, "%"REG_c
01372 );
01373 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01374 int y;
01375 int min=255;
01376 int max=0;
01377 int avg;
01378 uint8_t *p;
01379 int s[10];
01380 const int QP2= c->QP/2 + 1;
01381
01382 for(y=1; y<9; y++){
01383 int x;
01384 p= src + stride*y;
01385 for(x=1; x<9; x++){
01386 p++;
01387 if(*p > max) max= *p;
01388 if(*p < min) min= *p;
01389 }
01390 }
01391 avg= (min + max + 1)>>1;
01392
01393 if(max - min <deringThreshold) return;
01394
01395 for(y=0; y<10; y++){
01396 int t = 0;
01397
01398 if(src[stride*y + 0] > avg) t+= 1;
01399 if(src[stride*y + 1] > avg) t+= 2;
01400 if(src[stride*y + 2] > avg) t+= 4;
01401 if(src[stride*y + 3] > avg) t+= 8;
01402 if(src[stride*y + 4] > avg) t+= 16;
01403 if(src[stride*y + 5] > avg) t+= 32;
01404 if(src[stride*y + 6] > avg) t+= 64;
01405 if(src[stride*y + 7] > avg) t+= 128;
01406 if(src[stride*y + 8] > avg) t+= 256;
01407 if(src[stride*y + 9] > avg) t+= 512;
01408
01409 t |= (~t)<<16;
01410 t &= (t<<1) & (t>>1);
01411 s[y] = t;
01412 }
01413
01414 for(y=1; y<9; y++){
01415 int t = s[y-1] & s[y] & s[y+1];
01416 t|= t>>16;
01417 s[y-1]= t;
01418 }
01419
01420 for(y=1; y<9; y++){
01421 int x;
01422 int t = s[y-1];
01423
01424 p= src + stride*y;
01425 for(x=1; x<9; x++){
01426 p++;
01427 if(t & (1<<x)){
01428 int f= (*(p-stride-1)) + 2*(*(p-stride)) + (*(p-stride+1))
01429 +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1))
01430 +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1));
01431 f= (f + 8)>>4;
01432
01433 #ifdef DEBUG_DERING_THRESHOLD
01434 __asm__ volatile("emms\n\t":);
01435 {
01436 static long long numPixels=0;
01437 if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
01438
01439
01440
01441 if(max-min < 20){
01442 static int numSkipped=0;
01443 static int errorSum=0;
01444 static int worstQP=0;
01445 static int worstRange=0;
01446 static int worstDiff=0;
01447 int diff= (f - *p);
01448 int absDiff= FFABS(diff);
01449 int error= diff*diff;
01450
01451 if(x==1 || x==8 || y==1 || y==8) continue;
01452
01453 numSkipped++;
01454 if(absDiff > worstDiff){
01455 worstDiff= absDiff;
01456 worstQP= QP;
01457 worstRange= max-min;
01458 }
01459 errorSum+= error;
01460
01461 if(1024LL*1024LL*1024LL % numSkipped == 0){
01462 av_log(c, AV_LOG_INFO, "sum:%1.3f, skip:%d, wQP:%d, "
01463 "wRange:%d, wDiff:%d, relSkip:%1.3f\n",
01464 (float)errorSum/numSkipped, numSkipped, worstQP, worstRange,
01465 worstDiff, (float)numSkipped/numPixels);
01466 }
01467 }
01468 }
01469 #endif
01470 if (*p + QP2 < f) *p= *p + QP2;
01471 else if(*p - QP2 > f) *p= *p - QP2;
01472 else *p=f;
01473 }
01474 }
01475 }
01476 #ifdef DEBUG_DERING_THRESHOLD
01477 if(max-min < 20){
01478 for(y=1; y<9; y++){
01479 int x;
01480 int t = 0;
01481 p= src + stride*y;
01482 for(x=1; x<9; x++){
01483 p++;
01484 *p = FFMIN(*p + 20, 255);
01485 }
01486 }
01487
01488 }
01489 #endif
01490 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01491 }
01492 #endif //HAVE_ALTIVEC
01493
01500 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
01501 {
01502 #if HAVE_MMX2 || HAVE_AMD3DNOW
01503 src+= 4*stride;
01504 __asm__ volatile(
01505 "lea (%0, %1), %%"REG_a" \n\t"
01506 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
01507
01508
01509
01510 "movq (%0), %%mm0 \n\t"
01511 "movq (%%"REG_a", %1), %%mm1 \n\t"
01512 PAVGB(%%mm1, %%mm0)
01513 "movq %%mm0, (%%"REG_a") \n\t"
01514 "movq (%0, %1, 4), %%mm0 \n\t"
01515 PAVGB(%%mm0, %%mm1)
01516 "movq %%mm1, (%%"REG_a", %1, 2) \n\t"
01517 "movq (%%"REG_c", %1), %%mm1 \n\t"
01518 PAVGB(%%mm1, %%mm0)
01519 "movq %%mm0, (%%"REG_c") \n\t"
01520 "movq (%0, %1, 8), %%mm0 \n\t"
01521 PAVGB(%%mm0, %%mm1)
01522 "movq %%mm1, (%%"REG_c", %1, 2) \n\t"
01523
01524 : : "r" (src), "r" ((x86_reg)stride)
01525 : "%"REG_a, "%"REG_c
01526 );
01527 #else
01528 int a, b, x;
01529 src+= 4*stride;
01530
01531 for(x=0; x<2; x++){
01532 a= *(uint32_t*)&src[stride*0];
01533 b= *(uint32_t*)&src[stride*2];
01534 *(uint32_t*)&src[stride*1]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01535 a= *(uint32_t*)&src[stride*4];
01536 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01537 b= *(uint32_t*)&src[stride*6];
01538 *(uint32_t*)&src[stride*5]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01539 a= *(uint32_t*)&src[stride*8];
01540 *(uint32_t*)&src[stride*7]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01541 src += 4;
01542 }
01543 #endif
01544 }
01545
01553 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
01554 {
01555 #if HAVE_MMX2 || HAVE_AMD3DNOW
01556 src+= stride*3;
01557 __asm__ volatile(
01558 "lea (%0, %1), %%"REG_a" \n\t"
01559 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01560 "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t"
01561 "add %1, %%"REG_c" \n\t"
01562 "pxor %%mm7, %%mm7 \n\t"
01563
01564
01565
01566 #define REAL_DEINT_CUBIC(a,b,c,d,e)\
01567 "movq " #a ", %%mm0 \n\t"\
01568 "movq " #b ", %%mm1 \n\t"\
01569 "movq " #d ", %%mm2 \n\t"\
01570 "movq " #e ", %%mm3 \n\t"\
01571 PAVGB(%%mm2, %%mm1) \
01572 PAVGB(%%mm3, %%mm0) \
01573 "movq %%mm0, %%mm2 \n\t"\
01574 "punpcklbw %%mm7, %%mm0 \n\t"\
01575 "punpckhbw %%mm7, %%mm2 \n\t"\
01576 "movq %%mm1, %%mm3 \n\t"\
01577 "punpcklbw %%mm7, %%mm1 \n\t"\
01578 "punpckhbw %%mm7, %%mm3 \n\t"\
01579 "psubw %%mm1, %%mm0 \n\t" \
01580 "psubw %%mm3, %%mm2 \n\t" \
01581 "psraw $3, %%mm0 \n\t" \
01582 "psraw $3, %%mm2 \n\t" \
01583 "psubw %%mm0, %%mm1 \n\t" \
01584 "psubw %%mm2, %%mm3 \n\t" \
01585 "packuswb %%mm3, %%mm1 \n\t"\
01586 "movq %%mm1, " #c " \n\t"
01587 #define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e)
01588
01589 DEINT_CUBIC((%0) , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1))
01590 DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%0, %1, 8))
01591 DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc))
01592 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2))
01593
01594 : : "r" (src), "r" ((x86_reg)stride)
01595 : "%"REG_a, "%"REG_d, "%"REG_c
01596 );
01597 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01598 int x;
01599 src+= stride*3;
01600 for(x=0; x<8; x++){
01601 src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
01602 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
01603 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
01604 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
01605 src++;
01606 }
01607 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01608 }
01609
01617 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
01618 {
01619 #if HAVE_MMX2 || HAVE_AMD3DNOW
01620 src+= stride*4;
01621 __asm__ volatile(
01622 "lea (%0, %1), %%"REG_a" \n\t"
01623 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01624 "pxor %%mm7, %%mm7 \n\t"
01625 "movq (%2), %%mm0 \n\t"
01626
01627
01628
01629 #define REAL_DEINT_FF(a,b,c,d)\
01630 "movq " #a ", %%mm1 \n\t"\
01631 "movq " #b ", %%mm2 \n\t"\
01632 "movq " #c ", %%mm3 \n\t"\
01633 "movq " #d ", %%mm4 \n\t"\
01634 PAVGB(%%mm3, %%mm1) \
01635 PAVGB(%%mm4, %%mm0) \
01636 "movq %%mm0, %%mm3 \n\t"\
01637 "punpcklbw %%mm7, %%mm0 \n\t"\
01638 "punpckhbw %%mm7, %%mm3 \n\t"\
01639 "movq %%mm1, %%mm4 \n\t"\
01640 "punpcklbw %%mm7, %%mm1 \n\t"\
01641 "punpckhbw %%mm7, %%mm4 \n\t"\
01642 "psllw $2, %%mm1 \n\t"\
01643 "psllw $2, %%mm4 \n\t"\
01644 "psubw %%mm0, %%mm1 \n\t"\
01645 "psubw %%mm3, %%mm4 \n\t"\
01646 "movq %%mm2, %%mm5 \n\t"\
01647 "movq %%mm2, %%mm0 \n\t"\
01648 "punpcklbw %%mm7, %%mm2 \n\t"\
01649 "punpckhbw %%mm7, %%mm5 \n\t"\
01650 "paddw %%mm2, %%mm1 \n\t"\
01651 "paddw %%mm5, %%mm4 \n\t"\
01652 "psraw $2, %%mm1 \n\t"\
01653 "psraw $2, %%mm4 \n\t"\
01654 "packuswb %%mm4, %%mm1 \n\t"\
01655 "movq %%mm1, " #b " \n\t"\
01656
01657 #define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d)
01658
01659 DEINT_FF((%0) , (%%REGa) , (%%REGa, %1), (%%REGa, %1, 2))
01660 DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
01661 DEINT_FF((%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2))
01662 DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
01663
01664 "movq %%mm0, (%2) \n\t"
01665 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp)
01666 : "%"REG_a, "%"REG_d
01667 );
01668 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01669 int x;
01670 src+= stride*4;
01671 for(x=0; x<8; x++){
01672 int t1= tmp[x];
01673 int t2= src[stride*1];
01674
01675 src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3);
01676 t1= src[stride*4];
01677 src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3);
01678 t2= src[stride*6];
01679 src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3);
01680 t1= src[stride*8];
01681 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
01682 tmp[x]= t1;
01683
01684 src++;
01685 }
01686 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01687 }
01688
01696 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
01697 {
01698 #if HAVE_MMX2 || HAVE_AMD3DNOW
01699 src+= stride*4;
01700 __asm__ volatile(
01701 "lea (%0, %1), %%"REG_a" \n\t"
01702 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01703 "pxor %%mm7, %%mm7 \n\t"
01704 "movq (%2), %%mm0 \n\t"
01705 "movq (%3), %%mm1 \n\t"
01706
01707
01708
01709 #define REAL_DEINT_L5(t1,t2,a,b,c)\
01710 "movq " #a ", %%mm2 \n\t"\
01711 "movq " #b ", %%mm3 \n\t"\
01712 "movq " #c ", %%mm4 \n\t"\
01713 PAVGB(t2, %%mm3) \
01714 PAVGB(t1, %%mm4) \
01715 "movq %%mm2, %%mm5 \n\t"\
01716 "movq %%mm2, " #t1 " \n\t"\
01717 "punpcklbw %%mm7, %%mm2 \n\t"\
01718 "punpckhbw %%mm7, %%mm5 \n\t"\
01719 "movq %%mm2, %%mm6 \n\t"\
01720 "paddw %%mm2, %%mm2 \n\t"\
01721 "paddw %%mm6, %%mm2 \n\t"\
01722 "movq %%mm5, %%mm6 \n\t"\
01723 "paddw %%mm5, %%mm5 \n\t"\
01724 "paddw %%mm6, %%mm5 \n\t"\
01725 "movq %%mm3, %%mm6 \n\t"\
01726 "punpcklbw %%mm7, %%mm3 \n\t"\
01727 "punpckhbw %%mm7, %%mm6 \n\t"\
01728 "paddw %%mm3, %%mm3 \n\t"\
01729 "paddw %%mm6, %%mm6 \n\t"\
01730 "paddw %%mm3, %%mm2 \n\t"\
01731 "paddw %%mm6, %%mm5 \n\t"\
01732 "movq %%mm4, %%mm6 \n\t"\
01733 "punpcklbw %%mm7, %%mm4 \n\t"\
01734 "punpckhbw %%mm7, %%mm6 \n\t"\
01735 "psubw %%mm4, %%mm2 \n\t"\
01736 "psubw %%mm6, %%mm5 \n\t"\
01737 "psraw $2, %%mm2 \n\t"\
01738 "psraw $2, %%mm5 \n\t"\
01739 "packuswb %%mm5, %%mm2 \n\t"\
01740 "movq %%mm2, " #a " \n\t"\
01741
01742 #define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c)
01743
01744 DEINT_L5(%%mm0, %%mm1, (%0) , (%%REGa) , (%%REGa, %1) )
01745 DEINT_L5(%%mm1, %%mm0, (%%REGa) , (%%REGa, %1) , (%%REGa, %1, 2))
01746 DEINT_L5(%%mm0, %%mm1, (%%REGa, %1) , (%%REGa, %1, 2), (%0, %1, 4) )
01747 DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
01748 DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%REGd) , (%%REGd, %1) )
01749 DEINT_L5(%%mm1, %%mm0, (%%REGd) , (%%REGd, %1) , (%%REGd, %1, 2))
01750 DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) )
01751 DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
01752
01753 "movq %%mm0, (%2) \n\t"
01754 "movq %%mm1, (%3) \n\t"
01755 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2)
01756 : "%"REG_a, "%"REG_d
01757 );
01758 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01759 int x;
01760 src+= stride*4;
01761 for(x=0; x<8; x++){
01762 int t1= tmp[x];
01763 int t2= tmp2[x];
01764 int t3= src[0];
01765
01766 src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3);
01767 t1= src[stride*1];
01768 src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3);
01769 t2= src[stride*2];
01770 src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3);
01771 t3= src[stride*3];
01772 src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3);
01773 t1= src[stride*4];
01774 src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3);
01775 t2= src[stride*5];
01776 src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3);
01777 t3= src[stride*6];
01778 src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3);
01779 t1= src[stride*7];
01780 src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3);
01781
01782 tmp[x]= t3;
01783 tmp2[x]= t1;
01784
01785 src++;
01786 }
01787 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01788 }
01789
01797 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
01798 {
01799 #if HAVE_MMX2 || HAVE_AMD3DNOW
01800 src+= 4*stride;
01801 __asm__ volatile(
01802 "lea (%0, %1), %%"REG_a" \n\t"
01803 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01804
01805
01806
01807 "movq (%2), %%mm0 \n\t"
01808 "movq (%%"REG_a"), %%mm1 \n\t"
01809 PAVGB(%%mm1, %%mm0)
01810 "movq (%0), %%mm2 \n\t"
01811 PAVGB(%%mm2, %%mm0)
01812 "movq %%mm0, (%0) \n\t"
01813 "movq (%%"REG_a", %1), %%mm0 \n\t"
01814 PAVGB(%%mm0, %%mm2)
01815 PAVGB(%%mm1, %%mm2)
01816 "movq %%mm2, (%%"REG_a") \n\t"
01817 "movq (%%"REG_a", %1, 2), %%mm2 \n\t"
01818 PAVGB(%%mm2, %%mm1)
01819 PAVGB(%%mm0, %%mm1)
01820 "movq %%mm1, (%%"REG_a", %1) \n\t"
01821 "movq (%0, %1, 4), %%mm1 \n\t"
01822 PAVGB(%%mm1, %%mm0)
01823 PAVGB(%%mm2, %%mm0)
01824 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
01825 "movq (%%"REG_d"), %%mm0 \n\t"
01826 PAVGB(%%mm0, %%mm2)
01827 PAVGB(%%mm1, %%mm2)
01828 "movq %%mm2, (%0, %1, 4) \n\t"
01829 "movq (%%"REG_d", %1), %%mm2 \n\t"
01830 PAVGB(%%mm2, %%mm1)
01831 PAVGB(%%mm0, %%mm1)
01832 "movq %%mm1, (%%"REG_d") \n\t"
01833 "movq (%%"REG_d", %1, 2), %%mm1 \n\t"
01834 PAVGB(%%mm1, %%mm0)
01835 PAVGB(%%mm2, %%mm0)
01836 "movq %%mm0, (%%"REG_d", %1) \n\t"
01837 "movq (%0, %1, 8), %%mm0 \n\t"
01838 PAVGB(%%mm0, %%mm2)
01839 PAVGB(%%mm1, %%mm2)
01840 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
01841 "movq %%mm1, (%2) \n\t"
01842
01843 : : "r" (src), "r" ((x86_reg)stride), "r" (tmp)
01844 : "%"REG_a, "%"REG_d
01845 );
01846 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01847 int a, b, c, x;
01848 src+= 4*stride;
01849
01850 for(x=0; x<2; x++){
01851 a= *(uint32_t*)&tmp[stride*0];
01852 b= *(uint32_t*)&src[stride*0];
01853 c= *(uint32_t*)&src[stride*1];
01854 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01855 *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01856
01857 a= *(uint32_t*)&src[stride*2];
01858 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01859 *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01860
01861 b= *(uint32_t*)&src[stride*3];
01862 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
01863 *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
01864
01865 c= *(uint32_t*)&src[stride*4];
01866 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01867 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01868
01869 a= *(uint32_t*)&src[stride*5];
01870 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01871 *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01872
01873 b= *(uint32_t*)&src[stride*6];
01874 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
01875 *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
01876
01877 c= *(uint32_t*)&src[stride*7];
01878 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01879 *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01880
01881 a= *(uint32_t*)&src[stride*8];
01882 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01883 *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01884
01885 *(uint32_t*)&tmp[stride*0]= c;
01886 src += 4;
01887 tmp += 4;
01888 }
01889 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01890 }
01891
01898 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
01899 {
01900 #if HAVE_MMX
01901 src+= 4*stride;
01902 #if HAVE_MMX2
01903 __asm__ volatile(
01904 "lea (%0, %1), %%"REG_a" \n\t"
01905 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01906
01907
01908
01909 "movq (%0), %%mm0 \n\t"
01910 "movq (%%"REG_a", %1), %%mm2 \n\t"
01911 "movq (%%"REG_a"), %%mm1 \n\t"
01912 "movq %%mm0, %%mm3 \n\t"
01913 "pmaxub %%mm1, %%mm0 \n\t"
01914 "pminub %%mm3, %%mm1 \n\t"
01915 "pmaxub %%mm2, %%mm1 \n\t"
01916 "pminub %%mm1, %%mm0 \n\t"
01917 "movq %%mm0, (%%"REG_a") \n\t"
01918
01919 "movq (%0, %1, 4), %%mm0 \n\t"
01920 "movq (%%"REG_a", %1, 2), %%mm1 \n\t"
01921 "movq %%mm2, %%mm3 \n\t"
01922 "pmaxub %%mm1, %%mm2 \n\t"
01923 "pminub %%mm3, %%mm1 \n\t"
01924 "pmaxub %%mm0, %%mm1 \n\t"
01925 "pminub %%mm1, %%mm2 \n\t"
01926 "movq %%mm2, (%%"REG_a", %1, 2) \n\t"
01927
01928 "movq (%%"REG_d"), %%mm2 \n\t"
01929 "movq (%%"REG_d", %1), %%mm1 \n\t"
01930 "movq %%mm2, %%mm3 \n\t"
01931 "pmaxub %%mm0, %%mm2 \n\t"
01932 "pminub %%mm3, %%mm0 \n\t"
01933 "pmaxub %%mm1, %%mm0 \n\t"
01934 "pminub %%mm0, %%mm2 \n\t"
01935 "movq %%mm2, (%%"REG_d") \n\t"
01936
01937 "movq (%%"REG_d", %1, 2), %%mm2 \n\t"
01938 "movq (%0, %1, 8), %%mm0 \n\t"
01939 "movq %%mm2, %%mm3 \n\t"
01940 "pmaxub %%mm0, %%mm2 \n\t"
01941 "pminub %%mm3, %%mm0 \n\t"
01942 "pmaxub %%mm1, %%mm0 \n\t"
01943 "pminub %%mm0, %%mm2 \n\t"
01944 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
01945
01946
01947 : : "r" (src), "r" ((x86_reg)stride)
01948 : "%"REG_a, "%"REG_d
01949 );
01950
01951 #else // MMX without MMX2
01952 __asm__ volatile(
01953 "lea (%0, %1), %%"REG_a" \n\t"
01954 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01955
01956
01957 "pxor %%mm7, %%mm7 \n\t"
01958
01959 #define REAL_MEDIAN(a,b,c)\
01960 "movq " #a ", %%mm0 \n\t"\
01961 "movq " #b ", %%mm2 \n\t"\
01962 "movq " #c ", %%mm1 \n\t"\
01963 "movq %%mm0, %%mm3 \n\t"\
01964 "movq %%mm1, %%mm4 \n\t"\
01965 "movq %%mm2, %%mm5 \n\t"\
01966 "psubusb %%mm1, %%mm3 \n\t"\
01967 "psubusb %%mm2, %%mm4 \n\t"\
01968 "psubusb %%mm0, %%mm5 \n\t"\
01969 "pcmpeqb %%mm7, %%mm3 \n\t"\
01970 "pcmpeqb %%mm7, %%mm4 \n\t"\
01971 "pcmpeqb %%mm7, %%mm5 \n\t"\
01972 "movq %%mm3, %%mm6 \n\t"\
01973 "pxor %%mm4, %%mm3 \n\t"\
01974 "pxor %%mm5, %%mm4 \n\t"\
01975 "pxor %%mm6, %%mm5 \n\t"\
01976 "por %%mm3, %%mm1 \n\t"\
01977 "por %%mm4, %%mm2 \n\t"\
01978 "por %%mm5, %%mm0 \n\t"\
01979 "pand %%mm2, %%mm0 \n\t"\
01980 "pand %%mm1, %%mm0 \n\t"\
01981 "movq %%mm0, " #b " \n\t"
01982 #define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c)
01983
01984 MEDIAN((%0) , (%%REGa) , (%%REGa, %1))
01985 MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4))
01986 MEDIAN((%0, %1, 4) , (%%REGd) , (%%REGd, %1))
01987 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
01988
01989 : : "r" (src), "r" ((x86_reg)stride)
01990 : "%"REG_a, "%"REG_d
01991 );
01992 #endif //HAVE_MMX2
01993 #else //HAVE_MMX
01994 int x, y;
01995 src+= 4*stride;
01996
01997 for(x=0; x<8; x++){
01998 uint8_t *colsrc = src;
01999 for (y=0; y<4; y++){
02000 int a, b, c, d, e, f;
02001 a = colsrc[0 ];
02002 b = colsrc[stride ];
02003 c = colsrc[stride*2];
02004 d = (a-b)>>31;
02005 e = (b-c)>>31;
02006 f = (c-a)>>31;
02007 colsrc[stride ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f));
02008 colsrc += stride*2;
02009 }
02010 src++;
02011 }
02012 #endif //HAVE_MMX
02013 }
02014
02015 #if HAVE_MMX
02016
02019 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
02020 {
02021 __asm__(
02022 "lea (%0, %1), %%"REG_a" \n\t"
02023
02024
02025 "movq (%0), %%mm0 \n\t"
02026 "movq (%%"REG_a"), %%mm1 \n\t"
02027 "movq %%mm0, %%mm2 \n\t"
02028 "punpcklbw %%mm1, %%mm0 \n\t"
02029 "punpckhbw %%mm1, %%mm2 \n\t"
02030
02031 "movq (%%"REG_a", %1), %%mm1 \n\t"
02032 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
02033 "movq %%mm1, %%mm4 \n\t"
02034 "punpcklbw %%mm3, %%mm1 \n\t"
02035 "punpckhbw %%mm3, %%mm4 \n\t"
02036
02037 "movq %%mm0, %%mm3 \n\t"
02038 "punpcklwd %%mm1, %%mm0 \n\t"
02039 "punpckhwd %%mm1, %%mm3 \n\t"
02040 "movq %%mm2, %%mm1 \n\t"
02041 "punpcklwd %%mm4, %%mm2 \n\t"
02042 "punpckhwd %%mm4, %%mm1 \n\t"
02043
02044 "movd %%mm0, 128(%2) \n\t"
02045 "psrlq $32, %%mm0 \n\t"
02046 "movd %%mm0, 144(%2) \n\t"
02047 "movd %%mm3, 160(%2) \n\t"
02048 "psrlq $32, %%mm3 \n\t"
02049 "movd %%mm3, 176(%2) \n\t"
02050 "movd %%mm3, 48(%3) \n\t"
02051 "movd %%mm2, 192(%2) \n\t"
02052 "movd %%mm2, 64(%3) \n\t"
02053 "psrlq $32, %%mm2 \n\t"
02054 "movd %%mm2, 80(%3) \n\t"
02055 "movd %%mm1, 96(%3) \n\t"
02056 "psrlq $32, %%mm1 \n\t"
02057 "movd %%mm1, 112(%3) \n\t"
02058
02059 "lea (%%"REG_a", %1, 4), %%"REG_a" \n\t"
02060
02061 "movq (%0, %1, 4), %%mm0 \n\t"
02062 "movq (%%"REG_a"), %%mm1 \n\t"
02063 "movq %%mm0, %%mm2 \n\t"
02064 "punpcklbw %%mm1, %%mm0 \n\t"
02065 "punpckhbw %%mm1, %%mm2 \n\t"
02066
02067 "movq (%%"REG_a", %1), %%mm1 \n\t"
02068 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
02069 "movq %%mm1, %%mm4 \n\t"
02070 "punpcklbw %%mm3, %%mm1 \n\t"
02071 "punpckhbw %%mm3, %%mm4 \n\t"
02072
02073 "movq %%mm0, %%mm3 \n\t"
02074 "punpcklwd %%mm1, %%mm0 \n\t"
02075 "punpckhwd %%mm1, %%mm3 \n\t"
02076 "movq %%mm2, %%mm1 \n\t"
02077 "punpcklwd %%mm4, %%mm2 \n\t"
02078 "punpckhwd %%mm4, %%mm1 \n\t"
02079
02080 "movd %%mm0, 132(%2) \n\t"
02081 "psrlq $32, %%mm0 \n\t"
02082 "movd %%mm0, 148(%2) \n\t"
02083 "movd %%mm3, 164(%2) \n\t"
02084 "psrlq $32, %%mm3 \n\t"
02085 "movd %%mm3, 180(%2) \n\t"
02086 "movd %%mm3, 52(%3) \n\t"
02087 "movd %%mm2, 196(%2) \n\t"
02088 "movd %%mm2, 68(%3) \n\t"
02089 "psrlq $32, %%mm2 \n\t"
02090 "movd %%mm2, 84(%3) \n\t"
02091 "movd %%mm1, 100(%3) \n\t"
02092 "psrlq $32, %%mm1 \n\t"
02093 "movd %%mm1, 116(%3) \n\t"
02094
02095
02096 :: "r" (src), "r" ((x86_reg)srcStride), "r" (dst1), "r" (dst2)
02097 : "%"REG_a
02098 );
02099 }
02100
02104 static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
02105 {
02106 __asm__(
02107 "lea (%0, %1), %%"REG_a" \n\t"
02108 "lea (%%"REG_a",%1,4), %%"REG_d" \n\t"
02109
02110
02111 "movq (%2), %%mm0 \n\t"
02112 "movq 16(%2), %%mm1 \n\t"
02113 "movq %%mm0, %%mm2 \n\t"
02114 "punpcklbw %%mm1, %%mm0 \n\t"
02115 "punpckhbw %%mm1, %%mm2 \n\t"
02116
02117 "movq 32(%2), %%mm1 \n\t"
02118 "movq 48(%2), %%mm3 \n\t"
02119 "movq %%mm1, %%mm4 \n\t"
02120 "punpcklbw %%mm3, %%mm1 \n\t"
02121 "punpckhbw %%mm3, %%mm4 \n\t"
02122
02123 "movq %%mm0, %%mm3 \n\t"
02124 "punpcklwd %%mm1, %%mm0 \n\t"
02125 "punpckhwd %%mm1, %%mm3 \n\t"
02126 "movq %%mm2, %%mm1 \n\t"
02127 "punpcklwd %%mm4, %%mm2 \n\t"
02128 "punpckhwd %%mm4, %%mm1 \n\t"
02129
02130 "movd %%mm0, (%0) \n\t"
02131 "psrlq $32, %%mm0 \n\t"
02132 "movd %%mm0, (%%"REG_a") \n\t"
02133 "movd %%mm3, (%%"REG_a", %1) \n\t"
02134 "psrlq $32, %%mm3 \n\t"
02135 "movd %%mm3, (%%"REG_a", %1, 2) \n\t"
02136 "movd %%mm2, (%0, %1, 4) \n\t"
02137 "psrlq $32, %%mm2 \n\t"
02138 "movd %%mm2, (%%"REG_d") \n\t"
02139 "movd %%mm1, (%%"REG_d", %1) \n\t"
02140 "psrlq $32, %%mm1 \n\t"
02141 "movd %%mm1, (%%"REG_d", %1, 2) \n\t"
02142
02143
02144 "movq 64(%2), %%mm0 \n\t"
02145 "movq 80(%2), %%mm1 \n\t"
02146 "movq %%mm0, %%mm2 \n\t"
02147 "punpcklbw %%mm1, %%mm0 \n\t"
02148 "punpckhbw %%mm1, %%mm2 \n\t"
02149
02150 "movq 96(%2), %%mm1 \n\t"
02151 "movq 112(%2), %%mm3 \n\t"
02152 "movq %%mm1, %%mm4 \n\t"
02153 "punpcklbw %%mm3, %%mm1 \n\t"
02154 "punpckhbw %%mm3, %%mm4 \n\t"
02155
02156 "movq %%mm0, %%mm3 \n\t"
02157 "punpcklwd %%mm1, %%mm0 \n\t"
02158 "punpckhwd %%mm1, %%mm3 \n\t"
02159 "movq %%mm2, %%mm1 \n\t"
02160 "punpcklwd %%mm4, %%mm2 \n\t"
02161 "punpckhwd %%mm4, %%mm1 \n\t"
02162
02163 "movd %%mm0, 4(%0) \n\t"
02164 "psrlq $32, %%mm0 \n\t"
02165 "movd %%mm0, 4(%%"REG_a") \n\t"
02166 "movd %%mm3, 4(%%"REG_a", %1) \n\t"
02167 "psrlq $32, %%mm3 \n\t"
02168 "movd %%mm3, 4(%%"REG_a", %1, 2) \n\t"
02169 "movd %%mm2, 4(%0, %1, 4) \n\t"
02170 "psrlq $32, %%mm2 \n\t"
02171 "movd %%mm2, 4(%%"REG_d") \n\t"
02172 "movd %%mm1, 4(%%"REG_d", %1) \n\t"
02173 "psrlq $32, %%mm1 \n\t"
02174 "movd %%mm1, 4(%%"REG_d", %1, 2) \n\t"
02175
02176 :: "r" (dst), "r" ((x86_reg)dstStride), "r" (src)
02177 : "%"REG_a, "%"REG_d
02178 );
02179 }
02180 #endif //HAVE_MMX
02181
02182
02183 #if !HAVE_ALTIVEC
02184 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
02185 uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
02186 {
02187
02188 tempBlurredPast[127]= maxNoise[0];
02189 tempBlurredPast[128]= maxNoise[1];
02190 tempBlurredPast[129]= maxNoise[2];
02191
02192 #define FAST_L2_DIFF
02193
02194 #if HAVE_MMX2 || HAVE_AMD3DNOW
02195 __asm__ volatile(
02196 "lea (%2, %2, 2), %%"REG_a" \n\t"
02197 "lea (%2, %2, 4), %%"REG_d" \n\t"
02198 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02199
02200
02201
02202 #ifdef L1_DIFF //needs mmx2
02203 "movq (%0), %%mm0 \n\t"
02204 "psadbw (%1), %%mm0 \n\t"
02205 "movq (%0, %2), %%mm1 \n\t"
02206 "psadbw (%1, %2), %%mm1 \n\t"
02207 "movq (%0, %2, 2), %%mm2 \n\t"
02208 "psadbw (%1, %2, 2), %%mm2 \n\t"
02209 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02210 "psadbw (%1, %%"REG_a"), %%mm3 \n\t"
02211
02212 "movq (%0, %2, 4), %%mm4 \n\t"
02213 "paddw %%mm1, %%mm0 \n\t"
02214 "psadbw (%1, %2, 4), %%mm4 \n\t"
02215 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02216 "paddw %%mm2, %%mm0 \n\t"
02217 "psadbw (%1, %%"REG_d"), %%mm5 \n\t"
02218 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02219 "paddw %%mm3, %%mm0 \n\t"
02220 "psadbw (%1, %%"REG_a", 2), %%mm6 \n\t"
02221 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02222 "paddw %%mm4, %%mm0 \n\t"
02223 "psadbw (%1, %%"REG_c"), %%mm7 \n\t"
02224 "paddw %%mm5, %%mm6 \n\t"
02225 "paddw %%mm7, %%mm6 \n\t"
02226 "paddw %%mm6, %%mm0 \n\t"
02227 #else //L1_DIFF
02228 #if defined (FAST_L2_DIFF)
02229 "pcmpeqb %%mm7, %%mm7 \n\t"
02230 "movq "MANGLE(b80)", %%mm6 \n\t"
02231 "pxor %%mm0, %%mm0 \n\t"
02232 #define REAL_L2_DIFF_CORE(a, b)\
02233 "movq " #a ", %%mm5 \n\t"\
02234 "movq " #b ", %%mm2 \n\t"\
02235 "pxor %%mm7, %%mm2 \n\t"\
02236 PAVGB(%%mm2, %%mm5)\
02237 "paddb %%mm6, %%mm5 \n\t"\
02238 "movq %%mm5, %%mm2 \n\t"\
02239 "psllw $8, %%mm5 \n\t"\
02240 "pmaddwd %%mm5, %%mm5 \n\t"\
02241 "pmaddwd %%mm2, %%mm2 \n\t"\
02242 "paddd %%mm2, %%mm5 \n\t"\
02243 "psrld $14, %%mm5 \n\t"\
02244 "paddd %%mm5, %%mm0 \n\t"
02245
02246 #else //defined (FAST_L2_DIFF)
02247 "pxor %%mm7, %%mm7 \n\t"
02248 "pxor %%mm0, %%mm0 \n\t"
02249 #define REAL_L2_DIFF_CORE(a, b)\
02250 "movq " #a ", %%mm5 \n\t"\
02251 "movq " #b ", %%mm2 \n\t"\
02252 "movq %%mm5, %%mm1 \n\t"\
02253 "movq %%mm2, %%mm3 \n\t"\
02254 "punpcklbw %%mm7, %%mm5 \n\t"\
02255 "punpckhbw %%mm7, %%mm1 \n\t"\
02256 "punpcklbw %%mm7, %%mm2 \n\t"\
02257 "punpckhbw %%mm7, %%mm3 \n\t"\
02258 "psubw %%mm2, %%mm5 \n\t"\
02259 "psubw %%mm3, %%mm1 \n\t"\
02260 "pmaddwd %%mm5, %%mm5 \n\t"\
02261 "pmaddwd %%mm1, %%mm1 \n\t"\
02262 "paddd %%mm1, %%mm5 \n\t"\
02263 "paddd %%mm5, %%mm0 \n\t"
02264
02265 #endif //defined (FAST_L2_DIFF)
02266
02267 #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b)
02268
02269 L2_DIFF_CORE((%0) , (%1))
02270 L2_DIFF_CORE((%0, %2) , (%1, %2))
02271 L2_DIFF_CORE((%0, %2, 2) , (%1, %2, 2))
02272 L2_DIFF_CORE((%0, %%REGa) , (%1, %%REGa))
02273 L2_DIFF_CORE((%0, %2, 4) , (%1, %2, 4))
02274 L2_DIFF_CORE((%0, %%REGd) , (%1, %%REGd))
02275 L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2))
02276 L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
02277
02278 #endif //L1_DIFF
02279
02280 "movq %%mm0, %%mm4 \n\t"
02281 "psrlq $32, %%mm0 \n\t"
02282 "paddd %%mm0, %%mm4 \n\t"
02283 "movd %%mm4, %%ecx \n\t"
02284 "shll $2, %%ecx \n\t"
02285 "mov %3, %%"REG_d" \n\t"
02286 "addl -4(%%"REG_d"), %%ecx \n\t"
02287 "addl 4(%%"REG_d"), %%ecx \n\t"
02288 "addl -1024(%%"REG_d"), %%ecx \n\t"
02289 "addl $4, %%ecx \n\t"
02290 "addl 1024(%%"REG_d"), %%ecx \n\t"
02291 "shrl $3, %%ecx \n\t"
02292 "movl %%ecx, (%%"REG_d") \n\t"
02293
02294
02295
02296
02297 "cmpl 512(%%"REG_d"), %%ecx \n\t"
02298 " jb 2f \n\t"
02299 "cmpl 516(%%"REG_d"), %%ecx \n\t"
02300 " jb 1f \n\t"
02301
02302 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02303 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02304 "movq (%0), %%mm0 \n\t"
02305 "movq (%0, %2), %%mm1 \n\t"
02306 "movq (%0, %2, 2), %%mm2 \n\t"
02307 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02308 "movq (%0, %2, 4), %%mm4 \n\t"
02309 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02310 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02311 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02312 "movq %%mm0, (%1) \n\t"
02313 "movq %%mm1, (%1, %2) \n\t"
02314 "movq %%mm2, (%1, %2, 2) \n\t"
02315 "movq %%mm3, (%1, %%"REG_a") \n\t"
02316 "movq %%mm4, (%1, %2, 4) \n\t"
02317 "movq %%mm5, (%1, %%"REG_d") \n\t"
02318 "movq %%mm6, (%1, %%"REG_a", 2) \n\t"
02319 "movq %%mm7, (%1, %%"REG_c") \n\t"
02320 "jmp 4f \n\t"
02321
02322 "1: \n\t"
02323 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02324 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02325 "movq (%0), %%mm0 \n\t"
02326 PAVGB((%1), %%mm0)
02327 "movq (%0, %2), %%mm1 \n\t"
02328 PAVGB((%1, %2), %%mm1)
02329 "movq (%0, %2, 2), %%mm2 \n\t"
02330 PAVGB((%1, %2, 2), %%mm2)
02331 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02332 PAVGB((%1, %%REGa), %%mm3)
02333 "movq (%0, %2, 4), %%mm4 \n\t"
02334 PAVGB((%1, %2, 4), %%mm4)
02335 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02336 PAVGB((%1, %%REGd), %%mm5)
02337 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02338 PAVGB((%1, %%REGa, 2), %%mm6)
02339 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02340 PAVGB((%1, %%REGc), %%mm7)
02341 "movq %%mm0, (%1) \n\t"
02342 "movq %%mm1, (%1, %2) \n\t"
02343 "movq %%mm2, (%1, %2, 2) \n\t"
02344 "movq %%mm3, (%1, %%"REG_a") \n\t"
02345 "movq %%mm4, (%1, %2, 4) \n\t"
02346 "movq %%mm5, (%1, %%"REG_d") \n\t"
02347 "movq %%mm6, (%1, %%"REG_a", 2) \n\t"
02348 "movq %%mm7, (%1, %%"REG_c") \n\t"
02349 "movq %%mm0, (%0) \n\t"
02350 "movq %%mm1, (%0, %2) \n\t"
02351 "movq %%mm2, (%0, %2, 2) \n\t"
02352 "movq %%mm3, (%0, %%"REG_a") \n\t"
02353 "movq %%mm4, (%0, %2, 4) \n\t"
02354 "movq %%mm5, (%0, %%"REG_d") \n\t"
02355 "movq %%mm6, (%0, %%"REG_a", 2) \n\t"
02356 "movq %%mm7, (%0, %%"REG_c") \n\t"
02357 "jmp 4f \n\t"
02358
02359 "2: \n\t"
02360 "cmpl 508(%%"REG_d"), %%ecx \n\t"
02361 " jb 3f \n\t"
02362
02363 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02364 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02365 "movq (%0), %%mm0 \n\t"
02366 "movq (%0, %2), %%mm1 \n\t"
02367 "movq (%0, %2, 2), %%mm2 \n\t"
02368 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02369 "movq (%1), %%mm4 \n\t"
02370 "movq (%1, %2), %%mm5 \n\t"
02371 "movq (%1, %2, 2), %%mm6 \n\t"
02372 "movq (%1, %%"REG_a"), %%mm7 \n\t"
02373 PAVGB(%%mm4, %%mm0)
02374 PAVGB(%%mm5, %%mm1)
02375 PAVGB(%%mm6, %%mm2)
02376 PAVGB(%%mm7, %%mm3)
02377 PAVGB(%%mm4, %%mm0)
02378 PAVGB(%%mm5, %%mm1)
02379 PAVGB(%%mm6, %%mm2)
02380 PAVGB(%%mm7, %%mm3)
02381 "movq %%mm0, (%1) \n\t"
02382 "movq %%mm1, (%1, %2) \n\t"
02383 "movq %%mm2, (%1, %2, 2) \n\t"
02384 "movq %%mm3, (%1, %%"REG_a") \n\t"
02385 "movq %%mm0, (%0) \n\t"
02386 "movq %%mm1, (%0, %2) \n\t"
02387 "movq %%mm2, (%0, %2, 2) \n\t"
02388 "movq %%mm3, (%0, %%"REG_a") \n\t"
02389
02390 "movq (%0, %2, 4), %%mm0 \n\t"
02391 "movq (%0, %%"REG_d"), %%mm1 \n\t"
02392 "movq (%0, %%"REG_a", 2), %%mm2 \n\t"
02393 "movq (%0, %%"REG_c"), %%mm3 \n\t"
02394 "movq (%1, %2, 4), %%mm4 \n\t"
02395 "movq (%1, %%"REG_d"), %%mm5 \n\t"
02396 "movq (%1, %%"REG_a", 2), %%mm6 \n\t"
02397 "movq (%1, %%"REG_c"), %%mm7 \n\t"
02398 PAVGB(%%mm4, %%mm0)
02399 PAVGB(%%mm5, %%mm1)
02400 PAVGB(%%mm6, %%mm2)
02401 PAVGB(%%mm7, %%mm3)
02402 PAVGB(%%mm4, %%mm0)
02403 PAVGB(%%mm5, %%mm1)
02404 PAVGB(%%mm6, %%mm2)
02405 PAVGB(%%mm7, %%mm3)
02406 "movq %%mm0, (%1, %2, 4) \n\t"
02407 "movq %%mm1, (%1, %%"REG_d") \n\t"
02408 "movq %%mm2, (%1, %%"REG_a", 2) \n\t"
02409 "movq %%mm3, (%1, %%"REG_c") \n\t"
02410 "movq %%mm0, (%0, %2, 4) \n\t"
02411 "movq %%mm1, (%0, %%"REG_d") \n\t"
02412 "movq %%mm2, (%0, %%"REG_a", 2) \n\t"
02413 "movq %%mm3, (%0, %%"REG_c") \n\t"
02414 "jmp 4f \n\t"
02415
02416 "3: \n\t"
02417 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02418 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02419 "movq (%0), %%mm0 \n\t"
02420 "movq (%0, %2), %%mm1 \n\t"
02421 "movq (%0, %2, 2), %%mm2 \n\t"
02422 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02423 "movq (%1), %%mm4 \n\t"
02424 "movq (%1, %2), %%mm5 \n\t"
02425 "movq (%1, %2, 2), %%mm6 \n\t"
02426 "movq (%1, %%"REG_a"), %%mm7 \n\t"
02427 PAVGB(%%mm4, %%mm0)
02428 PAVGB(%%mm5, %%mm1)
02429 PAVGB(%%mm6, %%mm2)
02430 PAVGB(%%mm7, %%mm3)
02431 PAVGB(%%mm4, %%mm0)
02432 PAVGB(%%mm5, %%mm1)
02433 PAVGB(%%mm6, %%mm2)
02434 PAVGB(%%mm7, %%mm3)
02435 PAVGB(%%mm4, %%mm0)
02436 PAVGB(%%mm5, %%mm1)
02437 PAVGB(%%mm6, %%mm2)
02438 PAVGB(%%mm7, %%mm3)
02439 "movq %%mm0, (%1) \n\t"
02440 "movq %%mm1, (%1, %2) \n\t"
02441 "movq %%mm2, (%1, %2, 2) \n\t"
02442 "movq %%mm3, (%1, %%"REG_a") \n\t"
02443 "movq %%mm0, (%0) \n\t"
02444 "movq %%mm1, (%0, %2) \n\t"
02445 "movq %%mm2, (%0, %2, 2) \n\t"
02446 "movq %%mm3, (%0, %%"REG_a") \n\t"
02447
02448 "movq (%0, %2, 4), %%mm0 \n\t"
02449 "movq (%0, %%"REG_d"), %%mm1 \n\t"
02450 "movq (%0, %%"REG_a", 2), %%mm2 \n\t"
02451 "movq (%0, %%"REG_c"), %%mm3 \n\t"
02452 "movq (%1, %2, 4), %%mm4 \n\t"
02453 "movq (%1, %%"REG_d"), %%mm5 \n\t"
02454 "movq (%1, %%"REG_a", 2), %%mm6 \n\t"
02455 "movq (%1, %%"REG_c"), %%mm7 \n\t"
02456 PAVGB(%%mm4, %%mm0)
02457 PAVGB(%%mm5, %%mm1)
02458 PAVGB(%%mm6, %%mm2)
02459 PAVGB(%%mm7, %%mm3)
02460 PAVGB(%%mm4, %%mm0)
02461 PAVGB(%%mm5, %%mm1)
02462 PAVGB(%%mm6, %%mm2)
02463 PAVGB(%%mm7, %%mm3)
02464 PAVGB(%%mm4, %%mm0)
02465 PAVGB(%%mm5, %%mm1)
02466 PAVGB(%%mm6, %%mm2)
02467 PAVGB(%%mm7, %%mm3)
02468 "movq %%mm0, (%1, %2, 4) \n\t"
02469 "movq %%mm1, (%1, %%"REG_d") \n\t"
02470 "movq %%mm2, (%1, %%"REG_a", 2) \n\t"
02471 "movq %%mm3, (%1, %%"REG_c") \n\t"
02472 "movq %%mm0, (%0, %2, 4) \n\t"
02473 "movq %%mm1, (%0, %%"REG_d") \n\t"
02474 "movq %%mm2, (%0, %%"REG_a", 2) \n\t"
02475 "movq %%mm3, (%0, %%"REG_c") \n\t"
02476
02477 "4: \n\t"
02478
02479 :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
02480 : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
02481 );
02482 #else //HAVE_MMX2 || HAVE_AMD3DNOW
02483 {
02484 int y;
02485 int d=0;
02486
02487 int i;
02488
02489 for(y=0; y<8; y++){
02490 int x;
02491 for(x=0; x<8; x++){
02492 int ref= tempBlurred[ x + y*stride ];
02493 int cur= src[ x + y*stride ];
02494 int d1=ref - cur;
02495
02496
02497
02498 d+= d1*d1;
02499
02500 }
02501 }
02502 i=d;
02503 d= (
02504 4*d
02505 +(*(tempBlurredPast-256))
02506 +(*(tempBlurredPast-1))+ (*(tempBlurredPast+1))
02507 +(*(tempBlurredPast+256))
02508 +4)>>3;
02509 *tempBlurredPast=i;
02510
02511
02512
02513
02514
02515
02516
02517
02518
02519 if(d > maxNoise[1]){
02520 if(d < maxNoise[2]){
02521 for(y=0; y<8; y++){
02522 int x;
02523 for(x=0; x<8; x++){
02524 int ref= tempBlurred[ x + y*stride ];
02525 int cur= src[ x + y*stride ];
02526 tempBlurred[ x + y*stride ]=
02527 src[ x + y*stride ]=
02528 (ref + cur + 1)>>1;
02529 }
02530 }
02531 }else{
02532 for(y=0; y<8; y++){
02533 int x;
02534 for(x=0; x<8; x++){
02535 tempBlurred[ x + y*stride ]= src[ x + y*stride ];
02536 }
02537 }
02538 }
02539 }else{
02540 if(d < maxNoise[0]){
02541 for(y=0; y<8; y++){
02542 int x;
02543 for(x=0; x<8; x++){
02544 int ref= tempBlurred[ x + y*stride ];
02545 int cur= src[ x + y*stride ];
02546 tempBlurred[ x + y*stride ]=
02547 src[ x + y*stride ]=
02548 (ref*7 + cur + 4)>>3;
02549 }
02550 }
02551 }else{
02552 for(y=0; y<8; y++){
02553 int x;
02554 for(x=0; x<8; x++){
02555 int ref= tempBlurred[ x + y*stride ];
02556 int cur= src[ x + y*stride ];
02557 tempBlurred[ x + y*stride ]=
02558 src[ x + y*stride ]=
02559 (ref*3 + cur + 2)>>2;
02560 }
02561 }
02562 }
02563 }
02564 }
02565 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
02566 }
02567 #endif //HAVE_ALTIVEC
02568
02569 #if HAVE_MMX
02570
02573 static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){
02574 int64_t dc_mask, eq_mask, both_masks;
02575 int64_t sums[10*8*2];
02576 src+= step*3;
02577
02578 __asm__ volatile(
02579 "movq %0, %%mm7 \n\t"
02580 "movq %1, %%mm6 \n\t"
02581 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
02582 );
02583
02584 __asm__ volatile(
02585 "lea (%2, %3), %%"REG_a" \n\t"
02586
02587
02588
02589 "movq (%2), %%mm0 \n\t"
02590 "movq (%%"REG_a"), %%mm1 \n\t"
02591 "movq %%mm1, %%mm3 \n\t"
02592 "movq %%mm1, %%mm4 \n\t"
02593 "psubb %%mm1, %%mm0 \n\t"
02594 "paddb %%mm7, %%mm0 \n\t"
02595 "pcmpgtb %%mm6, %%mm0 \n\t"
02596
02597 "movq (%%"REG_a",%3), %%mm2 \n\t"
02598 PMAXUB(%%mm2, %%mm4)
02599 PMINUB(%%mm2, %%mm3, %%mm5)
02600 "psubb %%mm2, %%mm1 \n\t"
02601 "paddb %%mm7, %%mm1 \n\t"
02602 "pcmpgtb %%mm6, %%mm1 \n\t"
02603 "paddb %%mm1, %%mm0 \n\t"
02604
02605 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
02606 PMAXUB(%%mm1, %%mm4)
02607 PMINUB(%%mm1, %%mm3, %%mm5)
02608 "psubb %%mm1, %%mm2 \n\t"
02609 "paddb %%mm7, %%mm2 \n\t"
02610 "pcmpgtb %%mm6, %%mm2 \n\t"
02611 "paddb %%mm2, %%mm0 \n\t"
02612
02613 "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t"
02614
02615 "movq (%2, %3, 4), %%mm2 \n\t"
02616 PMAXUB(%%mm2, %%mm4)
02617 PMINUB(%%mm2, %%mm3, %%mm5)
02618 "psubb %%mm2, %%mm1 \n\t"
02619 "paddb %%mm7, %%mm1 \n\t"
02620 "pcmpgtb %%mm6, %%mm1 \n\t"
02621 "paddb %%mm1, %%mm0 \n\t"
02622
02623 "movq (%%"REG_a"), %%mm1 \n\t"
02624 PMAXUB(%%mm1, %%mm4)
02625 PMINUB(%%mm1, %%mm3, %%mm5)
02626 "psubb %%mm1, %%mm2 \n\t"
02627 "paddb %%mm7, %%mm2 \n\t"
02628 "pcmpgtb %%mm6, %%mm2 \n\t"
02629 "paddb %%mm2, %%mm0 \n\t"
02630
02631 "movq (%%"REG_a", %3), %%mm2 \n\t"
02632 PMAXUB(%%mm2, %%mm4)
02633 PMINUB(%%mm2, %%mm3, %%mm5)
02634 "psubb %%mm2, %%mm1 \n\t"
02635 "paddb %%mm7, %%mm1 \n\t"
02636 "pcmpgtb %%mm6, %%mm1 \n\t"
02637 "paddb %%mm1, %%mm0 \n\t"
02638
02639 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
02640 PMAXUB(%%mm1, %%mm4)
02641 PMINUB(%%mm1, %%mm3, %%mm5)
02642 "psubb %%mm1, %%mm2 \n\t"
02643 "paddb %%mm7, %%mm2 \n\t"
02644 "pcmpgtb %%mm6, %%mm2 \n\t"
02645 "paddb %%mm2, %%mm0 \n\t"
02646
02647 "movq (%2, %3, 8), %%mm2 \n\t"
02648 PMAXUB(%%mm2, %%mm4)
02649 PMINUB(%%mm2, %%mm3, %%mm5)
02650 "psubb %%mm2, %%mm1 \n\t"
02651 "paddb %%mm7, %%mm1 \n\t"
02652 "pcmpgtb %%mm6, %%mm1 \n\t"
02653 "paddb %%mm1, %%mm0 \n\t"
02654
02655 "movq (%%"REG_a", %3, 4), %%mm1 \n\t"
02656 "psubb %%mm1, %%mm2 \n\t"
02657 "paddb %%mm7, %%mm2 \n\t"
02658 "pcmpgtb %%mm6, %%mm2 \n\t"
02659 "paddb %%mm2, %%mm0 \n\t"
02660 "psubusb %%mm3, %%mm4 \n\t"
02661
02662 "pxor %%mm6, %%mm6 \n\t"
02663 "movq %4, %%mm7 \n\t"
02664 "paddusb %%mm7, %%mm7 \n\t"
02665 "psubusb %%mm4, %%mm7 \n\t"
02666 "pcmpeqb %%mm6, %%mm7 \n\t"
02667 "pcmpeqb %%mm6, %%mm7 \n\t"
02668 "movq %%mm7, %1 \n\t"
02669
02670 "movq %5, %%mm7 \n\t"
02671 "punpcklbw %%mm7, %%mm7 \n\t"
02672 "punpcklbw %%mm7, %%mm7 \n\t"
02673 "punpcklbw %%mm7, %%mm7 \n\t"
02674 "psubb %%mm0, %%mm6 \n\t"
02675 "pcmpgtb %%mm7, %%mm6 \n\t"
02676 "movq %%mm6, %0 \n\t"
02677
02678 : "=m" (eq_mask), "=m" (dc_mask)
02679 : "r" (src), "r" ((x86_reg)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold)
02680 : "%"REG_a
02681 );
02682
02683 both_masks = dc_mask & eq_mask;
02684
02685 if(both_masks){
02686 x86_reg offset= -8*step;
02687 int64_t *temp_sums= sums;
02688
02689 __asm__ volatile(
02690 "movq %2, %%mm0 \n\t"
02691 "pxor %%mm4, %%mm4 \n\t"
02692
02693 "movq (%0), %%mm6 \n\t"
02694 "movq (%0, %1), %%mm5 \n\t"
02695 "movq %%mm5, %%mm1 \n\t"
02696 "movq %%mm6, %%mm2 \n\t"
02697 "psubusb %%mm6, %%mm5 \n\t"
02698 "psubusb %%mm1, %%mm2 \n\t"
02699 "por %%mm5, %%mm2 \n\t"
02700 "psubusb %%mm2, %%mm0 \n\t"
02701 "pcmpeqb %%mm4, %%mm0 \n\t"
02702
02703 "pxor %%mm6, %%mm1 \n\t"
02704 "pand %%mm0, %%mm1 \n\t"
02705 "pxor %%mm1, %%mm6 \n\t"
02706
02707
02708 "movq (%0, %1, 8), %%mm5 \n\t"
02709 "add %1, %0 \n\t"
02710 "movq (%0, %1, 8), %%mm7 \n\t"
02711 "movq %%mm5, %%mm1 \n\t"
02712 "movq %%mm7, %%mm2 \n\t"
02713 "psubusb %%mm7, %%mm5 \n\t"
02714 "psubusb %%mm1, %%mm2 \n\t"
02715 "por %%mm5, %%mm2 \n\t"
02716 "movq %2, %%mm0 \n\t"
02717 "psubusb %%mm2, %%mm0 \n\t"
02718 "pcmpeqb %%mm4, %%mm0 \n\t"
02719
02720 "pxor %%mm7, %%mm1 \n\t"
02721 "pand %%mm0, %%mm1 \n\t"
02722 "pxor %%mm1, %%mm7 \n\t"
02723
02724 "movq %%mm6, %%mm5 \n\t"
02725 "punpckhbw %%mm4, %%mm6 \n\t"
02726 "punpcklbw %%mm4, %%mm5 \n\t"
02727
02728
02729 "movq %%mm5, %%mm0 \n\t"
02730 "movq %%mm6, %%mm1 \n\t"
02731 "psllw $2, %%mm0 \n\t"
02732 "psllw $2, %%mm1 \n\t"
02733 "paddw "MANGLE(w04)", %%mm0 \n\t"
02734 "paddw "MANGLE(w04)", %%mm1 \n\t"
02735
02736 #define NEXT\
02737 "movq (%0), %%mm2 \n\t"\
02738 "movq (%0), %%mm3 \n\t"\
02739 "add %1, %0 \n\t"\
02740 "punpcklbw %%mm4, %%mm2 \n\t"\
02741 "punpckhbw %%mm4, %%mm3 \n\t"\
02742 "paddw %%mm2, %%mm0 \n\t"\
02743 "paddw %%mm3, %%mm1 \n\t"
02744
02745 #define PREV\
02746 "movq (%0), %%mm2 \n\t"\
02747 "movq (%0), %%mm3 \n\t"\
02748 "add %1, %0 \n\t"\
02749 "punpcklbw %%mm4, %%mm2 \n\t"\
02750 "punpckhbw %%mm4, %%mm3 \n\t"\
02751 "psubw %%mm2, %%mm0 \n\t"\
02752 "psubw %%mm3, %%mm1 \n\t"
02753
02754
02755 NEXT
02756 NEXT
02757 NEXT
02758 "movq %%mm0, (%3) \n\t"
02759 "movq %%mm1, 8(%3) \n\t"
02760
02761 NEXT
02762 "psubw %%mm5, %%mm0 \n\t"
02763 "psubw %%mm6, %%mm1 \n\t"
02764 "movq %%mm0, 16(%3) \n\t"
02765 "movq %%mm1, 24(%3) \n\t"
02766
02767 NEXT
02768 "psubw %%mm5, %%mm0 \n\t"
02769 "psubw %%mm6, %%mm1 \n\t"
02770 "movq %%mm0, 32(%3) \n\t"
02771 "movq %%mm1, 40(%3) \n\t"
02772
02773 NEXT
02774 "psubw %%mm5, %%mm0 \n\t"
02775 "psubw %%mm6, %%mm1 \n\t"
02776 "movq %%mm0, 48(%3) \n\t"
02777 "movq %%mm1, 56(%3) \n\t"
02778
02779 NEXT
02780 "psubw %%mm5, %%mm0 \n\t"
02781 "psubw %%mm6, %%mm1 \n\t"
02782 "movq %%mm0, 64(%3) \n\t"
02783 "movq %%mm1, 72(%3) \n\t"
02784
02785 "movq %%mm7, %%mm6 \n\t"
02786 "punpckhbw %%mm4, %%mm7 \n\t"
02787 "punpcklbw %%mm4, %%mm6 \n\t"
02788
02789 NEXT
02790 "mov %4, %0 \n\t"
02791 "add %1, %0 \n\t"
02792 PREV
02793 "movq %%mm0, 80(%3) \n\t"
02794 "movq %%mm1, 88(%3) \n\t"
02795
02796 PREV
02797 "paddw %%mm6, %%mm0 \n\t"
02798 "paddw %%mm7, %%mm1 \n\t"
02799 "movq %%mm0, 96(%3) \n\t"
02800 "movq %%mm1, 104(%3) \n\t"
02801
02802 PREV
02803 "paddw %%mm6, %%mm0 \n\t"
02804 "paddw %%mm7, %%mm1 \n\t"
02805 "movq %%mm0, 112(%3) \n\t"
02806 "movq %%mm1, 120(%3) \n\t"
02807
02808 PREV
02809 "paddw %%mm6, %%mm0 \n\t"
02810 "paddw %%mm7, %%mm1 \n\t"
02811 "movq %%mm0, 128(%3) \n\t"
02812 "movq %%mm1, 136(%3) \n\t"
02813
02814 PREV
02815 "paddw %%mm6, %%mm0 \n\t"
02816 "paddw %%mm7, %%mm1 \n\t"
02817 "movq %%mm0, 144(%3) \n\t"
02818 "movq %%mm1, 152(%3) \n\t"
02819
02820 "mov %4, %0 \n\t"
02821
02822 : "+&r"(src)
02823 : "r" ((x86_reg)step), "m" (c->pQPb), "r"(sums), "g"(src)
02824 );
02825
02826 src+= step;
02827
02828 __asm__ volatile(
02829 "movq %4, %%mm6 \n\t"
02830 "pcmpeqb %%mm5, %%mm5 \n\t"
02831 "pxor %%mm6, %%mm5 \n\t"
02832 "pxor %%mm7, %%mm7 \n\t"
02833
02834 "1: \n\t"
02835 "movq (%1), %%mm0 \n\t"
02836 "movq 8(%1), %%mm1 \n\t"
02837 "paddw 32(%1), %%mm0 \n\t"
02838 "paddw 40(%1), %%mm1 \n\t"
02839 "movq (%0, %3), %%mm2 \n\t"
02840 "movq %%mm2, %%mm3 \n\t"
02841 "movq %%mm2, %%mm4 \n\t"
02842 "punpcklbw %%mm7, %%mm2 \n\t"
02843 "punpckhbw %%mm7, %%mm3 \n\t"
02844 "paddw %%mm2, %%mm0 \n\t"
02845 "paddw %%mm3, %%mm1 \n\t"
02846 "paddw %%mm2, %%mm0 \n\t"
02847 "paddw %%mm3, %%mm1 \n\t"
02848 "psrlw $4, %%mm0 \n\t"
02849 "psrlw $4, %%mm1 \n\t"
02850 "packuswb %%mm1, %%mm0 \n\t"
02851 "pand %%mm6, %%mm0 \n\t"
02852 "pand %%mm5, %%mm4 \n\t"
02853 "por %%mm4, %%mm0 \n\t"
02854 "movq %%mm0, (%0, %3) \n\t"
02855 "add $16, %1 \n\t"
02856 "add %2, %0 \n\t"
02857 " js 1b \n\t"
02858
02859 : "+r"(offset), "+r"(temp_sums)
02860 : "r" ((x86_reg)step), "r"(src - offset), "m"(both_masks)
02861 );
02862 }else
02863 src+= step;
02864
02865 if(eq_mask != -1LL){
02866 uint8_t *temp_src= src;
02867 __asm__ volatile(
02868 "pxor %%mm7, %%mm7 \n\t"
02869 "lea -40(%%"REG_SP"), %%"REG_c" \n\t"
02870 "and "ALIGN_MASK", %%"REG_c" \n\t"
02871
02872
02873
02874 "movq (%0), %%mm0 \n\t"
02875 "movq %%mm0, %%mm1 \n\t"
02876 "punpcklbw %%mm7, %%mm0 \n\t"
02877 "punpckhbw %%mm7, %%mm1 \n\t"
02878
02879 "movq (%0, %1), %%mm2 \n\t"
02880 "lea (%0, %1, 2), %%"REG_a" \n\t"
02881 "movq %%mm2, %%mm3 \n\t"
02882 "punpcklbw %%mm7, %%mm2 \n\t"
02883 "punpckhbw %%mm7, %%mm3 \n\t"
02884
02885 "movq (%%"REG_a"), %%mm4 \n\t"
02886 "movq %%mm4, %%mm5 \n\t"
02887 "punpcklbw %%mm7, %%mm4 \n\t"
02888 "punpckhbw %%mm7, %%mm5 \n\t"
02889
02890 "paddw %%mm0, %%mm0 \n\t"
02891 "paddw %%mm1, %%mm1 \n\t"
02892 "psubw %%mm4, %%mm2 \n\t"
02893 "psubw %%mm5, %%mm3 \n\t"
02894 "psubw %%mm2, %%mm0 \n\t"
02895 "psubw %%mm3, %%mm1 \n\t"
02896
02897 "psllw $2, %%mm2 \n\t"
02898 "psllw $2, %%mm3 \n\t"
02899 "psubw %%mm2, %%mm0 \n\t"
02900 "psubw %%mm3, %%mm1 \n\t"
02901
02902 "movq (%%"REG_a", %1), %%mm2 \n\t"
02903 "movq %%mm2, %%mm3 \n\t"
02904 "punpcklbw %%mm7, %%mm2 \n\t"
02905 "punpckhbw %%mm7, %%mm3 \n\t"
02906
02907 "psubw %%mm2, %%mm0 \n\t"
02908 "psubw %%mm3, %%mm1 \n\t"
02909 "psubw %%mm2, %%mm0 \n\t"
02910 "psubw %%mm3, %%mm1 \n\t"
02911 "movq %%mm0, (%%"REG_c") \n\t"
02912 "movq %%mm1, 8(%%"REG_c") \n\t"
02913
02914 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
02915 "movq %%mm0, %%mm1 \n\t"
02916 "punpcklbw %%mm7, %%mm0 \n\t"
02917 "punpckhbw %%mm7, %%mm1 \n\t"
02918
02919 "psubw %%mm0, %%mm2 \n\t"
02920 "psubw %%mm1, %%mm3 \n\t"
02921 "movq %%mm2, 16(%%"REG_c") \n\t"
02922 "movq %%mm3, 24(%%"REG_c") \n\t"
02923 "paddw %%mm4, %%mm4 \n\t"
02924 "paddw %%mm5, %%mm5 \n\t"
02925 "psubw %%mm2, %%mm4 \n\t"
02926 "psubw %%mm3, %%mm5 \n\t"
02927
02928 "lea (%%"REG_a", %1), %0 \n\t"
02929 "psllw $2, %%mm2 \n\t"
02930 "psllw $2, %%mm3 \n\t"
02931 "psubw %%mm2, %%mm4 \n\t"
02932 "psubw %%mm3, %%mm5 \n\t"
02933
02934 "movq (%0, %1, 2), %%mm2 \n\t"
02935 "movq %%mm2, %%mm3 \n\t"
02936 "punpcklbw %%mm7, %%mm2 \n\t"
02937 "punpckhbw %%mm7, %%mm3 \n\t"
02938 "psubw %%mm2, %%mm4 \n\t"
02939 "psubw %%mm3, %%mm5 \n\t"
02940 "psubw %%mm2, %%mm4 \n\t"
02941 "psubw %%mm3, %%mm5 \n\t"
02942
02943 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
02944 "punpcklbw %%mm7, %%mm6 \n\t"
02945 "psubw %%mm6, %%mm2 \n\t"
02946 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
02947 "punpckhbw %%mm7, %%mm6 \n\t"
02948 "psubw %%mm6, %%mm3 \n\t"
02949
02950 "paddw %%mm0, %%mm0 \n\t"
02951 "paddw %%mm1, %%mm1 \n\t"
02952 "psubw %%mm2, %%mm0 \n\t"
02953 "psubw %%mm3, %%mm1 \n\t"
02954
02955 "psllw $2, %%mm2 \n\t"
02956 "psllw $2, %%mm3 \n\t"
02957 "psubw %%mm2, %%mm0 \n\t"
02958 "psubw %%mm3, %%mm1 \n\t"
02959
02960 "movq (%0, %1, 4), %%mm2 \n\t"
02961 "movq %%mm2, %%mm3 \n\t"
02962 "punpcklbw %%mm7, %%mm2 \n\t"
02963 "punpckhbw %%mm7, %%mm3 \n\t"
02964
02965 "paddw %%mm2, %%mm2 \n\t"
02966 "paddw %%mm3, %%mm3 \n\t"
02967 "psubw %%mm2, %%mm0 \n\t"
02968 "psubw %%mm3, %%mm1 \n\t"
02969
02970 "movq (%%"REG_c"), %%mm2 \n\t"
02971 "movq 8(%%"REG_c"), %%mm3 \n\t"
02972
02973 #if HAVE_MMX2
02974 "movq %%mm7, %%mm6 \n\t"
02975 "psubw %%mm0, %%mm6 \n\t"
02976 "pmaxsw %%mm6, %%mm0 \n\t"
02977 "movq %%mm7, %%mm6 \n\t"
02978 "psubw %%mm1, %%mm6 \n\t"
02979 "pmaxsw %%mm6, %%mm1 \n\t"
02980 "movq %%mm7, %%mm6 \n\t"
02981 "psubw %%mm2, %%mm6 \n\t"
02982 "pmaxsw %%mm6, %%mm2 \n\t"
02983 "movq %%mm7, %%mm6 \n\t"
02984 "psubw %%mm3, %%mm6 \n\t"
02985 "pmaxsw %%mm6, %%mm3 \n\t"
02986 #else
02987 "movq %%mm7, %%mm6 \n\t"
02988 "pcmpgtw %%mm0, %%mm6 \n\t"
02989 "pxor %%mm6, %%mm0 \n\t"
02990 "psubw %%mm6, %%mm0 \n\t"
02991 "movq %%mm7, %%mm6 \n\t"
02992 "pcmpgtw %%mm1, %%mm6 \n\t"
02993 "pxor %%mm6, %%mm1 \n\t"
02994 "psubw %%mm6, %%mm1 \n\t"
02995 "movq %%mm7, %%mm6 \n\t"
02996 "pcmpgtw %%mm2, %%mm6 \n\t"
02997 "pxor %%mm6, %%mm2 \n\t"
02998 "psubw %%mm6, %%mm2 \n\t"
02999 "movq %%mm7, %%mm6 \n\t"
03000 "pcmpgtw %%mm3, %%mm6 \n\t"
03001 "pxor %%mm6, %%mm3 \n\t"
03002 "psubw %%mm6, %%mm3 \n\t"
03003 #endif
03004
03005 #if HAVE_MMX2
03006 "pminsw %%mm2, %%mm0 \n\t"
03007 "pminsw %%mm3, %%mm1 \n\t"
03008 #else
03009 "movq %%mm0, %%mm6 \n\t"
03010 "psubusw %%mm2, %%mm6 \n\t"
03011 "psubw %%mm6, %%mm0 \n\t"
03012 "movq %%mm1, %%mm6 \n\t"
03013 "psubusw %%mm3, %%mm6 \n\t"
03014 "psubw %%mm6, %%mm1 \n\t"
03015 #endif
03016
03017 "movd %2, %%mm2 \n\t"
03018 "punpcklbw %%mm7, %%mm2 \n\t"
03019
03020 "movq %%mm7, %%mm6 \n\t"
03021 "pcmpgtw %%mm4, %%mm6 \n\t"
03022 "pxor %%mm6, %%mm4 \n\t"
03023 "psubw %%mm6, %%mm4 \n\t"
03024 "pcmpgtw %%mm5, %%mm7 \n\t"
03025 "pxor %%mm7, %%mm5 \n\t"
03026 "psubw %%mm7, %%mm5 \n\t"
03027
03028 "psllw $3, %%mm2 \n\t"
03029 "movq %%mm2, %%mm3 \n\t"
03030 "pcmpgtw %%mm4, %%mm2 \n\t"
03031 "pcmpgtw %%mm5, %%mm3 \n\t"
03032 "pand %%mm2, %%mm4 \n\t"
03033 "pand %%mm3, %%mm5 \n\t"
03034
03035
03036 "psubusw %%mm0, %%mm4 \n\t"
03037 "psubusw %%mm1, %%mm5 \n\t"
03038
03039
03040 "movq "MANGLE(w05)", %%mm2 \n\t"
03041 "pmullw %%mm2, %%mm4 \n\t"
03042 "pmullw %%mm2, %%mm5 \n\t"
03043 "movq "MANGLE(w20)", %%mm2 \n\t"
03044 "paddw %%mm2, %%mm4 \n\t"
03045 "paddw %%mm2, %%mm5 \n\t"
03046 "psrlw $6, %%mm4 \n\t"
03047 "psrlw $6, %%mm5 \n\t"
03048
03049 "movq 16(%%"REG_c"), %%mm0 \n\t"
03050 "movq 24(%%"REG_c"), %%mm1 \n\t"
03051
03052 "pxor %%mm2, %%mm2 \n\t"
03053 "pxor %%mm3, %%mm3 \n\t"
03054
03055 "pcmpgtw %%mm0, %%mm2 \n\t"
03056 "pcmpgtw %%mm1, %%mm3 \n\t"
03057 "pxor %%mm2, %%mm0 \n\t"
03058 "pxor %%mm3, %%mm1 \n\t"
03059 "psubw %%mm2, %%mm0 \n\t"
03060 "psubw %%mm3, %%mm1 \n\t"
03061 "psrlw $1, %%mm0 \n\t"
03062 "psrlw $1, %%mm1 \n\t"
03063
03064 "pxor %%mm6, %%mm2 \n\t"
03065 "pxor %%mm7, %%mm3 \n\t"
03066 "pand %%mm2, %%mm4 \n\t"
03067 "pand %%mm3, %%mm5 \n\t"
03068
03069 #if HAVE_MMX2
03070 "pminsw %%mm0, %%mm4 \n\t"
03071 "pminsw %%mm1, %%mm5 \n\t"
03072 #else
03073 "movq %%mm4, %%mm2 \n\t"
03074 "psubusw %%mm0, %%mm2 \n\t"
03075 "psubw %%mm2, %%mm4 \n\t"
03076 "movq %%mm5, %%mm2 \n\t"
03077 "psubusw %%mm1, %%mm2 \n\t"
03078 "psubw %%mm2, %%mm5 \n\t"
03079 #endif
03080 "pxor %%mm6, %%mm4 \n\t"
03081 "pxor %%mm7, %%mm5 \n\t"
03082 "psubw %%mm6, %%mm4 \n\t"
03083 "psubw %%mm7, %%mm5 \n\t"
03084 "packsswb %%mm5, %%mm4 \n\t"
03085 "movq %3, %%mm1 \n\t"
03086 "pandn %%mm4, %%mm1 \n\t"
03087 "movq (%0), %%mm0 \n\t"
03088 "paddb %%mm1, %%mm0 \n\t"
03089 "movq %%mm0, (%0) \n\t"
03090 "movq (%0, %1), %%mm0 \n\t"
03091 "psubb %%mm1, %%mm0 \n\t"
03092 "movq %%mm0, (%0, %1) \n\t"
03093
03094 : "+r" (temp_src)
03095 : "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask)
03096 : "%"REG_a, "%"REG_c
03097 );
03098 }
03099
03100
03101
03102
03103
03104 }
03105 #endif //HAVE_MMX
03106
03107 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
03108 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
03109
03114 #undef SCALED_CPY
03115
03116 static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride,
03117 int levelFix, int64_t *packedOffsetAndScale)
03118 {
03119 #if !HAVE_MMX
03120 int i;
03121 #endif
03122 if(levelFix){
03123 #if HAVE_MMX
03124 __asm__ volatile(
03125 "movq (%%"REG_a"), %%mm2 \n\t"
03126 "movq 8(%%"REG_a"), %%mm3 \n\t"
03127 "lea (%2,%4), %%"REG_a" \n\t"
03128 "lea (%3,%5), %%"REG_d" \n\t"
03129 "pxor %%mm4, %%mm4 \n\t"
03130 #if HAVE_MMX2
03131 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
03132 "movq " #src1 ", %%mm0 \n\t"\
03133 "movq " #src1 ", %%mm5 \n\t"\
03134 "movq " #src2 ", %%mm1 \n\t"\
03135 "movq " #src2 ", %%mm6 \n\t"\
03136 "punpcklbw %%mm0, %%mm0 \n\t"\
03137 "punpckhbw %%mm5, %%mm5 \n\t"\
03138 "punpcklbw %%mm1, %%mm1 \n\t"\
03139 "punpckhbw %%mm6, %%mm6 \n\t"\
03140 "pmulhuw %%mm3, %%mm0 \n\t"\
03141 "pmulhuw %%mm3, %%mm5 \n\t"\
03142 "pmulhuw %%mm3, %%mm1 \n\t"\
03143 "pmulhuw %%mm3, %%mm6 \n\t"\
03144 "psubw %%mm2, %%mm0 \n\t"\
03145 "psubw %%mm2, %%mm5 \n\t"\
03146 "psubw %%mm2, %%mm1 \n\t"\
03147 "psubw %%mm2, %%mm6 \n\t"\
03148 "packuswb %%mm5, %%mm0 \n\t"\
03149 "packuswb %%mm6, %%mm1 \n\t"\
03150 "movq %%mm0, " #dst1 " \n\t"\
03151 "movq %%mm1, " #dst2 " \n\t"\
03152
03153 #else //HAVE_MMX2
03154 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
03155 "movq " #src1 ", %%mm0 \n\t"\
03156 "movq " #src1 ", %%mm5 \n\t"\
03157 "punpcklbw %%mm4, %%mm0 \n\t"\
03158 "punpckhbw %%mm4, %%mm5 \n\t"\
03159 "psubw %%mm2, %%mm0 \n\t"\
03160 "psubw %%mm2, %%mm5 \n\t"\
03161 "movq " #src2 ", %%mm1 \n\t"\
03162 "psllw $6, %%mm0 \n\t"\
03163 "psllw $6, %%mm5 \n\t"\
03164 "pmulhw %%mm3, %%mm0 \n\t"\
03165 "movq " #src2 ", %%mm6 \n\t"\
03166 "pmulhw %%mm3, %%mm5 \n\t"\
03167 "punpcklbw %%mm4, %%mm1 \n\t"\
03168 "punpckhbw %%mm4, %%mm6 \n\t"\
03169 "psubw %%mm2, %%mm1 \n\t"\
03170 "psubw %%mm2, %%mm6 \n\t"\
03171 "psllw $6, %%mm1 \n\t"\
03172 "psllw $6, %%mm6 \n\t"\
03173 "pmulhw %%mm3, %%mm1 \n\t"\
03174 "pmulhw %%mm3, %%mm6 \n\t"\
03175 "packuswb %%mm5, %%mm0 \n\t"\
03176 "packuswb %%mm6, %%mm1 \n\t"\
03177 "movq %%mm0, " #dst1 " \n\t"\
03178 "movq %%mm1, " #dst2 " \n\t"\
03179
03180 #endif //HAVE_MMX2
03181 #define SCALED_CPY(src1, src2, dst1, dst2)\
03182 REAL_SCALED_CPY(src1, src2, dst1, dst2)
03183
03184 SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5))
03185 SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
03186 SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
03187 "lea (%%"REG_a",%4,4), %%"REG_a" \n\t"
03188 "lea (%%"REG_d",%5,4), %%"REG_d" \n\t"
03189 SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
03190
03191
03192 : "=&a" (packedOffsetAndScale)
03193 : "0" (packedOffsetAndScale),
03194 "r"(src),
03195 "r"(dst),
03196 "r" ((x86_reg)srcStride),
03197 "r" ((x86_reg)dstStride)
03198 : "%"REG_d
03199 );
03200 #else //HAVE_MMX
03201 for(i=0; i<8; i++)
03202 memcpy( &(dst[dstStride*i]),
03203 &(src[srcStride*i]), BLOCK_SIZE);
03204 #endif //HAVE_MMX
03205 }else{
03206 #if HAVE_MMX
03207 __asm__ volatile(
03208 "lea (%0,%2), %%"REG_a" \n\t"
03209 "lea (%1,%3), %%"REG_d" \n\t"
03210
03211 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \
03212 "movq " #src1 ", %%mm0 \n\t"\
03213 "movq " #src2 ", %%mm1 \n\t"\
03214 "movq %%mm0, " #dst1 " \n\t"\
03215 "movq %%mm1, " #dst2 " \n\t"\
03216
03217 #define SIMPLE_CPY(src1, src2, dst1, dst2)\
03218 REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
03219
03220 SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3))
03221 SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
03222 SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
03223 "lea (%%"REG_a",%2,4), %%"REG_a" \n\t"
03224 "lea (%%"REG_d",%3,4), %%"REG_d" \n\t"
03225 SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
03226
03227 : : "r" (src),
03228 "r" (dst),
03229 "r" ((x86_reg)srcStride),
03230 "r" ((x86_reg)dstStride)
03231 : "%"REG_a, "%"REG_d
03232 );
03233 #else //HAVE_MMX
03234 for(i=0; i<8; i++)
03235 memcpy( &(dst[dstStride*i]),
03236 &(src[srcStride*i]), BLOCK_SIZE);
03237 #endif //HAVE_MMX
03238 }
03239 }
03240
03244 static inline void RENAME(duplicate)(uint8_t src[], int stride)
03245 {
03246 #if HAVE_MMX
03247 __asm__ volatile(
03248 "movq (%0), %%mm0 \n\t"
03249 "add %1, %0 \n\t"
03250 "movq %%mm0, (%0) \n\t"
03251 "movq %%mm0, (%0, %1) \n\t"
03252 "movq %%mm0, (%0, %1, 2) \n\t"
03253 : "+r" (src)
03254 : "r" ((x86_reg)-stride)
03255 );
03256 #else
03257 int i;
03258 uint8_t *p=src;
03259 for(i=0; i<3; i++){
03260 p-= stride;
03261 memcpy(p, src, 8);
03262 }
03263 #endif
03264 }
03265
03269 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
03270 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2)
03271 {
03272 DECLARE_ALIGNED(8, PPContext, c)= *c2;
03273 int x,y;
03274 #ifdef COMPILE_TIME_MODE
03275 const int mode= COMPILE_TIME_MODE;
03276 #else
03277 const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
03278 #endif
03279 int black=0, white=255;
03280 int QPCorrecture= 256*256;
03281
03282 int copyAhead;
03283 #if HAVE_MMX
03284 int i;
03285 #endif
03286
03287 const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
03288 const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
03289
03290
03291 uint64_t * const yHistogram= c.yHistogram;
03292 uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
03293 uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
03294
03295
03296 #if HAVE_MMX
03297 for(i=0; i<57; i++){
03298 int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
03299 int threshold= offset*2 + 1;
03300 c.mmxDcOffset[i]= 0x7F - offset;
03301 c.mmxDcThreshold[i]= 0x7F - threshold;
03302 c.mmxDcOffset[i]*= 0x0101010101010101LL;
03303 c.mmxDcThreshold[i]*= 0x0101010101010101LL;
03304 }
03305 #endif
03306
03307 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
03308 else if( (mode & LINEAR_BLEND_DEINT_FILTER)
03309 || (mode & FFMPEG_DEINT_FILTER)
03310 || (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14;
03311 else if( (mode & V_DEBLOCK)
03312 || (mode & LINEAR_IPOL_DEINT_FILTER)
03313 || (mode & MEDIAN_DEINT_FILTER)
03314 || (mode & V_A_DEBLOCK)) copyAhead=13;
03315 else if(mode & V_X1_FILTER) copyAhead=11;
03316
03317 else if(mode & DERING) copyAhead=9;
03318 else copyAhead=8;
03319
03320 copyAhead-= 8;
03321
03322 if(!isColor){
03323 uint64_t sum= 0;
03324 int i;
03325 uint64_t maxClipped;
03326 uint64_t clipped;
03327 double scale;
03328
03329 c.frameNum++;
03330
03331 if(c.frameNum == 1) yHistogram[0]= width*height/64*15/256;
03332
03333 for(i=0; i<256; i++){
03334 sum+= yHistogram[i];
03335 }
03336
03337
03338 maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold);
03339
03340 clipped= sum;
03341 for(black=255; black>0; black--){
03342 if(clipped < maxClipped) break;
03343 clipped-= yHistogram[black];
03344 }
03345
03346 clipped= sum;
03347 for(white=0; white<256; white++){
03348 if(clipped < maxClipped) break;
03349 clipped-= yHistogram[white];
03350 }
03351
03352 scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
03353
03354 #if HAVE_MMX2
03355 c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
03356 c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
03357 #else
03358 c.packedYScale= (uint16_t)(scale*1024.0 + 0.5);
03359 c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
03360 #endif
03361
03362 c.packedYOffset|= c.packedYOffset<<32;
03363 c.packedYOffset|= c.packedYOffset<<16;
03364
03365 c.packedYScale|= c.packedYScale<<32;
03366 c.packedYScale|= c.packedYScale<<16;
03367
03368 if(mode & LEVEL_FIX) QPCorrecture= (int)(scale*256*256 + 0.5);
03369 else QPCorrecture= 256*256;
03370 }else{
03371 c.packedYScale= 0x0100010001000100LL;
03372 c.packedYOffset= 0;
03373 QPCorrecture= 256*256;
03374 }
03375
03376
03377 y=-BLOCK_SIZE;
03378 {
03379 const uint8_t *srcBlock= &(src[y*srcStride]);
03380 uint8_t *dstBlock= tempDst + dstStride;
03381
03382
03383
03384
03385 for(x=0; x<width; x+=BLOCK_SIZE){
03386
03387 #if HAVE_MMX2
03388
03389
03390
03391
03392
03393
03394
03395 __asm__(
03396 "mov %4, %%"REG_a" \n\t"
03397 "shr $2, %%"REG_a" \n\t"
03398 "and $6, %%"REG_a" \n\t"
03399 "add %5, %%"REG_a" \n\t"
03400 "mov %%"REG_a", %%"REG_d" \n\t"
03401 "imul %1, %%"REG_a" \n\t"
03402 "imul %3, %%"REG_d" \n\t"
03403 "prefetchnta 32(%%"REG_a", %0) \n\t"
03404 "prefetcht0 32(%%"REG_d", %2) \n\t"
03405 "add %1, %%"REG_a" \n\t"
03406 "add %3, %%"REG_d" \n\t"
03407 "prefetchnta 32(%%"REG_a", %0) \n\t"
03408 "prefetcht0 32(%%"REG_d", %2) \n\t"
03409 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
03410 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
03411 : "%"REG_a, "%"REG_d
03412 );
03413
03414 #elif HAVE_AMD3DNOW
03415
03416
03417
03418
03419
03420
03421 #endif
03422
03423 RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
03424 srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
03425
03426 RENAME(duplicate)(dstBlock + dstStride*8, dstStride);
03427
03428 if(mode & LINEAR_IPOL_DEINT_FILTER)
03429 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
03430 else if(mode & LINEAR_BLEND_DEINT_FILTER)
03431 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
03432 else if(mode & MEDIAN_DEINT_FILTER)
03433 RENAME(deInterlaceMedian)(dstBlock, dstStride);
03434 else if(mode & CUBIC_IPOL_DEINT_FILTER)
03435 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
03436 else if(mode & FFMPEG_DEINT_FILTER)
03437 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
03438 else if(mode & LOWPASS5_DEINT_FILTER)
03439 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
03440
03441
03442
03443 dstBlock+=8;
03444 srcBlock+=8;
03445 }
03446 if(width==FFABS(dstStride))
03447 linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride);
03448 else{
03449 int i;
03450 for(i=0; i<copyAhead; i++){
03451 memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
03452 }
03453 }
03454 }
03455
03456 for(y=0; y<height; y+=BLOCK_SIZE){
03457
03458 const uint8_t *srcBlock= &(src[y*srcStride]);
03459 uint8_t *dstBlock= &(dst[y*dstStride]);
03460 #if HAVE_MMX
03461 uint8_t *tempBlock1= c.tempBlocks;
03462 uint8_t *tempBlock2= c.tempBlocks + 8;
03463 #endif
03464 const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
03465 int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
03466 int QP=0;
03467
03468
03469 if(y+15 >= height){
03470 int i;
03471
03472
03473 linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead,
03474 FFMAX(height-y-copyAhead, 0), srcStride);
03475
03476
03477 for(i=FFMAX(height-y, 8); i<copyAhead+8; i++)
03478 memcpy(tempSrc + srcStride*i, src + srcStride*(height-1), FFABS(srcStride));
03479
03480
03481 linecpy(tempDst, dstBlock - dstStride, FFMIN(height-y+1, copyAhead+1), dstStride);
03482
03483
03484 for(i=height-y+1; i<=copyAhead; i++)
03485 memcpy(tempDst + dstStride*i, dst + dstStride*(height-1), FFABS(dstStride));
03486
03487 dstBlock= tempDst + dstStride;
03488 srcBlock= tempSrc;
03489 }
03490
03491
03492
03493
03494 for(x=0; x<width; x+=BLOCK_SIZE){
03495 const int stride= dstStride;
03496 #if HAVE_MMX
03497 uint8_t *tmpXchg;
03498 #endif
03499 if(isColor){
03500 QP= QPptr[x>>qpHShift];
03501 c.nonBQP= nonBQPptr[x>>qpHShift];
03502 }else{
03503 QP= QPptr[x>>4];
03504 QP= (QP* QPCorrecture + 256*128)>>16;
03505 c.nonBQP= nonBQPptr[x>>4];
03506 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
03507 yHistogram[ srcBlock[srcStride*12 + 4] ]++;
03508 }
03509 c.QP= QP;
03510 #if HAVE_MMX
03511 __asm__ volatile(
03512 "movd %1, %%mm7 \n\t"
03513 "packuswb %%mm7, %%mm7 \n\t"
03514 "packuswb %%mm7, %%mm7 \n\t"
03515 "packuswb %%mm7, %%mm7 \n\t"
03516 "movq %%mm7, %0 \n\t"
03517 : "=m" (c.pQPb)
03518 : "r" (QP)
03519 );
03520 #endif
03521
03522
03523 #if HAVE_MMX2
03524
03525
03526
03527
03528
03529
03530
03531 __asm__(
03532 "mov %4, %%"REG_a" \n\t"
03533 "shr $2, %%"REG_a" \n\t"
03534 "and $6, %%"REG_a" \n\t"
03535 "add %5, %%"REG_a" \n\t"
03536 "mov %%"REG_a", %%"REG_d" \n\t"
03537 "imul %1, %%"REG_a" \n\t"
03538 "imul %3, %%"REG_d" \n\t"
03539 "prefetchnta 32(%%"REG_a", %0) \n\t"
03540 "prefetcht0 32(%%"REG_d", %2) \n\t"
03541 "add %1, %%"REG_a" \n\t"
03542 "add %3, %%"REG_d" \n\t"
03543 "prefetchnta 32(%%"REG_a", %0) \n\t"
03544 "prefetcht0 32(%%"REG_d", %2) \n\t"
03545 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
03546 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
03547 : "%"REG_a, "%"REG_d
03548 );
03549
03550 #elif HAVE_AMD3DNOW
03551
03552
03553
03554
03555
03556
03557 #endif
03558
03559 RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
03560 srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
03561
03562 if(mode & LINEAR_IPOL_DEINT_FILTER)
03563 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
03564 else if(mode & LINEAR_BLEND_DEINT_FILTER)
03565 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
03566 else if(mode & MEDIAN_DEINT_FILTER)
03567 RENAME(deInterlaceMedian)(dstBlock, dstStride);
03568 else if(mode & CUBIC_IPOL_DEINT_FILTER)
03569 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
03570 else if(mode & FFMPEG_DEINT_FILTER)
03571 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
03572 else if(mode & LOWPASS5_DEINT_FILTER)
03573 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
03574
03575
03576
03577
03578
03579 if(y + 8 < height){
03580 if(mode & V_X1_FILTER)
03581 RENAME(vertX1Filter)(dstBlock, stride, &c);
03582 else if(mode & V_DEBLOCK){
03583 const int t= RENAME(vertClassify)(dstBlock, stride, &c);
03584
03585 if(t==1)
03586 RENAME(doVertLowPass)(dstBlock, stride, &c);
03587 else if(t==2)
03588 RENAME(doVertDefFilter)(dstBlock, stride, &c);
03589 }else if(mode & V_A_DEBLOCK){
03590 RENAME(do_a_deblock)(dstBlock, stride, 1, &c);
03591 }
03592 }
03593
03594 #if HAVE_MMX
03595 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
03596 #endif
03597
03598 if(x - 8 >= 0){
03599 #if HAVE_MMX
03600 if(mode & H_X1_FILTER)
03601 RENAME(vertX1Filter)(tempBlock1, 16, &c);
03602 else if(mode & H_DEBLOCK){
03603
03604 const int t= RENAME(vertClassify)(tempBlock1, 16, &c);
03605
03606 if(t==1)
03607 RENAME(doVertLowPass)(tempBlock1, 16, &c);
03608 else if(t==2)
03609 RENAME(doVertDefFilter)(tempBlock1, 16, &c);
03610 }else if(mode & H_A_DEBLOCK){
03611 RENAME(do_a_deblock)(tempBlock1, 16, 1, &c);
03612 }
03613
03614 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
03615
03616 #else
03617 if(mode & H_X1_FILTER)
03618 horizX1Filter(dstBlock-4, stride, QP);
03619 else if(mode & H_DEBLOCK){
03620 #if HAVE_ALTIVEC
03621 DECLARE_ALIGNED(16, unsigned char, tempBlock[272]);
03622 transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
03623
03624 const int t=vertClassify_altivec(tempBlock-48, 16, &c);
03625 if(t==1) {
03626 doVertLowPass_altivec(tempBlock-48, 16, &c);
03627 transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
03628 }
03629 else if(t==2) {
03630 doVertDefFilter_altivec(tempBlock-48, 16, &c);
03631 transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
03632 }
03633 #else
03634 const int t= RENAME(horizClassify)(dstBlock-4, stride, &c);
03635
03636 if(t==1)
03637 RENAME(doHorizLowPass)(dstBlock-4, stride, &c);
03638 else if(t==2)
03639 RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
03640 #endif
03641 }else if(mode & H_A_DEBLOCK){
03642 RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
03643 }
03644 #endif //HAVE_MMX
03645 if(mode & DERING){
03646
03647 if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
03648 }
03649
03650 if(mode & TEMP_NOISE_FILTER)
03651 {
03652 RENAME(tempNoiseReducer)(dstBlock-8, stride,
03653 c.tempBlurred[isColor] + y*dstStride + x,
03654 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3),
03655 c.ppMode.maxTmpNoise);
03656 }
03657 }
03658
03659 dstBlock+=8;
03660 srcBlock+=8;
03661
03662 #if HAVE_MMX
03663 tmpXchg= tempBlock1;
03664 tempBlock1= tempBlock2;
03665 tempBlock2 = tmpXchg;
03666 #endif
03667 }
03668
03669 if(mode & DERING){
03670 if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c);
03671 }
03672
03673 if((mode & TEMP_NOISE_FILTER)){
03674 RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
03675 c.tempBlurred[isColor] + y*dstStride + x,
03676 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3),
03677 c.ppMode.maxTmpNoise);
03678 }
03679
03680
03681 if(y+15 >= height){
03682 uint8_t *dstBlock= &(dst[y*dstStride]);
03683 if(width==FFABS(dstStride))
03684 linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
03685 else{
03686 int i;
03687 for(i=0; i<height-y; i++){
03688 memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
03689 }
03690 }
03691 }
03692
03693
03694
03695
03696
03697
03698
03699
03700
03701 }
03702 #if HAVE_AMD3DNOW
03703 __asm__ volatile("femms");
03704 #elif HAVE_MMX
03705 __asm__ volatile("emms");
03706 #endif
03707
03708 #ifdef DEBUG_BRIGHTNESS
03709 if(!isColor){
03710 int max=1;
03711 int i;
03712 for(i=0; i<256; i++)
03713 if(yHistogram[i] > max) max=yHistogram[i];
03714
03715 for(i=1; i<256; i++){
03716 int x;
03717 int start=yHistogram[i-1]/(max/256+1);
03718 int end=yHistogram[i]/(max/256+1);
03719 int inc= end > start ? 1 : -1;
03720 for(x=start; x!=end+inc; x+=inc)
03721 dst[ i*dstStride + x]+=128;
03722 }
03723
03724 for(i=0; i<100; i+=2){
03725 dst[ (white)*dstStride + i]+=128;
03726 dst[ (black)*dstStride + i]+=128;
03727 }
03728 }
03729 #endif
03730
03731 *c2= c;
03732
03733 }