00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00031 {
00032 __asm__ volatile(
00033 "lea (%3, %3), %%"REG_a" \n\t"
00034 "1: \n\t"
00035 "movq (%1), %%mm0 \n\t"
00036 "movq (%1, %3), %%mm1 \n\t"
00037 PAVGB" 1(%1), %%mm0 \n\t"
00038 PAVGB" 1(%1, %3), %%mm1 \n\t"
00039 "movq %%mm0, (%2) \n\t"
00040 "movq %%mm1, (%2, %3) \n\t"
00041 "add %%"REG_a", %1 \n\t"
00042 "add %%"REG_a", %2 \n\t"
00043 "movq (%1), %%mm0 \n\t"
00044 "movq (%1, %3), %%mm1 \n\t"
00045 PAVGB" 1(%1), %%mm0 \n\t"
00046 PAVGB" 1(%1, %3), %%mm1 \n\t"
00047 "add %%"REG_a", %1 \n\t"
00048 "movq %%mm0, (%2) \n\t"
00049 "movq %%mm1, (%2, %3) \n\t"
00050 "add %%"REG_a", %2 \n\t"
00051 "subl $4, %0 \n\t"
00052 "jnz 1b \n\t"
00053 :"+g"(h), "+S"(pixels), "+D"(block)
00054 :"r" ((x86_reg)line_size)
00055 :"%"REG_a, "memory");
00056 }
00057
00058 static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00059 {
00060 __asm__ volatile(
00061 "testl $1, %0 \n\t"
00062 " jz 1f \n\t"
00063 "movd (%1), %%mm0 \n\t"
00064 "movd (%2), %%mm1 \n\t"
00065 "add %4, %1 \n\t"
00066 "add $4, %2 \n\t"
00067 PAVGB" %%mm1, %%mm0 \n\t"
00068 "movd %%mm0, (%3) \n\t"
00069 "add %5, %3 \n\t"
00070 "decl %0 \n\t"
00071 "1: \n\t"
00072 "movd (%1), %%mm0 \n\t"
00073 "add %4, %1 \n\t"
00074 "movd (%1), %%mm1 \n\t"
00075 "movd (%2), %%mm2 \n\t"
00076 "movd 4(%2), %%mm3 \n\t"
00077 "add %4, %1 \n\t"
00078 PAVGB" %%mm2, %%mm0 \n\t"
00079 PAVGB" %%mm3, %%mm1 \n\t"
00080 "movd %%mm0, (%3) \n\t"
00081 "add %5, %3 \n\t"
00082 "movd %%mm1, (%3) \n\t"
00083 "add %5, %3 \n\t"
00084 "movd (%1), %%mm0 \n\t"
00085 "add %4, %1 \n\t"
00086 "movd (%1), %%mm1 \n\t"
00087 "movd 8(%2), %%mm2 \n\t"
00088 "movd 12(%2), %%mm3 \n\t"
00089 "add %4, %1 \n\t"
00090 PAVGB" %%mm2, %%mm0 \n\t"
00091 PAVGB" %%mm3, %%mm1 \n\t"
00092 "movd %%mm0, (%3) \n\t"
00093 "add %5, %3 \n\t"
00094 "movd %%mm1, (%3) \n\t"
00095 "add %5, %3 \n\t"
00096 "add $16, %2 \n\t"
00097 "subl $4, %0 \n\t"
00098 "jnz 1b \n\t"
00099 #if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
00100 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00101 #else
00102 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00103 #endif
00104 :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
00105 :"memory");
00106 }
00107
00108
00109 static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00110 {
00111 __asm__ volatile(
00112 "testl $1, %0 \n\t"
00113 " jz 1f \n\t"
00114 "movq (%1), %%mm0 \n\t"
00115 "movq (%2), %%mm1 \n\t"
00116 "add %4, %1 \n\t"
00117 "add $8, %2 \n\t"
00118 PAVGB" %%mm1, %%mm0 \n\t"
00119 "movq %%mm0, (%3) \n\t"
00120 "add %5, %3 \n\t"
00121 "decl %0 \n\t"
00122 "1: \n\t"
00123 "movq (%1), %%mm0 \n\t"
00124 "add %4, %1 \n\t"
00125 "movq (%1), %%mm1 \n\t"
00126 "add %4, %1 \n\t"
00127 PAVGB" (%2), %%mm0 \n\t"
00128 PAVGB" 8(%2), %%mm1 \n\t"
00129 "movq %%mm0, (%3) \n\t"
00130 "add %5, %3 \n\t"
00131 "movq %%mm1, (%3) \n\t"
00132 "add %5, %3 \n\t"
00133 "movq (%1), %%mm0 \n\t"
00134 "add %4, %1 \n\t"
00135 "movq (%1), %%mm1 \n\t"
00136 "add %4, %1 \n\t"
00137 PAVGB" 16(%2), %%mm0 \n\t"
00138 PAVGB" 24(%2), %%mm1 \n\t"
00139 "movq %%mm0, (%3) \n\t"
00140 "add %5, %3 \n\t"
00141 "movq %%mm1, (%3) \n\t"
00142 "add %5, %3 \n\t"
00143 "add $32, %2 \n\t"
00144 "subl $4, %0 \n\t"
00145 "jnz 1b \n\t"
00146 #if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
00147 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00148 #else
00149 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00150 #endif
00151 :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
00152 :"memory");
00153
00154
00155
00156
00157 }
00158
00159 static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00160 {
00161 __asm__ volatile(
00162 "pcmpeqb %%mm6, %%mm6 \n\t"
00163 "testl $1, %0 \n\t"
00164 " jz 1f \n\t"
00165 "movq (%1), %%mm0 \n\t"
00166 "movq (%2), %%mm1 \n\t"
00167 "add %4, %1 \n\t"
00168 "add $8, %2 \n\t"
00169 "pxor %%mm6, %%mm0 \n\t"
00170 "pxor %%mm6, %%mm1 \n\t"
00171 PAVGB" %%mm1, %%mm0 \n\t"
00172 "pxor %%mm6, %%mm0 \n\t"
00173 "movq %%mm0, (%3) \n\t"
00174 "add %5, %3 \n\t"
00175 "decl %0 \n\t"
00176 "1: \n\t"
00177 "movq (%1), %%mm0 \n\t"
00178 "add %4, %1 \n\t"
00179 "movq (%1), %%mm1 \n\t"
00180 "add %4, %1 \n\t"
00181 "movq (%2), %%mm2 \n\t"
00182 "movq 8(%2), %%mm3 \n\t"
00183 "pxor %%mm6, %%mm0 \n\t"
00184 "pxor %%mm6, %%mm1 \n\t"
00185 "pxor %%mm6, %%mm2 \n\t"
00186 "pxor %%mm6, %%mm3 \n\t"
00187 PAVGB" %%mm2, %%mm0 \n\t"
00188 PAVGB" %%mm3, %%mm1 \n\t"
00189 "pxor %%mm6, %%mm0 \n\t"
00190 "pxor %%mm6, %%mm1 \n\t"
00191 "movq %%mm0, (%3) \n\t"
00192 "add %5, %3 \n\t"
00193 "movq %%mm1, (%3) \n\t"
00194 "add %5, %3 \n\t"
00195 "movq (%1), %%mm0 \n\t"
00196 "add %4, %1 \n\t"
00197 "movq (%1), %%mm1 \n\t"
00198 "add %4, %1 \n\t"
00199 "movq 16(%2), %%mm2 \n\t"
00200 "movq 24(%2), %%mm3 \n\t"
00201 "pxor %%mm6, %%mm0 \n\t"
00202 "pxor %%mm6, %%mm1 \n\t"
00203 "pxor %%mm6, %%mm2 \n\t"
00204 "pxor %%mm6, %%mm3 \n\t"
00205 PAVGB" %%mm2, %%mm0 \n\t"
00206 PAVGB" %%mm3, %%mm1 \n\t"
00207 "pxor %%mm6, %%mm0 \n\t"
00208 "pxor %%mm6, %%mm1 \n\t"
00209 "movq %%mm0, (%3) \n\t"
00210 "add %5, %3 \n\t"
00211 "movq %%mm1, (%3) \n\t"
00212 "add %5, %3 \n\t"
00213 "add $32, %2 \n\t"
00214 "subl $4, %0 \n\t"
00215 "jnz 1b \n\t"
00216 #if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
00217 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00218 #else
00219 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00220 #endif
00221 :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
00222 :"memory");
00223
00224
00225
00226
00227 }
00228
00229 static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00230 {
00231 __asm__ volatile(
00232 "testl $1, %0 \n\t"
00233 " jz 1f \n\t"
00234 "movd (%1), %%mm0 \n\t"
00235 "movd (%2), %%mm1 \n\t"
00236 "add %4, %1 \n\t"
00237 "add $4, %2 \n\t"
00238 PAVGB" %%mm1, %%mm0 \n\t"
00239 PAVGB" (%3), %%mm0 \n\t"
00240 "movd %%mm0, (%3) \n\t"
00241 "add %5, %3 \n\t"
00242 "decl %0 \n\t"
00243 "1: \n\t"
00244 "movd (%1), %%mm0 \n\t"
00245 "add %4, %1 \n\t"
00246 "movd (%1), %%mm1 \n\t"
00247 "add %4, %1 \n\t"
00248 PAVGB" (%2), %%mm0 \n\t"
00249 PAVGB" 4(%2), %%mm1 \n\t"
00250 PAVGB" (%3), %%mm0 \n\t"
00251 "movd %%mm0, (%3) \n\t"
00252 "add %5, %3 \n\t"
00253 PAVGB" (%3), %%mm1 \n\t"
00254 "movd %%mm1, (%3) \n\t"
00255 "add %5, %3 \n\t"
00256 "movd (%1), %%mm0 \n\t"
00257 "add %4, %1 \n\t"
00258 "movd (%1), %%mm1 \n\t"
00259 "add %4, %1 \n\t"
00260 PAVGB" 8(%2), %%mm0 \n\t"
00261 PAVGB" 12(%2), %%mm1 \n\t"
00262 PAVGB" (%3), %%mm0 \n\t"
00263 "movd %%mm0, (%3) \n\t"
00264 "add %5, %3 \n\t"
00265 PAVGB" (%3), %%mm1 \n\t"
00266 "movd %%mm1, (%3) \n\t"
00267 "add %5, %3 \n\t"
00268 "add $16, %2 \n\t"
00269 "subl $4, %0 \n\t"
00270 "jnz 1b \n\t"
00271 #if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
00272 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00273 #else
00274 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00275 #endif
00276 :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
00277 :"memory");
00278 }
00279
00280
00281 static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00282 {
00283 __asm__ volatile(
00284 "testl $1, %0 \n\t"
00285 " jz 1f \n\t"
00286 "movq (%1), %%mm0 \n\t"
00287 "movq (%2), %%mm1 \n\t"
00288 "add %4, %1 \n\t"
00289 "add $8, %2 \n\t"
00290 PAVGB" %%mm1, %%mm0 \n\t"
00291 PAVGB" (%3), %%mm0 \n\t"
00292 "movq %%mm0, (%3) \n\t"
00293 "add %5, %3 \n\t"
00294 "decl %0 \n\t"
00295 "1: \n\t"
00296 "movq (%1), %%mm0 \n\t"
00297 "add %4, %1 \n\t"
00298 "movq (%1), %%mm1 \n\t"
00299 "add %4, %1 \n\t"
00300 PAVGB" (%2), %%mm0 \n\t"
00301 PAVGB" 8(%2), %%mm1 \n\t"
00302 PAVGB" (%3), %%mm0 \n\t"
00303 "movq %%mm0, (%3) \n\t"
00304 "add %5, %3 \n\t"
00305 PAVGB" (%3), %%mm1 \n\t"
00306 "movq %%mm1, (%3) \n\t"
00307 "add %5, %3 \n\t"
00308 "movq (%1), %%mm0 \n\t"
00309 "add %4, %1 \n\t"
00310 "movq (%1), %%mm1 \n\t"
00311 "add %4, %1 \n\t"
00312 PAVGB" 16(%2), %%mm0 \n\t"
00313 PAVGB" 24(%2), %%mm1 \n\t"
00314 PAVGB" (%3), %%mm0 \n\t"
00315 "movq %%mm0, (%3) \n\t"
00316 "add %5, %3 \n\t"
00317 PAVGB" (%3), %%mm1 \n\t"
00318 "movq %%mm1, (%3) \n\t"
00319 "add %5, %3 \n\t"
00320 "add $32, %2 \n\t"
00321 "subl $4, %0 \n\t"
00322 "jnz 1b \n\t"
00323 #if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
00324 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00325 #else
00326 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00327 #endif
00328 :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
00329 :"memory");
00330
00331
00332
00333
00334 }
00335
00336 static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00337 {
00338 __asm__ volatile(
00339 "lea (%3, %3), %%"REG_a" \n\t"
00340 "1: \n\t"
00341 "movq (%1), %%mm0 \n\t"
00342 "movq (%1, %3), %%mm1 \n\t"
00343 "movq 8(%1), %%mm2 \n\t"
00344 "movq 8(%1, %3), %%mm3 \n\t"
00345 PAVGB" 1(%1), %%mm0 \n\t"
00346 PAVGB" 1(%1, %3), %%mm1 \n\t"
00347 PAVGB" 9(%1), %%mm2 \n\t"
00348 PAVGB" 9(%1, %3), %%mm3 \n\t"
00349 "movq %%mm0, (%2) \n\t"
00350 "movq %%mm1, (%2, %3) \n\t"
00351 "movq %%mm2, 8(%2) \n\t"
00352 "movq %%mm3, 8(%2, %3) \n\t"
00353 "add %%"REG_a", %1 \n\t"
00354 "add %%"REG_a", %2 \n\t"
00355 "movq (%1), %%mm0 \n\t"
00356 "movq (%1, %3), %%mm1 \n\t"
00357 "movq 8(%1), %%mm2 \n\t"
00358 "movq 8(%1, %3), %%mm3 \n\t"
00359 PAVGB" 1(%1), %%mm0 \n\t"
00360 PAVGB" 1(%1, %3), %%mm1 \n\t"
00361 PAVGB" 9(%1), %%mm2 \n\t"
00362 PAVGB" 9(%1, %3), %%mm3 \n\t"
00363 "add %%"REG_a", %1 \n\t"
00364 "movq %%mm0, (%2) \n\t"
00365 "movq %%mm1, (%2, %3) \n\t"
00366 "movq %%mm2, 8(%2) \n\t"
00367 "movq %%mm3, 8(%2, %3) \n\t"
00368 "add %%"REG_a", %2 \n\t"
00369 "subl $4, %0 \n\t"
00370 "jnz 1b \n\t"
00371 :"+g"(h), "+S"(pixels), "+D"(block)
00372 :"r" ((x86_reg)line_size)
00373 :"%"REG_a, "memory");
00374 }
00375
00376 static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00377 {
00378 __asm__ volatile(
00379 "testl $1, %0 \n\t"
00380 " jz 1f \n\t"
00381 "movq (%1), %%mm0 \n\t"
00382 "movq 8(%1), %%mm1 \n\t"
00383 PAVGB" (%2), %%mm0 \n\t"
00384 PAVGB" 8(%2), %%mm1 \n\t"
00385 "add %4, %1 \n\t"
00386 "add $16, %2 \n\t"
00387 "movq %%mm0, (%3) \n\t"
00388 "movq %%mm1, 8(%3) \n\t"
00389 "add %5, %3 \n\t"
00390 "decl %0 \n\t"
00391 "1: \n\t"
00392 "movq (%1), %%mm0 \n\t"
00393 "movq 8(%1), %%mm1 \n\t"
00394 "add %4, %1 \n\t"
00395 PAVGB" (%2), %%mm0 \n\t"
00396 PAVGB" 8(%2), %%mm1 \n\t"
00397 "movq %%mm0, (%3) \n\t"
00398 "movq %%mm1, 8(%3) \n\t"
00399 "add %5, %3 \n\t"
00400 "movq (%1), %%mm0 \n\t"
00401 "movq 8(%1), %%mm1 \n\t"
00402 "add %4, %1 \n\t"
00403 PAVGB" 16(%2), %%mm0 \n\t"
00404 PAVGB" 24(%2), %%mm1 \n\t"
00405 "movq %%mm0, (%3) \n\t"
00406 "movq %%mm1, 8(%3) \n\t"
00407 "add %5, %3 \n\t"
00408 "add $32, %2 \n\t"
00409 "subl $2, %0 \n\t"
00410 "jnz 1b \n\t"
00411 #if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
00412 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00413 #else
00414 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00415 #endif
00416 :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
00417 :"memory");
00418
00419
00420
00421
00422 }
00423
00424 static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00425 {
00426 __asm__ volatile(
00427 "testl $1, %0 \n\t"
00428 " jz 1f \n\t"
00429 "movq (%1), %%mm0 \n\t"
00430 "movq 8(%1), %%mm1 \n\t"
00431 PAVGB" (%2), %%mm0 \n\t"
00432 PAVGB" 8(%2), %%mm1 \n\t"
00433 "add %4, %1 \n\t"
00434 "add $16, %2 \n\t"
00435 PAVGB" (%3), %%mm0 \n\t"
00436 PAVGB" 8(%3), %%mm1 \n\t"
00437 "movq %%mm0, (%3) \n\t"
00438 "movq %%mm1, 8(%3) \n\t"
00439 "add %5, %3 \n\t"
00440 "decl %0 \n\t"
00441 "1: \n\t"
00442 "movq (%1), %%mm0 \n\t"
00443 "movq 8(%1), %%mm1 \n\t"
00444 "add %4, %1 \n\t"
00445 PAVGB" (%2), %%mm0 \n\t"
00446 PAVGB" 8(%2), %%mm1 \n\t"
00447 PAVGB" (%3), %%mm0 \n\t"
00448 PAVGB" 8(%3), %%mm1 \n\t"
00449 "movq %%mm0, (%3) \n\t"
00450 "movq %%mm1, 8(%3) \n\t"
00451 "add %5, %3 \n\t"
00452 "movq (%1), %%mm0 \n\t"
00453 "movq 8(%1), %%mm1 \n\t"
00454 "add %4, %1 \n\t"
00455 PAVGB" 16(%2), %%mm0 \n\t"
00456 PAVGB" 24(%2), %%mm1 \n\t"
00457 PAVGB" (%3), %%mm0 \n\t"
00458 PAVGB" 8(%3), %%mm1 \n\t"
00459 "movq %%mm0, (%3) \n\t"
00460 "movq %%mm1, 8(%3) \n\t"
00461 "add %5, %3 \n\t"
00462 "add $32, %2 \n\t"
00463 "subl $2, %0 \n\t"
00464 "jnz 1b \n\t"
00465 #if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
00466 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00467 #else
00468 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00469 #endif
00470 :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
00471 :"memory");
00472
00473
00474
00475
00476 }
00477
00478 static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00479 {
00480 __asm__ volatile(
00481 "pcmpeqb %%mm6, %%mm6 \n\t"
00482 "testl $1, %0 \n\t"
00483 " jz 1f \n\t"
00484 "movq (%1), %%mm0 \n\t"
00485 "movq 8(%1), %%mm1 \n\t"
00486 "movq (%2), %%mm2 \n\t"
00487 "movq 8(%2), %%mm3 \n\t"
00488 "pxor %%mm6, %%mm0 \n\t"
00489 "pxor %%mm6, %%mm1 \n\t"
00490 "pxor %%mm6, %%mm2 \n\t"
00491 "pxor %%mm6, %%mm3 \n\t"
00492 PAVGB" %%mm2, %%mm0 \n\t"
00493 PAVGB" %%mm3, %%mm1 \n\t"
00494 "pxor %%mm6, %%mm0 \n\t"
00495 "pxor %%mm6, %%mm1 \n\t"
00496 "add %4, %1 \n\t"
00497 "add $16, %2 \n\t"
00498 "movq %%mm0, (%3) \n\t"
00499 "movq %%mm1, 8(%3) \n\t"
00500 "add %5, %3 \n\t"
00501 "decl %0 \n\t"
00502 "1: \n\t"
00503 "movq (%1), %%mm0 \n\t"
00504 "movq 8(%1), %%mm1 \n\t"
00505 "add %4, %1 \n\t"
00506 "movq (%2), %%mm2 \n\t"
00507 "movq 8(%2), %%mm3 \n\t"
00508 "pxor %%mm6, %%mm0 \n\t"
00509 "pxor %%mm6, %%mm1 \n\t"
00510 "pxor %%mm6, %%mm2 \n\t"
00511 "pxor %%mm6, %%mm3 \n\t"
00512 PAVGB" %%mm2, %%mm0 \n\t"
00513 PAVGB" %%mm3, %%mm1 \n\t"
00514 "pxor %%mm6, %%mm0 \n\t"
00515 "pxor %%mm6, %%mm1 \n\t"
00516 "movq %%mm0, (%3) \n\t"
00517 "movq %%mm1, 8(%3) \n\t"
00518 "add %5, %3 \n\t"
00519 "movq (%1), %%mm0 \n\t"
00520 "movq 8(%1), %%mm1 \n\t"
00521 "add %4, %1 \n\t"
00522 "movq 16(%2), %%mm2 \n\t"
00523 "movq 24(%2), %%mm3 \n\t"
00524 "pxor %%mm6, %%mm0 \n\t"
00525 "pxor %%mm6, %%mm1 \n\t"
00526 "pxor %%mm6, %%mm2 \n\t"
00527 "pxor %%mm6, %%mm3 \n\t"
00528 PAVGB" %%mm2, %%mm0 \n\t"
00529 PAVGB" %%mm3, %%mm1 \n\t"
00530 "pxor %%mm6, %%mm0 \n\t"
00531 "pxor %%mm6, %%mm1 \n\t"
00532 "movq %%mm0, (%3) \n\t"
00533 "movq %%mm1, 8(%3) \n\t"
00534 "add %5, %3 \n\t"
00535 "add $32, %2 \n\t"
00536 "subl $2, %0 \n\t"
00537 "jnz 1b \n\t"
00538 #if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
00539 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00540 #else
00541 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00542 #endif
00543 :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
00544 :"memory");
00545
00546
00547
00548
00549 }
00550
00551
00552 static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00553 {
00554 MOVQ_BONE(mm6);
00555 __asm__ volatile(
00556 "lea (%3, %3), %%"REG_a" \n\t"
00557 "1: \n\t"
00558 "movq (%1), %%mm0 \n\t"
00559 "movq (%1, %3), %%mm2 \n\t"
00560 "movq 1(%1), %%mm1 \n\t"
00561 "movq 1(%1, %3), %%mm3 \n\t"
00562 "add %%"REG_a", %1 \n\t"
00563 "psubusb %%mm6, %%mm0 \n\t"
00564 "psubusb %%mm6, %%mm2 \n\t"
00565 PAVGB" %%mm1, %%mm0 \n\t"
00566 PAVGB" %%mm3, %%mm2 \n\t"
00567 "movq %%mm0, (%2) \n\t"
00568 "movq %%mm2, (%2, %3) \n\t"
00569 "movq (%1), %%mm0 \n\t"
00570 "movq 1(%1), %%mm1 \n\t"
00571 "movq (%1, %3), %%mm2 \n\t"
00572 "movq 1(%1, %3), %%mm3 \n\t"
00573 "add %%"REG_a", %2 \n\t"
00574 "add %%"REG_a", %1 \n\t"
00575 "psubusb %%mm6, %%mm0 \n\t"
00576 "psubusb %%mm6, %%mm2 \n\t"
00577 PAVGB" %%mm1, %%mm0 \n\t"
00578 PAVGB" %%mm3, %%mm2 \n\t"
00579 "movq %%mm0, (%2) \n\t"
00580 "movq %%mm2, (%2, %3) \n\t"
00581 "add %%"REG_a", %2 \n\t"
00582 "subl $4, %0 \n\t"
00583 "jnz 1b \n\t"
00584 :"+g"(h), "+S"(pixels), "+D"(block)
00585 :"r" ((x86_reg)line_size)
00586 :"%"REG_a, "memory");
00587 }
00588
00589 static void DEF(put_no_rnd_pixels8_x2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00590 {
00591 __asm__ volatile (
00592 "pcmpeqb %%mm6, %%mm6 \n\t"
00593 "1: \n\t"
00594 "movq (%1), %%mm0 \n\t"
00595 "movq (%1, %3), %%mm2 \n\t"
00596 "movq 1(%1), %%mm1 \n\t"
00597 "movq 1(%1, %3), %%mm3 \n\t"
00598 "pxor %%mm6, %%mm0 \n\t"
00599 "pxor %%mm6, %%mm2 \n\t"
00600 "pxor %%mm6, %%mm1 \n\t"
00601 "pxor %%mm6, %%mm3 \n\t"
00602 PAVGB" %%mm1, %%mm0 \n\t"
00603 PAVGB" %%mm3, %%mm2 \n\t"
00604 "pxor %%mm6, %%mm0 \n\t"
00605 "pxor %%mm6, %%mm2 \n\t"
00606 "movq %%mm0, (%2) \n\t"
00607 "movq %%mm2, (%2, %3) \n\t"
00608 "movq (%1, %3,2), %%mm0 \n\t"
00609 "movq 1(%1, %3,2), %%mm1 \n\t"
00610 "movq (%1, %4), %%mm2 \n\t"
00611 "movq 1(%1, %4), %%mm3 \n\t"
00612 "pxor %%mm6, %%mm0 \n\t"
00613 "pxor %%mm6, %%mm1 \n\t"
00614 "pxor %%mm6, %%mm2 \n\t"
00615 "pxor %%mm6, %%mm3 \n\t"
00616 PAVGB" %%mm1, %%mm0 \n\t"
00617 PAVGB" %%mm3, %%mm2 \n\t"
00618 "pxor %%mm6, %%mm0 \n\t"
00619 "pxor %%mm6, %%mm2 \n\t"
00620 "movq %%mm0, (%2, %3,2) \n\t"
00621 "movq %%mm2, (%2, %4) \n\t"
00622 "lea (%1, %3,4), %1 \n\t"
00623 "lea (%2, %3,4), %2 \n\t"
00624 "subl $4, %0 \n\t"
00625 "jg 1b \n\t"
00626 : "+g"(h), "+r"(pixels), "+r"(block)
00627 : "r" ((x86_reg)line_size), "r"((x86_reg)3*line_size)
00628 : "memory"
00629 );
00630 }
00631
00632 static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00633 {
00634 __asm__ volatile(
00635 "lea (%3, %3), %%"REG_a" \n\t"
00636 "movq (%1), %%mm0 \n\t"
00637 "sub %3, %2 \n\t"
00638 "1: \n\t"
00639 "movq (%1, %3), %%mm1 \n\t"
00640 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00641 "add %%"REG_a", %1 \n\t"
00642 PAVGB" %%mm1, %%mm0 \n\t"
00643 PAVGB" %%mm2, %%mm1 \n\t"
00644 "movq %%mm0, (%2, %3) \n\t"
00645 "movq %%mm1, (%2, %%"REG_a") \n\t"
00646 "movq (%1, %3), %%mm1 \n\t"
00647 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00648 "add %%"REG_a", %2 \n\t"
00649 "add %%"REG_a", %1 \n\t"
00650 PAVGB" %%mm1, %%mm2 \n\t"
00651 PAVGB" %%mm0, %%mm1 \n\t"
00652 "movq %%mm2, (%2, %3) \n\t"
00653 "movq %%mm1, (%2, %%"REG_a") \n\t"
00654 "add %%"REG_a", %2 \n\t"
00655 "subl $4, %0 \n\t"
00656 "jnz 1b \n\t"
00657 :"+g"(h), "+S"(pixels), "+D" (block)
00658 :"r" ((x86_reg)line_size)
00659 :"%"REG_a, "memory");
00660 }
00661
00662
00663 static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00664 {
00665 MOVQ_BONE(mm6);
00666 __asm__ volatile(
00667 "lea (%3, %3), %%"REG_a" \n\t"
00668 "movq (%1), %%mm0 \n\t"
00669 "sub %3, %2 \n\t"
00670 "1: \n\t"
00671 "movq (%1, %3), %%mm1 \n\t"
00672 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00673 "add %%"REG_a", %1 \n\t"
00674 "psubusb %%mm6, %%mm1 \n\t"
00675 PAVGB" %%mm1, %%mm0 \n\t"
00676 PAVGB" %%mm2, %%mm1 \n\t"
00677 "movq %%mm0, (%2, %3) \n\t"
00678 "movq %%mm1, (%2, %%"REG_a") \n\t"
00679 "movq (%1, %3), %%mm1 \n\t"
00680 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00681 "add %%"REG_a", %2 \n\t"
00682 "add %%"REG_a", %1 \n\t"
00683 "psubusb %%mm6, %%mm1 \n\t"
00684 PAVGB" %%mm1, %%mm2 \n\t"
00685 PAVGB" %%mm0, %%mm1 \n\t"
00686 "movq %%mm2, (%2, %3) \n\t"
00687 "movq %%mm1, (%2, %%"REG_a") \n\t"
00688 "add %%"REG_a", %2 \n\t"
00689 "subl $4, %0 \n\t"
00690 "jnz 1b \n\t"
00691 :"+g"(h), "+S"(pixels), "+D" (block)
00692 :"r" ((x86_reg)line_size)
00693 :"%"REG_a, "memory");
00694 }
00695
00696 static void DEF(put_no_rnd_pixels8_y2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00697 {
00698 __asm__ volatile (
00699 "movq (%1), %%mm0 \n\t"
00700 "pcmpeqb %%mm6, %%mm6 \n\t"
00701 "add %3, %1 \n\t"
00702 "pxor %%mm6, %%mm0 \n\t"
00703 "1: \n\t"
00704 "movq (%1), %%mm1 \n\t"
00705 "movq (%1, %3), %%mm2 \n\t"
00706 "pxor %%mm6, %%mm1 \n\t"
00707 "pxor %%mm6, %%mm2 \n\t"
00708 PAVGB" %%mm1, %%mm0 \n\t"
00709 PAVGB" %%mm2, %%mm1 \n\t"
00710 "pxor %%mm6, %%mm0 \n\t"
00711 "pxor %%mm6, %%mm1 \n\t"
00712 "movq %%mm0, (%2) \n\t"
00713 "movq %%mm1, (%2, %3) \n\t"
00714 "movq (%1, %3,2), %%mm1 \n\t"
00715 "movq (%1, %4), %%mm0 \n\t"
00716 "pxor %%mm6, %%mm1 \n\t"
00717 "pxor %%mm6, %%mm0 \n\t"
00718 PAVGB" %%mm1, %%mm2 \n\t"
00719 PAVGB" %%mm0, %%mm1 \n\t"
00720 "pxor %%mm6, %%mm2 \n\t"
00721 "pxor %%mm6, %%mm1 \n\t"
00722 "movq %%mm2, (%2, %3,2) \n\t"
00723 "movq %%mm1, (%2, %4) \n\t"
00724 "lea (%1, %3,4), %1 \n\t"
00725 "lea (%2, %3,4), %2 \n\t"
00726 "subl $4, %0 \n\t"
00727 "jg 1b \n\t"
00728 :"+g"(h), "+r"(pixels), "+r" (block)
00729 :"r" ((x86_reg)line_size), "r"((x86_reg)3*line_size)
00730 :"memory"
00731 );
00732 }
00733
00734 static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00735 {
00736 __asm__ volatile(
00737 "lea (%3, %3), %%"REG_a" \n\t"
00738 "1: \n\t"
00739 "movq (%2), %%mm0 \n\t"
00740 "movq (%2, %3), %%mm1 \n\t"
00741 PAVGB" (%1), %%mm0 \n\t"
00742 PAVGB" (%1, %3), %%mm1 \n\t"
00743 "movq %%mm0, (%2) \n\t"
00744 "movq %%mm1, (%2, %3) \n\t"
00745 "add %%"REG_a", %1 \n\t"
00746 "add %%"REG_a", %2 \n\t"
00747 "movq (%2), %%mm0 \n\t"
00748 "movq (%2, %3), %%mm1 \n\t"
00749 PAVGB" (%1), %%mm0 \n\t"
00750 PAVGB" (%1, %3), %%mm1 \n\t"
00751 "add %%"REG_a", %1 \n\t"
00752 "movq %%mm0, (%2) \n\t"
00753 "movq %%mm1, (%2, %3) \n\t"
00754 "add %%"REG_a", %2 \n\t"
00755 "subl $4, %0 \n\t"
00756 "jnz 1b \n\t"
00757 :"+g"(h), "+S"(pixels), "+D"(block)
00758 :"r" ((x86_reg)line_size)
00759 :"%"REG_a, "memory");
00760 }
00761
00762 static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00763 {
00764 __asm__ volatile(
00765 "lea (%3, %3), %%"REG_a" \n\t"
00766 "1: \n\t"
00767 "movq (%1), %%mm0 \n\t"
00768 "movq (%1, %3), %%mm2 \n\t"
00769 PAVGB" 1(%1), %%mm0 \n\t"
00770 PAVGB" 1(%1, %3), %%mm2 \n\t"
00771 PAVGB" (%2), %%mm0 \n\t"
00772 PAVGB" (%2, %3), %%mm2 \n\t"
00773 "add %%"REG_a", %1 \n\t"
00774 "movq %%mm0, (%2) \n\t"
00775 "movq %%mm2, (%2, %3) \n\t"
00776 "movq (%1), %%mm0 \n\t"
00777 "movq (%1, %3), %%mm2 \n\t"
00778 PAVGB" 1(%1), %%mm0 \n\t"
00779 PAVGB" 1(%1, %3), %%mm2 \n\t"
00780 "add %%"REG_a", %2 \n\t"
00781 "add %%"REG_a", %1 \n\t"
00782 PAVGB" (%2), %%mm0 \n\t"
00783 PAVGB" (%2, %3), %%mm2 \n\t"
00784 "movq %%mm0, (%2) \n\t"
00785 "movq %%mm2, (%2, %3) \n\t"
00786 "add %%"REG_a", %2 \n\t"
00787 "subl $4, %0 \n\t"
00788 "jnz 1b \n\t"
00789 :"+g"(h), "+S"(pixels), "+D"(block)
00790 :"r" ((x86_reg)line_size)
00791 :"%"REG_a, "memory");
00792 }
00793
00794 static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00795 {
00796 __asm__ volatile(
00797 "lea (%3, %3), %%"REG_a" \n\t"
00798 "movq (%1), %%mm0 \n\t"
00799 "sub %3, %2 \n\t"
00800 "1: \n\t"
00801 "movq (%1, %3), %%mm1 \n\t"
00802 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00803 "add %%"REG_a", %1 \n\t"
00804 PAVGB" %%mm1, %%mm0 \n\t"
00805 PAVGB" %%mm2, %%mm1 \n\t"
00806 "movq (%2, %3), %%mm3 \n\t"
00807 "movq (%2, %%"REG_a"), %%mm4 \n\t"
00808 PAVGB" %%mm3, %%mm0 \n\t"
00809 PAVGB" %%mm4, %%mm1 \n\t"
00810 "movq %%mm0, (%2, %3) \n\t"
00811 "movq %%mm1, (%2, %%"REG_a") \n\t"
00812 "movq (%1, %3), %%mm1 \n\t"
00813 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00814 PAVGB" %%mm1, %%mm2 \n\t"
00815 PAVGB" %%mm0, %%mm1 \n\t"
00816 "add %%"REG_a", %2 \n\t"
00817 "add %%"REG_a", %1 \n\t"
00818 "movq (%2, %3), %%mm3 \n\t"
00819 "movq (%2, %%"REG_a"), %%mm4 \n\t"
00820 PAVGB" %%mm3, %%mm2 \n\t"
00821 PAVGB" %%mm4, %%mm1 \n\t"
00822 "movq %%mm2, (%2, %3) \n\t"
00823 "movq %%mm1, (%2, %%"REG_a") \n\t"
00824 "add %%"REG_a", %2 \n\t"
00825 "subl $4, %0 \n\t"
00826 "jnz 1b \n\t"
00827 :"+g"(h), "+S"(pixels), "+D"(block)
00828 :"r" ((x86_reg)line_size)
00829 :"%"REG_a, "memory");
00830 }
00831
00832
00833
00834 static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00835 {
00836 MOVQ_BONE(mm6);
00837 __asm__ volatile(
00838 "lea (%3, %3), %%"REG_a" \n\t"
00839 "movq (%1), %%mm0 \n\t"
00840 PAVGB" 1(%1), %%mm0 \n\t"
00841 ".p2align 3 \n\t"
00842 "1: \n\t"
00843 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00844 "movq (%1, %3), %%mm1 \n\t"
00845 "psubusb %%mm6, %%mm2 \n\t"
00846 PAVGB" 1(%1, %3), %%mm1 \n\t"
00847 PAVGB" 1(%1, %%"REG_a"), %%mm2 \n\t"
00848 "add %%"REG_a", %1 \n\t"
00849 PAVGB" %%mm1, %%mm0 \n\t"
00850 PAVGB" %%mm2, %%mm1 \n\t"
00851 PAVGB" (%2), %%mm0 \n\t"
00852 PAVGB" (%2, %3), %%mm1 \n\t"
00853 "movq %%mm0, (%2) \n\t"
00854 "movq %%mm1, (%2, %3) \n\t"
00855 "movq (%1, %3), %%mm1 \n\t"
00856 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00857 PAVGB" 1(%1, %3), %%mm1 \n\t"
00858 PAVGB" 1(%1, %%"REG_a"), %%mm0 \n\t"
00859 "add %%"REG_a", %2 \n\t"
00860 "add %%"REG_a", %1 \n\t"
00861 PAVGB" %%mm1, %%mm2 \n\t"
00862 PAVGB" %%mm0, %%mm1 \n\t"
00863 PAVGB" (%2), %%mm2 \n\t"
00864 PAVGB" (%2, %3), %%mm1 \n\t"
00865 "movq %%mm2, (%2) \n\t"
00866 "movq %%mm1, (%2, %3) \n\t"
00867 "add %%"REG_a", %2 \n\t"
00868 "subl $4, %0 \n\t"
00869 "jnz 1b \n\t"
00870 :"+g"(h), "+S"(pixels), "+D"(block)
00871 :"r" ((x86_reg)line_size)
00872 :"%"REG_a, "memory");
00873 }
00874
00875 static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00876 {
00877 do {
00878 __asm__ volatile(
00879 "movd (%1), %%mm0 \n\t"
00880 "movd (%1, %2), %%mm1 \n\t"
00881 "movd (%1, %2, 2), %%mm2 \n\t"
00882 "movd (%1, %3), %%mm3 \n\t"
00883 PAVGB" (%0), %%mm0 \n\t"
00884 PAVGB" (%0, %2), %%mm1 \n\t"
00885 PAVGB" (%0, %2, 2), %%mm2 \n\t"
00886 PAVGB" (%0, %3), %%mm3 \n\t"
00887 "movd %%mm0, (%1) \n\t"
00888 "movd %%mm1, (%1, %2) \n\t"
00889 "movd %%mm2, (%1, %2, 2) \n\t"
00890 "movd %%mm3, (%1, %3) \n\t"
00891 ::"S"(pixels), "D"(block),
00892 "r" ((x86_reg)line_size), "r"((x86_reg)3L*line_size)
00893 :"memory");
00894 block += 4*line_size;
00895 pixels += 4*line_size;
00896 h -= 4;
00897 } while(h > 0);
00898 }
00899
00900
00901 static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00902 DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h);
00903 DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h);
00904 }
00905 static void DEF(put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00906 DEF(put_pixels8_y2)(block , pixels , line_size, h);
00907 DEF(put_pixels8_y2)(block+8, pixels+8, line_size, h);
00908 }
00909 static void DEF(put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00910 DEF(put_no_rnd_pixels8_y2)(block , pixels , line_size, h);
00911 DEF(put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h);
00912 }
00913 static void DEF(avg_pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00914 DEF(avg_pixels8)(block , pixels , line_size, h);
00915 DEF(avg_pixels8)(block+8, pixels+8, line_size, h);
00916 }
00917 static void DEF(avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00918 DEF(avg_pixels8_x2)(block , pixels , line_size, h);
00919 DEF(avg_pixels8_x2)(block+8, pixels+8, line_size, h);
00920 }
00921 static void DEF(avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00922 DEF(avg_pixels8_y2)(block , pixels , line_size, h);
00923 DEF(avg_pixels8_y2)(block+8, pixels+8, line_size, h);
00924 }
00925 static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00926 DEF(avg_pixels8_xy2)(block , pixels , line_size, h);
00927 DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
00928 }
00929
00930 #define QPEL_2TAP_L3(OPNAME) \
00931 static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
00932 __asm__ volatile(\
00933 "1: \n\t"\
00934 "movq (%1,%2), %%mm0 \n\t"\
00935 "movq 8(%1,%2), %%mm1 \n\t"\
00936 PAVGB" (%1,%3), %%mm0 \n\t"\
00937 PAVGB" 8(%1,%3), %%mm1 \n\t"\
00938 PAVGB" (%1), %%mm0 \n\t"\
00939 PAVGB" 8(%1), %%mm1 \n\t"\
00940 STORE_OP( (%1,%4),%%mm0)\
00941 STORE_OP(8(%1,%4),%%mm1)\
00942 "movq %%mm0, (%1,%4) \n\t"\
00943 "movq %%mm1, 8(%1,%4) \n\t"\
00944 "add %5, %1 \n\t"\
00945 "decl %0 \n\t"\
00946 "jnz 1b \n\t"\
00947 :"+g"(h), "+r"(src)\
00948 :"r"((x86_reg)off1), "r"((x86_reg)off2),\
00949 "r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
00950 :"memory"\
00951 );\
00952 }\
00953 static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
00954 __asm__ volatile(\
00955 "1: \n\t"\
00956 "movq (%1,%2), %%mm0 \n\t"\
00957 PAVGB" (%1,%3), %%mm0 \n\t"\
00958 PAVGB" (%1), %%mm0 \n\t"\
00959 STORE_OP((%1,%4),%%mm0)\
00960 "movq %%mm0, (%1,%4) \n\t"\
00961 "add %5, %1 \n\t"\
00962 "decl %0 \n\t"\
00963 "jnz 1b \n\t"\
00964 :"+g"(h), "+r"(src)\
00965 :"r"((x86_reg)off1), "r"((x86_reg)off2),\
00966 "r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
00967 :"memory"\
00968 );\
00969 }
00970
00971 #define STORE_OP(a,b) PAVGB" "#a","#b" \n\t"
00972 QPEL_2TAP_L3(avg_)
00973 #undef STORE_OP
00974 #define STORE_OP(a,b)
00975 QPEL_2TAP_L3(put_)
00976 #undef STORE_OP
00977 #undef QPEL_2TAP_L3