00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include <stddef.h>
00028
00029 #undef PREFETCH
00030 #undef MOVNTQ
00031 #undef EMMS
00032 #undef SFENCE
00033 #undef PAVGB
00034
00035 #if COMPILE_TEMPLATE_AMD3DNOW
00036 #define PREFETCH "prefetch"
00037 #define PAVGB "pavgusb"
00038 #elif COMPILE_TEMPLATE_MMX2
00039 #define PREFETCH "prefetchnta"
00040 #define PAVGB "pavgb"
00041 #else
00042 #define PREFETCH " # nop"
00043 #endif
00044
00045 #if COMPILE_TEMPLATE_AMD3DNOW
00046
00047 #define EMMS "femms"
00048 #else
00049 #define EMMS "emms"
00050 #endif
00051
00052 #if COMPILE_TEMPLATE_MMX2
00053 #define MOVNTQ "movntq"
00054 #define SFENCE "sfence"
00055 #else
00056 #define MOVNTQ "movq"
00057 #define SFENCE " # nop"
00058 #endif
00059
00060 #if !COMPILE_TEMPLATE_SSE2
00061
00062 #if !COMPILE_TEMPLATE_AMD3DNOW
00063
00064 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size)
00065 {
00066 uint8_t *dest = dst;
00067 const uint8_t *s = src;
00068 const uint8_t *end;
00069 const uint8_t *mm_end;
00070 end = s + src_size;
00071 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00072 mm_end = end - 23;
00073 __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory");
00074 while (s < mm_end) {
00075 __asm__ volatile(
00076 PREFETCH" 32%1 \n\t"
00077 "movd %1, %%mm0 \n\t"
00078 "punpckldq 3%1, %%mm0 \n\t"
00079 "movd 6%1, %%mm1 \n\t"
00080 "punpckldq 9%1, %%mm1 \n\t"
00081 "movd 12%1, %%mm2 \n\t"
00082 "punpckldq 15%1, %%mm2 \n\t"
00083 "movd 18%1, %%mm3 \n\t"
00084 "punpckldq 21%1, %%mm3 \n\t"
00085 "por %%mm7, %%mm0 \n\t"
00086 "por %%mm7, %%mm1 \n\t"
00087 "por %%mm7, %%mm2 \n\t"
00088 "por %%mm7, %%mm3 \n\t"
00089 MOVNTQ" %%mm0, %0 \n\t"
00090 MOVNTQ" %%mm1, 8%0 \n\t"
00091 MOVNTQ" %%mm2, 16%0 \n\t"
00092 MOVNTQ" %%mm3, 24%0"
00093 :"=m"(*dest)
00094 :"m"(*s)
00095 :"memory");
00096 dest += 32;
00097 s += 24;
00098 }
00099 __asm__ volatile(SFENCE:::"memory");
00100 __asm__ volatile(EMMS:::"memory");
00101 while (s < end) {
00102 *dest++ = *s++;
00103 *dest++ = *s++;
00104 *dest++ = *s++;
00105 *dest++ = 255;
00106 }
00107 }
00108
00109 #define STORE_BGR24_MMX \
00110 "psrlq $8, %%mm2 \n\t" \
00111 "psrlq $8, %%mm3 \n\t" \
00112 "psrlq $8, %%mm6 \n\t" \
00113 "psrlq $8, %%mm7 \n\t" \
00114 "pand "MANGLE(mask24l)", %%mm0\n\t" \
00115 "pand "MANGLE(mask24l)", %%mm1\n\t" \
00116 "pand "MANGLE(mask24l)", %%mm4\n\t" \
00117 "pand "MANGLE(mask24l)", %%mm5\n\t" \
00118 "pand "MANGLE(mask24h)", %%mm2\n\t" \
00119 "pand "MANGLE(mask24h)", %%mm3\n\t" \
00120 "pand "MANGLE(mask24h)", %%mm6\n\t" \
00121 "pand "MANGLE(mask24h)", %%mm7\n\t" \
00122 "por %%mm2, %%mm0 \n\t" \
00123 "por %%mm3, %%mm1 \n\t" \
00124 "por %%mm6, %%mm4 \n\t" \
00125 "por %%mm7, %%mm5 \n\t" \
00126 \
00127 "movq %%mm1, %%mm2 \n\t" \
00128 "movq %%mm4, %%mm3 \n\t" \
00129 "psllq $48, %%mm2 \n\t" \
00130 "psllq $32, %%mm3 \n\t" \
00131 "pand "MANGLE(mask24hh)", %%mm2\n\t" \
00132 "pand "MANGLE(mask24hhh)", %%mm3\n\t" \
00133 "por %%mm2, %%mm0 \n\t" \
00134 "psrlq $16, %%mm1 \n\t" \
00135 "psrlq $32, %%mm4 \n\t" \
00136 "psllq $16, %%mm5 \n\t" \
00137 "por %%mm3, %%mm1 \n\t" \
00138 "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \
00139 "por %%mm5, %%mm4 \n\t" \
00140 \
00141 MOVNTQ" %%mm0, %0 \n\t" \
00142 MOVNTQ" %%mm1, 8%0 \n\t" \
00143 MOVNTQ" %%mm4, 16%0"
00144
00145
00146 static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
00147 {
00148 uint8_t *dest = dst;
00149 const uint8_t *s = src;
00150 const uint8_t *end;
00151 const uint8_t *mm_end;
00152 end = s + src_size;
00153 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00154 mm_end = end - 31;
00155 while (s < mm_end) {
00156 __asm__ volatile(
00157 PREFETCH" 32%1 \n\t"
00158 "movq %1, %%mm0 \n\t"
00159 "movq 8%1, %%mm1 \n\t"
00160 "movq 16%1, %%mm4 \n\t"
00161 "movq 24%1, %%mm5 \n\t"
00162 "movq %%mm0, %%mm2 \n\t"
00163 "movq %%mm1, %%mm3 \n\t"
00164 "movq %%mm4, %%mm6 \n\t"
00165 "movq %%mm5, %%mm7 \n\t"
00166 STORE_BGR24_MMX
00167 :"=m"(*dest)
00168 :"m"(*s)
00169 :"memory");
00170 dest += 24;
00171 s += 32;
00172 }
00173 __asm__ volatile(SFENCE:::"memory");
00174 __asm__ volatile(EMMS:::"memory");
00175 while (s < end) {
00176 *dest++ = *s++;
00177 *dest++ = *s++;
00178 *dest++ = *s++;
00179 s++;
00180 }
00181 }
00182
00183
00184
00185
00186
00187
00188
00189 static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size)
00190 {
00191 register const uint8_t* s=src;
00192 register uint8_t* d=dst;
00193 register const uint8_t *end;
00194 const uint8_t *mm_end;
00195 end = s + src_size;
00196 __asm__ volatile(PREFETCH" %0"::"m"(*s));
00197 __asm__ volatile("movq %0, %%mm4"::"m"(mask15s));
00198 mm_end = end - 15;
00199 while (s<mm_end) {
00200 __asm__ volatile(
00201 PREFETCH" 32%1 \n\t"
00202 "movq %1, %%mm0 \n\t"
00203 "movq 8%1, %%mm2 \n\t"
00204 "movq %%mm0, %%mm1 \n\t"
00205 "movq %%mm2, %%mm3 \n\t"
00206 "pand %%mm4, %%mm0 \n\t"
00207 "pand %%mm4, %%mm2 \n\t"
00208 "paddw %%mm1, %%mm0 \n\t"
00209 "paddw %%mm3, %%mm2 \n\t"
00210 MOVNTQ" %%mm0, %0 \n\t"
00211 MOVNTQ" %%mm2, 8%0"
00212 :"=m"(*d)
00213 :"m"(*s)
00214 );
00215 d+=16;
00216 s+=16;
00217 }
00218 __asm__ volatile(SFENCE:::"memory");
00219 __asm__ volatile(EMMS:::"memory");
00220 mm_end = end - 3;
00221 while (s < mm_end) {
00222 register unsigned x= *((const uint32_t *)s);
00223 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
00224 d+=4;
00225 s+=4;
00226 }
00227 if (s < end) {
00228 register unsigned short x= *((const uint16_t *)s);
00229 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
00230 }
00231 }
00232
00233 static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size)
00234 {
00235 register const uint8_t* s=src;
00236 register uint8_t* d=dst;
00237 register const uint8_t *end;
00238 const uint8_t *mm_end;
00239 end = s + src_size;
00240 __asm__ volatile(PREFETCH" %0"::"m"(*s));
00241 __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg));
00242 __asm__ volatile("movq %0, %%mm6"::"m"(mask15b));
00243 mm_end = end - 15;
00244 while (s<mm_end) {
00245 __asm__ volatile(
00246 PREFETCH" 32%1 \n\t"
00247 "movq %1, %%mm0 \n\t"
00248 "movq 8%1, %%mm2 \n\t"
00249 "movq %%mm0, %%mm1 \n\t"
00250 "movq %%mm2, %%mm3 \n\t"
00251 "psrlq $1, %%mm0 \n\t"
00252 "psrlq $1, %%mm2 \n\t"
00253 "pand %%mm7, %%mm0 \n\t"
00254 "pand %%mm7, %%mm2 \n\t"
00255 "pand %%mm6, %%mm1 \n\t"
00256 "pand %%mm6, %%mm3 \n\t"
00257 "por %%mm1, %%mm0 \n\t"
00258 "por %%mm3, %%mm2 \n\t"
00259 MOVNTQ" %%mm0, %0 \n\t"
00260 MOVNTQ" %%mm2, 8%0"
00261 :"=m"(*d)
00262 :"m"(*s)
00263 );
00264 d+=16;
00265 s+=16;
00266 }
00267 __asm__ volatile(SFENCE:::"memory");
00268 __asm__ volatile(EMMS:::"memory");
00269 mm_end = end - 3;
00270 while (s < mm_end) {
00271 register uint32_t x= *((const uint32_t*)s);
00272 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
00273 s+=4;
00274 d+=4;
00275 }
00276 if (s < end) {
00277 register uint16_t x= *((const uint16_t*)s);
00278 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
00279 }
00280 }
00281
00282 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size)
00283 {
00284 const uint8_t *s = src;
00285 const uint8_t *end;
00286 const uint8_t *mm_end;
00287 uint16_t *d = (uint16_t *)dst;
00288 end = s + src_size;
00289 mm_end = end - 15;
00290 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00291 __asm__ volatile(
00292 "movq %3, %%mm5 \n\t"
00293 "movq %4, %%mm6 \n\t"
00294 "movq %5, %%mm7 \n\t"
00295 "jmp 2f \n\t"
00296 ".p2align 4 \n\t"
00297 "1: \n\t"
00298 PREFETCH" 32(%1) \n\t"
00299 "movd (%1), %%mm0 \n\t"
00300 "movd 4(%1), %%mm3 \n\t"
00301 "punpckldq 8(%1), %%mm0 \n\t"
00302 "punpckldq 12(%1), %%mm3 \n\t"
00303 "movq %%mm0, %%mm1 \n\t"
00304 "movq %%mm3, %%mm4 \n\t"
00305 "pand %%mm6, %%mm0 \n\t"
00306 "pand %%mm6, %%mm3 \n\t"
00307 "pmaddwd %%mm7, %%mm0 \n\t"
00308 "pmaddwd %%mm7, %%mm3 \n\t"
00309 "pand %%mm5, %%mm1 \n\t"
00310 "pand %%mm5, %%mm4 \n\t"
00311 "por %%mm1, %%mm0 \n\t"
00312 "por %%mm4, %%mm3 \n\t"
00313 "psrld $5, %%mm0 \n\t"
00314 "pslld $11, %%mm3 \n\t"
00315 "por %%mm3, %%mm0 \n\t"
00316 MOVNTQ" %%mm0, (%0) \n\t"
00317 "add $16, %1 \n\t"
00318 "add $8, %0 \n\t"
00319 "2: \n\t"
00320 "cmp %2, %1 \n\t"
00321 " jb 1b \n\t"
00322 : "+r" (d), "+r"(s)
00323 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
00324 );
00325 #else
00326 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00327 __asm__ volatile(
00328 "movq %0, %%mm7 \n\t"
00329 "movq %1, %%mm6 \n\t"
00330 ::"m"(red_16mask),"m"(green_16mask));
00331 while (s < mm_end) {
00332 __asm__ volatile(
00333 PREFETCH" 32%1 \n\t"
00334 "movd %1, %%mm0 \n\t"
00335 "movd 4%1, %%mm3 \n\t"
00336 "punpckldq 8%1, %%mm0 \n\t"
00337 "punpckldq 12%1, %%mm3 \n\t"
00338 "movq %%mm0, %%mm1 \n\t"
00339 "movq %%mm0, %%mm2 \n\t"
00340 "movq %%mm3, %%mm4 \n\t"
00341 "movq %%mm3, %%mm5 \n\t"
00342 "psrlq $3, %%mm0 \n\t"
00343 "psrlq $3, %%mm3 \n\t"
00344 "pand %2, %%mm0 \n\t"
00345 "pand %2, %%mm3 \n\t"
00346 "psrlq $5, %%mm1 \n\t"
00347 "psrlq $5, %%mm4 \n\t"
00348 "pand %%mm6, %%mm1 \n\t"
00349 "pand %%mm6, %%mm4 \n\t"
00350 "psrlq $8, %%mm2 \n\t"
00351 "psrlq $8, %%mm5 \n\t"
00352 "pand %%mm7, %%mm2 \n\t"
00353 "pand %%mm7, %%mm5 \n\t"
00354 "por %%mm1, %%mm0 \n\t"
00355 "por %%mm4, %%mm3 \n\t"
00356 "por %%mm2, %%mm0 \n\t"
00357 "por %%mm5, %%mm3 \n\t"
00358 "psllq $16, %%mm3 \n\t"
00359 "por %%mm3, %%mm0 \n\t"
00360 MOVNTQ" %%mm0, %0 \n\t"
00361 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00362 d += 4;
00363 s += 16;
00364 }
00365 #endif
00366 __asm__ volatile(SFENCE:::"memory");
00367 __asm__ volatile(EMMS:::"memory");
00368 while (s < end) {
00369 register int rgb = *(const uint32_t*)s; s += 4;
00370 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
00371 }
00372 }
00373
00374 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
00375 {
00376 const uint8_t *s = src;
00377 const uint8_t *end;
00378 const uint8_t *mm_end;
00379 uint16_t *d = (uint16_t *)dst;
00380 end = s + src_size;
00381 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00382 __asm__ volatile(
00383 "movq %0, %%mm7 \n\t"
00384 "movq %1, %%mm6 \n\t"
00385 ::"m"(red_16mask),"m"(green_16mask));
00386 mm_end = end - 15;
00387 while (s < mm_end) {
00388 __asm__ volatile(
00389 PREFETCH" 32%1 \n\t"
00390 "movd %1, %%mm0 \n\t"
00391 "movd 4%1, %%mm3 \n\t"
00392 "punpckldq 8%1, %%mm0 \n\t"
00393 "punpckldq 12%1, %%mm3 \n\t"
00394 "movq %%mm0, %%mm1 \n\t"
00395 "movq %%mm0, %%mm2 \n\t"
00396 "movq %%mm3, %%mm4 \n\t"
00397 "movq %%mm3, %%mm5 \n\t"
00398 "psllq $8, %%mm0 \n\t"
00399 "psllq $8, %%mm3 \n\t"
00400 "pand %%mm7, %%mm0 \n\t"
00401 "pand %%mm7, %%mm3 \n\t"
00402 "psrlq $5, %%mm1 \n\t"
00403 "psrlq $5, %%mm4 \n\t"
00404 "pand %%mm6, %%mm1 \n\t"
00405 "pand %%mm6, %%mm4 \n\t"
00406 "psrlq $19, %%mm2 \n\t"
00407 "psrlq $19, %%mm5 \n\t"
00408 "pand %2, %%mm2 \n\t"
00409 "pand %2, %%mm5 \n\t"
00410 "por %%mm1, %%mm0 \n\t"
00411 "por %%mm4, %%mm3 \n\t"
00412 "por %%mm2, %%mm0 \n\t"
00413 "por %%mm5, %%mm3 \n\t"
00414 "psllq $16, %%mm3 \n\t"
00415 "por %%mm3, %%mm0 \n\t"
00416 MOVNTQ" %%mm0, %0 \n\t"
00417 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00418 d += 4;
00419 s += 16;
00420 }
00421 __asm__ volatile(SFENCE:::"memory");
00422 __asm__ volatile(EMMS:::"memory");
00423 while (s < end) {
00424 register int rgb = *(const uint32_t*)s; s += 4;
00425 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
00426 }
00427 }
00428
00429 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size)
00430 {
00431 const uint8_t *s = src;
00432 const uint8_t *end;
00433 const uint8_t *mm_end;
00434 uint16_t *d = (uint16_t *)dst;
00435 end = s + src_size;
00436 mm_end = end - 15;
00437 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00438 __asm__ volatile(
00439 "movq %3, %%mm5 \n\t"
00440 "movq %4, %%mm6 \n\t"
00441 "movq %5, %%mm7 \n\t"
00442 "jmp 2f \n\t"
00443 ".p2align 4 \n\t"
00444 "1: \n\t"
00445 PREFETCH" 32(%1) \n\t"
00446 "movd (%1), %%mm0 \n\t"
00447 "movd 4(%1), %%mm3 \n\t"
00448 "punpckldq 8(%1), %%mm0 \n\t"
00449 "punpckldq 12(%1), %%mm3 \n\t"
00450 "movq %%mm0, %%mm1 \n\t"
00451 "movq %%mm3, %%mm4 \n\t"
00452 "pand %%mm6, %%mm0 \n\t"
00453 "pand %%mm6, %%mm3 \n\t"
00454 "pmaddwd %%mm7, %%mm0 \n\t"
00455 "pmaddwd %%mm7, %%mm3 \n\t"
00456 "pand %%mm5, %%mm1 \n\t"
00457 "pand %%mm5, %%mm4 \n\t"
00458 "por %%mm1, %%mm0 \n\t"
00459 "por %%mm4, %%mm3 \n\t"
00460 "psrld $6, %%mm0 \n\t"
00461 "pslld $10, %%mm3 \n\t"
00462 "por %%mm3, %%mm0 \n\t"
00463 MOVNTQ" %%mm0, (%0) \n\t"
00464 "add $16, %1 \n\t"
00465 "add $8, %0 \n\t"
00466 "2: \n\t"
00467 "cmp %2, %1 \n\t"
00468 " jb 1b \n\t"
00469 : "+r" (d), "+r"(s)
00470 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
00471 );
00472 #else
00473 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00474 __asm__ volatile(
00475 "movq %0, %%mm7 \n\t"
00476 "movq %1, %%mm6 \n\t"
00477 ::"m"(red_15mask),"m"(green_15mask));
00478 while (s < mm_end) {
00479 __asm__ volatile(
00480 PREFETCH" 32%1 \n\t"
00481 "movd %1, %%mm0 \n\t"
00482 "movd 4%1, %%mm3 \n\t"
00483 "punpckldq 8%1, %%mm0 \n\t"
00484 "punpckldq 12%1, %%mm3 \n\t"
00485 "movq %%mm0, %%mm1 \n\t"
00486 "movq %%mm0, %%mm2 \n\t"
00487 "movq %%mm3, %%mm4 \n\t"
00488 "movq %%mm3, %%mm5 \n\t"
00489 "psrlq $3, %%mm0 \n\t"
00490 "psrlq $3, %%mm3 \n\t"
00491 "pand %2, %%mm0 \n\t"
00492 "pand %2, %%mm3 \n\t"
00493 "psrlq $6, %%mm1 \n\t"
00494 "psrlq $6, %%mm4 \n\t"
00495 "pand %%mm6, %%mm1 \n\t"
00496 "pand %%mm6, %%mm4 \n\t"
00497 "psrlq $9, %%mm2 \n\t"
00498 "psrlq $9, %%mm5 \n\t"
00499 "pand %%mm7, %%mm2 \n\t"
00500 "pand %%mm7, %%mm5 \n\t"
00501 "por %%mm1, %%mm0 \n\t"
00502 "por %%mm4, %%mm3 \n\t"
00503 "por %%mm2, %%mm0 \n\t"
00504 "por %%mm5, %%mm3 \n\t"
00505 "psllq $16, %%mm3 \n\t"
00506 "por %%mm3, %%mm0 \n\t"
00507 MOVNTQ" %%mm0, %0 \n\t"
00508 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00509 d += 4;
00510 s += 16;
00511 }
00512 #endif
00513 __asm__ volatile(SFENCE:::"memory");
00514 __asm__ volatile(EMMS:::"memory");
00515 while (s < end) {
00516 register int rgb = *(const uint32_t*)s; s += 4;
00517 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
00518 }
00519 }
00520
00521 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
00522 {
00523 const uint8_t *s = src;
00524 const uint8_t *end;
00525 const uint8_t *mm_end;
00526 uint16_t *d = (uint16_t *)dst;
00527 end = s + src_size;
00528 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00529 __asm__ volatile(
00530 "movq %0, %%mm7 \n\t"
00531 "movq %1, %%mm6 \n\t"
00532 ::"m"(red_15mask),"m"(green_15mask));
00533 mm_end = end - 15;
00534 while (s < mm_end) {
00535 __asm__ volatile(
00536 PREFETCH" 32%1 \n\t"
00537 "movd %1, %%mm0 \n\t"
00538 "movd 4%1, %%mm3 \n\t"
00539 "punpckldq 8%1, %%mm0 \n\t"
00540 "punpckldq 12%1, %%mm3 \n\t"
00541 "movq %%mm0, %%mm1 \n\t"
00542 "movq %%mm0, %%mm2 \n\t"
00543 "movq %%mm3, %%mm4 \n\t"
00544 "movq %%mm3, %%mm5 \n\t"
00545 "psllq $7, %%mm0 \n\t"
00546 "psllq $7, %%mm3 \n\t"
00547 "pand %%mm7, %%mm0 \n\t"
00548 "pand %%mm7, %%mm3 \n\t"
00549 "psrlq $6, %%mm1 \n\t"
00550 "psrlq $6, %%mm4 \n\t"
00551 "pand %%mm6, %%mm1 \n\t"
00552 "pand %%mm6, %%mm4 \n\t"
00553 "psrlq $19, %%mm2 \n\t"
00554 "psrlq $19, %%mm5 \n\t"
00555 "pand %2, %%mm2 \n\t"
00556 "pand %2, %%mm5 \n\t"
00557 "por %%mm1, %%mm0 \n\t"
00558 "por %%mm4, %%mm3 \n\t"
00559 "por %%mm2, %%mm0 \n\t"
00560 "por %%mm5, %%mm3 \n\t"
00561 "psllq $16, %%mm3 \n\t"
00562 "por %%mm3, %%mm0 \n\t"
00563 MOVNTQ" %%mm0, %0 \n\t"
00564 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00565 d += 4;
00566 s += 16;
00567 }
00568 __asm__ volatile(SFENCE:::"memory");
00569 __asm__ volatile(EMMS:::"memory");
00570 while (s < end) {
00571 register int rgb = *(const uint32_t*)s; s += 4;
00572 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
00573 }
00574 }
00575
00576 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
00577 {
00578 const uint8_t *s = src;
00579 const uint8_t *end;
00580 const uint8_t *mm_end;
00581 uint16_t *d = (uint16_t *)dst;
00582 end = s + src_size;
00583 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00584 __asm__ volatile(
00585 "movq %0, %%mm7 \n\t"
00586 "movq %1, %%mm6 \n\t"
00587 ::"m"(red_16mask),"m"(green_16mask));
00588 mm_end = end - 11;
00589 while (s < mm_end) {
00590 __asm__ volatile(
00591 PREFETCH" 32%1 \n\t"
00592 "movd %1, %%mm0 \n\t"
00593 "movd 3%1, %%mm3 \n\t"
00594 "punpckldq 6%1, %%mm0 \n\t"
00595 "punpckldq 9%1, %%mm3 \n\t"
00596 "movq %%mm0, %%mm1 \n\t"
00597 "movq %%mm0, %%mm2 \n\t"
00598 "movq %%mm3, %%mm4 \n\t"
00599 "movq %%mm3, %%mm5 \n\t"
00600 "psrlq $3, %%mm0 \n\t"
00601 "psrlq $3, %%mm3 \n\t"
00602 "pand %2, %%mm0 \n\t"
00603 "pand %2, %%mm3 \n\t"
00604 "psrlq $5, %%mm1 \n\t"
00605 "psrlq $5, %%mm4 \n\t"
00606 "pand %%mm6, %%mm1 \n\t"
00607 "pand %%mm6, %%mm4 \n\t"
00608 "psrlq $8, %%mm2 \n\t"
00609 "psrlq $8, %%mm5 \n\t"
00610 "pand %%mm7, %%mm2 \n\t"
00611 "pand %%mm7, %%mm5 \n\t"
00612 "por %%mm1, %%mm0 \n\t"
00613 "por %%mm4, %%mm3 \n\t"
00614 "por %%mm2, %%mm0 \n\t"
00615 "por %%mm5, %%mm3 \n\t"
00616 "psllq $16, %%mm3 \n\t"
00617 "por %%mm3, %%mm0 \n\t"
00618 MOVNTQ" %%mm0, %0 \n\t"
00619 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00620 d += 4;
00621 s += 12;
00622 }
00623 __asm__ volatile(SFENCE:::"memory");
00624 __asm__ volatile(EMMS:::"memory");
00625 while (s < end) {
00626 const int b = *s++;
00627 const int g = *s++;
00628 const int r = *s++;
00629 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00630 }
00631 }
00632
00633 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size)
00634 {
00635 const uint8_t *s = src;
00636 const uint8_t *end;
00637 const uint8_t *mm_end;
00638 uint16_t *d = (uint16_t *)dst;
00639 end = s + src_size;
00640 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00641 __asm__ volatile(
00642 "movq %0, %%mm7 \n\t"
00643 "movq %1, %%mm6 \n\t"
00644 ::"m"(red_16mask),"m"(green_16mask));
00645 mm_end = end - 15;
00646 while (s < mm_end) {
00647 __asm__ volatile(
00648 PREFETCH" 32%1 \n\t"
00649 "movd %1, %%mm0 \n\t"
00650 "movd 3%1, %%mm3 \n\t"
00651 "punpckldq 6%1, %%mm0 \n\t"
00652 "punpckldq 9%1, %%mm3 \n\t"
00653 "movq %%mm0, %%mm1 \n\t"
00654 "movq %%mm0, %%mm2 \n\t"
00655 "movq %%mm3, %%mm4 \n\t"
00656 "movq %%mm3, %%mm5 \n\t"
00657 "psllq $8, %%mm0 \n\t"
00658 "psllq $8, %%mm3 \n\t"
00659 "pand %%mm7, %%mm0 \n\t"
00660 "pand %%mm7, %%mm3 \n\t"
00661 "psrlq $5, %%mm1 \n\t"
00662 "psrlq $5, %%mm4 \n\t"
00663 "pand %%mm6, %%mm1 \n\t"
00664 "pand %%mm6, %%mm4 \n\t"
00665 "psrlq $19, %%mm2 \n\t"
00666 "psrlq $19, %%mm5 \n\t"
00667 "pand %2, %%mm2 \n\t"
00668 "pand %2, %%mm5 \n\t"
00669 "por %%mm1, %%mm0 \n\t"
00670 "por %%mm4, %%mm3 \n\t"
00671 "por %%mm2, %%mm0 \n\t"
00672 "por %%mm5, %%mm3 \n\t"
00673 "psllq $16, %%mm3 \n\t"
00674 "por %%mm3, %%mm0 \n\t"
00675 MOVNTQ" %%mm0, %0 \n\t"
00676 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00677 d += 4;
00678 s += 12;
00679 }
00680 __asm__ volatile(SFENCE:::"memory");
00681 __asm__ volatile(EMMS:::"memory");
00682 while (s < end) {
00683 const int r = *s++;
00684 const int g = *s++;
00685 const int b = *s++;
00686 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00687 }
00688 }
00689
00690 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
00691 {
00692 const uint8_t *s = src;
00693 const uint8_t *end;
00694 const uint8_t *mm_end;
00695 uint16_t *d = (uint16_t *)dst;
00696 end = s + src_size;
00697 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00698 __asm__ volatile(
00699 "movq %0, %%mm7 \n\t"
00700 "movq %1, %%mm6 \n\t"
00701 ::"m"(red_15mask),"m"(green_15mask));
00702 mm_end = end - 11;
00703 while (s < mm_end) {
00704 __asm__ volatile(
00705 PREFETCH" 32%1 \n\t"
00706 "movd %1, %%mm0 \n\t"
00707 "movd 3%1, %%mm3 \n\t"
00708 "punpckldq 6%1, %%mm0 \n\t"
00709 "punpckldq 9%1, %%mm3 \n\t"
00710 "movq %%mm0, %%mm1 \n\t"
00711 "movq %%mm0, %%mm2 \n\t"
00712 "movq %%mm3, %%mm4 \n\t"
00713 "movq %%mm3, %%mm5 \n\t"
00714 "psrlq $3, %%mm0 \n\t"
00715 "psrlq $3, %%mm3 \n\t"
00716 "pand %2, %%mm0 \n\t"
00717 "pand %2, %%mm3 \n\t"
00718 "psrlq $6, %%mm1 \n\t"
00719 "psrlq $6, %%mm4 \n\t"
00720 "pand %%mm6, %%mm1 \n\t"
00721 "pand %%mm6, %%mm4 \n\t"
00722 "psrlq $9, %%mm2 \n\t"
00723 "psrlq $9, %%mm5 \n\t"
00724 "pand %%mm7, %%mm2 \n\t"
00725 "pand %%mm7, %%mm5 \n\t"
00726 "por %%mm1, %%mm0 \n\t"
00727 "por %%mm4, %%mm3 \n\t"
00728 "por %%mm2, %%mm0 \n\t"
00729 "por %%mm5, %%mm3 \n\t"
00730 "psllq $16, %%mm3 \n\t"
00731 "por %%mm3, %%mm0 \n\t"
00732 MOVNTQ" %%mm0, %0 \n\t"
00733 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00734 d += 4;
00735 s += 12;
00736 }
00737 __asm__ volatile(SFENCE:::"memory");
00738 __asm__ volatile(EMMS:::"memory");
00739 while (s < end) {
00740 const int b = *s++;
00741 const int g = *s++;
00742 const int r = *s++;
00743 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00744 }
00745 }
00746
00747 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size)
00748 {
00749 const uint8_t *s = src;
00750 const uint8_t *end;
00751 const uint8_t *mm_end;
00752 uint16_t *d = (uint16_t *)dst;
00753 end = s + src_size;
00754 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00755 __asm__ volatile(
00756 "movq %0, %%mm7 \n\t"
00757 "movq %1, %%mm6 \n\t"
00758 ::"m"(red_15mask),"m"(green_15mask));
00759 mm_end = end - 15;
00760 while (s < mm_end) {
00761 __asm__ volatile(
00762 PREFETCH" 32%1 \n\t"
00763 "movd %1, %%mm0 \n\t"
00764 "movd 3%1, %%mm3 \n\t"
00765 "punpckldq 6%1, %%mm0 \n\t"
00766 "punpckldq 9%1, %%mm3 \n\t"
00767 "movq %%mm0, %%mm1 \n\t"
00768 "movq %%mm0, %%mm2 \n\t"
00769 "movq %%mm3, %%mm4 \n\t"
00770 "movq %%mm3, %%mm5 \n\t"
00771 "psllq $7, %%mm0 \n\t"
00772 "psllq $7, %%mm3 \n\t"
00773 "pand %%mm7, %%mm0 \n\t"
00774 "pand %%mm7, %%mm3 \n\t"
00775 "psrlq $6, %%mm1 \n\t"
00776 "psrlq $6, %%mm4 \n\t"
00777 "pand %%mm6, %%mm1 \n\t"
00778 "pand %%mm6, %%mm4 \n\t"
00779 "psrlq $19, %%mm2 \n\t"
00780 "psrlq $19, %%mm5 \n\t"
00781 "pand %2, %%mm2 \n\t"
00782 "pand %2, %%mm5 \n\t"
00783 "por %%mm1, %%mm0 \n\t"
00784 "por %%mm4, %%mm3 \n\t"
00785 "por %%mm2, %%mm0 \n\t"
00786 "por %%mm5, %%mm3 \n\t"
00787 "psllq $16, %%mm3 \n\t"
00788 "por %%mm3, %%mm0 \n\t"
00789 MOVNTQ" %%mm0, %0 \n\t"
00790 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00791 d += 4;
00792 s += 12;
00793 }
00794 __asm__ volatile(SFENCE:::"memory");
00795 __asm__ volatile(EMMS:::"memory");
00796 while (s < end) {
00797 const int r = *s++;
00798 const int g = *s++;
00799 const int b = *s++;
00800 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00801 }
00802 }
00803
00804
00805
00806
00807
00808
00809
00810
00811
00812
00813
00814
00815
00816
00817
00818
00819
00820
00821
00822
00823
00824
00825 static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
00826 {
00827 const uint16_t *end;
00828 const uint16_t *mm_end;
00829 uint8_t *d = dst;
00830 const uint16_t *s = (const uint16_t*)src;
00831 end = s + src_size/2;
00832 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00833 mm_end = end - 7;
00834 while (s < mm_end) {
00835 __asm__ volatile(
00836 PREFETCH" 32%1 \n\t"
00837 "movq %1, %%mm0 \n\t"
00838 "movq %1, %%mm1 \n\t"
00839 "movq %1, %%mm2 \n\t"
00840 "pand %2, %%mm0 \n\t"
00841 "pand %3, %%mm1 \n\t"
00842 "pand %4, %%mm2 \n\t"
00843 "psllq $3, %%mm0 \n\t"
00844 "psrlq $2, %%mm1 \n\t"
00845 "psrlq $7, %%mm2 \n\t"
00846 "movq %%mm0, %%mm3 \n\t"
00847 "movq %%mm1, %%mm4 \n\t"
00848 "movq %%mm2, %%mm5 \n\t"
00849 "punpcklwd %5, %%mm0 \n\t"
00850 "punpcklwd %5, %%mm1 \n\t"
00851 "punpcklwd %5, %%mm2 \n\t"
00852 "punpckhwd %5, %%mm3 \n\t"
00853 "punpckhwd %5, %%mm4 \n\t"
00854 "punpckhwd %5, %%mm5 \n\t"
00855 "psllq $8, %%mm1 \n\t"
00856 "psllq $16, %%mm2 \n\t"
00857 "por %%mm1, %%mm0 \n\t"
00858 "por %%mm2, %%mm0 \n\t"
00859 "psllq $8, %%mm4 \n\t"
00860 "psllq $16, %%mm5 \n\t"
00861 "por %%mm4, %%mm3 \n\t"
00862 "por %%mm5, %%mm3 \n\t"
00863
00864 "movq %%mm0, %%mm6 \n\t"
00865 "movq %%mm3, %%mm7 \n\t"
00866
00867 "movq 8%1, %%mm0 \n\t"
00868 "movq 8%1, %%mm1 \n\t"
00869 "movq 8%1, %%mm2 \n\t"
00870 "pand %2, %%mm0 \n\t"
00871 "pand %3, %%mm1 \n\t"
00872 "pand %4, %%mm2 \n\t"
00873 "psllq $3, %%mm0 \n\t"
00874 "psrlq $2, %%mm1 \n\t"
00875 "psrlq $7, %%mm2 \n\t"
00876 "movq %%mm0, %%mm3 \n\t"
00877 "movq %%mm1, %%mm4 \n\t"
00878 "movq %%mm2, %%mm5 \n\t"
00879 "punpcklwd %5, %%mm0 \n\t"
00880 "punpcklwd %5, %%mm1 \n\t"
00881 "punpcklwd %5, %%mm2 \n\t"
00882 "punpckhwd %5, %%mm3 \n\t"
00883 "punpckhwd %5, %%mm4 \n\t"
00884 "punpckhwd %5, %%mm5 \n\t"
00885 "psllq $8, %%mm1 \n\t"
00886 "psllq $16, %%mm2 \n\t"
00887 "por %%mm1, %%mm0 \n\t"
00888 "por %%mm2, %%mm0 \n\t"
00889 "psllq $8, %%mm4 \n\t"
00890 "psllq $16, %%mm5 \n\t"
00891 "por %%mm4, %%mm3 \n\t"
00892 "por %%mm5, %%mm3 \n\t"
00893
00894 :"=m"(*d)
00895 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
00896 :"memory");
00897
00898 __asm__ volatile(
00899 "movq %%mm0, %%mm4 \n\t"
00900 "movq %%mm3, %%mm5 \n\t"
00901 "movq %%mm6, %%mm0 \n\t"
00902 "movq %%mm7, %%mm1 \n\t"
00903
00904 "movq %%mm4, %%mm6 \n\t"
00905 "movq %%mm5, %%mm7 \n\t"
00906 "movq %%mm0, %%mm2 \n\t"
00907 "movq %%mm1, %%mm3 \n\t"
00908
00909 STORE_BGR24_MMX
00910
00911 :"=m"(*d)
00912 :"m"(*s)
00913 :"memory");
00914 d += 24;
00915 s += 8;
00916 }
00917 __asm__ volatile(SFENCE:::"memory");
00918 __asm__ volatile(EMMS:::"memory");
00919 while (s < end) {
00920 register uint16_t bgr;
00921 bgr = *s++;
00922 *d++ = (bgr&0x1F)<<3;
00923 *d++ = (bgr&0x3E0)>>2;
00924 *d++ = (bgr&0x7C00)>>7;
00925 }
00926 }
00927
00928 static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
00929 {
00930 const uint16_t *end;
00931 const uint16_t *mm_end;
00932 uint8_t *d = (uint8_t *)dst;
00933 const uint16_t *s = (const uint16_t *)src;
00934 end = s + src_size/2;
00935 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00936 mm_end = end - 7;
00937 while (s < mm_end) {
00938 __asm__ volatile(
00939 PREFETCH" 32%1 \n\t"
00940 "movq %1, %%mm0 \n\t"
00941 "movq %1, %%mm1 \n\t"
00942 "movq %1, %%mm2 \n\t"
00943 "pand %2, %%mm0 \n\t"
00944 "pand %3, %%mm1 \n\t"
00945 "pand %4, %%mm2 \n\t"
00946 "psllq $3, %%mm0 \n\t"
00947 "psrlq $3, %%mm1 \n\t"
00948 "psrlq $8, %%mm2 \n\t"
00949 "movq %%mm0, %%mm3 \n\t"
00950 "movq %%mm1, %%mm4 \n\t"
00951 "movq %%mm2, %%mm5 \n\t"
00952 "punpcklwd %5, %%mm0 \n\t"
00953 "punpcklwd %5, %%mm1 \n\t"
00954 "punpcklwd %5, %%mm2 \n\t"
00955 "punpckhwd %5, %%mm3 \n\t"
00956 "punpckhwd %5, %%mm4 \n\t"
00957 "punpckhwd %5, %%mm5 \n\t"
00958 "psllq $8, %%mm1 \n\t"
00959 "psllq $16, %%mm2 \n\t"
00960 "por %%mm1, %%mm0 \n\t"
00961 "por %%mm2, %%mm0 \n\t"
00962 "psllq $8, %%mm4 \n\t"
00963 "psllq $16, %%mm5 \n\t"
00964 "por %%mm4, %%mm3 \n\t"
00965 "por %%mm5, %%mm3 \n\t"
00966
00967 "movq %%mm0, %%mm6 \n\t"
00968 "movq %%mm3, %%mm7 \n\t"
00969
00970 "movq 8%1, %%mm0 \n\t"
00971 "movq 8%1, %%mm1 \n\t"
00972 "movq 8%1, %%mm2 \n\t"
00973 "pand %2, %%mm0 \n\t"
00974 "pand %3, %%mm1 \n\t"
00975 "pand %4, %%mm2 \n\t"
00976 "psllq $3, %%mm0 \n\t"
00977 "psrlq $3, %%mm1 \n\t"
00978 "psrlq $8, %%mm2 \n\t"
00979 "movq %%mm0, %%mm3 \n\t"
00980 "movq %%mm1, %%mm4 \n\t"
00981 "movq %%mm2, %%mm5 \n\t"
00982 "punpcklwd %5, %%mm0 \n\t"
00983 "punpcklwd %5, %%mm1 \n\t"
00984 "punpcklwd %5, %%mm2 \n\t"
00985 "punpckhwd %5, %%mm3 \n\t"
00986 "punpckhwd %5, %%mm4 \n\t"
00987 "punpckhwd %5, %%mm5 \n\t"
00988 "psllq $8, %%mm1 \n\t"
00989 "psllq $16, %%mm2 \n\t"
00990 "por %%mm1, %%mm0 \n\t"
00991 "por %%mm2, %%mm0 \n\t"
00992 "psllq $8, %%mm4 \n\t"
00993 "psllq $16, %%mm5 \n\t"
00994 "por %%mm4, %%mm3 \n\t"
00995 "por %%mm5, %%mm3 \n\t"
00996 :"=m"(*d)
00997 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
00998 :"memory");
00999
01000 __asm__ volatile(
01001 "movq %%mm0, %%mm4 \n\t"
01002 "movq %%mm3, %%mm5 \n\t"
01003 "movq %%mm6, %%mm0 \n\t"
01004 "movq %%mm7, %%mm1 \n\t"
01005
01006 "movq %%mm4, %%mm6 \n\t"
01007 "movq %%mm5, %%mm7 \n\t"
01008 "movq %%mm0, %%mm2 \n\t"
01009 "movq %%mm1, %%mm3 \n\t"
01010
01011 STORE_BGR24_MMX
01012
01013 :"=m"(*d)
01014 :"m"(*s)
01015 :"memory");
01016 d += 24;
01017 s += 8;
01018 }
01019 __asm__ volatile(SFENCE:::"memory");
01020 __asm__ volatile(EMMS:::"memory");
01021 while (s < end) {
01022 register uint16_t bgr;
01023 bgr = *s++;
01024 *d++ = (bgr&0x1F)<<3;
01025 *d++ = (bgr&0x7E0)>>3;
01026 *d++ = (bgr&0xF800)>>8;
01027 }
01028 }
01029
01030
01031
01032
01033
01034
01035
01036
01037 #define PACK_RGB32 \
01038 "packuswb %%mm7, %%mm0 \n\t" \
01039 "packuswb %%mm7, %%mm1 \n\t" \
01040 "packuswb %%mm7, %%mm2 \n\t" \
01041 "punpcklbw %%mm1, %%mm0 \n\t" \
01042 "punpcklbw %%mm6, %%mm2 \n\t" \
01043 "movq %%mm0, %%mm3 \n\t" \
01044 "punpcklwd %%mm2, %%mm0 \n\t" \
01045 "punpckhwd %%mm2, %%mm3 \n\t" \
01046 MOVNTQ" %%mm0, %0 \n\t" \
01047 MOVNTQ" %%mm3, 8%0 \n\t" \
01048
01049 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
01050 {
01051 const uint16_t *end;
01052 const uint16_t *mm_end;
01053 uint8_t *d = dst;
01054 const uint16_t *s = (const uint16_t *)src;
01055 end = s + src_size/2;
01056 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
01057 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
01058 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
01059 mm_end = end - 3;
01060 while (s < mm_end) {
01061 __asm__ volatile(
01062 PREFETCH" 32%1 \n\t"
01063 "movq %1, %%mm0 \n\t"
01064 "movq %1, %%mm1 \n\t"
01065 "movq %1, %%mm2 \n\t"
01066 "pand %2, %%mm0 \n\t"
01067 "pand %3, %%mm1 \n\t"
01068 "pand %4, %%mm2 \n\t"
01069 "psllq $3, %%mm0 \n\t"
01070 "psrlq $2, %%mm1 \n\t"
01071 "psrlq $7, %%mm2 \n\t"
01072 PACK_RGB32
01073 :"=m"(*d)
01074 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
01075 :"memory");
01076 d += 16;
01077 s += 4;
01078 }
01079 __asm__ volatile(SFENCE:::"memory");
01080 __asm__ volatile(EMMS:::"memory");
01081 while (s < end) {
01082 register uint16_t bgr;
01083 bgr = *s++;
01084 *d++ = (bgr&0x1F)<<3;
01085 *d++ = (bgr&0x3E0)>>2;
01086 *d++ = (bgr&0x7C00)>>7;
01087 *d++ = 255;
01088 }
01089 }
01090
01091 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size)
01092 {
01093 const uint16_t *end;
01094 const uint16_t *mm_end;
01095 uint8_t *d = dst;
01096 const uint16_t *s = (const uint16_t*)src;
01097 end = s + src_size/2;
01098 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
01099 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
01100 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
01101 mm_end = end - 3;
01102 while (s < mm_end) {
01103 __asm__ volatile(
01104 PREFETCH" 32%1 \n\t"
01105 "movq %1, %%mm0 \n\t"
01106 "movq %1, %%mm1 \n\t"
01107 "movq %1, %%mm2 \n\t"
01108 "pand %2, %%mm0 \n\t"
01109 "pand %3, %%mm1 \n\t"
01110 "pand %4, %%mm2 \n\t"
01111 "psllq $3, %%mm0 \n\t"
01112 "psrlq $3, %%mm1 \n\t"
01113 "psrlq $8, %%mm2 \n\t"
01114 PACK_RGB32
01115 :"=m"(*d)
01116 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
01117 :"memory");
01118 d += 16;
01119 s += 4;
01120 }
01121 __asm__ volatile(SFENCE:::"memory");
01122 __asm__ volatile(EMMS:::"memory");
01123 while (s < end) {
01124 register uint16_t bgr;
01125 bgr = *s++;
01126 *d++ = (bgr&0x1F)<<3;
01127 *d++ = (bgr&0x7E0)>>3;
01128 *d++ = (bgr&0xF800)>>8;
01129 *d++ = 255;
01130 }
01131 }
01132
01133 static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size)
01134 {
01135 x86_reg idx = 15 - src_size;
01136 const uint8_t *s = src-idx;
01137 uint8_t *d = dst-idx;
01138 __asm__ volatile(
01139 "test %0, %0 \n\t"
01140 "jns 2f \n\t"
01141 PREFETCH" (%1, %0) \n\t"
01142 "movq %3, %%mm7 \n\t"
01143 "pxor %4, %%mm7 \n\t"
01144 "movq %%mm7, %%mm6 \n\t"
01145 "pxor %5, %%mm7 \n\t"
01146 ".p2align 4 \n\t"
01147 "1: \n\t"
01148 PREFETCH" 32(%1, %0) \n\t"
01149 "movq (%1, %0), %%mm0 \n\t"
01150 "movq 8(%1, %0), %%mm1 \n\t"
01151 # if COMPILE_TEMPLATE_MMX2
01152 "pshufw $177, %%mm0, %%mm3 \n\t"
01153 "pshufw $177, %%mm1, %%mm5 \n\t"
01154 "pand %%mm7, %%mm0 \n\t"
01155 "pand %%mm6, %%mm3 \n\t"
01156 "pand %%mm7, %%mm1 \n\t"
01157 "pand %%mm6, %%mm5 \n\t"
01158 "por %%mm3, %%mm0 \n\t"
01159 "por %%mm5, %%mm1 \n\t"
01160 # else
01161 "movq %%mm0, %%mm2 \n\t"
01162 "movq %%mm1, %%mm4 \n\t"
01163 "pand %%mm7, %%mm0 \n\t"
01164 "pand %%mm6, %%mm2 \n\t"
01165 "pand %%mm7, %%mm1 \n\t"
01166 "pand %%mm6, %%mm4 \n\t"
01167 "movq %%mm2, %%mm3 \n\t"
01168 "movq %%mm4, %%mm5 \n\t"
01169 "pslld $16, %%mm2 \n\t"
01170 "psrld $16, %%mm3 \n\t"
01171 "pslld $16, %%mm4 \n\t"
01172 "psrld $16, %%mm5 \n\t"
01173 "por %%mm2, %%mm0 \n\t"
01174 "por %%mm4, %%mm1 \n\t"
01175 "por %%mm3, %%mm0 \n\t"
01176 "por %%mm5, %%mm1 \n\t"
01177 # endif
01178 MOVNTQ" %%mm0, (%2, %0) \n\t"
01179 MOVNTQ" %%mm1, 8(%2, %0) \n\t"
01180 "add $16, %0 \n\t"
01181 "js 1b \n\t"
01182 SFENCE" \n\t"
01183 EMMS" \n\t"
01184 "2: \n\t"
01185 : "+&r"(idx)
01186 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
01187 : "memory");
01188 for (; idx<15; idx+=4) {
01189 register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
01190 v &= 0xff00ff;
01191 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
01192 }
01193 }
01194
01195 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
01196 {
01197 unsigned i;
01198 x86_reg mmx_size= 23 - src_size;
01199 __asm__ volatile (
01200 "test %%"REG_a", %%"REG_a" \n\t"
01201 "jns 2f \n\t"
01202 "movq "MANGLE(mask24r)", %%mm5 \n\t"
01203 "movq "MANGLE(mask24g)", %%mm6 \n\t"
01204 "movq "MANGLE(mask24b)", %%mm7 \n\t"
01205 ".p2align 4 \n\t"
01206 "1: \n\t"
01207 PREFETCH" 32(%1, %%"REG_a") \n\t"
01208 "movq (%1, %%"REG_a"), %%mm0 \n\t"
01209 "movq (%1, %%"REG_a"), %%mm1 \n\t"
01210 "movq 2(%1, %%"REG_a"), %%mm2 \n\t"
01211 "psllq $16, %%mm0 \n\t"
01212 "pand %%mm5, %%mm0 \n\t"
01213 "pand %%mm6, %%mm1 \n\t"
01214 "pand %%mm7, %%mm2 \n\t"
01215 "por %%mm0, %%mm1 \n\t"
01216 "por %%mm2, %%mm1 \n\t"
01217 "movq 6(%1, %%"REG_a"), %%mm0 \n\t"
01218 MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t"
01219 "movq 8(%1, %%"REG_a"), %%mm1 \n\t"
01220 "movq 10(%1, %%"REG_a"), %%mm2 \n\t"
01221 "pand %%mm7, %%mm0 \n\t"
01222 "pand %%mm5, %%mm1 \n\t"
01223 "pand %%mm6, %%mm2 \n\t"
01224 "por %%mm0, %%mm1 \n\t"
01225 "por %%mm2, %%mm1 \n\t"
01226 "movq 14(%1, %%"REG_a"), %%mm0 \n\t"
01227 MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t"
01228 "movq 16(%1, %%"REG_a"), %%mm1 \n\t"
01229 "movq 18(%1, %%"REG_a"), %%mm2 \n\t"
01230 "pand %%mm6, %%mm0 \n\t"
01231 "pand %%mm7, %%mm1 \n\t"
01232 "pand %%mm5, %%mm2 \n\t"
01233 "por %%mm0, %%mm1 \n\t"
01234 "por %%mm2, %%mm1 \n\t"
01235 MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t"
01236 "add $24, %%"REG_a" \n\t"
01237 " js 1b \n\t"
01238 "2: \n\t"
01239 : "+a" (mmx_size)
01240 : "r" (src-mmx_size), "r"(dst-mmx_size)
01241 );
01242
01243 __asm__ volatile(SFENCE:::"memory");
01244 __asm__ volatile(EMMS:::"memory");
01245
01246 if (mmx_size==23) return;
01247
01248 src+= src_size;
01249 dst+= src_size;
01250 src_size= 23-mmx_size;
01251 src-= src_size;
01252 dst-= src_size;
01253 for (i=0; i<src_size; i+=3) {
01254 register uint8_t x;
01255 x = src[i + 2];
01256 dst[i + 1] = src[i + 1];
01257 dst[i + 2] = src[i + 0];
01258 dst[i + 0] = x;
01259 }
01260 }
01261
01262 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01263 int width, int height,
01264 int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
01265 {
01266 int y;
01267 const x86_reg chromWidth= width>>1;
01268 for (y=0; y<height; y++) {
01269
01270 __asm__ volatile(
01271 "xor %%"REG_a", %%"REG_a" \n\t"
01272 ".p2align 4 \n\t"
01273 "1: \n\t"
01274 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
01275 PREFETCH" 32(%2, %%"REG_a") \n\t"
01276 PREFETCH" 32(%3, %%"REG_a") \n\t"
01277 "movq (%2, %%"REG_a"), %%mm0 \n\t"
01278 "movq %%mm0, %%mm2 \n\t"
01279 "movq (%3, %%"REG_a"), %%mm1 \n\t"
01280 "punpcklbw %%mm1, %%mm0 \n\t"
01281 "punpckhbw %%mm1, %%mm2 \n\t"
01282
01283 "movq (%1, %%"REG_a",2), %%mm3 \n\t"
01284 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t"
01285 "movq %%mm3, %%mm4 \n\t"
01286 "movq %%mm5, %%mm6 \n\t"
01287 "punpcklbw %%mm0, %%mm3 \n\t"
01288 "punpckhbw %%mm0, %%mm4 \n\t"
01289 "punpcklbw %%mm2, %%mm5 \n\t"
01290 "punpckhbw %%mm2, %%mm6 \n\t"
01291
01292 MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t"
01293 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
01294 MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t"
01295 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
01296
01297 "add $8, %%"REG_a" \n\t"
01298 "cmp %4, %%"REG_a" \n\t"
01299 " jb 1b \n\t"
01300 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01301 : "%"REG_a
01302 );
01303 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
01304 usrc += chromStride;
01305 vsrc += chromStride;
01306 }
01307 ysrc += lumStride;
01308 dst += dstStride;
01309 }
01310 __asm__(EMMS" \n\t"
01311 SFENCE" \n\t"
01312 :::"memory");
01313 }
01314
01319 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01320 int width, int height,
01321 int lumStride, int chromStride, int dstStride)
01322 {
01323
01324 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01325 }
01326
01327 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01328 int width, int height,
01329 int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
01330 {
01331 int y;
01332 const x86_reg chromWidth= width>>1;
01333 for (y=0; y<height; y++) {
01334
01335 __asm__ volatile(
01336 "xor %%"REG_a", %%"REG_a" \n\t"
01337 ".p2align 4 \n\t"
01338 "1: \n\t"
01339 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
01340 PREFETCH" 32(%2, %%"REG_a") \n\t"
01341 PREFETCH" 32(%3, %%"REG_a") \n\t"
01342 "movq (%2, %%"REG_a"), %%mm0 \n\t"
01343 "movq %%mm0, %%mm2 \n\t"
01344 "movq (%3, %%"REG_a"), %%mm1 \n\t"
01345 "punpcklbw %%mm1, %%mm0 \n\t"
01346 "punpckhbw %%mm1, %%mm2 \n\t"
01347
01348 "movq (%1, %%"REG_a",2), %%mm3 \n\t"
01349 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t"
01350 "movq %%mm0, %%mm4 \n\t"
01351 "movq %%mm2, %%mm6 \n\t"
01352 "punpcklbw %%mm3, %%mm0 \n\t"
01353 "punpckhbw %%mm3, %%mm4 \n\t"
01354 "punpcklbw %%mm5, %%mm2 \n\t"
01355 "punpckhbw %%mm5, %%mm6 \n\t"
01356
01357 MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t"
01358 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
01359 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t"
01360 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
01361
01362 "add $8, %%"REG_a" \n\t"
01363 "cmp %4, %%"REG_a" \n\t"
01364 " jb 1b \n\t"
01365 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01366 : "%"REG_a
01367 );
01368 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
01369 usrc += chromStride;
01370 vsrc += chromStride;
01371 }
01372 ysrc += lumStride;
01373 dst += dstStride;
01374 }
01375 __asm__(EMMS" \n\t"
01376 SFENCE" \n\t"
01377 :::"memory");
01378 }
01379
01384 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01385 int width, int height,
01386 int lumStride, int chromStride, int dstStride)
01387 {
01388
01389 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01390 }
01391
01395 static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01396 int width, int height,
01397 int lumStride, int chromStride, int dstStride)
01398 {
01399 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
01400 }
01401
01405 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01406 int width, int height,
01407 int lumStride, int chromStride, int dstStride)
01408 {
01409 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
01410 }
01411
01416 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01417 int width, int height,
01418 int lumStride, int chromStride, int srcStride)
01419 {
01420 int y;
01421 const x86_reg chromWidth= width>>1;
01422 for (y=0; y<height; y+=2) {
01423 __asm__ volatile(
01424 "xor %%"REG_a", %%"REG_a" \n\t"
01425 "pcmpeqw %%mm7, %%mm7 \n\t"
01426 "psrlw $8, %%mm7 \n\t"
01427 ".p2align 4 \n\t"
01428 "1: \n\t"
01429 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01430 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01431 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01432 "movq %%mm0, %%mm2 \n\t"
01433 "movq %%mm1, %%mm3 \n\t"
01434 "psrlw $8, %%mm0 \n\t"
01435 "psrlw $8, %%mm1 \n\t"
01436 "pand %%mm7, %%mm2 \n\t"
01437 "pand %%mm7, %%mm3 \n\t"
01438 "packuswb %%mm1, %%mm0 \n\t"
01439 "packuswb %%mm3, %%mm2 \n\t"
01440
01441 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
01442
01443 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t"
01444 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t"
01445 "movq %%mm1, %%mm3 \n\t"
01446 "movq %%mm2, %%mm4 \n\t"
01447 "psrlw $8, %%mm1 \n\t"
01448 "psrlw $8, %%mm2 \n\t"
01449 "pand %%mm7, %%mm3 \n\t"
01450 "pand %%mm7, %%mm4 \n\t"
01451 "packuswb %%mm2, %%mm1 \n\t"
01452 "packuswb %%mm4, %%mm3 \n\t"
01453
01454 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
01455
01456 "movq %%mm0, %%mm2 \n\t"
01457 "movq %%mm1, %%mm3 \n\t"
01458 "psrlw $8, %%mm0 \n\t"
01459 "psrlw $8, %%mm1 \n\t"
01460 "pand %%mm7, %%mm2 \n\t"
01461 "pand %%mm7, %%mm3 \n\t"
01462 "packuswb %%mm1, %%mm0 \n\t"
01463 "packuswb %%mm3, %%mm2 \n\t"
01464
01465 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
01466 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
01467
01468 "add $8, %%"REG_a" \n\t"
01469 "cmp %4, %%"REG_a" \n\t"
01470 " jb 1b \n\t"
01471 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01472 : "memory", "%"REG_a
01473 );
01474
01475 ydst += lumStride;
01476 src += srcStride;
01477
01478 __asm__ volatile(
01479 "xor %%"REG_a", %%"REG_a" \n\t"
01480 ".p2align 4 \n\t"
01481 "1: \n\t"
01482 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01483 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01484 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01485 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t"
01486 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t"
01487 "pand %%mm7, %%mm0 \n\t"
01488 "pand %%mm7, %%mm1 \n\t"
01489 "pand %%mm7, %%mm2 \n\t"
01490 "pand %%mm7, %%mm3 \n\t"
01491 "packuswb %%mm1, %%mm0 \n\t"
01492 "packuswb %%mm3, %%mm2 \n\t"
01493
01494 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t"
01495 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t"
01496
01497 "add $8, %%"REG_a" \n\t"
01498 "cmp %4, %%"REG_a" \n\t"
01499 " jb 1b \n\t"
01500
01501 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01502 : "memory", "%"REG_a
01503 );
01504 udst += chromStride;
01505 vdst += chromStride;
01506 ydst += lumStride;
01507 src += srcStride;
01508 }
01509 __asm__ volatile(EMMS" \n\t"
01510 SFENCE" \n\t"
01511 :::"memory");
01512 }
01513 #endif
01514
01515 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
01516 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
01517 {
01518 int x,y;
01519
01520 dst[0]= src[0];
01521
01522
01523 for (x=0; x<srcWidth-1; x++) {
01524 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
01525 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
01526 }
01527 dst[2*srcWidth-1]= src[srcWidth-1];
01528
01529 dst+= dstStride;
01530
01531 for (y=1; y<srcHeight; y++) {
01532 const x86_reg mmxSize= srcWidth&~15;
01533 __asm__ volatile(
01534 "mov %4, %%"REG_a" \n\t"
01535 "movq "MANGLE(mmx_ff)", %%mm0 \n\t"
01536 "movq (%0, %%"REG_a"), %%mm4 \n\t"
01537 "movq %%mm4, %%mm2 \n\t"
01538 "psllq $8, %%mm4 \n\t"
01539 "pand %%mm0, %%mm2 \n\t"
01540 "por %%mm2, %%mm4 \n\t"
01541 "movq (%1, %%"REG_a"), %%mm5 \n\t"
01542 "movq %%mm5, %%mm3 \n\t"
01543 "psllq $8, %%mm5 \n\t"
01544 "pand %%mm0, %%mm3 \n\t"
01545 "por %%mm3, %%mm5 \n\t"
01546 "1: \n\t"
01547 "movq (%0, %%"REG_a"), %%mm0 \n\t"
01548 "movq (%1, %%"REG_a"), %%mm1 \n\t"
01549 "movq 1(%0, %%"REG_a"), %%mm2 \n\t"
01550 "movq 1(%1, %%"REG_a"), %%mm3 \n\t"
01551 PAVGB" %%mm0, %%mm5 \n\t"
01552 PAVGB" %%mm0, %%mm3 \n\t"
01553 PAVGB" %%mm0, %%mm5 \n\t"
01554 PAVGB" %%mm0, %%mm3 \n\t"
01555 PAVGB" %%mm1, %%mm4 \n\t"
01556 PAVGB" %%mm1, %%mm2 \n\t"
01557 PAVGB" %%mm1, %%mm4 \n\t"
01558 PAVGB" %%mm1, %%mm2 \n\t"
01559 "movq %%mm5, %%mm7 \n\t"
01560 "movq %%mm4, %%mm6 \n\t"
01561 "punpcklbw %%mm3, %%mm5 \n\t"
01562 "punpckhbw %%mm3, %%mm7 \n\t"
01563 "punpcklbw %%mm2, %%mm4 \n\t"
01564 "punpckhbw %%mm2, %%mm6 \n\t"
01565 MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t"
01566 MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t"
01567 MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t"
01568 MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t"
01569 "add $8, %%"REG_a" \n\t"
01570 "movq -1(%0, %%"REG_a"), %%mm4 \n\t"
01571 "movq -1(%1, %%"REG_a"), %%mm5 \n\t"
01572 " js 1b \n\t"
01573 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
01574 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
01575 "g" (-mmxSize)
01576 : "%"REG_a
01577 );
01578
01579 for (x=mmxSize-1; x<srcWidth-1; x++) {
01580 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
01581 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
01582 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
01583 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
01584 }
01585 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
01586 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
01587
01588 dst+=dstStride*2;
01589 src+=srcStride;
01590 }
01591
01592
01593 dst[0]= src[0];
01594
01595 for (x=0; x<srcWidth-1; x++) {
01596 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
01597 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
01598 }
01599 dst[2*srcWidth-1]= src[srcWidth-1];
01600
01601 __asm__ volatile(EMMS" \n\t"
01602 SFENCE" \n\t"
01603 :::"memory");
01604 }
01605 #endif
01606
01607 #if !COMPILE_TEMPLATE_AMD3DNOW
01608
01614 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01615 int width, int height,
01616 int lumStride, int chromStride, int srcStride)
01617 {
01618 int y;
01619 const x86_reg chromWidth= width>>1;
01620 for (y=0; y<height; y+=2) {
01621 __asm__ volatile(
01622 "xor %%"REG_a", %%"REG_a" \n\t"
01623 "pcmpeqw %%mm7, %%mm7 \n\t"
01624 "psrlw $8, %%mm7 \n\t"
01625 ".p2align 4 \n\t"
01626 "1: \n\t"
01627 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01628 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01629 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01630 "movq %%mm0, %%mm2 \n\t"
01631 "movq %%mm1, %%mm3 \n\t"
01632 "pand %%mm7, %%mm0 \n\t"
01633 "pand %%mm7, %%mm1 \n\t"
01634 "psrlw $8, %%mm2 \n\t"
01635 "psrlw $8, %%mm3 \n\t"
01636 "packuswb %%mm1, %%mm0 \n\t"
01637 "packuswb %%mm3, %%mm2 \n\t"
01638
01639 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
01640
01641 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t"
01642 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t"
01643 "movq %%mm1, %%mm3 \n\t"
01644 "movq %%mm2, %%mm4 \n\t"
01645 "pand %%mm7, %%mm1 \n\t"
01646 "pand %%mm7, %%mm2 \n\t"
01647 "psrlw $8, %%mm3 \n\t"
01648 "psrlw $8, %%mm4 \n\t"
01649 "packuswb %%mm2, %%mm1 \n\t"
01650 "packuswb %%mm4, %%mm3 \n\t"
01651
01652 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
01653
01654 "movq %%mm0, %%mm2 \n\t"
01655 "movq %%mm1, %%mm3 \n\t"
01656 "psrlw $8, %%mm0 \n\t"
01657 "psrlw $8, %%mm1 \n\t"
01658 "pand %%mm7, %%mm2 \n\t"
01659 "pand %%mm7, %%mm3 \n\t"
01660 "packuswb %%mm1, %%mm0 \n\t"
01661 "packuswb %%mm3, %%mm2 \n\t"
01662
01663 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
01664 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
01665
01666 "add $8, %%"REG_a" \n\t"
01667 "cmp %4, %%"REG_a" \n\t"
01668 " jb 1b \n\t"
01669 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01670 : "memory", "%"REG_a
01671 );
01672
01673 ydst += lumStride;
01674 src += srcStride;
01675
01676 __asm__ volatile(
01677 "xor %%"REG_a", %%"REG_a" \n\t"
01678 ".p2align 4 \n\t"
01679 "1: \n\t"
01680 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01681 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01682 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01683 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t"
01684 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t"
01685 "psrlw $8, %%mm0 \n\t"
01686 "psrlw $8, %%mm1 \n\t"
01687 "psrlw $8, %%mm2 \n\t"
01688 "psrlw $8, %%mm3 \n\t"
01689 "packuswb %%mm1, %%mm0 \n\t"
01690 "packuswb %%mm3, %%mm2 \n\t"
01691
01692 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t"
01693 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t"
01694
01695 "add $8, %%"REG_a" \n\t"
01696 "cmp %4, %%"REG_a" \n\t"
01697 " jb 1b \n\t"
01698
01699 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01700 : "memory", "%"REG_a
01701 );
01702 udst += chromStride;
01703 vdst += chromStride;
01704 ydst += lumStride;
01705 src += srcStride;
01706 }
01707 __asm__ volatile(EMMS" \n\t"
01708 SFENCE" \n\t"
01709 :::"memory");
01710 }
01711 #endif
01712
01720 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01721 int width, int height,
01722 int lumStride, int chromStride, int srcStride)
01723 {
01724 int y;
01725 const x86_reg chromWidth= width>>1;
01726 for (y=0; y<height-2; y+=2) {
01727 int i;
01728 for (i=0; i<2; i++) {
01729 __asm__ volatile(
01730 "mov %2, %%"REG_a" \n\t"
01731 "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
01732 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
01733 "pxor %%mm7, %%mm7 \n\t"
01734 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
01735 ".p2align 4 \n\t"
01736 "1: \n\t"
01737 PREFETCH" 64(%0, %%"REG_d") \n\t"
01738 "movd (%0, %%"REG_d"), %%mm0 \n\t"
01739 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
01740 "punpcklbw %%mm7, %%mm0 \n\t"
01741 "punpcklbw %%mm7, %%mm1 \n\t"
01742 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
01743 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
01744 "punpcklbw %%mm7, %%mm2 \n\t"
01745 "punpcklbw %%mm7, %%mm3 \n\t"
01746 "pmaddwd %%mm6, %%mm0 \n\t"
01747 "pmaddwd %%mm6, %%mm1 \n\t"
01748 "pmaddwd %%mm6, %%mm2 \n\t"
01749 "pmaddwd %%mm6, %%mm3 \n\t"
01750 #ifndef FAST_BGR2YV12
01751 "psrad $8, %%mm0 \n\t"
01752 "psrad $8, %%mm1 \n\t"
01753 "psrad $8, %%mm2 \n\t"
01754 "psrad $8, %%mm3 \n\t"
01755 #endif
01756 "packssdw %%mm1, %%mm0 \n\t"
01757 "packssdw %%mm3, %%mm2 \n\t"
01758 "pmaddwd %%mm5, %%mm0 \n\t"
01759 "pmaddwd %%mm5, %%mm2 \n\t"
01760 "packssdw %%mm2, %%mm0 \n\t"
01761 "psraw $7, %%mm0 \n\t"
01762
01763 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
01764 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
01765 "punpcklbw %%mm7, %%mm4 \n\t"
01766 "punpcklbw %%mm7, %%mm1 \n\t"
01767 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
01768 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
01769 "punpcklbw %%mm7, %%mm2 \n\t"
01770 "punpcklbw %%mm7, %%mm3 \n\t"
01771 "pmaddwd %%mm6, %%mm4 \n\t"
01772 "pmaddwd %%mm6, %%mm1 \n\t"
01773 "pmaddwd %%mm6, %%mm2 \n\t"
01774 "pmaddwd %%mm6, %%mm3 \n\t"
01775 #ifndef FAST_BGR2YV12
01776 "psrad $8, %%mm4 \n\t"
01777 "psrad $8, %%mm1 \n\t"
01778 "psrad $8, %%mm2 \n\t"
01779 "psrad $8, %%mm3 \n\t"
01780 #endif
01781 "packssdw %%mm1, %%mm4 \n\t"
01782 "packssdw %%mm3, %%mm2 \n\t"
01783 "pmaddwd %%mm5, %%mm4 \n\t"
01784 "pmaddwd %%mm5, %%mm2 \n\t"
01785 "add $24, %%"REG_d" \n\t"
01786 "packssdw %%mm2, %%mm4 \n\t"
01787 "psraw $7, %%mm4 \n\t"
01788
01789 "packuswb %%mm4, %%mm0 \n\t"
01790 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t"
01791
01792 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t"
01793 "add $8, %%"REG_a" \n\t"
01794 " js 1b \n\t"
01795 : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
01796 : "%"REG_a, "%"REG_d
01797 );
01798 ydst += lumStride;
01799 src += srcStride;
01800 }
01801 src -= srcStride*2;
01802 __asm__ volatile(
01803 "mov %4, %%"REG_a" \n\t"
01804 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
01805 "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t"
01806 "pxor %%mm7, %%mm7 \n\t"
01807 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
01808 "add %%"REG_d", %%"REG_d" \n\t"
01809 ".p2align 4 \n\t"
01810 "1: \n\t"
01811 PREFETCH" 64(%0, %%"REG_d") \n\t"
01812 PREFETCH" 64(%1, %%"REG_d") \n\t"
01813 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
01814 "movq (%0, %%"REG_d"), %%mm0 \n\t"
01815 "movq (%1, %%"REG_d"), %%mm1 \n\t"
01816 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
01817 "movq 6(%1, %%"REG_d"), %%mm3 \n\t"
01818 PAVGB" %%mm1, %%mm0 \n\t"
01819 PAVGB" %%mm3, %%mm2 \n\t"
01820 "movq %%mm0, %%mm1 \n\t"
01821 "movq %%mm2, %%mm3 \n\t"
01822 "psrlq $24, %%mm0 \n\t"
01823 "psrlq $24, %%mm2 \n\t"
01824 PAVGB" %%mm1, %%mm0 \n\t"
01825 PAVGB" %%mm3, %%mm2 \n\t"
01826 "punpcklbw %%mm7, %%mm0 \n\t"
01827 "punpcklbw %%mm7, %%mm2 \n\t"
01828 #else
01829 "movd (%0, %%"REG_d"), %%mm0 \n\t"
01830 "movd (%1, %%"REG_d"), %%mm1 \n\t"
01831 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
01832 "movd 3(%1, %%"REG_d"), %%mm3 \n\t"
01833 "punpcklbw %%mm7, %%mm0 \n\t"
01834 "punpcklbw %%mm7, %%mm1 \n\t"
01835 "punpcklbw %%mm7, %%mm2 \n\t"
01836 "punpcklbw %%mm7, %%mm3 \n\t"
01837 "paddw %%mm1, %%mm0 \n\t"
01838 "paddw %%mm3, %%mm2 \n\t"
01839 "paddw %%mm2, %%mm0 \n\t"
01840 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
01841 "movd 6(%1, %%"REG_d"), %%mm1 \n\t"
01842 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
01843 "movd 9(%1, %%"REG_d"), %%mm3 \n\t"
01844 "punpcklbw %%mm7, %%mm4 \n\t"
01845 "punpcklbw %%mm7, %%mm1 \n\t"
01846 "punpcklbw %%mm7, %%mm2 \n\t"
01847 "punpcklbw %%mm7, %%mm3 \n\t"
01848 "paddw %%mm1, %%mm4 \n\t"
01849 "paddw %%mm3, %%mm2 \n\t"
01850 "paddw %%mm4, %%mm2 \n\t"
01851 "psrlw $2, %%mm0 \n\t"
01852 "psrlw $2, %%mm2 \n\t"
01853 #endif
01854 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
01855 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
01856
01857 "pmaddwd %%mm0, %%mm1 \n\t"
01858 "pmaddwd %%mm2, %%mm3 \n\t"
01859 "pmaddwd %%mm6, %%mm0 \n\t"
01860 "pmaddwd %%mm6, %%mm2 \n\t"
01861 #ifndef FAST_BGR2YV12
01862 "psrad $8, %%mm0 \n\t"
01863 "psrad $8, %%mm1 \n\t"
01864 "psrad $8, %%mm2 \n\t"
01865 "psrad $8, %%mm3 \n\t"
01866 #endif
01867 "packssdw %%mm2, %%mm0 \n\t"
01868 "packssdw %%mm3, %%mm1 \n\t"
01869 "pmaddwd %%mm5, %%mm0 \n\t"
01870 "pmaddwd %%mm5, %%mm1 \n\t"
01871 "packssdw %%mm1, %%mm0 \n\t"
01872 "psraw $7, %%mm0 \n\t"
01873
01874 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
01875 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
01876 "movq 12(%1, %%"REG_d"), %%mm1 \n\t"
01877 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
01878 "movq 18(%1, %%"REG_d"), %%mm3 \n\t"
01879 PAVGB" %%mm1, %%mm4 \n\t"
01880 PAVGB" %%mm3, %%mm2 \n\t"
01881 "movq %%mm4, %%mm1 \n\t"
01882 "movq %%mm2, %%mm3 \n\t"
01883 "psrlq $24, %%mm4 \n\t"
01884 "psrlq $24, %%mm2 \n\t"
01885 PAVGB" %%mm1, %%mm4 \n\t"
01886 PAVGB" %%mm3, %%mm2 \n\t"
01887 "punpcklbw %%mm7, %%mm4 \n\t"
01888 "punpcklbw %%mm7, %%mm2 \n\t"
01889 #else
01890 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
01891 "movd 12(%1, %%"REG_d"), %%mm1 \n\t"
01892 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
01893 "movd 15(%1, %%"REG_d"), %%mm3 \n\t"
01894 "punpcklbw %%mm7, %%mm4 \n\t"
01895 "punpcklbw %%mm7, %%mm1 \n\t"
01896 "punpcklbw %%mm7, %%mm2 \n\t"
01897 "punpcklbw %%mm7, %%mm3 \n\t"
01898 "paddw %%mm1, %%mm4 \n\t"
01899 "paddw %%mm3, %%mm2 \n\t"
01900 "paddw %%mm2, %%mm4 \n\t"
01901 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
01902 "movd 18(%1, %%"REG_d"), %%mm1 \n\t"
01903 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
01904 "movd 21(%1, %%"REG_d"), %%mm3 \n\t"
01905 "punpcklbw %%mm7, %%mm5 \n\t"
01906 "punpcklbw %%mm7, %%mm1 \n\t"
01907 "punpcklbw %%mm7, %%mm2 \n\t"
01908 "punpcklbw %%mm7, %%mm3 \n\t"
01909 "paddw %%mm1, %%mm5 \n\t"
01910 "paddw %%mm3, %%mm2 \n\t"
01911 "paddw %%mm5, %%mm2 \n\t"
01912 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
01913 "psrlw $2, %%mm4 \n\t"
01914 "psrlw $2, %%mm2 \n\t"
01915 #endif
01916 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
01917 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
01918
01919 "pmaddwd %%mm4, %%mm1 \n\t"
01920 "pmaddwd %%mm2, %%mm3 \n\t"
01921 "pmaddwd %%mm6, %%mm4 \n\t"
01922 "pmaddwd %%mm6, %%mm2 \n\t"
01923 #ifndef FAST_BGR2YV12
01924 "psrad $8, %%mm4 \n\t"
01925 "psrad $8, %%mm1 \n\t"
01926 "psrad $8, %%mm2 \n\t"
01927 "psrad $8, %%mm3 \n\t"
01928 #endif
01929 "packssdw %%mm2, %%mm4 \n\t"
01930 "packssdw %%mm3, %%mm1 \n\t"
01931 "pmaddwd %%mm5, %%mm4 \n\t"
01932 "pmaddwd %%mm5, %%mm1 \n\t"
01933 "add $24, %%"REG_d" \n\t"
01934 "packssdw %%mm1, %%mm4 \n\t"
01935 "psraw $7, %%mm4 \n\t"
01936
01937 "movq %%mm0, %%mm1 \n\t"
01938 "punpckldq %%mm4, %%mm0 \n\t"
01939 "punpckhdq %%mm4, %%mm1 \n\t"
01940 "packsswb %%mm1, %%mm0 \n\t"
01941 "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t"
01942 "movd %%mm0, (%2, %%"REG_a") \n\t"
01943 "punpckhdq %%mm0, %%mm0 \n\t"
01944 "movd %%mm0, (%3, %%"REG_a") \n\t"
01945 "add $4, %%"REG_a" \n\t"
01946 " js 1b \n\t"
01947 : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
01948 : "%"REG_a, "%"REG_d
01949 );
01950
01951 udst += chromStride;
01952 vdst += chromStride;
01953 src += srcStride*2;
01954 }
01955
01956 __asm__ volatile(EMMS" \n\t"
01957 SFENCE" \n\t"
01958 :::"memory");
01959
01960 rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride);
01961 }
01962 #endif
01963
01964 #if !COMPILE_TEMPLATE_AMD3DNOW
01965 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
01966 int width, int height, int src1Stride,
01967 int src2Stride, int dstStride)
01968 {
01969 int h;
01970
01971 for (h=0; h < height; h++) {
01972 int w;
01973
01974 #if COMPILE_TEMPLATE_SSE2
01975 __asm__(
01976 "xor %%"REG_a", %%"REG_a" \n\t"
01977 "1: \n\t"
01978 PREFETCH" 64(%1, %%"REG_a") \n\t"
01979 PREFETCH" 64(%2, %%"REG_a") \n\t"
01980 "movdqa (%1, %%"REG_a"), %%xmm0 \n\t"
01981 "movdqa (%1, %%"REG_a"), %%xmm1 \n\t"
01982 "movdqa (%2, %%"REG_a"), %%xmm2 \n\t"
01983 "punpcklbw %%xmm2, %%xmm0 \n\t"
01984 "punpckhbw %%xmm2, %%xmm1 \n\t"
01985 "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t"
01986 "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t"
01987 "add $16, %%"REG_a" \n\t"
01988 "cmp %3, %%"REG_a" \n\t"
01989 " jb 1b \n\t"
01990 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
01991 : "memory", "%"REG_a""
01992 );
01993 #else
01994 __asm__(
01995 "xor %%"REG_a", %%"REG_a" \n\t"
01996 "1: \n\t"
01997 PREFETCH" 64(%1, %%"REG_a") \n\t"
01998 PREFETCH" 64(%2, %%"REG_a") \n\t"
01999 "movq (%1, %%"REG_a"), %%mm0 \n\t"
02000 "movq 8(%1, %%"REG_a"), %%mm2 \n\t"
02001 "movq %%mm0, %%mm1 \n\t"
02002 "movq %%mm2, %%mm3 \n\t"
02003 "movq (%2, %%"REG_a"), %%mm4 \n\t"
02004 "movq 8(%2, %%"REG_a"), %%mm5 \n\t"
02005 "punpcklbw %%mm4, %%mm0 \n\t"
02006 "punpckhbw %%mm4, %%mm1 \n\t"
02007 "punpcklbw %%mm5, %%mm2 \n\t"
02008 "punpckhbw %%mm5, %%mm3 \n\t"
02009 MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t"
02010 MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t"
02011 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t"
02012 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t"
02013 "add $16, %%"REG_a" \n\t"
02014 "cmp %3, %%"REG_a" \n\t"
02015 " jb 1b \n\t"
02016 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
02017 : "memory", "%"REG_a
02018 );
02019 #endif
02020 for (w= (width&(~15)); w < width; w++) {
02021 dest[2*w+0] = src1[w];
02022 dest[2*w+1] = src2[w];
02023 }
02024 dest += dstStride;
02025 src1 += src1Stride;
02026 src2 += src2Stride;
02027 }
02028 __asm__(
02029 EMMS" \n\t"
02030 SFENCE" \n\t"
02031 ::: "memory"
02032 );
02033 }
02034 #endif
02035
02036 #if !COMPILE_TEMPLATE_SSE2
02037 #if !COMPILE_TEMPLATE_AMD3DNOW
02038 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
02039 uint8_t *dst1, uint8_t *dst2,
02040 int width, int height,
02041 int srcStride1, int srcStride2,
02042 int dstStride1, int dstStride2)
02043 {
02044 x86_reg y;
02045 int x,w,h;
02046 w=width/2; h=height/2;
02047 __asm__ volatile(
02048 PREFETCH" %0 \n\t"
02049 PREFETCH" %1 \n\t"
02050 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
02051 for (y=0;y<h;y++) {
02052 const uint8_t* s1=src1+srcStride1*(y>>1);
02053 uint8_t* d=dst1+dstStride1*y;
02054 x=0;
02055 for (;x<w-31;x+=32) {
02056 __asm__ volatile(
02057 PREFETCH" 32%1 \n\t"
02058 "movq %1, %%mm0 \n\t"
02059 "movq 8%1, %%mm2 \n\t"
02060 "movq 16%1, %%mm4 \n\t"
02061 "movq 24%1, %%mm6 \n\t"
02062 "movq %%mm0, %%mm1 \n\t"
02063 "movq %%mm2, %%mm3 \n\t"
02064 "movq %%mm4, %%mm5 \n\t"
02065 "movq %%mm6, %%mm7 \n\t"
02066 "punpcklbw %%mm0, %%mm0 \n\t"
02067 "punpckhbw %%mm1, %%mm1 \n\t"
02068 "punpcklbw %%mm2, %%mm2 \n\t"
02069 "punpckhbw %%mm3, %%mm3 \n\t"
02070 "punpcklbw %%mm4, %%mm4 \n\t"
02071 "punpckhbw %%mm5, %%mm5 \n\t"
02072 "punpcklbw %%mm6, %%mm6 \n\t"
02073 "punpckhbw %%mm7, %%mm7 \n\t"
02074 MOVNTQ" %%mm0, %0 \n\t"
02075 MOVNTQ" %%mm1, 8%0 \n\t"
02076 MOVNTQ" %%mm2, 16%0 \n\t"
02077 MOVNTQ" %%mm3, 24%0 \n\t"
02078 MOVNTQ" %%mm4, 32%0 \n\t"
02079 MOVNTQ" %%mm5, 40%0 \n\t"
02080 MOVNTQ" %%mm6, 48%0 \n\t"
02081 MOVNTQ" %%mm7, 56%0"
02082 :"=m"(d[2*x])
02083 :"m"(s1[x])
02084 :"memory");
02085 }
02086 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
02087 }
02088 for (y=0;y<h;y++) {
02089 const uint8_t* s2=src2+srcStride2*(y>>1);
02090 uint8_t* d=dst2+dstStride2*y;
02091 x=0;
02092 for (;x<w-31;x+=32) {
02093 __asm__ volatile(
02094 PREFETCH" 32%1 \n\t"
02095 "movq %1, %%mm0 \n\t"
02096 "movq 8%1, %%mm2 \n\t"
02097 "movq 16%1, %%mm4 \n\t"
02098 "movq 24%1, %%mm6 \n\t"
02099 "movq %%mm0, %%mm1 \n\t"
02100 "movq %%mm2, %%mm3 \n\t"
02101 "movq %%mm4, %%mm5 \n\t"
02102 "movq %%mm6, %%mm7 \n\t"
02103 "punpcklbw %%mm0, %%mm0 \n\t"
02104 "punpckhbw %%mm1, %%mm1 \n\t"
02105 "punpcklbw %%mm2, %%mm2 \n\t"
02106 "punpckhbw %%mm3, %%mm3 \n\t"
02107 "punpcklbw %%mm4, %%mm4 \n\t"
02108 "punpckhbw %%mm5, %%mm5 \n\t"
02109 "punpcklbw %%mm6, %%mm6 \n\t"
02110 "punpckhbw %%mm7, %%mm7 \n\t"
02111 MOVNTQ" %%mm0, %0 \n\t"
02112 MOVNTQ" %%mm1, 8%0 \n\t"
02113 MOVNTQ" %%mm2, 16%0 \n\t"
02114 MOVNTQ" %%mm3, 24%0 \n\t"
02115 MOVNTQ" %%mm4, 32%0 \n\t"
02116 MOVNTQ" %%mm5, 40%0 \n\t"
02117 MOVNTQ" %%mm6, 48%0 \n\t"
02118 MOVNTQ" %%mm7, 56%0"
02119 :"=m"(d[2*x])
02120 :"m"(s2[x])
02121 :"memory");
02122 }
02123 for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
02124 }
02125 __asm__(
02126 EMMS" \n\t"
02127 SFENCE" \n\t"
02128 ::: "memory"
02129 );
02130 }
02131
02132 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
02133 uint8_t *dst,
02134 int width, int height,
02135 int srcStride1, int srcStride2,
02136 int srcStride3, int dstStride)
02137 {
02138 x86_reg x;
02139 int y,w,h;
02140 w=width/2; h=height;
02141 for (y=0;y<h;y++) {
02142 const uint8_t* yp=src1+srcStride1*y;
02143 const uint8_t* up=src2+srcStride2*(y>>2);
02144 const uint8_t* vp=src3+srcStride3*(y>>2);
02145 uint8_t* d=dst+dstStride*y;
02146 x=0;
02147 for (;x<w-7;x+=8) {
02148 __asm__ volatile(
02149 PREFETCH" 32(%1, %0) \n\t"
02150 PREFETCH" 32(%2, %0) \n\t"
02151 PREFETCH" 32(%3, %0) \n\t"
02152 "movq (%1, %0, 4), %%mm0 \n\t"
02153 "movq (%2, %0), %%mm1 \n\t"
02154 "movq (%3, %0), %%mm2 \n\t"
02155 "movq %%mm0, %%mm3 \n\t"
02156 "movq %%mm1, %%mm4 \n\t"
02157 "movq %%mm2, %%mm5 \n\t"
02158 "punpcklbw %%mm1, %%mm1 \n\t"
02159 "punpcklbw %%mm2, %%mm2 \n\t"
02160 "punpckhbw %%mm4, %%mm4 \n\t"
02161 "punpckhbw %%mm5, %%mm5 \n\t"
02162
02163 "movq %%mm1, %%mm6 \n\t"
02164 "punpcklbw %%mm2, %%mm1 \n\t"
02165 "punpcklbw %%mm1, %%mm0 \n\t"
02166 "punpckhbw %%mm1, %%mm3 \n\t"
02167 MOVNTQ" %%mm0, (%4, %0, 8) \n\t"
02168 MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t"
02169
02170 "punpckhbw %%mm2, %%mm6 \n\t"
02171 "movq 8(%1, %0, 4), %%mm0 \n\t"
02172 "movq %%mm0, %%mm3 \n\t"
02173 "punpcklbw %%mm6, %%mm0 \n\t"
02174 "punpckhbw %%mm6, %%mm3 \n\t"
02175 MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t"
02176 MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t"
02177
02178 "movq %%mm4, %%mm6 \n\t"
02179 "movq 16(%1, %0, 4), %%mm0 \n\t"
02180 "movq %%mm0, %%mm3 \n\t"
02181 "punpcklbw %%mm5, %%mm4 \n\t"
02182 "punpcklbw %%mm4, %%mm0 \n\t"
02183 "punpckhbw %%mm4, %%mm3 \n\t"
02184 MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t"
02185 MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t"
02186
02187 "punpckhbw %%mm5, %%mm6 \n\t"
02188 "movq 24(%1, %0, 4), %%mm0 \n\t"
02189 "movq %%mm0, %%mm3 \n\t"
02190 "punpcklbw %%mm6, %%mm0 \n\t"
02191 "punpckhbw %%mm6, %%mm3 \n\t"
02192 MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t"
02193 MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t"
02194
02195 : "+r" (x)
02196 : "r"(yp), "r" (up), "r"(vp), "r"(d)
02197 :"memory");
02198 }
02199 for (; x<w; x++) {
02200 const int x2 = x<<2;
02201 d[8*x+0] = yp[x2];
02202 d[8*x+1] = up[x];
02203 d[8*x+2] = yp[x2+1];
02204 d[8*x+3] = vp[x];
02205 d[8*x+4] = yp[x2+2];
02206 d[8*x+5] = up[x];
02207 d[8*x+6] = yp[x2+3];
02208 d[8*x+7] = vp[x];
02209 }
02210 }
02211 __asm__(
02212 EMMS" \n\t"
02213 SFENCE" \n\t"
02214 ::: "memory"
02215 );
02216 }
02217 #endif
02218
02219 static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
02220 {
02221 dst += count;
02222 src += 2*count;
02223 count= - count;
02224
02225 if(count <= -16) {
02226 count += 15;
02227 __asm__ volatile(
02228 "pcmpeqw %%mm7, %%mm7 \n\t"
02229 "psrlw $8, %%mm7 \n\t"
02230 "1: \n\t"
02231 "movq -30(%1, %0, 2), %%mm0 \n\t"
02232 "movq -22(%1, %0, 2), %%mm1 \n\t"
02233 "movq -14(%1, %0, 2), %%mm2 \n\t"
02234 "movq -6(%1, %0, 2), %%mm3 \n\t"
02235 "pand %%mm7, %%mm0 \n\t"
02236 "pand %%mm7, %%mm1 \n\t"
02237 "pand %%mm7, %%mm2 \n\t"
02238 "pand %%mm7, %%mm3 \n\t"
02239 "packuswb %%mm1, %%mm0 \n\t"
02240 "packuswb %%mm3, %%mm2 \n\t"
02241 MOVNTQ" %%mm0,-15(%2, %0) \n\t"
02242 MOVNTQ" %%mm2,- 7(%2, %0) \n\t"
02243 "add $16, %0 \n\t"
02244 " js 1b \n\t"
02245 : "+r"(count)
02246 : "r"(src), "r"(dst)
02247 );
02248 count -= 15;
02249 }
02250 while(count<0) {
02251 dst[count]= src[2*count];
02252 count++;
02253 }
02254 }
02255
02256 #if !COMPILE_TEMPLATE_AMD3DNOW
02257 static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
02258 {
02259 dst0+= count;
02260 dst1+= count;
02261 src += 4*count;
02262 count= - count;
02263 if(count <= -8) {
02264 count += 7;
02265 __asm__ volatile(
02266 "pcmpeqw %%mm7, %%mm7 \n\t"
02267 "psrlw $8, %%mm7 \n\t"
02268 "1: \n\t"
02269 "movq -28(%1, %0, 4), %%mm0 \n\t"
02270 "movq -20(%1, %0, 4), %%mm1 \n\t"
02271 "movq -12(%1, %0, 4), %%mm2 \n\t"
02272 "movq -4(%1, %0, 4), %%mm3 \n\t"
02273 "pand %%mm7, %%mm0 \n\t"
02274 "pand %%mm7, %%mm1 \n\t"
02275 "pand %%mm7, %%mm2 \n\t"
02276 "pand %%mm7, %%mm3 \n\t"
02277 "packuswb %%mm1, %%mm0 \n\t"
02278 "packuswb %%mm3, %%mm2 \n\t"
02279 "movq %%mm0, %%mm1 \n\t"
02280 "movq %%mm2, %%mm3 \n\t"
02281 "psrlw $8, %%mm0 \n\t"
02282 "psrlw $8, %%mm2 \n\t"
02283 "pand %%mm7, %%mm1 \n\t"
02284 "pand %%mm7, %%mm3 \n\t"
02285 "packuswb %%mm2, %%mm0 \n\t"
02286 "packuswb %%mm3, %%mm1 \n\t"
02287 MOVNTQ" %%mm0,- 7(%3, %0) \n\t"
02288 MOVNTQ" %%mm1,- 7(%2, %0) \n\t"
02289 "add $8, %0 \n\t"
02290 " js 1b \n\t"
02291 : "+r"(count)
02292 : "r"(src), "r"(dst0), "r"(dst1)
02293 );
02294 count -= 7;
02295 }
02296 while(count<0) {
02297 dst0[count]= src[4*count+0];
02298 dst1[count]= src[4*count+2];
02299 count++;
02300 }
02301 }
02302 #endif
02303
02304 static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
02305 {
02306 dst0 += count;
02307 dst1 += count;
02308 src0 += 4*count;
02309 src1 += 4*count;
02310 count= - count;
02311 #ifdef PAVGB
02312 if(count <= -8) {
02313 count += 7;
02314 __asm__ volatile(
02315 "pcmpeqw %%mm7, %%mm7 \n\t"
02316 "psrlw $8, %%mm7 \n\t"
02317 "1: \n\t"
02318 "movq -28(%1, %0, 4), %%mm0 \n\t"
02319 "movq -20(%1, %0, 4), %%mm1 \n\t"
02320 "movq -12(%1, %0, 4), %%mm2 \n\t"
02321 "movq -4(%1, %0, 4), %%mm3 \n\t"
02322 PAVGB" -28(%2, %0, 4), %%mm0 \n\t"
02323 PAVGB" -20(%2, %0, 4), %%mm1 \n\t"
02324 PAVGB" -12(%2, %0, 4), %%mm2 \n\t"
02325 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t"
02326 "pand %%mm7, %%mm0 \n\t"
02327 "pand %%mm7, %%mm1 \n\t"
02328 "pand %%mm7, %%mm2 \n\t"
02329 "pand %%mm7, %%mm3 \n\t"
02330 "packuswb %%mm1, %%mm0 \n\t"
02331 "packuswb %%mm3, %%mm2 \n\t"
02332 "movq %%mm0, %%mm1 \n\t"
02333 "movq %%mm2, %%mm3 \n\t"
02334 "psrlw $8, %%mm0 \n\t"
02335 "psrlw $8, %%mm2 \n\t"
02336 "pand %%mm7, %%mm1 \n\t"
02337 "pand %%mm7, %%mm3 \n\t"
02338 "packuswb %%mm2, %%mm0 \n\t"
02339 "packuswb %%mm3, %%mm1 \n\t"
02340 MOVNTQ" %%mm0,- 7(%4, %0) \n\t"
02341 MOVNTQ" %%mm1,- 7(%3, %0) \n\t"
02342 "add $8, %0 \n\t"
02343 " js 1b \n\t"
02344 : "+r"(count)
02345 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
02346 );
02347 count -= 7;
02348 }
02349 #endif
02350 while(count<0) {
02351 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
02352 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
02353 count++;
02354 }
02355 }
02356
02357 #if !COMPILE_TEMPLATE_AMD3DNOW
02358 static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
02359 {
02360 dst0+= count;
02361 dst1+= count;
02362 src += 4*count;
02363 count= - count;
02364 if(count <= -8) {
02365 count += 7;
02366 __asm__ volatile(
02367 "pcmpeqw %%mm7, %%mm7 \n\t"
02368 "psrlw $8, %%mm7 \n\t"
02369 "1: \n\t"
02370 "movq -28(%1, %0, 4), %%mm0 \n\t"
02371 "movq -20(%1, %0, 4), %%mm1 \n\t"
02372 "movq -12(%1, %0, 4), %%mm2 \n\t"
02373 "movq -4(%1, %0, 4), %%mm3 \n\t"
02374 "psrlw $8, %%mm0 \n\t"
02375 "psrlw $8, %%mm1 \n\t"
02376 "psrlw $8, %%mm2 \n\t"
02377 "psrlw $8, %%mm3 \n\t"
02378 "packuswb %%mm1, %%mm0 \n\t"
02379 "packuswb %%mm3, %%mm2 \n\t"
02380 "movq %%mm0, %%mm1 \n\t"
02381 "movq %%mm2, %%mm3 \n\t"
02382 "psrlw $8, %%mm0 \n\t"
02383 "psrlw $8, %%mm2 \n\t"
02384 "pand %%mm7, %%mm1 \n\t"
02385 "pand %%mm7, %%mm3 \n\t"
02386 "packuswb %%mm2, %%mm0 \n\t"
02387 "packuswb %%mm3, %%mm1 \n\t"
02388 MOVNTQ" %%mm0,- 7(%3, %0) \n\t"
02389 MOVNTQ" %%mm1,- 7(%2, %0) \n\t"
02390 "add $8, %0 \n\t"
02391 " js 1b \n\t"
02392 : "+r"(count)
02393 : "r"(src), "r"(dst0), "r"(dst1)
02394 );
02395 count -= 7;
02396 }
02397 src++;
02398 while(count<0) {
02399 dst0[count]= src[4*count+0];
02400 dst1[count]= src[4*count+2];
02401 count++;
02402 }
02403 }
02404 #endif
02405
02406 static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
02407 {
02408 dst0 += count;
02409 dst1 += count;
02410 src0 += 4*count;
02411 src1 += 4*count;
02412 count= - count;
02413 #ifdef PAVGB
02414 if(count <= -8) {
02415 count += 7;
02416 __asm__ volatile(
02417 "pcmpeqw %%mm7, %%mm7 \n\t"
02418 "psrlw $8, %%mm7 \n\t"
02419 "1: \n\t"
02420 "movq -28(%1, %0, 4), %%mm0 \n\t"
02421 "movq -20(%1, %0, 4), %%mm1 \n\t"
02422 "movq -12(%1, %0, 4), %%mm2 \n\t"
02423 "movq -4(%1, %0, 4), %%mm3 \n\t"
02424 PAVGB" -28(%2, %0, 4), %%mm0 \n\t"
02425 PAVGB" -20(%2, %0, 4), %%mm1 \n\t"
02426 PAVGB" -12(%2, %0, 4), %%mm2 \n\t"
02427 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t"
02428 "psrlw $8, %%mm0 \n\t"
02429 "psrlw $8, %%mm1 \n\t"
02430 "psrlw $8, %%mm2 \n\t"
02431 "psrlw $8, %%mm3 \n\t"
02432 "packuswb %%mm1, %%mm0 \n\t"
02433 "packuswb %%mm3, %%mm2 \n\t"
02434 "movq %%mm0, %%mm1 \n\t"
02435 "movq %%mm2, %%mm3 \n\t"
02436 "psrlw $8, %%mm0 \n\t"
02437 "psrlw $8, %%mm2 \n\t"
02438 "pand %%mm7, %%mm1 \n\t"
02439 "pand %%mm7, %%mm3 \n\t"
02440 "packuswb %%mm2, %%mm0 \n\t"
02441 "packuswb %%mm3, %%mm1 \n\t"
02442 MOVNTQ" %%mm0,- 7(%4, %0) \n\t"
02443 MOVNTQ" %%mm1,- 7(%3, %0) \n\t"
02444 "add $8, %0 \n\t"
02445 " js 1b \n\t"
02446 : "+r"(count)
02447 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
02448 );
02449 count -= 7;
02450 }
02451 #endif
02452 src0++;
02453 src1++;
02454 while(count<0) {
02455 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
02456 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
02457 count++;
02458 }
02459 }
02460
02461 static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
02462 int width, int height,
02463 int lumStride, int chromStride, int srcStride)
02464 {
02465 int y;
02466 const int chromWidth= -((-width)>>1);
02467
02468 for (y=0; y<height; y++) {
02469 RENAME(extract_even)(src, ydst, width);
02470 if(y&1) {
02471 RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth);
02472 udst+= chromStride;
02473 vdst+= chromStride;
02474 }
02475
02476 src += srcStride;
02477 ydst+= lumStride;
02478 }
02479 __asm__(
02480 EMMS" \n\t"
02481 SFENCE" \n\t"
02482 ::: "memory"
02483 );
02484 }
02485
02486 #if !COMPILE_TEMPLATE_AMD3DNOW
02487 static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
02488 int width, int height,
02489 int lumStride, int chromStride, int srcStride)
02490 {
02491 int y;
02492 const int chromWidth= -((-width)>>1);
02493
02494 for (y=0; y<height; y++) {
02495 RENAME(extract_even)(src, ydst, width);
02496 RENAME(extract_odd2)(src, udst, vdst, chromWidth);
02497
02498 src += srcStride;
02499 ydst+= lumStride;
02500 udst+= chromStride;
02501 vdst+= chromStride;
02502 }
02503 __asm__(
02504 EMMS" \n\t"
02505 SFENCE" \n\t"
02506 ::: "memory"
02507 );
02508 }
02509 #endif
02510
02511 static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
02512 int width, int height,
02513 int lumStride, int chromStride, int srcStride)
02514 {
02515 int y;
02516 const int chromWidth= -((-width)>>1);
02517
02518 for (y=0; y<height; y++) {
02519 RENAME(extract_even)(src+1, ydst, width);
02520 if(y&1) {
02521 RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth);
02522 udst+= chromStride;
02523 vdst+= chromStride;
02524 }
02525
02526 src += srcStride;
02527 ydst+= lumStride;
02528 }
02529 __asm__(
02530 EMMS" \n\t"
02531 SFENCE" \n\t"
02532 ::: "memory"
02533 );
02534 }
02535
02536 #if !COMPILE_TEMPLATE_AMD3DNOW
02537 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
02538 int width, int height,
02539 int lumStride, int chromStride, int srcStride)
02540 {
02541 int y;
02542 const int chromWidth= -((-width)>>1);
02543
02544 for (y=0; y<height; y++) {
02545 RENAME(extract_even)(src+1, ydst, width);
02546 RENAME(extract_even2)(src, udst, vdst, chromWidth);
02547
02548 src += srcStride;
02549 ydst+= lumStride;
02550 udst+= chromStride;
02551 vdst+= chromStride;
02552 }
02553 __asm__(
02554 EMMS" \n\t"
02555 SFENCE" \n\t"
02556 ::: "memory"
02557 );
02558 }
02559 #endif
02560 #endif
02561
02562 static inline void RENAME(rgb2rgb_init)(void)
02563 {
02564 #if !COMPILE_TEMPLATE_SSE2
02565 #if !COMPILE_TEMPLATE_AMD3DNOW
02566 rgb15to16 = RENAME(rgb15to16);
02567 rgb15tobgr24 = RENAME(rgb15tobgr24);
02568 rgb15to32 = RENAME(rgb15to32);
02569 rgb16tobgr24 = RENAME(rgb16tobgr24);
02570 rgb16to32 = RENAME(rgb16to32);
02571 rgb16to15 = RENAME(rgb16to15);
02572 rgb24tobgr16 = RENAME(rgb24tobgr16);
02573 rgb24tobgr15 = RENAME(rgb24tobgr15);
02574 rgb24tobgr32 = RENAME(rgb24tobgr32);
02575 rgb32to16 = RENAME(rgb32to16);
02576 rgb32to15 = RENAME(rgb32to15);
02577 rgb32tobgr24 = RENAME(rgb32tobgr24);
02578 rgb24to15 = RENAME(rgb24to15);
02579 rgb24to16 = RENAME(rgb24to16);
02580 rgb24tobgr24 = RENAME(rgb24tobgr24);
02581 shuffle_bytes_2103 = RENAME(shuffle_bytes_2103);
02582 rgb32tobgr16 = RENAME(rgb32tobgr16);
02583 rgb32tobgr15 = RENAME(rgb32tobgr15);
02584 yv12toyuy2 = RENAME(yv12toyuy2);
02585 yv12touyvy = RENAME(yv12touyvy);
02586 yuv422ptoyuy2 = RENAME(yuv422ptoyuy2);
02587 yuv422ptouyvy = RENAME(yuv422ptouyvy);
02588 yuy2toyv12 = RENAME(yuy2toyv12);
02589 vu9_to_vu12 = RENAME(vu9_to_vu12);
02590 yvu9_to_yuy2 = RENAME(yvu9_to_yuy2);
02591 uyvytoyuv422 = RENAME(uyvytoyuv422);
02592 yuyvtoyuv422 = RENAME(yuyvtoyuv422);
02593 #endif
02594
02595 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
02596 planar2x = RENAME(planar2x);
02597 #endif
02598 rgb24toyv12 = RENAME(rgb24toyv12);
02599
02600 yuyvtoyuv420 = RENAME(yuyvtoyuv420);
02601 uyvytoyuv420 = RENAME(uyvytoyuv420);
02602 #endif
02603
02604 #if !COMPILE_TEMPLATE_AMD3DNOW
02605 interleaveBytes = RENAME(interleaveBytes);
02606 #endif
02607 }