00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #undef REAL_MOVNTQ
00025 #undef MOVNTQ
00026 #undef PAVGB
00027 #undef PREFETCH
00028 #undef PREFETCHW
00029 #undef EMMS
00030 #undef SFENCE
00031
00032 #if HAVE_AMD3DNOW
00033
00034 #define EMMS "femms"
00035 #else
00036 #define EMMS "emms"
00037 #endif
00038
00039 #if HAVE_AMD3DNOW
00040 #define PREFETCH "prefetch"
00041 #define PREFETCHW "prefetchw"
00042 #elif HAVE_MMX2
00043 #define PREFETCH "prefetchnta"
00044 #define PREFETCHW "prefetcht0"
00045 #else
00046 #define PREFETCH " # nop"
00047 #define PREFETCHW " # nop"
00048 #endif
00049
00050 #if HAVE_MMX2
00051 #define SFENCE "sfence"
00052 #else
00053 #define SFENCE " # nop"
00054 #endif
00055
00056 #if HAVE_MMX2
00057 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
00058 #elif HAVE_AMD3DNOW
00059 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
00060 #endif
00061
00062 #if HAVE_MMX2
00063 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
00064 #else
00065 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
00066 #endif
00067 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
00068
00069 #if HAVE_ALTIVEC
00070 #include "swscale_altivec_template.c"
00071 #endif
00072
00073 #define YSCALEYUV2YV12X(x, offset, dest, width) \
00074 __asm__ volatile(\
00075 "xor %%"REG_a", %%"REG_a" \n\t"\
00076 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00077 "movq %%mm3, %%mm4 \n\t"\
00078 "lea " offset "(%0), %%"REG_d" \n\t"\
00079 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00080 ASMALIGN(4) \
00081 "1: \n\t"\
00082 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00083 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00084 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" \
00085 "add $16, %%"REG_d" \n\t"\
00086 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00087 "test %%"REG_S", %%"REG_S" \n\t"\
00088 "pmulhw %%mm0, %%mm2 \n\t"\
00089 "pmulhw %%mm0, %%mm5 \n\t"\
00090 "paddw %%mm2, %%mm3 \n\t"\
00091 "paddw %%mm5, %%mm4 \n\t"\
00092 " jnz 1b \n\t"\
00093 "psraw $3, %%mm3 \n\t"\
00094 "psraw $3, %%mm4 \n\t"\
00095 "packuswb %%mm4, %%mm3 \n\t"\
00096 MOVNTQ(%%mm3, (%1, %%REGa))\
00097 "add $8, %%"REG_a" \n\t"\
00098 "cmp %2, %%"REG_a" \n\t"\
00099 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00100 "movq %%mm3, %%mm4 \n\t"\
00101 "lea " offset "(%0), %%"REG_d" \n\t"\
00102 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00103 "jb 1b \n\t"\
00104 :: "r" (&c->redDither),\
00105 "r" (dest), "g" (width)\
00106 : "%"REG_a, "%"REG_d, "%"REG_S\
00107 );
00108
00109 #define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
00110 __asm__ volatile(\
00111 "lea " offset "(%0), %%"REG_d" \n\t"\
00112 "xor %%"REG_a", %%"REG_a" \n\t"\
00113 "pxor %%mm4, %%mm4 \n\t"\
00114 "pxor %%mm5, %%mm5 \n\t"\
00115 "pxor %%mm6, %%mm6 \n\t"\
00116 "pxor %%mm7, %%mm7 \n\t"\
00117 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00118 ASMALIGN(4) \
00119 "1: \n\t"\
00120 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" \
00121 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00122 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00123 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" \
00124 "movq %%mm0, %%mm3 \n\t"\
00125 "punpcklwd %%mm1, %%mm0 \n\t"\
00126 "punpckhwd %%mm1, %%mm3 \n\t"\
00127 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" \
00128 "pmaddwd %%mm1, %%mm0 \n\t"\
00129 "pmaddwd %%mm1, %%mm3 \n\t"\
00130 "paddd %%mm0, %%mm4 \n\t"\
00131 "paddd %%mm3, %%mm5 \n\t"\
00132 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" \
00133 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00134 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00135 "test %%"REG_S", %%"REG_S" \n\t"\
00136 "movq %%mm2, %%mm0 \n\t"\
00137 "punpcklwd %%mm3, %%mm2 \n\t"\
00138 "punpckhwd %%mm3, %%mm0 \n\t"\
00139 "pmaddwd %%mm1, %%mm2 \n\t"\
00140 "pmaddwd %%mm1, %%mm0 \n\t"\
00141 "paddd %%mm2, %%mm6 \n\t"\
00142 "paddd %%mm0, %%mm7 \n\t"\
00143 " jnz 1b \n\t"\
00144 "psrad $16, %%mm4 \n\t"\
00145 "psrad $16, %%mm5 \n\t"\
00146 "psrad $16, %%mm6 \n\t"\
00147 "psrad $16, %%mm7 \n\t"\
00148 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00149 "packssdw %%mm5, %%mm4 \n\t"\
00150 "packssdw %%mm7, %%mm6 \n\t"\
00151 "paddw %%mm0, %%mm4 \n\t"\
00152 "paddw %%mm0, %%mm6 \n\t"\
00153 "psraw $3, %%mm4 \n\t"\
00154 "psraw $3, %%mm6 \n\t"\
00155 "packuswb %%mm6, %%mm4 \n\t"\
00156 MOVNTQ(%%mm4, (%1, %%REGa))\
00157 "add $8, %%"REG_a" \n\t"\
00158 "cmp %2, %%"REG_a" \n\t"\
00159 "lea " offset "(%0), %%"REG_d" \n\t"\
00160 "pxor %%mm4, %%mm4 \n\t"\
00161 "pxor %%mm5, %%mm5 \n\t"\
00162 "pxor %%mm6, %%mm6 \n\t"\
00163 "pxor %%mm7, %%mm7 \n\t"\
00164 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00165 "jb 1b \n\t"\
00166 :: "r" (&c->redDither),\
00167 "r" (dest), "g" (width)\
00168 : "%"REG_a, "%"REG_d, "%"REG_S\
00169 );
00170
00171 #define YSCALEYUV2YV121 \
00172 "mov %2, %%"REG_a" \n\t"\
00173 ASMALIGN(4) \
00174 "1: \n\t"\
00175 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
00176 "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
00177 "psraw $7, %%mm0 \n\t"\
00178 "psraw $7, %%mm1 \n\t"\
00179 "packuswb %%mm1, %%mm0 \n\t"\
00180 MOVNTQ(%%mm0, (%1, %%REGa))\
00181 "add $8, %%"REG_a" \n\t"\
00182 "jnc 1b \n\t"
00183
00184 #define YSCALEYUV2YV121_ACCURATE \
00185 "mov %2, %%"REG_a" \n\t"\
00186 "pcmpeqw %%mm7, %%mm7 \n\t"\
00187 "psrlw $15, %%mm7 \n\t"\
00188 "psllw $6, %%mm7 \n\t"\
00189 ASMALIGN(4) \
00190 "1: \n\t"\
00191 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
00192 "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
00193 "paddsw %%mm7, %%mm0 \n\t"\
00194 "paddsw %%mm7, %%mm1 \n\t"\
00195 "psraw $7, %%mm0 \n\t"\
00196 "psraw $7, %%mm1 \n\t"\
00197 "packuswb %%mm1, %%mm0 \n\t"\
00198 MOVNTQ(%%mm0, (%1, %%REGa))\
00199 "add $8, %%"REG_a" \n\t"\
00200 "jnc 1b \n\t"
00201
00202
00203
00204
00205
00206
00207
00208
00209 #define YSCALEYUV2PACKEDX_UV \
00210 __asm__ volatile(\
00211 "xor %%"REG_a", %%"REG_a" \n\t"\
00212 ASMALIGN(4)\
00213 "nop \n\t"\
00214 "1: \n\t"\
00215 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00216 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00217 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00218 "movq %%mm3, %%mm4 \n\t"\
00219 ASMALIGN(4)\
00220 "2: \n\t"\
00221 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00222 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00223 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5 \n\t" \
00224 "add $16, %%"REG_d" \n\t"\
00225 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00226 "pmulhw %%mm0, %%mm2 \n\t"\
00227 "pmulhw %%mm0, %%mm5 \n\t"\
00228 "paddw %%mm2, %%mm3 \n\t"\
00229 "paddw %%mm5, %%mm4 \n\t"\
00230 "test %%"REG_S", %%"REG_S" \n\t"\
00231 " jnz 2b \n\t"\
00232
00233 #define YSCALEYUV2PACKEDX_YA(offset) \
00234 "lea "offset"(%0), %%"REG_d" \n\t"\
00235 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00236 "movq "VROUNDER_OFFSET"(%0), %%mm1 \n\t"\
00237 "movq %%mm1, %%mm7 \n\t"\
00238 ASMALIGN(4)\
00239 "2: \n\t"\
00240 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00241 "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00242 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" \
00243 "add $16, %%"REG_d" \n\t"\
00244 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00245 "pmulhw %%mm0, %%mm2 \n\t"\
00246 "pmulhw %%mm0, %%mm5 \n\t"\
00247 "paddw %%mm2, %%mm1 \n\t"\
00248 "paddw %%mm5, %%mm7 \n\t"\
00249 "test %%"REG_S", %%"REG_S" \n\t"\
00250 " jnz 2b \n\t"\
00251
00252 #define YSCALEYUV2PACKEDX \
00253 YSCALEYUV2PACKEDX_UV \
00254 YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET) \
00255
00256 #define YSCALEYUV2PACKEDX_END \
00257 :: "r" (&c->redDither), \
00258 "m" (dummy), "m" (dummy), "m" (dummy),\
00259 "r" (dest), "m" (dstW) \
00260 : "%"REG_a, "%"REG_d, "%"REG_S \
00261 );
00262
00263 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
00264 __asm__ volatile(\
00265 "xor %%"REG_a", %%"REG_a" \n\t"\
00266 ASMALIGN(4)\
00267 "nop \n\t"\
00268 "1: \n\t"\
00269 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00270 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00271 "pxor %%mm4, %%mm4 \n\t"\
00272 "pxor %%mm5, %%mm5 \n\t"\
00273 "pxor %%mm6, %%mm6 \n\t"\
00274 "pxor %%mm7, %%mm7 \n\t"\
00275 ASMALIGN(4)\
00276 "2: \n\t"\
00277 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" \
00278 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00279 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00280 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" \
00281 "movq %%mm0, %%mm3 \n\t"\
00282 "punpcklwd %%mm1, %%mm0 \n\t"\
00283 "punpckhwd %%mm1, %%mm3 \n\t"\
00284 "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" \
00285 "pmaddwd %%mm1, %%mm0 \n\t"\
00286 "pmaddwd %%mm1, %%mm3 \n\t"\
00287 "paddd %%mm0, %%mm4 \n\t"\
00288 "paddd %%mm3, %%mm5 \n\t"\
00289 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" \
00290 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00291 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00292 "test %%"REG_S", %%"REG_S" \n\t"\
00293 "movq %%mm2, %%mm0 \n\t"\
00294 "punpcklwd %%mm3, %%mm2 \n\t"\
00295 "punpckhwd %%mm3, %%mm0 \n\t"\
00296 "pmaddwd %%mm1, %%mm2 \n\t"\
00297 "pmaddwd %%mm1, %%mm0 \n\t"\
00298 "paddd %%mm2, %%mm6 \n\t"\
00299 "paddd %%mm0, %%mm7 \n\t"\
00300 " jnz 2b \n\t"\
00301 "psrad $16, %%mm4 \n\t"\
00302 "psrad $16, %%mm5 \n\t"\
00303 "psrad $16, %%mm6 \n\t"\
00304 "psrad $16, %%mm7 \n\t"\
00305 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00306 "packssdw %%mm5, %%mm4 \n\t"\
00307 "packssdw %%mm7, %%mm6 \n\t"\
00308 "paddw %%mm0, %%mm4 \n\t"\
00309 "paddw %%mm0, %%mm6 \n\t"\
00310 "movq %%mm4, "U_TEMP"(%0) \n\t"\
00311 "movq %%mm6, "V_TEMP"(%0) \n\t"\
00312
00313 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
00314 "lea "offset"(%0), %%"REG_d" \n\t"\
00315 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00316 "pxor %%mm1, %%mm1 \n\t"\
00317 "pxor %%mm5, %%mm5 \n\t"\
00318 "pxor %%mm7, %%mm7 \n\t"\
00319 "pxor %%mm6, %%mm6 \n\t"\
00320 ASMALIGN(4)\
00321 "2: \n\t"\
00322 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" \
00323 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00324 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00325 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" \
00326 "movq %%mm0, %%mm3 \n\t"\
00327 "punpcklwd %%mm4, %%mm0 \n\t"\
00328 "punpckhwd %%mm4, %%mm3 \n\t"\
00329 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" \
00330 "pmaddwd %%mm4, %%mm0 \n\t"\
00331 "pmaddwd %%mm4, %%mm3 \n\t"\
00332 "paddd %%mm0, %%mm1 \n\t"\
00333 "paddd %%mm3, %%mm5 \n\t"\
00334 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" \
00335 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00336 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00337 "test %%"REG_S", %%"REG_S" \n\t"\
00338 "movq %%mm2, %%mm0 \n\t"\
00339 "punpcklwd %%mm3, %%mm2 \n\t"\
00340 "punpckhwd %%mm3, %%mm0 \n\t"\
00341 "pmaddwd %%mm4, %%mm2 \n\t"\
00342 "pmaddwd %%mm4, %%mm0 \n\t"\
00343 "paddd %%mm2, %%mm7 \n\t"\
00344 "paddd %%mm0, %%mm6 \n\t"\
00345 " jnz 2b \n\t"\
00346 "psrad $16, %%mm1 \n\t"\
00347 "psrad $16, %%mm5 \n\t"\
00348 "psrad $16, %%mm7 \n\t"\
00349 "psrad $16, %%mm6 \n\t"\
00350 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00351 "packssdw %%mm5, %%mm1 \n\t"\
00352 "packssdw %%mm6, %%mm7 \n\t"\
00353 "paddw %%mm0, %%mm1 \n\t"\
00354 "paddw %%mm0, %%mm7 \n\t"\
00355 "movq "U_TEMP"(%0), %%mm3 \n\t"\
00356 "movq "V_TEMP"(%0), %%mm4 \n\t"\
00357
00358 #define YSCALEYUV2PACKEDX_ACCURATE \
00359 YSCALEYUV2PACKEDX_ACCURATE_UV \
00360 YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
00361
00362 #define YSCALEYUV2RGBX \
00363 "psubw "U_OFFSET"(%0), %%mm3 \n\t" \
00364 "psubw "V_OFFSET"(%0), %%mm4 \n\t" \
00365 "movq %%mm3, %%mm2 \n\t" \
00366 "movq %%mm4, %%mm5 \n\t" \
00367 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
00368 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
00369 \
00370 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
00371 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
00372 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" \
00373 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" \
00374 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
00375 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
00376 \
00377 "paddw %%mm3, %%mm4 \n\t"\
00378 "movq %%mm2, %%mm0 \n\t"\
00379 "movq %%mm5, %%mm6 \n\t"\
00380 "movq %%mm4, %%mm3 \n\t"\
00381 "punpcklwd %%mm2, %%mm2 \n\t"\
00382 "punpcklwd %%mm5, %%mm5 \n\t"\
00383 "punpcklwd %%mm4, %%mm4 \n\t"\
00384 "paddw %%mm1, %%mm2 \n\t"\
00385 "paddw %%mm1, %%mm5 \n\t"\
00386 "paddw %%mm1, %%mm4 \n\t"\
00387 "punpckhwd %%mm0, %%mm0 \n\t"\
00388 "punpckhwd %%mm6, %%mm6 \n\t"\
00389 "punpckhwd %%mm3, %%mm3 \n\t"\
00390 "paddw %%mm7, %%mm0 \n\t"\
00391 "paddw %%mm7, %%mm6 \n\t"\
00392 "paddw %%mm7, %%mm3 \n\t"\
00393 \
00394 "packuswb %%mm0, %%mm2 \n\t"\
00395 "packuswb %%mm6, %%mm5 \n\t"\
00396 "packuswb %%mm3, %%mm4 \n\t"\
00397
00398 #define REAL_YSCALEYUV2PACKED(index, c) \
00399 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00400 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
00401 "psraw $3, %%mm0 \n\t"\
00402 "psraw $3, %%mm1 \n\t"\
00403 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00404 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00405 "xor "#index", "#index" \n\t"\
00406 ASMALIGN(4)\
00407 "1: \n\t"\
00408 "movq (%2, "#index"), %%mm2 \n\t" \
00409 "movq (%3, "#index"), %%mm3 \n\t" \
00410 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" \
00411 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" \
00412 "psubw %%mm3, %%mm2 \n\t" \
00413 "psubw %%mm4, %%mm5 \n\t" \
00414 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00415 "pmulhw %%mm0, %%mm2 \n\t" \
00416 "pmulhw %%mm0, %%mm5 \n\t" \
00417 "psraw $7, %%mm3 \n\t" \
00418 "psraw $7, %%mm4 \n\t" \
00419 "paddw %%mm2, %%mm3 \n\t" \
00420 "paddw %%mm5, %%mm4 \n\t" \
00421 "movq (%0, "#index", 2), %%mm0 \n\t" \
00422 "movq (%1, "#index", 2), %%mm1 \n\t" \
00423 "movq 8(%0, "#index", 2), %%mm6 \n\t" \
00424 "movq 8(%1, "#index", 2), %%mm7 \n\t" \
00425 "psubw %%mm1, %%mm0 \n\t" \
00426 "psubw %%mm7, %%mm6 \n\t" \
00427 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
00428 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
00429 "psraw $7, %%mm1 \n\t" \
00430 "psraw $7, %%mm7 \n\t" \
00431 "paddw %%mm0, %%mm1 \n\t" \
00432 "paddw %%mm6, %%mm7 \n\t" \
00433
00434 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
00435
00436 #define REAL_YSCALEYUV2RGB_UV(index, c) \
00437 "xor "#index", "#index" \n\t"\
00438 ASMALIGN(4)\
00439 "1: \n\t"\
00440 "movq (%2, "#index"), %%mm2 \n\t" \
00441 "movq (%3, "#index"), %%mm3 \n\t" \
00442 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" \
00443 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" \
00444 "psubw %%mm3, %%mm2 \n\t" \
00445 "psubw %%mm4, %%mm5 \n\t" \
00446 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00447 "pmulhw %%mm0, %%mm2 \n\t" \
00448 "pmulhw %%mm0, %%mm5 \n\t" \
00449 "psraw $4, %%mm3 \n\t" \
00450 "psraw $4, %%mm4 \n\t" \
00451 "paddw %%mm2, %%mm3 \n\t" \
00452 "paddw %%mm5, %%mm4 \n\t" \
00453 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00454 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00455 "movq %%mm3, %%mm2 \n\t" \
00456 "movq %%mm4, %%mm5 \n\t" \
00457 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00458 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00459 \
00460
00461 #define REAL_YSCALEYUV2RGB_YA(index, c) \
00462 "movq (%0, "#index", 2), %%mm0 \n\t" \
00463 "movq (%1, "#index", 2), %%mm1 \n\t" \
00464 "movq 8(%0, "#index", 2), %%mm6 \n\t" \
00465 "movq 8(%1, "#index", 2), %%mm7 \n\t" \
00466 "psubw %%mm1, %%mm0 \n\t" \
00467 "psubw %%mm7, %%mm6 \n\t" \
00468 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
00469 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
00470 "psraw $4, %%mm1 \n\t" \
00471 "psraw $4, %%mm7 \n\t" \
00472 "paddw %%mm0, %%mm1 \n\t" \
00473 "paddw %%mm6, %%mm7 \n\t" \
00474
00475 #define REAL_YSCALEYUV2RGB_COEFF(c) \
00476 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00477 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00478 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00479 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00480 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00481 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00482 \
00483 "paddw %%mm3, %%mm4 \n\t"\
00484 "movq %%mm2, %%mm0 \n\t"\
00485 "movq %%mm5, %%mm6 \n\t"\
00486 "movq %%mm4, %%mm3 \n\t"\
00487 "punpcklwd %%mm2, %%mm2 \n\t"\
00488 "punpcklwd %%mm5, %%mm5 \n\t"\
00489 "punpcklwd %%mm4, %%mm4 \n\t"\
00490 "paddw %%mm1, %%mm2 \n\t"\
00491 "paddw %%mm1, %%mm5 \n\t"\
00492 "paddw %%mm1, %%mm4 \n\t"\
00493 "punpckhwd %%mm0, %%mm0 \n\t"\
00494 "punpckhwd %%mm6, %%mm6 \n\t"\
00495 "punpckhwd %%mm3, %%mm3 \n\t"\
00496 "paddw %%mm7, %%mm0 \n\t"\
00497 "paddw %%mm7, %%mm6 \n\t"\
00498 "paddw %%mm7, %%mm3 \n\t"\
00499 \
00500 "packuswb %%mm0, %%mm2 \n\t"\
00501 "packuswb %%mm6, %%mm5 \n\t"\
00502 "packuswb %%mm3, %%mm4 \n\t"\
00503
00504 #define YSCALEYUV2RGB_YA(index, c) REAL_YSCALEYUV2RGB_YA(index, c)
00505
00506 #define YSCALEYUV2RGB(index, c) \
00507 REAL_YSCALEYUV2RGB_UV(index, c) \
00508 REAL_YSCALEYUV2RGB_YA(index, c) \
00509 REAL_YSCALEYUV2RGB_COEFF(c)
00510
00511 #define REAL_YSCALEYUV2PACKED1(index, c) \
00512 "xor "#index", "#index" \n\t"\
00513 ASMALIGN(4)\
00514 "1: \n\t"\
00515 "movq (%2, "#index"), %%mm3 \n\t" \
00516 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" \
00517 "psraw $7, %%mm3 \n\t" \
00518 "psraw $7, %%mm4 \n\t" \
00519 "movq (%0, "#index", 2), %%mm1 \n\t" \
00520 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00521 "psraw $7, %%mm1 \n\t" \
00522 "psraw $7, %%mm7 \n\t" \
00523
00524 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
00525
00526 #define REAL_YSCALEYUV2RGB1(index, c) \
00527 "xor "#index", "#index" \n\t"\
00528 ASMALIGN(4)\
00529 "1: \n\t"\
00530 "movq (%2, "#index"), %%mm3 \n\t" \
00531 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" \
00532 "psraw $4, %%mm3 \n\t" \
00533 "psraw $4, %%mm4 \n\t" \
00534 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00535 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00536 "movq %%mm3, %%mm2 \n\t" \
00537 "movq %%mm4, %%mm5 \n\t" \
00538 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00539 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00540 \
00541 "movq (%0, "#index", 2), %%mm1 \n\t" \
00542 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00543 "psraw $4, %%mm1 \n\t" \
00544 "psraw $4, %%mm7 \n\t" \
00545 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00546 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00547 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00548 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00549 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00550 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00551 \
00552 "paddw %%mm3, %%mm4 \n\t"\
00553 "movq %%mm2, %%mm0 \n\t"\
00554 "movq %%mm5, %%mm6 \n\t"\
00555 "movq %%mm4, %%mm3 \n\t"\
00556 "punpcklwd %%mm2, %%mm2 \n\t"\
00557 "punpcklwd %%mm5, %%mm5 \n\t"\
00558 "punpcklwd %%mm4, %%mm4 \n\t"\
00559 "paddw %%mm1, %%mm2 \n\t"\
00560 "paddw %%mm1, %%mm5 \n\t"\
00561 "paddw %%mm1, %%mm4 \n\t"\
00562 "punpckhwd %%mm0, %%mm0 \n\t"\
00563 "punpckhwd %%mm6, %%mm6 \n\t"\
00564 "punpckhwd %%mm3, %%mm3 \n\t"\
00565 "paddw %%mm7, %%mm0 \n\t"\
00566 "paddw %%mm7, %%mm6 \n\t"\
00567 "paddw %%mm7, %%mm3 \n\t"\
00568 \
00569 "packuswb %%mm0, %%mm2 \n\t"\
00570 "packuswb %%mm6, %%mm5 \n\t"\
00571 "packuswb %%mm3, %%mm4 \n\t"\
00572
00573 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
00574
00575 #define REAL_YSCALEYUV2PACKED1b(index, c) \
00576 "xor "#index", "#index" \n\t"\
00577 ASMALIGN(4)\
00578 "1: \n\t"\
00579 "movq (%2, "#index"), %%mm2 \n\t" \
00580 "movq (%3, "#index"), %%mm3 \n\t" \
00581 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" \
00582 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" \
00583 "paddw %%mm2, %%mm3 \n\t" \
00584 "paddw %%mm5, %%mm4 \n\t" \
00585 "psrlw $8, %%mm3 \n\t" \
00586 "psrlw $8, %%mm4 \n\t" \
00587 "movq (%0, "#index", 2), %%mm1 \n\t" \
00588 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00589 "psraw $7, %%mm1 \n\t" \
00590 "psraw $7, %%mm7 \n\t"
00591 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
00592
00593
00594 #define REAL_YSCALEYUV2RGB1b(index, c) \
00595 "xor "#index", "#index" \n\t"\
00596 ASMALIGN(4)\
00597 "1: \n\t"\
00598 "movq (%2, "#index"), %%mm2 \n\t" \
00599 "movq (%3, "#index"), %%mm3 \n\t" \
00600 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" \
00601 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" \
00602 "paddw %%mm2, %%mm3 \n\t" \
00603 "paddw %%mm5, %%mm4 \n\t" \
00604 "psrlw $5, %%mm3 \n\t" \
00605 "psrlw $5, %%mm4 \n\t" \
00606 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00607 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00608 "movq %%mm3, %%mm2 \n\t" \
00609 "movq %%mm4, %%mm5 \n\t" \
00610 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00611 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00612 \
00613 "movq (%0, "#index", 2), %%mm1 \n\t" \
00614 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00615 "psraw $4, %%mm1 \n\t" \
00616 "psraw $4, %%mm7 \n\t" \
00617 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00618 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00619 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00620 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00621 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00622 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00623 \
00624 "paddw %%mm3, %%mm4 \n\t"\
00625 "movq %%mm2, %%mm0 \n\t"\
00626 "movq %%mm5, %%mm6 \n\t"\
00627 "movq %%mm4, %%mm3 \n\t"\
00628 "punpcklwd %%mm2, %%mm2 \n\t"\
00629 "punpcklwd %%mm5, %%mm5 \n\t"\
00630 "punpcklwd %%mm4, %%mm4 \n\t"\
00631 "paddw %%mm1, %%mm2 \n\t"\
00632 "paddw %%mm1, %%mm5 \n\t"\
00633 "paddw %%mm1, %%mm4 \n\t"\
00634 "punpckhwd %%mm0, %%mm0 \n\t"\
00635 "punpckhwd %%mm6, %%mm6 \n\t"\
00636 "punpckhwd %%mm3, %%mm3 \n\t"\
00637 "paddw %%mm7, %%mm0 \n\t"\
00638 "paddw %%mm7, %%mm6 \n\t"\
00639 "paddw %%mm7, %%mm3 \n\t"\
00640 \
00641 "packuswb %%mm0, %%mm2 \n\t"\
00642 "packuswb %%mm6, %%mm5 \n\t"\
00643 "packuswb %%mm3, %%mm4 \n\t"\
00644
00645 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
00646
00647 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
00648 "movq "#b", "#q2" \n\t" \
00649 "movq "#r", "#t" \n\t" \
00650 "punpcklbw "#g", "#b" \n\t" \
00651 "punpcklbw "#a", "#r" \n\t" \
00652 "punpckhbw "#g", "#q2" \n\t" \
00653 "punpckhbw "#a", "#t" \n\t" \
00654 "movq "#b", "#q0" \n\t" \
00655 "movq "#q2", "#q3" \n\t" \
00656 "punpcklwd "#r", "#q0" \n\t" \
00657 "punpckhwd "#r", "#b" \n\t" \
00658 "punpcklwd "#t", "#q2" \n\t" \
00659 "punpckhwd "#t", "#q3" \n\t" \
00660 \
00661 MOVNTQ( q0, (dst, index, 4))\
00662 MOVNTQ( b, 8(dst, index, 4))\
00663 MOVNTQ( q2, 16(dst, index, 4))\
00664 MOVNTQ( q3, 24(dst, index, 4))\
00665 \
00666 "add $8, "#index" \n\t"\
00667 "cmp "#dstw", "#index" \n\t"\
00668 " jb 1b \n\t"
00669 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
00670
00671 #define REAL_WRITERGB16(dst, dstw, index) \
00672 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00673 "pand "MANGLE(bFC)", %%mm4 \n\t" \
00674 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00675 "psrlq $3, %%mm2 \n\t"\
00676 \
00677 "movq %%mm2, %%mm1 \n\t"\
00678 "movq %%mm4, %%mm3 \n\t"\
00679 \
00680 "punpcklbw %%mm7, %%mm3 \n\t"\
00681 "punpcklbw %%mm5, %%mm2 \n\t"\
00682 "punpckhbw %%mm7, %%mm4 \n\t"\
00683 "punpckhbw %%mm5, %%mm1 \n\t"\
00684 \
00685 "psllq $3, %%mm3 \n\t"\
00686 "psllq $3, %%mm4 \n\t"\
00687 \
00688 "por %%mm3, %%mm2 \n\t"\
00689 "por %%mm4, %%mm1 \n\t"\
00690 \
00691 MOVNTQ(%%mm2, (dst, index, 2))\
00692 MOVNTQ(%%mm1, 8(dst, index, 2))\
00693 \
00694 "add $8, "#index" \n\t"\
00695 "cmp "#dstw", "#index" \n\t"\
00696 " jb 1b \n\t"
00697 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
00698
00699 #define REAL_WRITERGB15(dst, dstw, index) \
00700 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00701 "pand "MANGLE(bF8)", %%mm4 \n\t" \
00702 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00703 "psrlq $3, %%mm2 \n\t"\
00704 "psrlq $1, %%mm5 \n\t"\
00705 \
00706 "movq %%mm2, %%mm1 \n\t"\
00707 "movq %%mm4, %%mm3 \n\t"\
00708 \
00709 "punpcklbw %%mm7, %%mm3 \n\t"\
00710 "punpcklbw %%mm5, %%mm2 \n\t"\
00711 "punpckhbw %%mm7, %%mm4 \n\t"\
00712 "punpckhbw %%mm5, %%mm1 \n\t"\
00713 \
00714 "psllq $2, %%mm3 \n\t"\
00715 "psllq $2, %%mm4 \n\t"\
00716 \
00717 "por %%mm3, %%mm2 \n\t"\
00718 "por %%mm4, %%mm1 \n\t"\
00719 \
00720 MOVNTQ(%%mm2, (dst, index, 2))\
00721 MOVNTQ(%%mm1, 8(dst, index, 2))\
00722 \
00723 "add $8, "#index" \n\t"\
00724 "cmp "#dstw", "#index" \n\t"\
00725 " jb 1b \n\t"
00726 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
00727
00728 #define WRITEBGR24OLD(dst, dstw, index) \
00729 \
00730 "movq %%mm2, %%mm1 \n\t" \
00731 "movq %%mm5, %%mm6 \n\t" \
00732 "punpcklbw %%mm4, %%mm2 \n\t" \
00733 "punpcklbw %%mm7, %%mm5 \n\t" \
00734 "punpckhbw %%mm4, %%mm1 \n\t" \
00735 "punpckhbw %%mm7, %%mm6 \n\t" \
00736 "movq %%mm2, %%mm0 \n\t" \
00737 "movq %%mm1, %%mm3 \n\t" \
00738 "punpcklwd %%mm5, %%mm0 \n\t" \
00739 "punpckhwd %%mm5, %%mm2 \n\t" \
00740 "punpcklwd %%mm6, %%mm1 \n\t" \
00741 "punpckhwd %%mm6, %%mm3 \n\t" \
00742 \
00743 "movq %%mm0, %%mm4 \n\t" \
00744 "psrlq $8, %%mm0 \n\t" \
00745 "pand "MANGLE(bm00000111)", %%mm4 \n\t" \
00746 "pand "MANGLE(bm11111000)", %%mm0 \n\t" \
00747 "por %%mm4, %%mm0 \n\t" \
00748 "movq %%mm2, %%mm4 \n\t" \
00749 "psllq $48, %%mm2 \n\t" \
00750 "por %%mm2, %%mm0 \n\t" \
00751 \
00752 "movq %%mm4, %%mm2 \n\t" \
00753 "psrld $16, %%mm4 \n\t" \
00754 "psrlq $24, %%mm2 \n\t" \
00755 "por %%mm4, %%mm2 \n\t" \
00756 "pand "MANGLE(bm00001111)", %%mm2 \n\t" \
00757 "movq %%mm1, %%mm4 \n\t" \
00758 "psrlq $8, %%mm1 \n\t" \
00759 "pand "MANGLE(bm00000111)", %%mm4 \n\t" \
00760 "pand "MANGLE(bm11111000)", %%mm1 \n\t" \
00761 "por %%mm4, %%mm1 \n\t" \
00762 "movq %%mm1, %%mm4 \n\t" \
00763 "psllq $32, %%mm1 \n\t" \
00764 "por %%mm1, %%mm2 \n\t" \
00765 \
00766 "psrlq $32, %%mm4 \n\t" \
00767 "movq %%mm3, %%mm5 \n\t" \
00768 "psrlq $8, %%mm3 \n\t" \
00769 "pand "MANGLE(bm00000111)", %%mm5 \n\t" \
00770 "pand "MANGLE(bm11111000)", %%mm3 \n\t" \
00771 "por %%mm5, %%mm3 \n\t" \
00772 "psllq $16, %%mm3 \n\t" \
00773 "por %%mm4, %%mm3 \n\t" \
00774 \
00775 MOVNTQ(%%mm0, (dst))\
00776 MOVNTQ(%%mm2, 8(dst))\
00777 MOVNTQ(%%mm3, 16(dst))\
00778 "add $24, "#dst" \n\t"\
00779 \
00780 "add $8, "#index" \n\t"\
00781 "cmp "#dstw", "#index" \n\t"\
00782 " jb 1b \n\t"
00783
00784 #define WRITEBGR24MMX(dst, dstw, index) \
00785 \
00786 "movq %%mm2, %%mm1 \n\t" \
00787 "movq %%mm5, %%mm6 \n\t" \
00788 "punpcklbw %%mm4, %%mm2 \n\t" \
00789 "punpcklbw %%mm7, %%mm5 \n\t" \
00790 "punpckhbw %%mm4, %%mm1 \n\t" \
00791 "punpckhbw %%mm7, %%mm6 \n\t" \
00792 "movq %%mm2, %%mm0 \n\t" \
00793 "movq %%mm1, %%mm3 \n\t" \
00794 "punpcklwd %%mm5, %%mm0 \n\t" \
00795 "punpckhwd %%mm5, %%mm2 \n\t" \
00796 "punpcklwd %%mm6, %%mm1 \n\t" \
00797 "punpckhwd %%mm6, %%mm3 \n\t" \
00798 \
00799 "movq %%mm0, %%mm4 \n\t" \
00800 "movq %%mm2, %%mm6 \n\t" \
00801 "movq %%mm1, %%mm5 \n\t" \
00802 "movq %%mm3, %%mm7 \n\t" \
00803 \
00804 "psllq $40, %%mm0 \n\t" \
00805 "psllq $40, %%mm2 \n\t" \
00806 "psllq $40, %%mm1 \n\t" \
00807 "psllq $40, %%mm3 \n\t" \
00808 \
00809 "punpckhdq %%mm4, %%mm0 \n\t" \
00810 "punpckhdq %%mm6, %%mm2 \n\t" \
00811 "punpckhdq %%mm5, %%mm1 \n\t" \
00812 "punpckhdq %%mm7, %%mm3 \n\t" \
00813 \
00814 "psrlq $8, %%mm0 \n\t" \
00815 "movq %%mm2, %%mm6 \n\t" \
00816 "psllq $40, %%mm2 \n\t" \
00817 "por %%mm2, %%mm0 \n\t" \
00818 MOVNTQ(%%mm0, (dst))\
00819 \
00820 "psrlq $24, %%mm6 \n\t" \
00821 "movq %%mm1, %%mm5 \n\t" \
00822 "psllq $24, %%mm1 \n\t" \
00823 "por %%mm1, %%mm6 \n\t" \
00824 MOVNTQ(%%mm6, 8(dst))\
00825 \
00826 "psrlq $40, %%mm5 \n\t" \
00827 "psllq $8, %%mm3 \n\t" \
00828 "por %%mm3, %%mm5 \n\t" \
00829 MOVNTQ(%%mm5, 16(dst))\
00830 \
00831 "add $24, "#dst" \n\t"\
00832 \
00833 "add $8, "#index" \n\t"\
00834 "cmp "#dstw", "#index" \n\t"\
00835 " jb 1b \n\t"
00836
00837 #define WRITEBGR24MMX2(dst, dstw, index) \
00838 \
00839 "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
00840 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
00841 "pshufw $0x50, %%mm2, %%mm1 \n\t" \
00842 "pshufw $0x50, %%mm4, %%mm3 \n\t" \
00843 "pshufw $0x00, %%mm5, %%mm6 \n\t" \
00844 \
00845 "pand %%mm0, %%mm1 \n\t" \
00846 "pand %%mm0, %%mm3 \n\t" \
00847 "pand %%mm7, %%mm6 \n\t" \
00848 \
00849 "psllq $8, %%mm3 \n\t" \
00850 "por %%mm1, %%mm6 \n\t"\
00851 "por %%mm3, %%mm6 \n\t"\
00852 MOVNTQ(%%mm6, (dst))\
00853 \
00854 "psrlq $8, %%mm4 \n\t" \
00855 "pshufw $0xA5, %%mm2, %%mm1 \n\t" \
00856 "pshufw $0x55, %%mm4, %%mm3 \n\t" \
00857 "pshufw $0xA5, %%mm5, %%mm6 \n\t" \
00858 \
00859 "pand "MANGLE(ff_M24B)", %%mm1 \n\t" \
00860 "pand %%mm7, %%mm3 \n\t" \
00861 "pand %%mm0, %%mm6 \n\t" \
00862 \
00863 "por %%mm1, %%mm3 \n\t" \
00864 "por %%mm3, %%mm6 \n\t"\
00865 MOVNTQ(%%mm6, 8(dst))\
00866 \
00867 "pshufw $0xFF, %%mm2, %%mm1 \n\t" \
00868 "pshufw $0xFA, %%mm4, %%mm3 \n\t" \
00869 "pshufw $0xFA, %%mm5, %%mm6 \n\t" \
00870 \
00871 "pand %%mm7, %%mm1 \n\t" \
00872 "pand %%mm0, %%mm3 \n\t" \
00873 "pand "MANGLE(ff_M24B)", %%mm6 \n\t" \
00874 \
00875 "por %%mm1, %%mm3 \n\t"\
00876 "por %%mm3, %%mm6 \n\t"\
00877 MOVNTQ(%%mm6, 16(dst))\
00878 \
00879 "add $24, "#dst" \n\t"\
00880 \
00881 "add $8, "#index" \n\t"\
00882 "cmp "#dstw", "#index" \n\t"\
00883 " jb 1b \n\t"
00884
00885 #if HAVE_MMX2
00886 #undef WRITEBGR24
00887 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
00888 #else
00889 #undef WRITEBGR24
00890 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
00891 #endif
00892
00893 #define REAL_WRITEYUY2(dst, dstw, index) \
00894 "packuswb %%mm3, %%mm3 \n\t"\
00895 "packuswb %%mm4, %%mm4 \n\t"\
00896 "packuswb %%mm7, %%mm1 \n\t"\
00897 "punpcklbw %%mm4, %%mm3 \n\t"\
00898 "movq %%mm1, %%mm7 \n\t"\
00899 "punpcklbw %%mm3, %%mm1 \n\t"\
00900 "punpckhbw %%mm3, %%mm7 \n\t"\
00901 \
00902 MOVNTQ(%%mm1, (dst, index, 2))\
00903 MOVNTQ(%%mm7, 8(dst, index, 2))\
00904 \
00905 "add $8, "#index" \n\t"\
00906 "cmp "#dstw", "#index" \n\t"\
00907 " jb 1b \n\t"
00908 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
00909
00910
00911 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00912 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00913 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
00914 {
00915 #if HAVE_MMX
00916 if(!(c->flags & SWS_BITEXACT)){
00917 if (c->flags & SWS_ACCURATE_RND){
00918 if (uDest){
00919 YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
00920 YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
00921 }
00922
00923 YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
00924 }else{
00925 if (uDest){
00926 YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
00927 YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
00928 }
00929
00930 YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
00931 }
00932 return;
00933 }
00934 #endif
00935 #if HAVE_ALTIVEC
00936 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
00937 chrFilter, chrSrc, chrFilterSize,
00938 dest, uDest, vDest, dstW, chrDstW);
00939 #else //HAVE_ALTIVEC
00940 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
00941 chrFilter, chrSrc, chrFilterSize,
00942 dest, uDest, vDest, dstW, chrDstW);
00943 #endif
00944 }
00945
00946 static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00947 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00948 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
00949 {
00950 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
00951 chrFilter, chrSrc, chrFilterSize,
00952 dest, uDest, dstW, chrDstW, dstFormat);
00953 }
00954
00955 static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc,
00956 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
00957 {
00958 int i;
00959 #if HAVE_MMX
00960 if(!(c->flags & SWS_BITEXACT)){
00961 long p= uDest ? 3 : 1;
00962 uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
00963 uint8_t *dst[3]= {dest, uDest, vDest};
00964 long counter[3] = {dstW, chrDstW, chrDstW};
00965
00966 if (c->flags & SWS_ACCURATE_RND){
00967 while(p--){
00968 __asm__ volatile(
00969 YSCALEYUV2YV121_ACCURATE
00970 :: "r" (src[p]), "r" (dst[p] + counter[p]),
00971 "g" (-counter[p])
00972 : "%"REG_a
00973 );
00974 }
00975 }else{
00976 while(p--){
00977 __asm__ volatile(
00978 YSCALEYUV2YV121
00979 :: "r" (src[p]), "r" (dst[p] + counter[p]),
00980 "g" (-counter[p])
00981 : "%"REG_a
00982 );
00983 }
00984 }
00985 return;
00986 }
00987 #endif
00988 for (i=0; i<dstW; i++)
00989 {
00990 int val= (lumSrc[i]+64)>>7;
00991
00992 if (val&256){
00993 if (val<0) val=0;
00994 else val=255;
00995 }
00996
00997 dest[i]= val;
00998 }
00999
01000 if (uDest)
01001 for (i=0; i<chrDstW; i++)
01002 {
01003 int u=(chrSrc[i ]+64)>>7;
01004 int v=(chrSrc[i + VOFW]+64)>>7;
01005
01006 if ((u|v)&256){
01007 if (u<0) u=0;
01008 else if (u>255) u=255;
01009 if (v<0) v=0;
01010 else if (v>255) v=255;
01011 }
01012
01013 uDest[i]= u;
01014 vDest[i]= v;
01015 }
01016 }
01017
01018
01022 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
01023 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
01024 uint8_t *dest, long dstW, long dstY)
01025 {
01026 #if HAVE_MMX
01027 long dummy=0;
01028 if(!(c->flags & SWS_BITEXACT)){
01029 if (c->flags & SWS_ACCURATE_RND){
01030 switch(c->dstFormat){
01031 case PIX_FMT_RGB32:
01032 YSCALEYUV2PACKEDX_ACCURATE
01033 YSCALEYUV2RGBX
01034 "pcmpeqd %%mm7, %%mm7 \n\t"
01035 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01036
01037 YSCALEYUV2PACKEDX_END
01038 return;
01039 case PIX_FMT_BGR24:
01040 YSCALEYUV2PACKEDX_ACCURATE
01041 YSCALEYUV2RGBX
01042 "pxor %%mm7, %%mm7 \n\t"
01043 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t"
01044 "add %4, %%"REG_c" \n\t"
01045 WRITEBGR24(%%REGc, %5, %%REGa)
01046
01047
01048 :: "r" (&c->redDither),
01049 "m" (dummy), "m" (dummy), "m" (dummy),
01050 "r" (dest), "m" (dstW)
01051 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
01052 );
01053 return;
01054 case PIX_FMT_RGB555:
01055 YSCALEYUV2PACKEDX_ACCURATE
01056 YSCALEYUV2RGBX
01057 "pxor %%mm7, %%mm7 \n\t"
01058
01059 #ifdef DITHER1XBPP
01060 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
01061 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
01062 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
01063 #endif
01064
01065 WRITERGB15(%4, %5, %%REGa)
01066 YSCALEYUV2PACKEDX_END
01067 return;
01068 case PIX_FMT_RGB565:
01069 YSCALEYUV2PACKEDX_ACCURATE
01070 YSCALEYUV2RGBX
01071 "pxor %%mm7, %%mm7 \n\t"
01072
01073 #ifdef DITHER1XBPP
01074 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
01075 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
01076 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
01077 #endif
01078
01079 WRITERGB16(%4, %5, %%REGa)
01080 YSCALEYUV2PACKEDX_END
01081 return;
01082 case PIX_FMT_YUYV422:
01083 YSCALEYUV2PACKEDX_ACCURATE
01084
01085
01086 "psraw $3, %%mm3 \n\t"
01087 "psraw $3, %%mm4 \n\t"
01088 "psraw $3, %%mm1 \n\t"
01089 "psraw $3, %%mm7 \n\t"
01090 WRITEYUY2(%4, %5, %%REGa)
01091 YSCALEYUV2PACKEDX_END
01092 return;
01093 }
01094 }else{
01095 switch(c->dstFormat)
01096 {
01097 case PIX_FMT_RGB32:
01098 YSCALEYUV2PACKEDX
01099 YSCALEYUV2RGBX
01100 "pcmpeqd %%mm7, %%mm7 \n\t"
01101 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01102 YSCALEYUV2PACKEDX_END
01103 return;
01104 case PIX_FMT_BGR24:
01105 YSCALEYUV2PACKEDX
01106 YSCALEYUV2RGBX
01107 "pxor %%mm7, %%mm7 \n\t"
01108 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t"
01109 "add %4, %%"REG_c" \n\t"
01110 WRITEBGR24(%%REGc, %5, %%REGa)
01111
01112 :: "r" (&c->redDither),
01113 "m" (dummy), "m" (dummy), "m" (dummy),
01114 "r" (dest), "m" (dstW)
01115 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
01116 );
01117 return;
01118 case PIX_FMT_RGB555:
01119 YSCALEYUV2PACKEDX
01120 YSCALEYUV2RGBX
01121 "pxor %%mm7, %%mm7 \n\t"
01122
01123 #ifdef DITHER1XBPP
01124 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
01125 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
01126 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
01127 #endif
01128
01129 WRITERGB15(%4, %5, %%REGa)
01130 YSCALEYUV2PACKEDX_END
01131 return;
01132 case PIX_FMT_RGB565:
01133 YSCALEYUV2PACKEDX
01134 YSCALEYUV2RGBX
01135 "pxor %%mm7, %%mm7 \n\t"
01136
01137 #ifdef DITHER1XBPP
01138 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
01139 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
01140 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
01141 #endif
01142
01143 WRITERGB16(%4, %5, %%REGa)
01144 YSCALEYUV2PACKEDX_END
01145 return;
01146 case PIX_FMT_YUYV422:
01147 YSCALEYUV2PACKEDX
01148
01149
01150 "psraw $3, %%mm3 \n\t"
01151 "psraw $3, %%mm4 \n\t"
01152 "psraw $3, %%mm1 \n\t"
01153 "psraw $3, %%mm7 \n\t"
01154 WRITEYUY2(%4, %5, %%REGa)
01155 YSCALEYUV2PACKEDX_END
01156 return;
01157 }
01158 }
01159 }
01160 #endif
01161 #if HAVE_ALTIVEC
01162
01163
01164 if (!(c->flags & SWS_BITEXACT) &&
01165 (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
01166 c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
01167 c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB))
01168 altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
01169 chrFilter, chrSrc, chrFilterSize,
01170 dest, dstW, dstY);
01171 else
01172 #endif
01173 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
01174 chrFilter, chrSrc, chrFilterSize,
01175 dest, dstW, dstY);
01176 }
01177
01181 static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
01182 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
01183 {
01184 int yalpha1=4095- yalpha;
01185 int uvalpha1=4095-uvalpha;
01186 int i;
01187
01188 #if HAVE_MMX
01189 if(!(c->flags & SWS_BITEXACT)){
01190 switch(c->dstFormat)
01191 {
01192
01193 case PIX_FMT_RGB32:
01194 __asm__ volatile(
01195 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01196 "mov %4, %%"REG_b" \n\t"
01197 "push %%"REG_BP" \n\t"
01198 YSCALEYUV2RGB(%%REGBP, %5)
01199 "pcmpeqd %%mm7, %%mm7 \n\t"
01200 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01201 "pop %%"REG_BP" \n\t"
01202 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01203
01204 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01205 "a" (&c->redDither)
01206 );
01207 return;
01208 case PIX_FMT_BGR24:
01209 __asm__ volatile(
01210 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01211 "mov %4, %%"REG_b" \n\t"
01212 "push %%"REG_BP" \n\t"
01213 YSCALEYUV2RGB(%%REGBP, %5)
01214 "pxor %%mm7, %%mm7 \n\t"
01215 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01216 "pop %%"REG_BP" \n\t"
01217 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01218 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01219 "a" (&c->redDither)
01220 );
01221 return;
01222 case PIX_FMT_RGB555:
01223 __asm__ volatile(
01224 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01225 "mov %4, %%"REG_b" \n\t"
01226 "push %%"REG_BP" \n\t"
01227 YSCALEYUV2RGB(%%REGBP, %5)
01228 "pxor %%mm7, %%mm7 \n\t"
01229
01230 #ifdef DITHER1XBPP
01231 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01232 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01233 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01234 #endif
01235
01236 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01237 "pop %%"REG_BP" \n\t"
01238 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01239
01240 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01241 "a" (&c->redDither)
01242 );
01243 return;
01244 case PIX_FMT_RGB565:
01245 __asm__ volatile(
01246 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01247 "mov %4, %%"REG_b" \n\t"
01248 "push %%"REG_BP" \n\t"
01249 YSCALEYUV2RGB(%%REGBP, %5)
01250 "pxor %%mm7, %%mm7 \n\t"
01251
01252 #ifdef DITHER1XBPP
01253 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01254 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01255 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01256 #endif
01257
01258 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01259 "pop %%"REG_BP" \n\t"
01260 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01261 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01262 "a" (&c->redDither)
01263 );
01264 return;
01265 case PIX_FMT_YUYV422:
01266 __asm__ volatile(
01267 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01268 "mov %4, %%"REG_b" \n\t"
01269 "push %%"REG_BP" \n\t"
01270 YSCALEYUV2PACKED(%%REGBP, %5)
01271 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01272 "pop %%"REG_BP" \n\t"
01273 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01274 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01275 "a" (&c->redDither)
01276 );
01277 return;
01278 default: break;
01279 }
01280 }
01281 #endif //HAVE_MMX
01282 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C, YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
01283 }
01284
01288 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
01289 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
01290 {
01291 const int yalpha1=0;
01292 int i;
01293
01294 uint16_t *buf1= buf0;
01295 const int yalpha= 4096;
01296
01297 if (flags&SWS_FULL_CHR_H_INT)
01298 {
01299 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
01300 return;
01301 }
01302
01303 #if HAVE_MMX
01304 if(!(flags & SWS_BITEXACT)){
01305 if (uvalpha < 2048)
01306 {
01307 switch(dstFormat)
01308 {
01309 case PIX_FMT_RGB32:
01310 __asm__ volatile(
01311 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01312 "mov %4, %%"REG_b" \n\t"
01313 "push %%"REG_BP" \n\t"
01314 YSCALEYUV2RGB1(%%REGBP, %5)
01315 "pcmpeqd %%mm7, %%mm7 \n\t"
01316 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01317 "pop %%"REG_BP" \n\t"
01318 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01319
01320 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01321 "a" (&c->redDither)
01322 );
01323 return;
01324 case PIX_FMT_BGR24:
01325 __asm__ volatile(
01326 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01327 "mov %4, %%"REG_b" \n\t"
01328 "push %%"REG_BP" \n\t"
01329 YSCALEYUV2RGB1(%%REGBP, %5)
01330 "pxor %%mm7, %%mm7 \n\t"
01331 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01332 "pop %%"REG_BP" \n\t"
01333 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01334
01335 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01336 "a" (&c->redDither)
01337 );
01338 return;
01339 case PIX_FMT_RGB555:
01340 __asm__ volatile(
01341 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01342 "mov %4, %%"REG_b" \n\t"
01343 "push %%"REG_BP" \n\t"
01344 YSCALEYUV2RGB1(%%REGBP, %5)
01345 "pxor %%mm7, %%mm7 \n\t"
01346
01347 #ifdef DITHER1XBPP
01348 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01349 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01350 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01351 #endif
01352 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01353 "pop %%"REG_BP" \n\t"
01354 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01355
01356 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01357 "a" (&c->redDither)
01358 );
01359 return;
01360 case PIX_FMT_RGB565:
01361 __asm__ volatile(
01362 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01363 "mov %4, %%"REG_b" \n\t"
01364 "push %%"REG_BP" \n\t"
01365 YSCALEYUV2RGB1(%%REGBP, %5)
01366 "pxor %%mm7, %%mm7 \n\t"
01367
01368 #ifdef DITHER1XBPP
01369 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01370 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01371 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01372 #endif
01373
01374 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01375 "pop %%"REG_BP" \n\t"
01376 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01377
01378 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01379 "a" (&c->redDither)
01380 );
01381 return;
01382 case PIX_FMT_YUYV422:
01383 __asm__ volatile(
01384 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01385 "mov %4, %%"REG_b" \n\t"
01386 "push %%"REG_BP" \n\t"
01387 YSCALEYUV2PACKED1(%%REGBP, %5)
01388 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01389 "pop %%"REG_BP" \n\t"
01390 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01391
01392 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01393 "a" (&c->redDither)
01394 );
01395 return;
01396 }
01397 }
01398 else
01399 {
01400 switch(dstFormat)
01401 {
01402 case PIX_FMT_RGB32:
01403 __asm__ volatile(
01404 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01405 "mov %4, %%"REG_b" \n\t"
01406 "push %%"REG_BP" \n\t"
01407 YSCALEYUV2RGB1b(%%REGBP, %5)
01408 "pcmpeqd %%mm7, %%mm7 \n\t"
01409 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01410 "pop %%"REG_BP" \n\t"
01411 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01412
01413 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01414 "a" (&c->redDither)
01415 );
01416 return;
01417 case PIX_FMT_BGR24:
01418 __asm__ volatile(
01419 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01420 "mov %4, %%"REG_b" \n\t"
01421 "push %%"REG_BP" \n\t"
01422 YSCALEYUV2RGB1b(%%REGBP, %5)
01423 "pxor %%mm7, %%mm7 \n\t"
01424 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01425 "pop %%"REG_BP" \n\t"
01426 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01427
01428 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01429 "a" (&c->redDither)
01430 );
01431 return;
01432 case PIX_FMT_RGB555:
01433 __asm__ volatile(
01434 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01435 "mov %4, %%"REG_b" \n\t"
01436 "push %%"REG_BP" \n\t"
01437 YSCALEYUV2RGB1b(%%REGBP, %5)
01438 "pxor %%mm7, %%mm7 \n\t"
01439
01440 #ifdef DITHER1XBPP
01441 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01442 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01443 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01444 #endif
01445 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01446 "pop %%"REG_BP" \n\t"
01447 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01448
01449 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01450 "a" (&c->redDither)
01451 );
01452 return;
01453 case PIX_FMT_RGB565:
01454 __asm__ volatile(
01455 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01456 "mov %4, %%"REG_b" \n\t"
01457 "push %%"REG_BP" \n\t"
01458 YSCALEYUV2RGB1b(%%REGBP, %5)
01459 "pxor %%mm7, %%mm7 \n\t"
01460
01461 #ifdef DITHER1XBPP
01462 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01463 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01464 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01465 #endif
01466
01467 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01468 "pop %%"REG_BP" \n\t"
01469 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01470
01471 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01472 "a" (&c->redDither)
01473 );
01474 return;
01475 case PIX_FMT_YUYV422:
01476 __asm__ volatile(
01477 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01478 "mov %4, %%"REG_b" \n\t"
01479 "push %%"REG_BP" \n\t"
01480 YSCALEYUV2PACKED1b(%%REGBP, %5)
01481 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01482 "pop %%"REG_BP" \n\t"
01483 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01484
01485 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01486 "a" (&c->redDither)
01487 );
01488 return;
01489 }
01490 }
01491 }
01492 #endif
01493 if (uvalpha < 2048)
01494 {
01495 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
01496 }else{
01497 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
01498 }
01499 }
01500
01501
01502
01503 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
01504 {
01505 #if HAVE_MMX
01506 __asm__ volatile(
01507 "movq "MANGLE(bm01010101)", %%mm2 \n\t"
01508 "mov %0, %%"REG_a" \n\t"
01509 "1: \n\t"
01510 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01511 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01512 "pand %%mm2, %%mm0 \n\t"
01513 "pand %%mm2, %%mm1 \n\t"
01514 "packuswb %%mm1, %%mm0 \n\t"
01515 "movq %%mm0, (%2, %%"REG_a") \n\t"
01516 "add $8, %%"REG_a" \n\t"
01517 " js 1b \n\t"
01518 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
01519 : "%"REG_a
01520 );
01521 #else
01522 int i;
01523 for (i=0; i<width; i++)
01524 dst[i]= src[2*i];
01525 #endif
01526 }
01527
01528 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
01529 {
01530 #if HAVE_MMX
01531 __asm__ volatile(
01532 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01533 "mov %0, %%"REG_a" \n\t"
01534 "1: \n\t"
01535 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
01536 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
01537 "psrlw $8, %%mm0 \n\t"
01538 "psrlw $8, %%mm1 \n\t"
01539 "packuswb %%mm1, %%mm0 \n\t"
01540 "movq %%mm0, %%mm1 \n\t"
01541 "psrlw $8, %%mm0 \n\t"
01542 "pand %%mm4, %%mm1 \n\t"
01543 "packuswb %%mm0, %%mm0 \n\t"
01544 "packuswb %%mm1, %%mm1 \n\t"
01545 "movd %%mm0, (%3, %%"REG_a") \n\t"
01546 "movd %%mm1, (%2, %%"REG_a") \n\t"
01547 "add $4, %%"REG_a" \n\t"
01548 " js 1b \n\t"
01549 : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
01550 : "%"REG_a
01551 );
01552 #else
01553 int i;
01554 for (i=0; i<width; i++)
01555 {
01556 dstU[i]= src1[4*i + 1];
01557 dstV[i]= src1[4*i + 3];
01558 }
01559 #endif
01560 assert(src1 == src2);
01561 }
01562
01563
01564
01565 static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
01566 {
01567 #if HAVE_MMX
01568 __asm__ volatile(
01569 "mov %0, %%"REG_a" \n\t"
01570 "1: \n\t"
01571 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01572 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01573 "psrlw $8, %%mm0 \n\t"
01574 "psrlw $8, %%mm1 \n\t"
01575 "packuswb %%mm1, %%mm0 \n\t"
01576 "movq %%mm0, (%2, %%"REG_a") \n\t"
01577 "add $8, %%"REG_a" \n\t"
01578 " js 1b \n\t"
01579 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
01580 : "%"REG_a
01581 );
01582 #else
01583 int i;
01584 for (i=0; i<width; i++)
01585 dst[i]= src[2*i+1];
01586 #endif
01587 }
01588
01589 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
01590 {
01591 #if HAVE_MMX
01592 __asm__ volatile(
01593 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01594 "mov %0, %%"REG_a" \n\t"
01595 "1: \n\t"
01596 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
01597 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
01598 "pand %%mm4, %%mm0 \n\t"
01599 "pand %%mm4, %%mm1 \n\t"
01600 "packuswb %%mm1, %%mm0 \n\t"
01601 "movq %%mm0, %%mm1 \n\t"
01602 "psrlw $8, %%mm0 \n\t"
01603 "pand %%mm4, %%mm1 \n\t"
01604 "packuswb %%mm0, %%mm0 \n\t"
01605 "packuswb %%mm1, %%mm1 \n\t"
01606 "movd %%mm0, (%3, %%"REG_a") \n\t"
01607 "movd %%mm1, (%2, %%"REG_a") \n\t"
01608 "add $4, %%"REG_a" \n\t"
01609 " js 1b \n\t"
01610 : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
01611 : "%"REG_a
01612 );
01613 #else
01614 int i;
01615 for (i=0; i<width; i++)
01616 {
01617 dstU[i]= src1[4*i + 0];
01618 dstV[i]= src1[4*i + 2];
01619 }
01620 #endif
01621 assert(src1 == src2);
01622 }
01623
01624 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
01625 static inline void RENAME(name)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)\
01626 {\
01627 int i;\
01628 for (i=0; i<width; i++)\
01629 {\
01630 int b= (((type*)src)[i]>>shb)&maskb;\
01631 int g= (((type*)src)[i]>>shg)&maskg;\
01632 int r= (((type*)src)[i]>>shr)&maskr;\
01633 \
01634 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
01635 }\
01636 }
01637
01638 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
01639 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
01640 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
01641 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
01642 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
01643 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
01644
01645 #define BGR2UV(type, name, shr, shg, shb, maska, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\
01646 static inline void RENAME(name)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
01647 {\
01648 int i;\
01649 for (i=0; i<width; i++)\
01650 {\
01651 int b= (((type*)src)[i]&maskb)>>shb;\
01652 int g= (((type*)src)[i]&maskg)>>shg;\
01653 int r= (((type*)src)[i]&maskr)>>shr;\
01654 \
01655 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
01656 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
01657 }\
01658 }\
01659 static inline void RENAME(name ## _half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
01660 {\
01661 int i;\
01662 for (i=0; i<width; i++)\
01663 {\
01664 int pix0= ((type*)src)[2*i+0];\
01665 int pix1= ((type*)src)[2*i+1];\
01666 int g= (pix0&(maskg|maska))+(pix1&(maskg|maska));\
01667 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
01668 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
01669 g&= maskg|(2*maskg);\
01670 \
01671 g>>=shg;\
01672 \
01673 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
01674 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
01675 }\
01676 }
01677
01678 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0xFF000000, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
01679 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0xFF000000, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
01680 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
01681 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
01682 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
01683 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
01684
01685 #if HAVE_MMX
01686 static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, uint8_t *src, long width, int srcFormat)
01687 {
01688
01689 if(srcFormat == PIX_FMT_BGR24){
01690 __asm__ volatile(
01691 "movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t"
01692 "movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t"
01693 :
01694 );
01695 }else{
01696 __asm__ volatile(
01697 "movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t"
01698 "movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t"
01699 :
01700 );
01701 }
01702
01703 __asm__ volatile(
01704 "movq "MANGLE(ff_bgr24toYOffset)", %%mm4 \n\t"
01705 "mov %2, %%"REG_a" \n\t"
01706 "pxor %%mm7, %%mm7 \n\t"
01707 "1: \n\t"
01708 PREFETCH" 64(%0) \n\t"
01709 "movd (%0), %%mm0 \n\t"
01710 "movd 2(%0), %%mm1 \n\t"
01711 "movd 6(%0), %%mm2 \n\t"
01712 "movd 8(%0), %%mm3 \n\t"
01713 "add $12, %0 \n\t"
01714 "punpcklbw %%mm7, %%mm0 \n\t"
01715 "punpcklbw %%mm7, %%mm1 \n\t"
01716 "punpcklbw %%mm7, %%mm2 \n\t"
01717 "punpcklbw %%mm7, %%mm3 \n\t"
01718 "pmaddwd %%mm5, %%mm0 \n\t"
01719 "pmaddwd %%mm6, %%mm1 \n\t"
01720 "pmaddwd %%mm5, %%mm2 \n\t"
01721 "pmaddwd %%mm6, %%mm3 \n\t"
01722 "paddd %%mm1, %%mm0 \n\t"
01723 "paddd %%mm3, %%mm2 \n\t"
01724 "paddd %%mm4, %%mm0 \n\t"
01725 "paddd %%mm4, %%mm2 \n\t"
01726 "psrad $15, %%mm0 \n\t"
01727 "psrad $15, %%mm2 \n\t"
01728 "packssdw %%mm2, %%mm0 \n\t"
01729 "packuswb %%mm0, %%mm0 \n\t"
01730 "movd %%mm0, (%1, %%"REG_a") \n\t"
01731 "add $4, %%"REG_a" \n\t"
01732 " js 1b \n\t"
01733 : "+r" (src)
01734 : "r" (dst+width), "g" (-width)
01735 : "%"REG_a
01736 );
01737 }
01738
01739 static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, long width, int srcFormat)
01740 {
01741 __asm__ volatile(
01742 "movq 24(%4), %%mm6 \n\t"
01743 "mov %3, %%"REG_a" \n\t"
01744 "pxor %%mm7, %%mm7 \n\t"
01745 "1: \n\t"
01746 PREFETCH" 64(%0) \n\t"
01747 "movd (%0), %%mm0 \n\t"
01748 "movd 2(%0), %%mm1 \n\t"
01749 "punpcklbw %%mm7, %%mm0 \n\t"
01750 "punpcklbw %%mm7, %%mm1 \n\t"
01751 "movq %%mm0, %%mm2 \n\t"
01752 "movq %%mm1, %%mm3 \n\t"
01753 "pmaddwd (%4), %%mm0 \n\t"
01754 "pmaddwd 8(%4), %%mm1 \n\t"
01755 "pmaddwd 16(%4), %%mm2 \n\t"
01756 "pmaddwd %%mm6, %%mm3 \n\t"
01757 "paddd %%mm1, %%mm0 \n\t"
01758 "paddd %%mm3, %%mm2 \n\t"
01759
01760 "movd 6(%0), %%mm1 \n\t"
01761 "movd 8(%0), %%mm3 \n\t"
01762 "add $12, %0 \n\t"
01763 "punpcklbw %%mm7, %%mm1 \n\t"
01764 "punpcklbw %%mm7, %%mm3 \n\t"
01765 "movq %%mm1, %%mm4 \n\t"
01766 "movq %%mm3, %%mm5 \n\t"
01767 "pmaddwd (%4), %%mm1 \n\t"
01768 "pmaddwd 8(%4), %%mm3 \n\t"
01769 "pmaddwd 16(%4), %%mm4 \n\t"
01770 "pmaddwd %%mm6, %%mm5 \n\t"
01771 "paddd %%mm3, %%mm1 \n\t"
01772 "paddd %%mm5, %%mm4 \n\t"
01773
01774 "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3 \n\t"
01775 "paddd %%mm3, %%mm0 \n\t"
01776 "paddd %%mm3, %%mm2 \n\t"
01777 "paddd %%mm3, %%mm1 \n\t"
01778 "paddd %%mm3, %%mm4 \n\t"
01779 "psrad $15, %%mm0 \n\t"
01780 "psrad $15, %%mm2 \n\t"
01781 "psrad $15, %%mm1 \n\t"
01782 "psrad $15, %%mm4 \n\t"
01783 "packssdw %%mm1, %%mm0 \n\t"
01784 "packssdw %%mm4, %%mm2 \n\t"
01785 "packuswb %%mm0, %%mm0 \n\t"
01786 "packuswb %%mm2, %%mm2 \n\t"
01787 "movd %%mm0, (%1, %%"REG_a") \n\t"
01788 "movd %%mm2, (%2, %%"REG_a") \n\t"
01789 "add $4, %%"REG_a" \n\t"
01790 " js 1b \n\t"
01791 : "+r" (src)
01792 : "r" (dstU+width), "r" (dstV+width), "g" (-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
01793 : "%"REG_a
01794 );
01795 }
01796 #endif
01797
01798 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
01799 {
01800 #if HAVE_MMX
01801 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
01802 #else
01803 int i;
01804 for (i=0; i<width; i++)
01805 {
01806 int b= src[i*3+0];
01807 int g= src[i*3+1];
01808 int r= src[i*3+2];
01809
01810 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
01811 }
01812 #endif
01813 }
01814
01815 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
01816 {
01817 #if HAVE_MMX
01818 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
01819 #else
01820 int i;
01821 for (i=0; i<width; i++)
01822 {
01823 int b= src1[3*i + 0];
01824 int g= src1[3*i + 1];
01825 int r= src1[3*i + 2];
01826
01827 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01828 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01829 }
01830 #endif
01831 assert(src1 == src2);
01832 }
01833
01834 static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
01835 {
01836 int i;
01837 for (i=0; i<width; i++)
01838 {
01839 int b= src1[6*i + 0] + src1[6*i + 3];
01840 int g= src1[6*i + 1] + src1[6*i + 4];
01841 int r= src1[6*i + 2] + src1[6*i + 5];
01842
01843 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01844 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01845 }
01846 assert(src1 == src2);
01847 }
01848
01849 static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
01850 {
01851 #if HAVE_MMX
01852 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
01853 #else
01854 int i;
01855 for (i=0; i<width; i++)
01856 {
01857 int r= src[i*3+0];
01858 int g= src[i*3+1];
01859 int b= src[i*3+2];
01860
01861 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
01862 }
01863 #endif
01864 }
01865
01866 static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
01867 {
01868 #if HAVE_MMX
01869 assert(src1==src2);
01870 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
01871 #else
01872 int i;
01873 assert(src1==src2);
01874 for (i=0; i<width; i++)
01875 {
01876 int r= src1[3*i + 0];
01877 int g= src1[3*i + 1];
01878 int b= src1[3*i + 2];
01879
01880 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01881 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01882 }
01883 #endif
01884 }
01885
01886 static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
01887 {
01888 int i;
01889 assert(src1==src2);
01890 for (i=0; i<width; i++)
01891 {
01892 int r= src1[6*i + 0] + src1[6*i + 3];
01893 int g= src1[6*i + 1] + src1[6*i + 4];
01894 int b= src1[6*i + 2] + src1[6*i + 5];
01895
01896 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01897 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01898 }
01899 }
01900
01901
01902 static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *pal)
01903 {
01904 int i;
01905 for (i=0; i<width; i++)
01906 {
01907 int d= src[i];
01908
01909 dst[i]= pal[d] & 0xFF;
01910 }
01911 }
01912
01913 static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *pal)
01914 {
01915 int i;
01916 assert(src1 == src2);
01917 for (i=0; i<width; i++)
01918 {
01919 int p= pal[src1[i]];
01920
01921 dstU[i]= p>>8;
01922 dstV[i]= p>>16;
01923 }
01924 }
01925
01926 static inline void RENAME(monowhite2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
01927 {
01928 int i, j;
01929 for (i=0; i<width/8; i++){
01930 int d= ~src[i];
01931 for(j=0; j<8; j++)
01932 dst[8*i+j]= ((d>>(7-j))&1)*255;
01933 }
01934 }
01935
01936 static inline void RENAME(monoblack2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
01937 {
01938 int i, j;
01939 for (i=0; i<width/8; i++){
01940 int d= src[i];
01941 for(j=0; j<8; j++)
01942 dst[8*i+j]= ((d>>(7-j))&1)*255;
01943 }
01944 }
01945
01946
01947 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
01948 int16_t *filter, int16_t *filterPos, long filterSize)
01949 {
01950 #if HAVE_MMX
01951 assert(filterSize % 4 == 0 && filterSize>0);
01952 if (filterSize==4)
01953 {
01954 long counter= -2*dstW;
01955 filter-= counter*2;
01956 filterPos-= counter/2;
01957 dst-= counter/2;
01958 __asm__ volatile(
01959 #if defined(PIC)
01960 "push %%"REG_b" \n\t"
01961 #endif
01962 "pxor %%mm7, %%mm7 \n\t"
01963 "push %%"REG_BP" \n\t"
01964 "mov %%"REG_a", %%"REG_BP" \n\t"
01965 ASMALIGN(4)
01966 "1: \n\t"
01967 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
01968 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
01969 "movq (%1, %%"REG_BP", 4), %%mm1 \n\t"
01970 "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t"
01971 "movd (%3, %%"REG_a"), %%mm0 \n\t"
01972 "movd (%3, %%"REG_b"), %%mm2 \n\t"
01973 "punpcklbw %%mm7, %%mm0 \n\t"
01974 "punpcklbw %%mm7, %%mm2 \n\t"
01975 "pmaddwd %%mm1, %%mm0 \n\t"
01976 "pmaddwd %%mm2, %%mm3 \n\t"
01977 "movq %%mm0, %%mm4 \n\t"
01978 "punpckldq %%mm3, %%mm0 \n\t"
01979 "punpckhdq %%mm3, %%mm4 \n\t"
01980 "paddd %%mm4, %%mm0 \n\t"
01981 "psrad $7, %%mm0 \n\t"
01982 "packssdw %%mm0, %%mm0 \n\t"
01983 "movd %%mm0, (%4, %%"REG_BP") \n\t"
01984 "add $4, %%"REG_BP" \n\t"
01985 " jnc 1b \n\t"
01986
01987 "pop %%"REG_BP" \n\t"
01988 #if defined(PIC)
01989 "pop %%"REG_b" \n\t"
01990 #endif
01991 : "+a" (counter)
01992 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
01993 #if !defined(PIC)
01994 : "%"REG_b
01995 #endif
01996 );
01997 }
01998 else if (filterSize==8)
01999 {
02000 long counter= -2*dstW;
02001 filter-= counter*4;
02002 filterPos-= counter/2;
02003 dst-= counter/2;
02004 __asm__ volatile(
02005 #if defined(PIC)
02006 "push %%"REG_b" \n\t"
02007 #endif
02008 "pxor %%mm7, %%mm7 \n\t"
02009 "push %%"REG_BP" \n\t"
02010 "mov %%"REG_a", %%"REG_BP" \n\t"
02011 ASMALIGN(4)
02012 "1: \n\t"
02013 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
02014 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
02015 "movq (%1, %%"REG_BP", 8), %%mm1 \n\t"
02016 "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t"
02017 "movd (%3, %%"REG_a"), %%mm0 \n\t"
02018 "movd (%3, %%"REG_b"), %%mm2 \n\t"
02019 "punpcklbw %%mm7, %%mm0 \n\t"
02020 "punpcklbw %%mm7, %%mm2 \n\t"
02021 "pmaddwd %%mm1, %%mm0 \n\t"
02022 "pmaddwd %%mm2, %%mm3 \n\t"
02023
02024 "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t"
02025 "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t"
02026 "movd 4(%3, %%"REG_a"), %%mm4 \n\t"
02027 "movd 4(%3, %%"REG_b"), %%mm2 \n\t"
02028 "punpcklbw %%mm7, %%mm4 \n\t"
02029 "punpcklbw %%mm7, %%mm2 \n\t"
02030 "pmaddwd %%mm1, %%mm4 \n\t"
02031 "pmaddwd %%mm2, %%mm5 \n\t"
02032 "paddd %%mm4, %%mm0 \n\t"
02033 "paddd %%mm5, %%mm3 \n\t"
02034 "movq %%mm0, %%mm4 \n\t"
02035 "punpckldq %%mm3, %%mm0 \n\t"
02036 "punpckhdq %%mm3, %%mm4 \n\t"
02037 "paddd %%mm4, %%mm0 \n\t"
02038 "psrad $7, %%mm0 \n\t"
02039 "packssdw %%mm0, %%mm0 \n\t"
02040 "movd %%mm0, (%4, %%"REG_BP") \n\t"
02041 "add $4, %%"REG_BP" \n\t"
02042 " jnc 1b \n\t"
02043
02044 "pop %%"REG_BP" \n\t"
02045 #if defined(PIC)
02046 "pop %%"REG_b" \n\t"
02047 #endif
02048 : "+a" (counter)
02049 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
02050 #if !defined(PIC)
02051 : "%"REG_b
02052 #endif
02053 );
02054 }
02055 else
02056 {
02057 uint8_t *offset = src+filterSize;
02058 long counter= -2*dstW;
02059
02060 filterPos-= counter/2;
02061 dst-= counter/2;
02062 __asm__ volatile(
02063 "pxor %%mm7, %%mm7 \n\t"
02064 ASMALIGN(4)
02065 "1: \n\t"
02066 "mov %2, %%"REG_c" \n\t"
02067 "movzwl (%%"REG_c", %0), %%eax \n\t"
02068 "movzwl 2(%%"REG_c", %0), %%edx \n\t"
02069 "mov %5, %%"REG_c" \n\t"
02070 "pxor %%mm4, %%mm4 \n\t"
02071 "pxor %%mm5, %%mm5 \n\t"
02072 "2: \n\t"
02073 "movq (%1), %%mm1 \n\t"
02074 "movq (%1, %6), %%mm3 \n\t"
02075 "movd (%%"REG_c", %%"REG_a"), %%mm0 \n\t"
02076 "movd (%%"REG_c", %%"REG_d"), %%mm2 \n\t"
02077 "punpcklbw %%mm7, %%mm0 \n\t"
02078 "punpcklbw %%mm7, %%mm2 \n\t"
02079 "pmaddwd %%mm1, %%mm0 \n\t"
02080 "pmaddwd %%mm2, %%mm3 \n\t"
02081 "paddd %%mm3, %%mm5 \n\t"
02082 "paddd %%mm0, %%mm4 \n\t"
02083 "add $8, %1 \n\t"
02084 "add $4, %%"REG_c" \n\t"
02085 "cmp %4, %%"REG_c" \n\t"
02086 " jb 2b \n\t"
02087 "add %6, %1 \n\t"
02088 "movq %%mm4, %%mm0 \n\t"
02089 "punpckldq %%mm5, %%mm4 \n\t"
02090 "punpckhdq %%mm5, %%mm0 \n\t"
02091 "paddd %%mm0, %%mm4 \n\t"
02092 "psrad $7, %%mm4 \n\t"
02093 "packssdw %%mm4, %%mm4 \n\t"
02094 "mov %3, %%"REG_a" \n\t"
02095 "movd %%mm4, (%%"REG_a", %0) \n\t"
02096 "add $4, %0 \n\t"
02097 " jnc 1b \n\t"
02098
02099 : "+r" (counter), "+r" (filter)
02100 : "m" (filterPos), "m" (dst), "m"(offset),
02101 "m" (src), "r" (filterSize*2)
02102 : "%"REG_a, "%"REG_c, "%"REG_d
02103 );
02104 }
02105 #else
02106 #if HAVE_ALTIVEC
02107 hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
02108 #else
02109 int i;
02110 for (i=0; i<dstW; i++)
02111 {
02112 int j;
02113 int srcPos= filterPos[i];
02114 int val=0;
02115
02116 for (j=0; j<filterSize; j++)
02117 {
02118
02119 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
02120 }
02121
02122 dst[i] = FFMIN(val>>7, (1<<15)-1);
02123
02124 }
02125 #endif
02126 #endif
02127 }
02128
02129 static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc,
02130 int flags, int canMMX2BeUsed, int16_t *hLumFilter,
02131 int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
02132 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
02133 int32_t *mmx2FilterPos, uint32_t *pal)
02134 {
02135 if (srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE)
02136 {
02137 RENAME(yuy2ToY)(formatConvBuffer, src, srcW, pal);
02138 src= formatConvBuffer;
02139 }
02140 else if (srcFormat==PIX_FMT_UYVY422 || srcFormat==PIX_FMT_GRAY16LE)
02141 {
02142 RENAME(uyvyToY)(formatConvBuffer, src, srcW, pal);
02143 src= formatConvBuffer;
02144 }
02145 else if (srcFormat==PIX_FMT_RGB32)
02146 {
02147 RENAME(bgr32ToY)(formatConvBuffer, src, srcW, pal);
02148 src= formatConvBuffer;
02149 }
02150 else if (srcFormat==PIX_FMT_RGB32_1)
02151 {
02152 RENAME(bgr32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
02153 src= formatConvBuffer;
02154 }
02155 else if (srcFormat==PIX_FMT_BGR24)
02156 {
02157 RENAME(bgr24ToY)(formatConvBuffer, src, srcW, pal);
02158 src= formatConvBuffer;
02159 }
02160 else if (srcFormat==PIX_FMT_BGR565)
02161 {
02162 RENAME(bgr16ToY)(formatConvBuffer, src, srcW, pal);
02163 src= formatConvBuffer;
02164 }
02165 else if (srcFormat==PIX_FMT_BGR555)
02166 {
02167 RENAME(bgr15ToY)(formatConvBuffer, src, srcW, pal);
02168 src= formatConvBuffer;
02169 }
02170 else if (srcFormat==PIX_FMT_BGR32)
02171 {
02172 RENAME(rgb32ToY)(formatConvBuffer, src, srcW, pal);
02173 src= formatConvBuffer;
02174 }
02175 else if (srcFormat==PIX_FMT_BGR32_1)
02176 {
02177 RENAME(rgb32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
02178 src= formatConvBuffer;
02179 }
02180 else if (srcFormat==PIX_FMT_RGB24)
02181 {
02182 RENAME(rgb24ToY)(formatConvBuffer, src, srcW, pal);
02183 src= formatConvBuffer;
02184 }
02185 else if (srcFormat==PIX_FMT_RGB565)
02186 {
02187 RENAME(rgb16ToY)(formatConvBuffer, src, srcW, pal);
02188 src= formatConvBuffer;
02189 }
02190 else if (srcFormat==PIX_FMT_RGB555)
02191 {
02192 RENAME(rgb15ToY)(formatConvBuffer, src, srcW, pal);
02193 src= formatConvBuffer;
02194 }
02195 else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE)
02196 {
02197 RENAME(palToY)(formatConvBuffer, src, srcW, pal);
02198 src= formatConvBuffer;
02199 }
02200 else if (srcFormat==PIX_FMT_MONOBLACK)
02201 {
02202 RENAME(monoblack2Y)(formatConvBuffer, src, srcW, pal);
02203 src= formatConvBuffer;
02204 }
02205 else if (srcFormat==PIX_FMT_MONOWHITE)
02206 {
02207 RENAME(monowhite2Y)(formatConvBuffer, src, srcW, pal);
02208 src= formatConvBuffer;
02209 }
02210
02211 #if HAVE_MMX
02212
02213 if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
02214 #else
02215 if (!(flags&SWS_FAST_BILINEAR))
02216 #endif
02217 {
02218 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
02219 }
02220 else
02221 {
02222 #if ARCH_X86 && CONFIG_GPL
02223 #if HAVE_MMX2
02224 int i;
02225 #if defined(PIC)
02226 uint64_t ebxsave __attribute__((aligned(8)));
02227 #endif
02228 if (canMMX2BeUsed)
02229 {
02230 __asm__ volatile(
02231 #if defined(PIC)
02232 "mov %%"REG_b", %5 \n\t"
02233 #endif
02234 "pxor %%mm7, %%mm7 \n\t"
02235 "mov %0, %%"REG_c" \n\t"
02236 "mov %1, %%"REG_D" \n\t"
02237 "mov %2, %%"REG_d" \n\t"
02238 "mov %3, %%"REG_b" \n\t"
02239 "xor %%"REG_a", %%"REG_a" \n\t"
02240 PREFETCH" (%%"REG_c") \n\t"
02241 PREFETCH" 32(%%"REG_c") \n\t"
02242 PREFETCH" 64(%%"REG_c") \n\t"
02243
02244 #if ARCH_X86_64
02245
02246 #define FUNNY_Y_CODE \
02247 "movl (%%"REG_b"), %%esi \n\t"\
02248 "call *%4 \n\t"\
02249 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
02250 "add %%"REG_S", %%"REG_c" \n\t"\
02251 "add %%"REG_a", %%"REG_D" \n\t"\
02252 "xor %%"REG_a", %%"REG_a" \n\t"\
02253
02254 #else
02255
02256 #define FUNNY_Y_CODE \
02257 "movl (%%"REG_b"), %%esi \n\t"\
02258 "call *%4 \n\t"\
02259 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
02260 "add %%"REG_a", %%"REG_D" \n\t"\
02261 "xor %%"REG_a", %%"REG_a" \n\t"\
02262
02263 #endif
02264
02265 FUNNY_Y_CODE
02266 FUNNY_Y_CODE
02267 FUNNY_Y_CODE
02268 FUNNY_Y_CODE
02269 FUNNY_Y_CODE
02270 FUNNY_Y_CODE
02271 FUNNY_Y_CODE
02272 FUNNY_Y_CODE
02273
02274 #if defined(PIC)
02275 "mov %5, %%"REG_b" \n\t"
02276 #endif
02277 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
02278 "m" (funnyYCode)
02279 #if defined(PIC)
02280 ,"m" (ebxsave)
02281 #endif
02282 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
02283 #if !defined(PIC)
02284 ,"%"REG_b
02285 #endif
02286 );
02287 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
02288 }
02289 else
02290 {
02291 #endif
02292 long xInc_shr16 = xInc >> 16;
02293 uint16_t xInc_mask = xInc & 0xffff;
02294
02295 __asm__ volatile(
02296 "xor %%"REG_a", %%"REG_a" \n\t"
02297 "xor %%"REG_d", %%"REG_d" \n\t"
02298 "xorl %%ecx, %%ecx \n\t"
02299 ASMALIGN(4)
02300 "1: \n\t"
02301 "movzbl (%0, %%"REG_d"), %%edi \n\t"
02302 "movzbl 1(%0, %%"REG_d"), %%esi \n\t"
02303 "subl %%edi, %%esi \n\t"
02304 "imull %%ecx, %%esi \n\t"
02305 "shll $16, %%edi \n\t"
02306 "addl %%edi, %%esi \n\t"
02307 "mov %1, %%"REG_D" \n\t"
02308 "shrl $9, %%esi \n\t"
02309 "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
02310 "addw %4, %%cx \n\t"
02311 "adc %3, %%"REG_d" \n\t"
02312
02313 "movzbl (%0, %%"REG_d"), %%edi \n\t"
02314 "movzbl 1(%0, %%"REG_d"), %%esi \n\t"
02315 "subl %%edi, %%esi \n\t"
02316 "imull %%ecx, %%esi \n\t"
02317 "shll $16, %%edi \n\t"
02318 "addl %%edi, %%esi \n\t"
02319 "mov %1, %%"REG_D" \n\t"
02320 "shrl $9, %%esi \n\t"
02321 "movw %%si, 2(%%"REG_D", %%"REG_a", 2) \n\t"
02322 "addw %4, %%cx \n\t"
02323 "adc %3, %%"REG_d" \n\t"
02324
02325
02326 "add $2, %%"REG_a" \n\t"
02327 "cmp %2, %%"REG_a" \n\t"
02328 " jb 1b \n\t"
02329
02330
02331 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask)
02332 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
02333 );
02334 #if HAVE_MMX2
02335 }
02336 #endif
02337 #else
02338 int i;
02339 unsigned int xpos=0;
02340 for (i=0;i<dstWidth;i++)
02341 {
02342 register unsigned int xx=xpos>>16;
02343 register unsigned int xalpha=(xpos&0xFFFF)>>9;
02344 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
02345 xpos+=xInc;
02346 }
02347 #endif
02348 }
02349
02350 if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
02351 int i;
02352
02353
02354 if(c->srcRange){
02355 for (i=0; i<dstWidth; i++)
02356 dst[i]= (dst[i]*14071 + 33561947)>>14;
02357 }else{
02358 for (i=0; i<dstWidth; i++)
02359 dst[i]= (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
02360 }
02361 }
02362 }
02363
02364 inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2,
02365 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
02366 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
02367 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
02368 int32_t *mmx2FilterPos, uint32_t *pal)
02369 {
02370 if (srcFormat==PIX_FMT_YUYV422)
02371 {
02372 RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02373 src1= formatConvBuffer;
02374 src2= formatConvBuffer+VOFW;
02375 }
02376 else if (srcFormat==PIX_FMT_UYVY422)
02377 {
02378 RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02379 src1= formatConvBuffer;
02380 src2= formatConvBuffer+VOFW;
02381 }
02382 else if (srcFormat==PIX_FMT_RGB32)
02383 {
02384 if(c->chrSrcHSubSample)
02385 RENAME(bgr32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02386 else
02387 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02388 src1= formatConvBuffer;
02389 src2= formatConvBuffer+VOFW;
02390 }
02391 else if (srcFormat==PIX_FMT_RGB32_1)
02392 {
02393 if(c->chrSrcHSubSample)
02394 RENAME(bgr32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
02395 else
02396 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
02397 src1= formatConvBuffer;
02398 src2= formatConvBuffer+VOFW;
02399 }
02400 else if (srcFormat==PIX_FMT_BGR24)
02401 {
02402 if(c->chrSrcHSubSample)
02403 RENAME(bgr24ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02404 else
02405 RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02406 src1= formatConvBuffer;
02407 src2= formatConvBuffer+VOFW;
02408 }
02409 else if (srcFormat==PIX_FMT_BGR565)
02410 {
02411 if(c->chrSrcHSubSample)
02412 RENAME(bgr16ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02413 else
02414 RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02415 src1= formatConvBuffer;
02416 src2= formatConvBuffer+VOFW;
02417 }
02418 else if (srcFormat==PIX_FMT_BGR555)
02419 {
02420 if(c->chrSrcHSubSample)
02421 RENAME(bgr15ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02422 else
02423 RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02424 src1= formatConvBuffer;
02425 src2= formatConvBuffer+VOFW;
02426 }
02427 else if (srcFormat==PIX_FMT_BGR32)
02428 {
02429 if(c->chrSrcHSubSample)
02430 RENAME(rgb32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02431 else
02432 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02433 src1= formatConvBuffer;
02434 src2= formatConvBuffer+VOFW;
02435 }
02436 else if (srcFormat==PIX_FMT_BGR32_1)
02437 {
02438 if(c->chrSrcHSubSample)
02439 RENAME(rgb32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
02440 else
02441 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
02442 src1= formatConvBuffer;
02443 src2= formatConvBuffer+VOFW;
02444 }
02445 else if (srcFormat==PIX_FMT_RGB24)
02446 {
02447 if(c->chrSrcHSubSample)
02448 RENAME(rgb24ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02449 else
02450 RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02451 src1= formatConvBuffer;
02452 src2= formatConvBuffer+VOFW;
02453 }
02454 else if (srcFormat==PIX_FMT_RGB565)
02455 {
02456 if(c->chrSrcHSubSample)
02457 RENAME(rgb16ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02458 else
02459 RENAME(rgb16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02460 src1= formatConvBuffer;
02461 src2= formatConvBuffer+VOFW;
02462 }
02463 else if (srcFormat==PIX_FMT_RGB555)
02464 {
02465 if(c->chrSrcHSubSample)
02466 RENAME(rgb15ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02467 else
02468 RENAME(rgb15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02469 src1= formatConvBuffer;
02470 src2= formatConvBuffer+VOFW;
02471 }
02472 else if (isGray(srcFormat) || srcFormat==PIX_FMT_MONOBLACK || srcFormat==PIX_FMT_MONOWHITE)
02473 {
02474 return;
02475 }
02476 else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE)
02477 {
02478 RENAME(palToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02479 src1= formatConvBuffer;
02480 src2= formatConvBuffer+VOFW;
02481 }
02482
02483 #if HAVE_MMX
02484
02485 if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
02486 #else
02487 if (!(flags&SWS_FAST_BILINEAR))
02488 #endif
02489 {
02490 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
02491 RENAME(hScale)(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
02492 }
02493 else
02494 {
02495 #if ARCH_X86 && CONFIG_GPL
02496 #if HAVE_MMX2
02497 int i;
02498 #if defined(PIC)
02499 uint64_t ebxsave __attribute__((aligned(8)));
02500 #endif
02501 if (canMMX2BeUsed)
02502 {
02503 __asm__ volatile(
02504 #if defined(PIC)
02505 "mov %%"REG_b", %6 \n\t"
02506 #endif
02507 "pxor %%mm7, %%mm7 \n\t"
02508 "mov %0, %%"REG_c" \n\t"
02509 "mov %1, %%"REG_D" \n\t"
02510 "mov %2, %%"REG_d" \n\t"
02511 "mov %3, %%"REG_b" \n\t"
02512 "xor %%"REG_a", %%"REG_a" \n\t"
02513 PREFETCH" (%%"REG_c") \n\t"
02514 PREFETCH" 32(%%"REG_c") \n\t"
02515 PREFETCH" 64(%%"REG_c") \n\t"
02516
02517 #if ARCH_X86_64
02518
02519 #define FUNNY_UV_CODE \
02520 "movl (%%"REG_b"), %%esi \n\t"\
02521 "call *%4 \n\t"\
02522 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
02523 "add %%"REG_S", %%"REG_c" \n\t"\
02524 "add %%"REG_a", %%"REG_D" \n\t"\
02525 "xor %%"REG_a", %%"REG_a" \n\t"\
02526
02527 #else
02528
02529 #define FUNNY_UV_CODE \
02530 "movl (%%"REG_b"), %%esi \n\t"\
02531 "call *%4 \n\t"\
02532 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
02533 "add %%"REG_a", %%"REG_D" \n\t"\
02534 "xor %%"REG_a", %%"REG_a" \n\t"\
02535
02536 #endif
02537
02538 FUNNY_UV_CODE
02539 FUNNY_UV_CODE
02540 FUNNY_UV_CODE
02541 FUNNY_UV_CODE
02542 "xor %%"REG_a", %%"REG_a" \n\t"
02543 "mov %5, %%"REG_c" \n\t"
02544 "mov %1, %%"REG_D" \n\t"
02545 "add $"AV_STRINGIFY(VOF)", %%"REG_D" \n\t"
02546 PREFETCH" (%%"REG_c") \n\t"
02547 PREFETCH" 32(%%"REG_c") \n\t"
02548 PREFETCH" 64(%%"REG_c") \n\t"
02549
02550 FUNNY_UV_CODE
02551 FUNNY_UV_CODE
02552 FUNNY_UV_CODE
02553 FUNNY_UV_CODE
02554
02555 #if defined(PIC)
02556 "mov %6, %%"REG_b" \n\t"
02557 #endif
02558 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
02559 "m" (funnyUVCode), "m" (src2)
02560 #if defined(PIC)
02561 ,"m" (ebxsave)
02562 #endif
02563 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
02564 #if !defined(PIC)
02565 ,"%"REG_b
02566 #endif
02567 );
02568 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
02569 {
02570
02571 dst[i] = src1[srcW-1]*128;
02572 dst[i+VOFW] = src2[srcW-1]*128;
02573 }
02574 }
02575 else
02576 {
02577 #endif
02578 long xInc_shr16 = (long) (xInc >> 16);
02579 uint16_t xInc_mask = xInc & 0xffff;
02580 __asm__ volatile(
02581 "xor %%"REG_a", %%"REG_a" \n\t"
02582 "xor %%"REG_d", %%"REG_d" \n\t"
02583 "xorl %%ecx, %%ecx \n\t"
02584 ASMALIGN(4)
02585 "1: \n\t"
02586 "mov %0, %%"REG_S" \n\t"
02587 "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t"
02588 "movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t"
02589 "subl %%edi, %%esi \n\t"
02590 "imull %%ecx, %%esi \n\t"
02591 "shll $16, %%edi \n\t"
02592 "addl %%edi, %%esi \n\t"
02593 "mov %1, %%"REG_D" \n\t"
02594 "shrl $9, %%esi \n\t"
02595 "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
02596
02597 "movzbl (%5, %%"REG_d"), %%edi \n\t"
02598 "movzbl 1(%5, %%"REG_d"), %%esi \n\t"
02599 "subl %%edi, %%esi \n\t"
02600 "imull %%ecx, %%esi \n\t"
02601 "shll $16, %%edi \n\t"
02602 "addl %%edi, %%esi \n\t"
02603 "mov %1, %%"REG_D" \n\t"
02604 "shrl $9, %%esi \n\t"
02605 "movw %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2) \n\t"
02606
02607 "addw %4, %%cx \n\t"
02608 "adc %3, %%"REG_d" \n\t"
02609 "add $1, %%"REG_a" \n\t"
02610 "cmp %2, %%"REG_a" \n\t"
02611 " jb 1b \n\t"
02612
02613
02614
02615 #if ARCH_X86_64 && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
02616 :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
02617 #else
02618 :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
02619 #endif
02620 "r" (src2)
02621 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
02622 );
02623 #if HAVE_MMX2
02624 }
02625 #endif
02626 #else
02627 int i;
02628 unsigned int xpos=0;
02629 for (i=0;i<dstWidth;i++)
02630 {
02631 register unsigned int xx=xpos>>16;
02632 register unsigned int xalpha=(xpos&0xFFFF)>>9;
02633 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
02634 dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
02635
02636
02637
02638
02639 xpos+=xInc;
02640 }
02641 #endif
02642 }
02643 if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
02644 int i;
02645
02646
02647 if(c->srcRange){
02648 for (i=0; i<dstWidth; i++){
02649 dst[i ]= (dst[i ]*1799 + 4081085)>>11;
02650 dst[i+VOFW]= (dst[i+VOFW]*1799 + 4081085)>>11;
02651 }
02652 }else{
02653 for (i=0; i<dstWidth; i++){
02654 dst[i ]= (FFMIN(dst[i ],30775)*4663 - 9289992)>>12;
02655 dst[i+VOFW]= (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12;
02656 }
02657 }
02658 }
02659 }
02660
02661 static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
02662 int srcSliceH, uint8_t* dst[], int dstStride[]){
02663
02664
02665 const int srcW= c->srcW;
02666 const int dstW= c->dstW;
02667 const int dstH= c->dstH;
02668 const int chrDstW= c->chrDstW;
02669 const int chrSrcW= c->chrSrcW;
02670 const int lumXInc= c->lumXInc;
02671 const int chrXInc= c->chrXInc;
02672 const int dstFormat= c->dstFormat;
02673 const int srcFormat= c->srcFormat;
02674 const int flags= c->flags;
02675 const int canMMX2BeUsed= c->canMMX2BeUsed;
02676 int16_t *vLumFilterPos= c->vLumFilterPos;
02677 int16_t *vChrFilterPos= c->vChrFilterPos;
02678 int16_t *hLumFilterPos= c->hLumFilterPos;
02679 int16_t *hChrFilterPos= c->hChrFilterPos;
02680 int16_t *vLumFilter= c->vLumFilter;
02681 int16_t *vChrFilter= c->vChrFilter;
02682 int16_t *hLumFilter= c->hLumFilter;
02683 int16_t *hChrFilter= c->hChrFilter;
02684 int32_t *lumMmxFilter= c->lumMmxFilter;
02685 int32_t *chrMmxFilter= c->chrMmxFilter;
02686 const int vLumFilterSize= c->vLumFilterSize;
02687 const int vChrFilterSize= c->vChrFilterSize;
02688 const int hLumFilterSize= c->hLumFilterSize;
02689 const int hChrFilterSize= c->hChrFilterSize;
02690 int16_t **lumPixBuf= c->lumPixBuf;
02691 int16_t **chrPixBuf= c->chrPixBuf;
02692 const int vLumBufSize= c->vLumBufSize;
02693 const int vChrBufSize= c->vChrBufSize;
02694 uint8_t *funnyYCode= c->funnyYCode;
02695 uint8_t *funnyUVCode= c->funnyUVCode;
02696 uint8_t *formatConvBuffer= c->formatConvBuffer;
02697 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
02698 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
02699 int lastDstY;
02700 uint32_t *pal=c->pal_yuv;
02701
02702
02703 int dstY= c->dstY;
02704 int lumBufIndex= c->lumBufIndex;
02705 int chrBufIndex= c->chrBufIndex;
02706 int lastInLumBuf= c->lastInLumBuf;
02707 int lastInChrBuf= c->lastInChrBuf;
02708
02709 if (isPacked(c->srcFormat)){
02710 src[0]=
02711 src[1]=
02712 src[2]= src[0];
02713 srcStride[0]=
02714 srcStride[1]=
02715 srcStride[2]= srcStride[0];
02716 }
02717 srcStride[1]<<= c->vChrDrop;
02718 srcStride[2]<<= c->vChrDrop;
02719
02720
02721
02722
02723 #if 0 //self test FIXME move to a vfilter or something
02724 {
02725 static volatile int i=0;
02726 i++;
02727 if (srcFormat==PIX_FMT_YUV420P && i==1 && srcSliceH>= c->srcH)
02728 selfTest(src, srcStride, c->srcW, c->srcH);
02729 i--;
02730 }
02731 #endif
02732
02733
02734
02735
02736 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0)
02737 {
02738 static int warnedAlready=0;
02739 if (flags & SWS_PRINT_INFO && !warnedAlready)
02740 {
02741 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
02742 " ->cannot do aligned memory accesses anymore\n");
02743 warnedAlready=1;
02744 }
02745 }
02746
02747
02748
02749
02750 if (srcSliceY ==0){
02751 lumBufIndex=0;
02752 chrBufIndex=0;
02753 dstY=0;
02754 lastInLumBuf= -1;
02755 lastInChrBuf= -1;
02756 }
02757
02758 lastDstY= dstY;
02759
02760 for (;dstY < dstH; dstY++){
02761 unsigned char *dest =dst[0]+dstStride[0]*dstY;
02762 const int chrDstY= dstY>>c->chrDstVSubSample;
02763 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
02764 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
02765
02766 const int firstLumSrcY= vLumFilterPos[dstY];
02767 const int firstChrSrcY= vChrFilterPos[chrDstY];
02768 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1;
02769 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1;
02770
02771
02772
02773
02774 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
02775 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
02776
02777 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
02778 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
02779
02780
02781 if (lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample))
02782 {
02783
02784 while(lastInLumBuf < lastLumSrcY)
02785 {
02786 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
02787 lumBufIndex++;
02788
02789 assert(lumBufIndex < 2*vLumBufSize);
02790 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
02791 assert(lastInLumBuf + 1 - srcSliceY >= 0);
02792
02793 RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
02794 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
02795 funnyYCode, c->srcFormat, formatConvBuffer,
02796 c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
02797 lastInLumBuf++;
02798 }
02799 while(lastInChrBuf < lastChrSrcY)
02800 {
02801 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
02802 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
02803 chrBufIndex++;
02804 assert(chrBufIndex < 2*vChrBufSize);
02805 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
02806 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
02807
02808
02809 if (!(isGray(srcFormat) || isGray(dstFormat)))
02810 RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
02811 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
02812 funnyUVCode, c->srcFormat, formatConvBuffer,
02813 c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
02814 lastInChrBuf++;
02815 }
02816
02817 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
02818 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
02819 }
02820 else
02821 {
02822
02823
02824
02825
02826
02827
02828 while(lastInLumBuf+1 < srcSliceY + srcSliceH)
02829 {
02830 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
02831 lumBufIndex++;
02832 assert(lumBufIndex < 2*vLumBufSize);
02833 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
02834 assert(lastInLumBuf + 1 - srcSliceY >= 0);
02835 RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
02836 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
02837 funnyYCode, c->srcFormat, formatConvBuffer,
02838 c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
02839 lastInLumBuf++;
02840 }
02841 while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
02842 {
02843 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
02844 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
02845 chrBufIndex++;
02846 assert(chrBufIndex < 2*vChrBufSize);
02847 assert(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH);
02848 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
02849
02850 if (!(isGray(srcFormat) || isGray(dstFormat)))
02851 RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
02852 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
02853 funnyUVCode, c->srcFormat, formatConvBuffer,
02854 c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
02855 lastInChrBuf++;
02856 }
02857
02858 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
02859 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
02860 break;
02861 }
02862
02863 #if HAVE_MMX
02864 c->blueDither= ff_dither8[dstY&1];
02865 if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
02866 c->greenDither= ff_dither8[dstY&1];
02867 else
02868 c->greenDither= ff_dither4[dstY&1];
02869 c->redDither= ff_dither8[(dstY+1)&1];
02870 #endif
02871 if (dstY < dstH-2)
02872 {
02873 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
02874 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02875 #if HAVE_MMX
02876 int i;
02877 if (flags & SWS_ACCURATE_RND){
02878 int s= APCK_SIZE / 8;
02879 for (i=0; i<vLumFilterSize; i+=2){
02880 *(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
02881 *(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
02882 lumMmxFilter[s*i+APCK_COEF/4 ]=
02883 lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
02884 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
02885 }
02886 for (i=0; i<vChrFilterSize; i+=2){
02887 *(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ];
02888 *(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)];
02889 chrMmxFilter[s*i+APCK_COEF/4 ]=
02890 chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
02891 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
02892 }
02893 }else{
02894 for (i=0; i<vLumFilterSize; i++)
02895 {
02896 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
02897 lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
02898 lumMmxFilter[4*i+2]=
02899 lumMmxFilter[4*i+3]=
02900 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
02901 }
02902 for (i=0; i<vChrFilterSize; i++)
02903 {
02904 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
02905 chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
02906 chrMmxFilter[4*i+2]=
02907 chrMmxFilter[4*i+3]=
02908 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
02909 }
02910 }
02911 #endif
02912 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
02913 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02914 if (dstY&chrSkipMask) uDest= NULL;
02915 RENAME(yuv2nv12X)(c,
02916 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
02917 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02918 dest, uDest, dstW, chrDstW, dstFormat);
02919 }
02920 else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8)
02921 {
02922 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02923 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL;
02924 if (vLumFilterSize == 1 && vChrFilterSize == 1)
02925 {
02926 int16_t *lumBuf = lumPixBuf[0];
02927 int16_t *chrBuf= chrPixBuf[0];
02928 RENAME(yuv2yuv1)(c, lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW);
02929 }
02930 else
02931 {
02932 RENAME(yuv2yuvX)(c,
02933 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
02934 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02935 dest, uDest, vDest, dstW, chrDstW);
02936 }
02937 }
02938 else
02939 {
02940 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
02941 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
02942 if (vLumFilterSize == 1 && vChrFilterSize == 2)
02943 {
02944 int chrAlpha= vChrFilter[2*dstY+1];
02945 if(flags & SWS_FULL_CHR_H_INT){
02946 yuv2rgbXinC_full(c,
02947 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02948 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02949 dest, dstW, dstY);
02950 }else{
02951 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
02952 dest, dstW, chrAlpha, dstFormat, flags, dstY);
02953 }
02954 }
02955 else if (vLumFilterSize == 2 && vChrFilterSize == 2)
02956 {
02957 int lumAlpha= vLumFilter[2*dstY+1];
02958 int chrAlpha= vChrFilter[2*dstY+1];
02959 lumMmxFilter[2]=
02960 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
02961 chrMmxFilter[2]=
02962 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
02963 if(flags & SWS_FULL_CHR_H_INT){
02964 yuv2rgbXinC_full(c,
02965 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02966 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02967 dest, dstW, dstY);
02968 }else{
02969 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
02970 dest, dstW, lumAlpha, chrAlpha, dstY);
02971 }
02972 }
02973 else
02974 {
02975 if(flags & SWS_FULL_CHR_H_INT){
02976 yuv2rgbXinC_full(c,
02977 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02978 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02979 dest, dstW, dstY);
02980 }else{
02981 RENAME(yuv2packedX)(c,
02982 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02983 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02984 dest, dstW, dstY);
02985 }
02986 }
02987 }
02988 }
02989 else
02990 {
02991 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
02992 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02993 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
02994 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02995 if (dstY&chrSkipMask) uDest= NULL;
02996 yuv2nv12XinC(
02997 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
02998 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02999 dest, uDest, dstW, chrDstW, dstFormat);
03000 }
03001 else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8)
03002 {
03003 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
03004 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL;
03005 yuv2yuvXinC(
03006 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
03007 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
03008 dest, uDest, vDest, dstW, chrDstW);
03009 }
03010 else
03011 {
03012 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
03013 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
03014 if(flags & SWS_FULL_CHR_H_INT){
03015 yuv2rgbXinC_full(c,
03016 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
03017 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
03018 dest, dstW, dstY);
03019 }else{
03020 yuv2packedXinC(c,
03021 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
03022 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
03023 dest, dstW, dstY);
03024 }
03025 }
03026 }
03027 }
03028
03029 #if HAVE_MMX
03030 __asm__ volatile(SFENCE:::"memory");
03031 __asm__ volatile(EMMS:::"memory");
03032 #endif
03033
03034 c->dstY= dstY;
03035 c->lumBufIndex= lumBufIndex;
03036 c->chrBufIndex= chrBufIndex;
03037 c->lastInLumBuf= lastInLumBuf;
03038 c->lastInChrBuf= lastInChrBuf;
03039
03040 return dstY - lastDstY;
03041 }