00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <assert.h>
00022 #include <inttypes.h>
00023 #include <math.h>
00024 #include <stdio.h>
00025 #include <string.h>
00026
00027 #include "libavutil/avassert.h"
00028 #include "libavutil/avutil.h"
00029 #include "libavutil/bswap.h"
00030 #include "libavutil/cpu.h"
00031 #include "libavutil/intreadwrite.h"
00032 #include "libavutil/mathematics.h"
00033 #include "libavutil/pixdesc.h"
00034 #include "config.h"
00035 #include "rgb2rgb.h"
00036 #include "swscale_internal.h"
00037 #include "swscale.h"
00038
00039 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
00040 { 36, 68, 60, 92, 34, 66, 58, 90, },
00041 { 100, 4, 124, 28, 98, 2, 122, 26, },
00042 { 52, 84, 44, 76, 50, 82, 42, 74, },
00043 { 116, 20, 108, 12, 114, 18, 106, 10, },
00044 { 32, 64, 56, 88, 38, 70, 62, 94, },
00045 { 96, 0, 120, 24, 102, 6, 126, 30, },
00046 { 48, 80, 40, 72, 54, 86, 46, 78, },
00047 { 112, 16, 104, 8, 118, 22, 110, 14, },
00048 };
00049
00050 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] = {
00051 64, 64, 64, 64, 64, 64, 64, 64
00052 };
00053
00054 static av_always_inline void fillPlane(uint8_t *plane, int stride, int width,
00055 int height, int y, uint8_t val)
00056 {
00057 int i;
00058 uint8_t *ptr = plane + stride * y;
00059 for (i = 0; i < height; i++) {
00060 memset(ptr, val, width);
00061 ptr += stride;
00062 }
00063 }
00064
00065 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
00066 const uint8_t *_src, const int16_t *filter,
00067 const int32_t *filterPos, int filterSize)
00068 {
00069 int i;
00070 int32_t *dst = (int32_t *) _dst;
00071 const uint16_t *src = (const uint16_t *) _src;
00072 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
00073 int sh = bits - 4;
00074
00075 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
00076 sh= 9;
00077
00078 for (i = 0; i < dstW; i++) {
00079 int j;
00080 int srcPos = filterPos[i];
00081 int val = 0;
00082
00083 for (j = 0; j < filterSize; j++) {
00084 val += src[srcPos + j] * filter[filterSize * i + j];
00085 }
00086
00087 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
00088 }
00089 }
00090
00091 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW,
00092 const uint8_t *_src, const int16_t *filter,
00093 const int32_t *filterPos, int filterSize)
00094 {
00095 int i;
00096 const uint16_t *src = (const uint16_t *) _src;
00097 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
00098
00099 if(sh<15)
00100 sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
00101
00102 for (i = 0; i < dstW; i++) {
00103 int j;
00104 int srcPos = filterPos[i];
00105 int val = 0;
00106
00107 for (j = 0; j < filterSize; j++) {
00108 val += src[srcPos + j] * filter[filterSize * i + j];
00109 }
00110
00111 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
00112 }
00113 }
00114
00115
00116 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW,
00117 const uint8_t *src, const int16_t *filter,
00118 const int32_t *filterPos, int filterSize)
00119 {
00120 int i;
00121 for (i = 0; i < dstW; i++) {
00122 int j;
00123 int srcPos = filterPos[i];
00124 int val = 0;
00125 for (j = 0; j < filterSize; j++) {
00126 val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
00127 }
00128 dst[i] = FFMIN(val >> 7, (1 << 15) - 1);
00129 }
00130 }
00131
00132 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW,
00133 const uint8_t *src, const int16_t *filter,
00134 const int32_t *filterPos, int filterSize)
00135 {
00136 int i;
00137 int32_t *dst = (int32_t *) _dst;
00138 for (i = 0; i < dstW; i++) {
00139 int j;
00140 int srcPos = filterPos[i];
00141 int val = 0;
00142 for (j = 0; j < filterSize; j++) {
00143 val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
00144 }
00145 dst[i] = FFMIN(val >> 3, (1 << 19) - 1);
00146 }
00147 }
00148
00149
00150
00151 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
00152 {
00153 int i;
00154 for (i = 0; i < width; i++) {
00155 dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12;
00156 dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12;
00157 }
00158 }
00159
00160 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
00161 {
00162 int i;
00163 for (i = 0; i < width; i++) {
00164 dstU[i] = (dstU[i] * 1799 + 4081085) >> 11;
00165 dstV[i] = (dstV[i] * 1799 + 4081085) >> 11;
00166 }
00167 }
00168
00169 static void lumRangeToJpeg_c(int16_t *dst, int width)
00170 {
00171 int i;
00172 for (i = 0; i < width; i++)
00173 dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
00174 }
00175
00176 static void lumRangeFromJpeg_c(int16_t *dst, int width)
00177 {
00178 int i;
00179 for (i = 0; i < width; i++)
00180 dst[i] = (dst[i] * 14071 + 33561947) >> 14;
00181 }
00182
00183 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
00184 {
00185 int i;
00186 int32_t *dstU = (int32_t *) _dstU;
00187 int32_t *dstV = (int32_t *) _dstV;
00188 for (i = 0; i < width; i++) {
00189 dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12;
00190 dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12;
00191 }
00192 }
00193
00194 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
00195 {
00196 int i;
00197 int32_t *dstU = (int32_t *) _dstU;
00198 int32_t *dstV = (int32_t *) _dstV;
00199 for (i = 0; i < width; i++) {
00200 dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11;
00201 dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11;
00202 }
00203 }
00204
00205 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
00206 {
00207 int i;
00208 int32_t *dst = (int32_t *) _dst;
00209 for (i = 0; i < width; i++)
00210 dst[i] = (FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
00211 }
00212
00213 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
00214 {
00215 int i;
00216 int32_t *dst = (int32_t *) _dst;
00217 for (i = 0; i < width; i++)
00218 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
00219 }
00220
00221 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
00222 const uint8_t *src, int srcW, int xInc)
00223 {
00224 int i;
00225 unsigned int xpos = 0;
00226 for (i = 0; i < dstWidth; i++) {
00227 register unsigned int xx = xpos >> 16;
00228 register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
00229 dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
00230 xpos += xInc;
00231 }
00232 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
00233 dst[i] = src[srcW-1]*128;
00234 }
00235
00236
00237 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
00238 const uint8_t *src_in[4],
00239 int srcW, int xInc,
00240 const int16_t *hLumFilter,
00241 const int32_t *hLumFilterPos,
00242 int hLumFilterSize,
00243 uint8_t *formatConvBuffer,
00244 uint32_t *pal, int isAlpha)
00245 {
00246 void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) =
00247 isAlpha ? c->alpToYV12 : c->lumToYV12;
00248 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
00249 const uint8_t *src = src_in[isAlpha ? 3 : 0];
00250
00251 if (toYV12) {
00252 toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
00253 src = formatConvBuffer;
00254 } else if (c->readLumPlanar && !isAlpha) {
00255 c->readLumPlanar(formatConvBuffer, src_in, srcW);
00256 src = formatConvBuffer;
00257 }
00258
00259 if (!c->hyscale_fast) {
00260 c->hyScale(c, dst, dstWidth, src, hLumFilter,
00261 hLumFilterPos, hLumFilterSize);
00262 } else {
00263 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
00264 }
00265
00266 if (convertRange)
00267 convertRange(dst, dstWidth);
00268 }
00269
00270 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
00271 int dstWidth, const uint8_t *src1,
00272 const uint8_t *src2, int srcW, int xInc)
00273 {
00274 int i;
00275 unsigned int xpos = 0;
00276 for (i = 0; i < dstWidth; i++) {
00277 register unsigned int xx = xpos >> 16;
00278 register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
00279 dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
00280 dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
00281 xpos += xInc;
00282 }
00283 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
00284 dst1[i] = src1[srcW-1]*128;
00285 dst2[i] = src2[srcW-1]*128;
00286 }
00287 }
00288
00289 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
00290 int16_t *dst2, int dstWidth,
00291 const uint8_t *src_in[4],
00292 int srcW, int xInc,
00293 const int16_t *hChrFilter,
00294 const int32_t *hChrFilterPos,
00295 int hChrFilterSize,
00296 uint8_t *formatConvBuffer, uint32_t *pal)
00297 {
00298 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
00299 if (c->chrToYV12) {
00300 uint8_t *buf2 = formatConvBuffer +
00301 FFALIGN(srcW*2+78, 16);
00302 c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
00303 src1= formatConvBuffer;
00304 src2= buf2;
00305 } else if (c->readChrPlanar) {
00306 uint8_t *buf2 = formatConvBuffer +
00307 FFALIGN(srcW*2+78, 16);
00308 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
00309 src1 = formatConvBuffer;
00310 src2 = buf2;
00311 }
00312
00313 if (!c->hcscale_fast) {
00314 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
00315 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
00316 } else {
00317 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
00318 }
00319
00320 if (c->chrConvertRange)
00321 c->chrConvertRange(dst1, dst2, dstWidth);
00322 }
00323
00324 #define DEBUG_SWSCALE_BUFFERS 0
00325 #define DEBUG_BUFFERS(...) \
00326 if (DEBUG_SWSCALE_BUFFERS) \
00327 av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
00328
00329 static int swScale(SwsContext *c, const uint8_t *src[],
00330 int srcStride[], int srcSliceY,
00331 int srcSliceH, uint8_t *dst[], int dstStride[])
00332 {
00333
00334
00335 const int srcW = c->srcW;
00336 const int dstW = c->dstW;
00337 const int dstH = c->dstH;
00338 const int chrDstW = c->chrDstW;
00339 const int chrSrcW = c->chrSrcW;
00340 const int lumXInc = c->lumXInc;
00341 const int chrXInc = c->chrXInc;
00342 const enum PixelFormat dstFormat = c->dstFormat;
00343 const int flags = c->flags;
00344 int32_t *vLumFilterPos = c->vLumFilterPos;
00345 int32_t *vChrFilterPos = c->vChrFilterPos;
00346 int32_t *hLumFilterPos = c->hLumFilterPos;
00347 int32_t *hChrFilterPos = c->hChrFilterPos;
00348 int16_t *vLumFilter = c->vLumFilter;
00349 int16_t *vChrFilter = c->vChrFilter;
00350 int16_t *hLumFilter = c->hLumFilter;
00351 int16_t *hChrFilter = c->hChrFilter;
00352 int32_t *lumMmxFilter = c->lumMmxFilter;
00353 int32_t *chrMmxFilter = c->chrMmxFilter;
00354 const int vLumFilterSize = c->vLumFilterSize;
00355 const int vChrFilterSize = c->vChrFilterSize;
00356 const int hLumFilterSize = c->hLumFilterSize;
00357 const int hChrFilterSize = c->hChrFilterSize;
00358 int16_t **lumPixBuf = c->lumPixBuf;
00359 int16_t **chrUPixBuf = c->chrUPixBuf;
00360 int16_t **chrVPixBuf = c->chrVPixBuf;
00361 int16_t **alpPixBuf = c->alpPixBuf;
00362 const int vLumBufSize = c->vLumBufSize;
00363 const int vChrBufSize = c->vChrBufSize;
00364 uint8_t *formatConvBuffer = c->formatConvBuffer;
00365 uint32_t *pal = c->pal_yuv;
00366 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
00367 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
00368 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
00369 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
00370 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
00371 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
00372 const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample;
00373 const int chrSrcSliceH = -((-srcSliceH) >> c->chrSrcVSubSample);
00374 int should_dither = is9_OR_10BPS(c->srcFormat) ||
00375 is16BPS(c->srcFormat);
00376 int lastDstY;
00377
00378
00379 int dstY = c->dstY;
00380 int lumBufIndex = c->lumBufIndex;
00381 int chrBufIndex = c->chrBufIndex;
00382 int lastInLumBuf = c->lastInLumBuf;
00383 int lastInChrBuf = c->lastInChrBuf;
00384
00385 if (isPacked(c->srcFormat)) {
00386 src[0] =
00387 src[1] =
00388 src[2] =
00389 src[3] = src[0];
00390 srcStride[0] =
00391 srcStride[1] =
00392 srcStride[2] =
00393 srcStride[3] = srcStride[0];
00394 }
00395 srcStride[1] <<= c->vChrDrop;
00396 srcStride[2] <<= c->vChrDrop;
00397
00398 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
00399 src[0], srcStride[0], src[1], srcStride[1],
00400 src[2], srcStride[2], src[3], srcStride[3],
00401 dst[0], dstStride[0], dst[1], dstStride[1],
00402 dst[2], dstStride[2], dst[3], dstStride[3]);
00403 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
00404 srcSliceY, srcSliceH, dstY, dstH);
00405 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
00406 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
00407
00408 if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 ||
00409 dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
00410 static int warnedAlready = 0;
00411 if (flags & SWS_PRINT_INFO && !warnedAlready) {
00412 av_log(c, AV_LOG_WARNING,
00413 "Warning: dstStride is not aligned!\n"
00414 " ->cannot do aligned memory accesses anymore\n");
00415 warnedAlready = 1;
00416 }
00417 }
00418
00419 if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
00420 || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
00421 || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
00422 ) {
00423 static int warnedAlready=0;
00424 int cpu_flags = av_get_cpu_flags();
00425 if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
00426 av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
00427 warnedAlready=1;
00428 }
00429 }
00430
00431
00432
00433
00434 if (srcSliceY == 0) {
00435 lumBufIndex = -1;
00436 chrBufIndex = -1;
00437 dstY = 0;
00438 lastInLumBuf = -1;
00439 lastInChrBuf = -1;
00440 }
00441
00442 if (!should_dither) {
00443 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
00444 }
00445 lastDstY = dstY;
00446
00447 for (; dstY < dstH; dstY++) {
00448 const int chrDstY = dstY >> c->chrDstVSubSample;
00449 uint8_t *dest[4] = {
00450 dst[0] + dstStride[0] * dstY,
00451 dst[1] + dstStride[1] * chrDstY,
00452 dst[2] + dstStride[2] * chrDstY,
00453 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
00454 };
00455 int use_mmx_vfilter= c->use_mmx_vfilter;
00456
00457
00458 const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
00459 const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]);
00460
00461 const int firstChrSrcY = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
00462
00463
00464 int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
00465 int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
00466 int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
00467 int enough_lines;
00468
00469
00470 if (firstLumSrcY > lastInLumBuf)
00471 lastInLumBuf = firstLumSrcY - 1;
00472 if (firstChrSrcY > lastInChrBuf)
00473 lastInChrBuf = firstChrSrcY - 1;
00474 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
00475 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
00476
00477 DEBUG_BUFFERS("dstY: %d\n", dstY);
00478 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
00479 firstLumSrcY, lastLumSrcY, lastInLumBuf);
00480 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
00481 firstChrSrcY, lastChrSrcY, lastInChrBuf);
00482
00483
00484 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
00485 lastChrSrcY < -((-srcSliceY - srcSliceH) >> c->chrSrcVSubSample);
00486
00487 if (!enough_lines) {
00488 lastLumSrcY = srcSliceY + srcSliceH - 1;
00489 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
00490 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
00491 lastLumSrcY, lastChrSrcY);
00492 }
00493
00494
00495 while (lastInLumBuf < lastLumSrcY) {
00496 const uint8_t *src1[4] = {
00497 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
00498 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
00499 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
00500 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
00501 };
00502 lumBufIndex++;
00503 assert(lumBufIndex < 2 * vLumBufSize);
00504 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
00505 assert(lastInLumBuf + 1 - srcSliceY >= 0);
00506 hyscale(c, lumPixBuf[lumBufIndex], dstW, src1, srcW, lumXInc,
00507 hLumFilter, hLumFilterPos, hLumFilterSize,
00508 formatConvBuffer, pal, 0);
00509 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
00510 hyscale(c, alpPixBuf[lumBufIndex], dstW, src1, srcW,
00511 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
00512 formatConvBuffer, pal, 1);
00513 lastInLumBuf++;
00514 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
00515 lumBufIndex, lastInLumBuf);
00516 }
00517 while (lastInChrBuf < lastChrSrcY) {
00518 const uint8_t *src1[4] = {
00519 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
00520 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
00521 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
00522 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
00523 };
00524 chrBufIndex++;
00525 assert(chrBufIndex < 2 * vChrBufSize);
00526 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
00527 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
00528
00529
00530 if (c->needs_hcscale)
00531 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
00532 chrDstW, src1, chrSrcW, chrXInc,
00533 hChrFilter, hChrFilterPos, hChrFilterSize,
00534 formatConvBuffer, pal);
00535 lastInChrBuf++;
00536 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
00537 chrBufIndex, lastInChrBuf);
00538 }
00539
00540 if (lumBufIndex >= vLumBufSize)
00541 lumBufIndex -= vLumBufSize;
00542 if (chrBufIndex >= vChrBufSize)
00543 chrBufIndex -= vChrBufSize;
00544 if (!enough_lines)
00545 break;
00546
00547 #if HAVE_MMX
00548 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex,
00549 lastInLumBuf, lastInChrBuf);
00550 #endif
00551 if (should_dither) {
00552 c->chrDither8 = dither_8x8_128[chrDstY & 7];
00553 c->lumDither8 = dither_8x8_128[dstY & 7];
00554 }
00555 if (dstY >= dstH - 2) {
00556
00557
00558 ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
00559 &yuv2packed1, &yuv2packed2, &yuv2packedX);
00560 use_mmx_vfilter= 0;
00561 }
00562
00563 {
00564 const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
00565 const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
00566 const int16_t **chrVSrcPtr = (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
00567 const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ?
00568 (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
00569 int16_t *vLumFilter = c->vLumFilter;
00570 int16_t *vChrFilter = c->vChrFilter;
00571
00572 if (isPlanarYUV(dstFormat) ||
00573 (isGray(dstFormat) && !isALPHA(dstFormat))) {
00574 const int chrSkipMask = (1 << c->chrDstVSubSample) - 1;
00575
00576 vLumFilter += dstY * vLumFilterSize;
00577 vChrFilter += chrDstY * vChrFilterSize;
00578
00579
00580
00581
00582
00583
00584
00585
00586
00587
00588 if(use_mmx_vfilter){
00589 vLumFilter= c->lumMmxFilter;
00590 vChrFilter= c->chrMmxFilter;
00591 }
00592
00593 if (vLumFilterSize == 1) {
00594 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
00595 } else {
00596 yuv2planeX(vLumFilter, vLumFilterSize,
00597 lumSrcPtr, dest[0],
00598 dstW, c->lumDither8, 0);
00599 }
00600
00601 if (!((dstY & chrSkipMask) || isGray(dstFormat))) {
00602 if (yuv2nv12cX) {
00603 yuv2nv12cX(c, vChrFilter,
00604 vChrFilterSize, chrUSrcPtr, chrVSrcPtr,
00605 dest[1], chrDstW);
00606 } else if (vChrFilterSize == 1) {
00607 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
00608 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
00609 } else {
00610 yuv2planeX(vChrFilter,
00611 vChrFilterSize, chrUSrcPtr, dest[1],
00612 chrDstW, c->chrDither8, 0);
00613 yuv2planeX(vChrFilter,
00614 vChrFilterSize, chrVSrcPtr, dest[2],
00615 chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
00616 }
00617 }
00618
00619 if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
00620 if(use_mmx_vfilter){
00621 vLumFilter= c->alpMmxFilter;
00622 }
00623 if (vLumFilterSize == 1) {
00624 yuv2plane1(alpSrcPtr[0], dest[3], dstW,
00625 c->lumDither8, 0);
00626 } else {
00627 yuv2planeX(vLumFilter,
00628 vLumFilterSize, alpSrcPtr, dest[3],
00629 dstW, c->lumDither8, 0);
00630 }
00631 }
00632 } else {
00633 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize * 2);
00634 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize * 2);
00635 if (c->yuv2packed1 && vLumFilterSize == 1 &&
00636 vChrFilterSize <= 2) {
00637 int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
00638 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
00639 alpPixBuf ? *alpSrcPtr : NULL,
00640 dest[0], dstW, chrAlpha, dstY);
00641 } else if (c->yuv2packed2 && vLumFilterSize == 2 &&
00642 vChrFilterSize == 2) {
00643 int lumAlpha = vLumFilter[2 * dstY + 1];
00644 int chrAlpha = vChrFilter[2 * dstY + 1];
00645 lumMmxFilter[2] =
00646 lumMmxFilter[3] = vLumFilter[2 * dstY] * 0x10001;
00647 chrMmxFilter[2] =
00648 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
00649 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
00650 alpPixBuf ? alpSrcPtr : NULL,
00651 dest[0], dstW, lumAlpha, chrAlpha, dstY);
00652 } else {
00653 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
00654 lumSrcPtr, vLumFilterSize,
00655 vChrFilter + dstY * vChrFilterSize,
00656 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
00657 alpSrcPtr, dest[0], dstW, dstY);
00658 }
00659 }
00660 }
00661 }
00662
00663 if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
00664 fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255);
00665
00666 #if HAVE_MMX2
00667 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
00668 __asm__ volatile ("sfence" ::: "memory");
00669 #endif
00670 emms_c();
00671
00672
00673 c->dstY = dstY;
00674 c->lumBufIndex = lumBufIndex;
00675 c->chrBufIndex = chrBufIndex;
00676 c->lastInLumBuf = lastInLumBuf;
00677 c->lastInChrBuf = lastInChrBuf;
00678
00679 return dstY - lastDstY;
00680 }
00681
00682 static av_cold void sws_init_swScale_c(SwsContext *c)
00683 {
00684 enum PixelFormat srcFormat = c->srcFormat;
00685
00686 ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
00687 &c->yuv2nv12cX, &c->yuv2packed1,
00688 &c->yuv2packed2, &c->yuv2packedX);
00689
00690 ff_sws_init_input_funcs(c);
00691
00692
00693 if (c->srcBpc == 8) {
00694 if (c->dstBpc <= 10) {
00695 c->hyScale = c->hcScale = hScale8To15_c;
00696 if (c->flags & SWS_FAST_BILINEAR) {
00697 c->hyscale_fast = hyscale_fast_c;
00698 c->hcscale_fast = hcscale_fast_c;
00699 }
00700 } else {
00701 c->hyScale = c->hcScale = hScale8To19_c;
00702 }
00703 } else {
00704 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c
00705 : hScale16To15_c;
00706 }
00707
00708 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
00709 if (c->dstBpc <= 10) {
00710 if (c->srcRange) {
00711 c->lumConvertRange = lumRangeFromJpeg_c;
00712 c->chrConvertRange = chrRangeFromJpeg_c;
00713 } else {
00714 c->lumConvertRange = lumRangeToJpeg_c;
00715 c->chrConvertRange = chrRangeToJpeg_c;
00716 }
00717 } else {
00718 if (c->srcRange) {
00719 c->lumConvertRange = lumRangeFromJpeg16_c;
00720 c->chrConvertRange = chrRangeFromJpeg16_c;
00721 } else {
00722 c->lumConvertRange = lumRangeToJpeg16_c;
00723 c->chrConvertRange = chrRangeToJpeg16_c;
00724 }
00725 }
00726 }
00727
00728 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
00729 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
00730 c->needs_hcscale = 1;
00731 }
00732
00733 SwsFunc ff_getSwsFunc(SwsContext *c)
00734 {
00735 sws_init_swScale_c(c);
00736
00737 if (HAVE_MMX)
00738 ff_sws_init_swScale_mmx(c);
00739 if (HAVE_ALTIVEC)
00740 ff_sws_init_swScale_altivec(c);
00741
00742 return swScale;
00743 }