35 0x0200020002000200LL,};
39 0x0004000400040004LL,};
60 #define COMPILE_TEMPLATE_MMXEXT 0 61 #define RENAME(a) a ## _mmx 66 #if HAVE_MMXEXT_INLINE 68 #undef COMPILE_TEMPLATE_MMXEXT 69 #define COMPILE_TEMPLATE_MMXEXT 1 70 #define RENAME(a) a ## _mmxext 94 const int firstLumSrcY= vLumFilterPos[
dstY];
95 const int firstChrSrcY= vChrFilterPos[chrDstY];
103 if (dstY < dstH - 2) {
104 const int16_t **lumSrcPtr = (
const int16_t **)(
void*) lumPlane->
line + firstLumSrcY - lumPlane->
sliceY;
105 const int16_t **chrUSrcPtr = (
const int16_t **)(
void*) chrUPlane->
line + firstChrSrcY - chrUPlane->
sliceY;
106 const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (
const int16_t **)(
void*) alpPlane->
line + firstLumSrcY - alpPlane->
sliceY :
NULL;
109 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->
srcH) {
110 const int16_t **tmpY = (
const int16_t **) lumPlane->
tmp;
112 int neg = -firstLumSrcY,
i,
end =
FFMIN(c->
srcH - firstLumSrcY, vLumFilterSize);
113 for (
i = 0;
i < neg;
i++)
114 tmpY[
i] = lumSrcPtr[neg];
115 for ( ;
i <
end;
i++)
116 tmpY[
i] = lumSrcPtr[
i];
122 const int16_t **tmpA = (
const int16_t **) alpPlane->
tmp;
124 tmpA[
i] = alpSrcPtr[neg];
125 for ( ;
i <
end;
i++)
126 tmpA[
i] = alpSrcPtr[
i];
128 tmpA[
i] = tmpA[
i - 1];
132 if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->
chrSrcH) {
133 const int16_t **tmpU = (
const int16_t **) chrUPlane->
tmp;
134 int neg = -firstChrSrcY,
i,
end =
FFMIN(c->
chrSrcH - firstChrSrcY, vChrFilterSize);
135 for (
i = 0;
i < neg;
i++) {
136 tmpU[
i] = chrUSrcPtr[neg];
138 for ( ;
i <
end;
i++) {
139 tmpU[
i] = chrUSrcPtr[
i];
142 tmpU[
i] = tmpU[
i - 1];
150 *(
const void**)&lumMmxFilter[s*
i ]= lumSrcPtr[
i ];
151 *(
const void**)&lumMmxFilter[s*
i+
APCK_PTR2/4 ]= lumSrcPtr[
i+(vLumFilterSize>1)];
153 lumMmxFilter[s*
i+
APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize +
i ]
154 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize +
i + 1] * (1 << 16) : 0);
155 if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
156 *(
const void**)&alpMmxFilter[s*
i ]= alpSrcPtr[
i ];
157 *(
const void**)&alpMmxFilter[s*
i+
APCK_PTR2/4 ]= alpSrcPtr[
i+(vLumFilterSize>1)];
163 *(
const void**)&chrMmxFilter[s*
i ]= chrUSrcPtr[
i ];
164 *(
const void**)&chrMmxFilter[s*
i+
APCK_PTR2/4 ]= chrUSrcPtr[
i+(vChrFilterSize>1)];
166 chrMmxFilter[s*
i+
APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize +
i ]
167 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize +
i + 1] * (1 << 16) : 0);
171 *(
const void**)&lumMmxFilter[4*
i+0]= lumSrcPtr[
i];
174 ((uint16_t)vLumFilter[dstY*vLumFilterSize +
i])*0x10001
U;
175 if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
176 *(
const void**)&alpMmxFilter[4*
i+0]= alpSrcPtr[
i];
178 alpMmxFilter[4*
i+3]= lumMmxFilter[4*
i+2];
182 *(
const void**)&chrMmxFilter[4*
i+0]= chrUSrcPtr[
i];
185 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize +
i])*0x10001
U;
192 #define YUV2YUVX_FUNC_MMX(opt, step) \ 193 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \ 194 uint8_t *dest, int dstW, \ 195 const uint8_t *dither, int offset); \ 196 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \ 197 const int16_t **src, uint8_t *dest, int dstW, \ 198 const uint8_t *dither, int offset) \ 200 ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset); \ 204 #define YUV2YUVX_FUNC(opt, step) \ 205 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \ 206 uint8_t *dest, int dstW, \ 207 const uint8_t *dither, int offset); \ 208 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \ 209 const int16_t **src, uint8_t *dest, int dstW, \ 210 const uint8_t *dither, int offset) \ 212 int remainder = (dstW % step); \ 213 int pixelsProcessed = dstW - remainder; \ 214 if(((uintptr_t)dest) & 15){ \ 215 yuv2yuvX_mmx(filter, filterSize, src, dest, dstW, dither, offset); \ 218 ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \ 220 ff_yuv2yuvX_mmx(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \ 225 #if HAVE_MMX_EXTERNAL 228 #if HAVE_MMXEXT_EXTERNAL 231 #if HAVE_SSE3_EXTERNAL 234 #if HAVE_AVX2_EXTERNAL 238 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \ 239 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ 240 SwsContext *c, int16_t *data, \ 241 int dstW, const uint8_t *src, \ 242 const int16_t *filter, \ 243 const int32_t *filterPos, int filterSize) 245 #define SCALE_FUNCS(filter_n, opt) \ 246 SCALE_FUNC(filter_n, 8, 15, opt); \ 247 SCALE_FUNC(filter_n, 9, 15, opt); \ 248 SCALE_FUNC(filter_n, 10, 15, opt); \ 249 SCALE_FUNC(filter_n, 12, 15, opt); \ 250 SCALE_FUNC(filter_n, 14, 15, opt); \ 251 SCALE_FUNC(filter_n, 16, 15, opt); \ 252 SCALE_FUNC(filter_n, 8, 19, opt); \ 253 SCALE_FUNC(filter_n, 9, 19, opt); \ 254 SCALE_FUNC(filter_n, 10, 19, opt); \ 255 SCALE_FUNC(filter_n, 12, 19, opt); \ 256 SCALE_FUNC(filter_n, 14, 19, opt); \ 257 SCALE_FUNC(filter_n, 16, 19, opt) 259 #define SCALE_FUNCS_MMX(opt) \ 260 SCALE_FUNCS(4, opt); \ 261 SCALE_FUNCS(8, opt); \ 264 #define SCALE_FUNCS_SSE(opt) \ 265 SCALE_FUNCS(4, opt); \ 266 SCALE_FUNCS(8, opt); \ 267 SCALE_FUNCS(X4, opt); \ 277 #define VSCALEX_FUNC(size, opt) \ 278 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \ 279 const int16_t **src, uint8_t *dest, int dstW, \ 280 const uint8_t *dither, int offset) 281 #define VSCALEX_FUNCS(opt) \ 282 VSCALEX_FUNC(8, opt); \ 283 VSCALEX_FUNC(9, opt); \ 284 VSCALEX_FUNC(10, opt) 294 #define VSCALE_FUNC(size, opt) \ 295 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \ 296 const uint8_t *dither, int offset) 297 #define VSCALE_FUNCS(opt1, opt2) \ 298 VSCALE_FUNC(8, opt1); \ 299 VSCALE_FUNC(9, opt2); \ 300 VSCALE_FUNC(10, opt2); \ 301 VSCALE_FUNC(16, opt1) 310 #define INPUT_Y_FUNC(fmt, opt) \ 311 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \ 312 const uint8_t *unused1, const uint8_t *unused2, \ 313 int w, uint32_t *unused) 314 #define INPUT_UV_FUNC(fmt, opt) \ 315 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ 316 const uint8_t *unused0, \ 317 const uint8_t *src1, \ 318 const uint8_t *src2, \ 319 int w, uint32_t *unused) 320 #define INPUT_FUNC(fmt, opt) \ 321 INPUT_Y_FUNC(fmt, opt); \ 322 INPUT_UV_FUNC(fmt, opt) 323 #define INPUT_FUNCS(opt) \ 324 INPUT_FUNC(uyvy, opt); \ 325 INPUT_FUNC(yuyv, opt); \ 326 INPUT_UV_FUNC(nv12, opt); \ 327 INPUT_UV_FUNC(nv21, opt); \ 328 INPUT_FUNC(rgba, opt); \ 329 INPUT_FUNC(bgra, opt); \ 330 INPUT_FUNC(argb, opt); \ 331 INPUT_FUNC(abgr, opt); \ 332 INPUT_FUNC(rgb24, opt); \ 333 INPUT_FUNC(bgr24, opt) 343 #define YUV2NV_DECL(fmt, opt) \ 344 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \ 345 const int16_t *filter, int filterSize, \ 346 const int16_t **u, const int16_t **v, \ 347 uint8_t *dst, int dstWidth) 349 YUV2NV_DECL(nv12, avx2);
350 YUV2NV_DECL(nv21, avx2);
359 sws_init_swscale_mmx(c);
361 #if HAVE_MMXEXT_INLINE 363 sws_init_swscale_mmxext(c);
366 #if HAVE_MMX_EXTERNAL 370 #if HAVE_MMXEXT_EXTERNAL 374 #if HAVE_SSE3_EXTERNAL 378 #if HAVE_AVX2_EXTERNAL 384 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ 385 if (c->srcBpc == 8) { \ 386 hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ 387 ff_hscale8to19_ ## filtersize ## _ ## opt1; \ 388 } else if (c->srcBpc == 9) { \ 389 hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \ 390 ff_hscale9to19_ ## filtersize ## _ ## opt1; \ 391 } else if (c->srcBpc == 10) { \ 392 hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ 393 ff_hscale10to19_ ## filtersize ## _ ## opt1; \ 394 } else if (c->srcBpc == 12) { \ 395 hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \ 396 ff_hscale12to19_ ## filtersize ## _ ## opt1; \ 397 } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \ 398 hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \ 399 ff_hscale14to19_ ## filtersize ## _ ## opt1; \ 401 av_assert0(c->srcBpc == 16);\ 402 hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ 403 ff_hscale16to19_ ## filtersize ## _ ## opt1; \ 406 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ 407 switch (filtersize) { \ 408 case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ 409 case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ 410 default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \ 412 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \ 414 case 16: do_16_case; break; \ 415 case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE) vscalefn = ff_yuv2planeX_10_ ## opt; break; \ 416 case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \ 417 case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \ 419 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \ 421 case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \ 422 case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \ 423 case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \ 424 case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \ 425 default: av_assert0(c->dstBpc>8); \ 427 #define case_rgb(x, X, opt) \ 428 case AV_PIX_FMT_ ## X: \ 429 c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \ 430 if (!c->chrSrcHSubSample) \ 431 c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \ 473 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ 474 switch (filtersize) { \ 475 case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ 476 case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ 477 default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \ 478 else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ 485 HAVE_ALIGNED_STACK || ARCH_X86_64);
534 HAVE_ALIGNED_STACK || ARCH_X86_64);
541 HAVE_ALIGNED_STACK || ARCH_X86_64);
#define EXTERNAL_MMX(flags)
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
#define YUV2YUVX_FUNC_MMX(opt, step)
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
int chrSrcH
Height of source chroma planes.
8 bits gray, 8 bits alpha
#define VSCALE_FUNC(size, opt)
#define SCALE_FUNCS_MMX(opt)
void(* chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of chroma planes to YV12 for horizontal scaler.
void(* alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of alpha plane to YV12 for horizontal scaler.
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
static atomic_int cpu_flags
int dstY
Last destination vertical line output from last slice.
#define case_rgb(x, X, opt)
Macro definitions for various function/variable attributes.
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
int srcH
Height of source luma/alpha planes.
#define VSCALE_FUNCS(opt1, opt2)
#define EXTERNAL_SSE4(flags)
int chrDstVSubSample
Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination i...
uint8_t ** line
line buffer
int vChrFilterSize
Vertical filter size for chroma pixels.
static av_cold int end(AVCodecContext *avctx)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
#define EXTERNAL_SSE3(flags)
enum AVPixelFormat dstFormat
Destination pixel format.
#define EXTERNAL_SSE2(flags)
#define VSCALEX_FUNCS(opt)
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
int dstH
Height of destination luma/alpha planes.
#define EXTERNAL_AVX2_FAST(flags)
#define INLINE_MMX(flags)
int hLumFilterSize
Horizontal filter size for luma/alpha pixels.
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
simple assert() macros that are a bit more flexible than ISO C assert().
#define YUV2YUVX_FUNC(opt, step)
SwsPlane plane[MAX_SLICE_PLANES]
color planes
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
int hChrFilterSize
Horizontal filter size for chroma pixels.
#define DECLARE_ASM_CONST(n, t, v)
Declare a static constant aligned variable appropriate for use in inline assembly code...
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
as above, but U and V bytes are swapped
void ff_updateMMXDitherTables(SwsContext *c, int dstY)
yuv2planar1_fn yuv2plane1
#define SCALE_FUNCS_SSE(opt)
yuv2interleavedX_fn yuv2nv12cX
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
#define AV_PIX_FMT_BGR555
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
const uint64_t ff_dither4[2]
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
yuv2planarX_fn yuv2planeX
#define DECLARE_ASM_ALIGNED(n, t, v)
Declare an aligned variable appropriate for use in inline assembly code.
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
#define EXTERNAL_SSSE3(flags)
as above, but U and V bytes are swapped
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
#define flags(name, subs,...)
#define EXTERNAL_MMXEXT(flags)
#define VSCALEX_FUNC(size, opt)
#define INLINE_MMXEXT(flags)
enum AVPixelFormat srcFormat
Source pixel format.
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
#define AV_PIX_FMT_RGB555
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk)
uint8_t ** tmp
Tmp line buffer used by mmx code.
void(* lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of luma plane to YV12 for horizontal scaler.
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
int sliceY
index of first line
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
#define EXTERNAL_AVX(flags)
const uint64_t ff_dither8[2]