FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/pixdesc.h"
32 
33 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
34  0x0103010301030103LL,
35  0x0200020002000200LL,};
36 
37 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
38  0x0602060206020602LL,
39  0x0004000400040004LL,};
40 
41 #if HAVE_INLINE_ASM
42 
43 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
44 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
45 
46 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
47 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
48 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
49 
50 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
51 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
52 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
53 
54 
55 // MMXEXT versions
56 #if HAVE_MMXEXT_INLINE
57 #undef RENAME
58 #undef COMPILE_TEMPLATE_MMXEXT
59 #define COMPILE_TEMPLATE_MMXEXT 1
60 #define RENAME(a) a ## _mmxext
61 #include "swscale_template.c"
62 #endif
63 
65 {
66  const int dstH= c->dstH;
67  const int flags= c->flags;
68 
69  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
70  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
71  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
72 
73  int hasAlpha = c->needAlpha;
74  int32_t *vLumFilterPos= c->vLumFilterPos;
75  int32_t *vChrFilterPos= c->vChrFilterPos;
76  int16_t *vLumFilter= c->vLumFilter;
77  int16_t *vChrFilter= c->vChrFilter;
78  int32_t *lumMmxFilter= c->lumMmxFilter;
79  int32_t *chrMmxFilter= c->chrMmxFilter;
80  int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
81  const int vLumFilterSize= c->vLumFilterSize;
82  const int vChrFilterSize= c->vChrFilterSize;
83  const int chrDstY= dstY>>c->chrDstVSubSample;
84  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
85  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
86 
87  c->blueDither= ff_dither8[dstY&1];
88  if (c->dstFormat == AV_PIX_FMT_RGB555 || c->dstFormat == AV_PIX_FMT_BGR555)
89  c->greenDither= ff_dither8[dstY&1];
90  else
91  c->greenDither= ff_dither4[dstY&1];
92  c->redDither= ff_dither8[(dstY+1)&1];
93  if (dstY < dstH - 2) {
94  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
95  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
96  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
97 
98  int i;
99  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
100  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
101 
102  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
103  for (i = 0; i < neg; i++)
104  tmpY[i] = lumSrcPtr[neg];
105  for ( ; i < end; i++)
106  tmpY[i] = lumSrcPtr[i];
107  for ( ; i < vLumFilterSize; i++)
108  tmpY[i] = tmpY[i-1];
109  lumSrcPtr = tmpY;
110 
111  if (alpSrcPtr) {
112  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
113  for (i = 0; i < neg; i++)
114  tmpA[i] = alpSrcPtr[neg];
115  for ( ; i < end; i++)
116  tmpA[i] = alpSrcPtr[i];
117  for ( ; i < vLumFilterSize; i++)
118  tmpA[i] = tmpA[i - 1];
119  alpSrcPtr = tmpA;
120  }
121  }
122  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
123  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
124  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
125  for (i = 0; i < neg; i++) {
126  tmpU[i] = chrUSrcPtr[neg];
127  }
128  for ( ; i < end; i++) {
129  tmpU[i] = chrUSrcPtr[i];
130  }
131  for ( ; i < vChrFilterSize; i++) {
132  tmpU[i] = tmpU[i - 1];
133  }
134  chrUSrcPtr = tmpU;
135  }
136 
137  if (flags & SWS_ACCURATE_RND) {
138  int s= APCK_SIZE / 8;
139  for (i=0; i<vLumFilterSize; i+=2) {
140  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
141  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
144  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
145  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
146  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
147  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
150  }
151  }
152  for (i=0; i<vChrFilterSize; i+=2) {
153  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
154  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
157  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
158  }
159  } else {
160  for (i=0; i<vLumFilterSize; i++) {
161  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
162  lumMmxFilter[4*i+2]=
163  lumMmxFilter[4*i+3]=
164  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
165  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
166  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
167  alpMmxFilter[4*i+2]=
168  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
169  }
170  }
171  for (i=0; i<vChrFilterSize; i++) {
172  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
173  chrMmxFilter[4*i+2]=
174  chrMmxFilter[4*i+3]=
175  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
176  }
177  }
178  }
179 }
180 #endif /* HAVE_INLINE_ASM */
181 
182 #define YUV2YUVX_FUNC_MMX(opt, step) \
183 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
184  uint8_t *dest, int dstW, \
185  const uint8_t *dither, int offset); \
186 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
187  const int16_t **src, uint8_t *dest, int dstW, \
188  const uint8_t *dither, int offset) \
189 { \
190  if(dstW > 0) \
191  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset); \
192  return; \
193 }
194 
195 #define YUV2YUVX_FUNC(opt, step) \
196 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
197  uint8_t *dest, int dstW, \
198  const uint8_t *dither, int offset); \
199 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
200  const int16_t **src, uint8_t *dest, int dstW, \
201  const uint8_t *dither, int offset) \
202 { \
203  int remainder = (dstW % step); \
204  int pixelsProcessed = dstW - remainder; \
205  if(((uintptr_t)dest) & 15){ \
206  yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset); \
207  return; \
208  } \
209  if(pixelsProcessed > 0) \
210  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
211  if(remainder > 0){ \
212  ff_yuv2yuvX_mmxext(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \
213  } \
214  return; \
215 }
216 
217 #if HAVE_MMXEXT_EXTERNAL
218 YUV2YUVX_FUNC_MMX(mmxext, 16)
219 #endif
220 #if HAVE_SSE3_EXTERNAL
221 YUV2YUVX_FUNC(sse3, 32)
222 #endif
223 #if HAVE_AVX2_EXTERNAL
224 YUV2YUVX_FUNC(avx2, 64)
225 #endif
226 
227 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
228 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
229  SwsContext *c, int16_t *data, \
230  int dstW, const uint8_t *src, \
231  const int16_t *filter, \
232  const int32_t *filterPos, int filterSize)
233 
234 #define SCALE_FUNCS(filter_n, opt) \
235  SCALE_FUNC(filter_n, 8, 15, opt); \
236  SCALE_FUNC(filter_n, 9, 15, opt); \
237  SCALE_FUNC(filter_n, 10, 15, opt); \
238  SCALE_FUNC(filter_n, 12, 15, opt); \
239  SCALE_FUNC(filter_n, 14, 15, opt); \
240  SCALE_FUNC(filter_n, 16, 15, opt); \
241  SCALE_FUNC(filter_n, 8, 19, opt); \
242  SCALE_FUNC(filter_n, 9, 19, opt); \
243  SCALE_FUNC(filter_n, 10, 19, opt); \
244  SCALE_FUNC(filter_n, 12, 19, opt); \
245  SCALE_FUNC(filter_n, 14, 19, opt); \
246  SCALE_FUNC(filter_n, 16, 19, opt)
247 
248 #define SCALE_FUNCS_MMX(opt) \
249  SCALE_FUNCS(4, opt); \
250  SCALE_FUNCS(8, opt); \
251  SCALE_FUNCS(X, opt)
252 
253 #define SCALE_FUNCS_SSE(opt) \
254  SCALE_FUNCS(4, opt); \
255  SCALE_FUNCS(8, opt); \
256  SCALE_FUNCS(X4, opt); \
257  SCALE_FUNCS(X8, opt)
258 
259 SCALE_FUNCS_SSE(sse2);
260 SCALE_FUNCS_SSE(ssse3);
261 SCALE_FUNCS_SSE(sse4);
262 
263 SCALE_FUNC(4, 8, 15, avx2);
264 SCALE_FUNC(X4, 8, 15, avx2);
265 
266 #define VSCALEX_FUNC(size, opt) \
267 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
268  const int16_t **src, uint8_t *dest, int dstW, \
269  const uint8_t *dither, int offset)
270 #define VSCALEX_FUNCS(opt) \
271  VSCALEX_FUNC(8, opt); \
272  VSCALEX_FUNC(9, opt); \
273  VSCALEX_FUNC(10, opt)
274 
275 VSCALEX_FUNC(8, mmxext);
276 VSCALEX_FUNCS(sse2);
277 VSCALEX_FUNCS(sse4);
278 VSCALEX_FUNC(16, sse4);
279 VSCALEX_FUNCS(avx);
280 
281 #define VSCALE_FUNC(size, opt) \
282 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
283  const uint8_t *dither, int offset)
284 #define VSCALE_FUNCS(opt1, opt2) \
285  VSCALE_FUNC(8, opt1); \
286  VSCALE_FUNC(9, opt2); \
287  VSCALE_FUNC(10, opt2); \
288  VSCALE_FUNC(16, opt1)
289 
290 VSCALE_FUNCS(sse2, sse2);
291 VSCALE_FUNC(16, sse4);
292 VSCALE_FUNCS(avx, avx);
293 
294 #define INPUT_Y_FUNC(fmt, opt) \
295 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
296  const uint8_t *unused1, const uint8_t *unused2, \
297  int w, uint32_t *unused, void *opq)
298 #define INPUT_UV_FUNC(fmt, opt) \
299 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
300  const uint8_t *unused0, \
301  const uint8_t *src1, \
302  const uint8_t *src2, \
303  int w, uint32_t *unused, void *opq)
304 #define INPUT_FUNC(fmt, opt) \
305  INPUT_Y_FUNC(fmt, opt); \
306  INPUT_UV_FUNC(fmt, opt)
307 #define INPUT_FUNCS(opt) \
308  INPUT_FUNC(uyvy, opt); \
309  INPUT_FUNC(yuyv, opt); \
310  INPUT_UV_FUNC(nv12, opt); \
311  INPUT_UV_FUNC(nv21, opt); \
312  INPUT_FUNC(rgba, opt); \
313  INPUT_FUNC(bgra, opt); \
314  INPUT_FUNC(argb, opt); \
315  INPUT_FUNC(abgr, opt); \
316  INPUT_FUNC(rgb24, opt); \
317  INPUT_FUNC(bgr24, opt)
318 
319 INPUT_FUNCS(sse2);
320 INPUT_FUNCS(ssse3);
321 INPUT_FUNCS(avx);
322 INPUT_FUNC(rgba, avx2);
323 INPUT_FUNC(bgra, avx2);
324 INPUT_FUNC(argb, avx2);
325 INPUT_FUNC(abgr, avx2);
326 INPUT_FUNC(rgb24, avx2);
327 INPUT_FUNC(bgr24, avx2);
328 
329 #if ARCH_X86_64
330 #define YUV2NV_DECL(fmt, opt) \
331 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
332  const int16_t *filter, int filterSize, \
333  const int16_t **u, const int16_t **v, \
334  uint8_t *dst, int dstWidth)
335 
336 YUV2NV_DECL(nv12, avx2);
337 YUV2NV_DECL(nv21, avx2);
338 
339 #define YUV2GBRP_FN_DECL(fmt, opt) \
340 void ff_yuv2##fmt##_full_X_ ##opt(SwsContext *c, const int16_t *lumFilter, \
341  const int16_t **lumSrcx, int lumFilterSize, \
342  const int16_t *chrFilter, const int16_t **chrUSrcx, \
343  const int16_t **chrVSrcx, int chrFilterSize, \
344  const int16_t **alpSrcx, uint8_t **dest, \
345  int dstW, int y)
346 
347 #define YUV2GBRP_DECL(opt) \
348 YUV2GBRP_FN_DECL(gbrp, opt); \
349 YUV2GBRP_FN_DECL(gbrap, opt); \
350 YUV2GBRP_FN_DECL(gbrp9le, opt); \
351 YUV2GBRP_FN_DECL(gbrp10le, opt); \
352 YUV2GBRP_FN_DECL(gbrap10le, opt); \
353 YUV2GBRP_FN_DECL(gbrp12le, opt); \
354 YUV2GBRP_FN_DECL(gbrap12le, opt); \
355 YUV2GBRP_FN_DECL(gbrp14le, opt); \
356 YUV2GBRP_FN_DECL(gbrp16le, opt); \
357 YUV2GBRP_FN_DECL(gbrap16le, opt); \
358 YUV2GBRP_FN_DECL(gbrpf32le, opt); \
359 YUV2GBRP_FN_DECL(gbrapf32le, opt); \
360 YUV2GBRP_FN_DECL(gbrp9be, opt); \
361 YUV2GBRP_FN_DECL(gbrp10be, opt); \
362 YUV2GBRP_FN_DECL(gbrap10be, opt); \
363 YUV2GBRP_FN_DECL(gbrp12be, opt); \
364 YUV2GBRP_FN_DECL(gbrap12be, opt); \
365 YUV2GBRP_FN_DECL(gbrp14be, opt); \
366 YUV2GBRP_FN_DECL(gbrp16be, opt); \
367 YUV2GBRP_FN_DECL(gbrap16be, opt); \
368 YUV2GBRP_FN_DECL(gbrpf32be, opt); \
369 YUV2GBRP_FN_DECL(gbrapf32be, opt)
370 
371 YUV2GBRP_DECL(sse2);
372 YUV2GBRP_DECL(sse4);
373 YUV2GBRP_DECL(avx2);
374 
375 #define INPUT_PLANAR_RGB_Y_FN_DECL(fmt, opt) \
376 void ff_planar_##fmt##_to_y_##opt(uint8_t *dst, \
377  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
378  void *opq)
379 
380 #define INPUT_PLANAR_RGB_UV_FN_DECL(fmt, opt) \
381 void ff_planar_##fmt##_to_uv_##opt(uint8_t *dstU, uint8_t *dstV, \
382  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
383  void *opq)
384 
385 #define INPUT_PLANAR_RGB_A_FN_DECL(fmt, opt) \
386 void ff_planar_##fmt##_to_a_##opt(uint8_t *dst, \
387  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
388  void *opq)
389 
390 
391 #define INPUT_PLANAR_RGBXX_A_DECL(fmt, opt) \
392 INPUT_PLANAR_RGB_A_FN_DECL(fmt##le, opt); \
393 INPUT_PLANAR_RGB_A_FN_DECL(fmt##be, opt)
394 
395 #define INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt) \
396 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##le, opt); \
397 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##be, opt)
398 
399 #define INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt) \
400 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##le, opt); \
401 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##be, opt)
402 
403 #define INPUT_PLANAR_RGBXX_YUVA_DECL(fmt, opt) \
404 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
405 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
406 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
407 
408 #define INPUT_PLANAR_RGBXX_YUV_DECL(fmt, opt) \
409 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
410 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt)
411 
412 #define INPUT_PLANAR_RGBXX_UVA_DECL(fmt, opt) \
413 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
414 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
415 
416 #define INPUT_PLANAR_RGB_A_ALL_DECL(opt) \
417 INPUT_PLANAR_RGB_A_FN_DECL(rgb, opt); \
418 INPUT_PLANAR_RGBXX_A_DECL(rgb10, opt); \
419 INPUT_PLANAR_RGBXX_A_DECL(rgb12, opt); \
420 INPUT_PLANAR_RGBXX_A_DECL(rgb16, opt); \
421 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, opt)
422 
423 #define INPUT_PLANAR_RGB_Y_ALL_DECL(opt) \
424 INPUT_PLANAR_RGB_Y_FN_DECL(rgb, opt); \
425 INPUT_PLANAR_RGBXX_Y_DECL(rgb9, opt); \
426 INPUT_PLANAR_RGBXX_Y_DECL(rgb10, opt); \
427 INPUT_PLANAR_RGBXX_Y_DECL(rgb12, opt); \
428 INPUT_PLANAR_RGBXX_Y_DECL(rgb14, opt); \
429 INPUT_PLANAR_RGBXX_Y_DECL(rgb16, opt); \
430 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, opt)
431 
432 #define INPUT_PLANAR_RGB_UV_ALL_DECL(opt) \
433 INPUT_PLANAR_RGB_UV_FN_DECL(rgb, opt); \
434 INPUT_PLANAR_RGBXX_UV_DECL(rgb9, opt); \
435 INPUT_PLANAR_RGBXX_UV_DECL(rgb10, opt); \
436 INPUT_PLANAR_RGBXX_UV_DECL(rgb12, opt); \
437 INPUT_PLANAR_RGBXX_UV_DECL(rgb14, opt); \
438 INPUT_PLANAR_RGBXX_UV_DECL(rgb16, opt); \
439 INPUT_PLANAR_RGBXX_UV_DECL(rgbf32, opt)
440 
441 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, sse2);
442 INPUT_PLANAR_RGB_UV_ALL_DECL(sse2);
443 INPUT_PLANAR_RGB_A_ALL_DECL(sse2);
444 
445 INPUT_PLANAR_RGB_Y_ALL_DECL(sse4);
446 INPUT_PLANAR_RGB_UV_ALL_DECL(sse4);
447 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, sse4);
448 
449 INPUT_PLANAR_RGB_Y_ALL_DECL(avx2);
450 INPUT_PLANAR_RGB_UV_ALL_DECL(avx2);
451 INPUT_PLANAR_RGB_A_ALL_DECL(avx2);
452 #endif
453 
454 #define RANGE_CONVERT_FUNCS(opt) do { \
455  if (c->dstBpc <= 14) { \
456  if (c->srcRange) { \
457  c->lumConvertRange = ff_lumRangeFromJpeg_ ##opt; \
458  c->chrConvertRange = ff_chrRangeFromJpeg_ ##opt; \
459  } else { \
460  c->lumConvertRange = ff_lumRangeToJpeg_ ##opt; \
461  c->chrConvertRange = ff_chrRangeToJpeg_ ##opt; \
462  } \
463  } \
464 } while (0)
465 
466 #define RANGE_CONVERT_FUNCS_DECL(opt) \
467 void ff_lumRangeFromJpeg_ ##opt(int16_t *dst, int width); \
468 void ff_chrRangeFromJpeg_ ##opt(int16_t *dstU, int16_t *dstV, int width); \
469 void ff_lumRangeToJpeg_ ##opt(int16_t *dst, int width); \
470 void ff_chrRangeToJpeg_ ##opt(int16_t *dstU, int16_t *dstV, int width); \
471 
474 
476 {
477  if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
478  int cpu_flags = av_get_cpu_flags();
480  RANGE_CONVERT_FUNCS(avx2);
481  } else if (EXTERNAL_SSE2(cpu_flags)) {
482  RANGE_CONVERT_FUNCS(sse2);
483  }
484  }
485 }
486 
488 {
489  int cpu_flags = av_get_cpu_flags();
490 
491 #if HAVE_MMXEXT_INLINE
493  sws_init_swscale_mmxext(c);
494 #endif
495  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
496 #if HAVE_MMXEXT_EXTERNAL
498  c->yuv2planeX = yuv2yuvX_mmxext;
499 #endif
500 #if HAVE_SSE3_EXTERNAL
502  c->yuv2planeX = yuv2yuvX_sse3;
503 #endif
504 #if HAVE_AVX2_EXTERNAL
506  c->yuv2planeX = yuv2yuvX_avx2;
507 #endif
508  }
509 #if ARCH_X86_32 && !HAVE_ALIGNED_STACK
510  // The better yuv2planeX_8 functions need aligned stack on x86-32,
511  // so we use MMXEXT in this case if they are not available.
512  if (EXTERNAL_MMXEXT(cpu_flags)) {
513  if (c->dstBpc == 8 && !c->use_mmx_vfilter)
514  c->yuv2planeX = ff_yuv2planeX_8_mmxext;
515  }
516 #endif /* ARCH_X86_32 && !HAVE_ALIGNED_STACK */
517 
518 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
519  if (c->srcBpc == 8) { \
520  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
521  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
522  } else if (c->srcBpc == 9) { \
523  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
524  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
525  } else if (c->srcBpc == 10) { \
526  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
527  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
528  } else if (c->srcBpc == 12) { \
529  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
530  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
531  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
532  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
533  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
534  } else { /* c->srcBpc == 16 */ \
535  av_assert0(c->srcBpc == 16);\
536  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
537  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
538  } \
539 } while (0)
540 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
541 switch(c->dstBpc){ \
542  case 16: do_16_case; break; \
543  case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
544  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
545  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
546  }
547 #define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
548  switch(c->dstBpc){ \
549  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt; break; \
550  case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat)) vscalefn = ff_yuv2plane1_10_ ## opt; break; \
551  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_9_ ## opt; break; \
552  case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \
553  default: av_assert0(c->dstBpc>8); \
554  }
555 #define case_rgb(x, X, opt) \
556  case AV_PIX_FMT_ ## X: \
557  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
558  if (!c->chrSrcHSubSample) \
559  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
560  break
561 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
562  switch (filtersize) { \
563  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
564  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
565  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
566  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
567  break; \
568  }
569  if (EXTERNAL_SSE2(cpu_flags)) {
570  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
571  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
572  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
573  HAVE_ALIGNED_STACK || ARCH_X86_64);
574  if (!(c->flags & SWS_ACCURATE_RND))
575  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2);
576 
577  switch (c->srcFormat) {
578  case AV_PIX_FMT_YA8:
579  c->lumToYV12 = ff_yuyvToY_sse2;
580  if (c->needAlpha)
581  c->alpToYV12 = ff_uyvyToY_sse2;
582  break;
583  case AV_PIX_FMT_YUYV422:
584  c->lumToYV12 = ff_yuyvToY_sse2;
585  c->chrToYV12 = ff_yuyvToUV_sse2;
586  break;
587  case AV_PIX_FMT_UYVY422:
588  c->lumToYV12 = ff_uyvyToY_sse2;
589  c->chrToYV12 = ff_uyvyToUV_sse2;
590  break;
591  case AV_PIX_FMT_NV12:
592  c->chrToYV12 = ff_nv12ToUV_sse2;
593  break;
594  case AV_PIX_FMT_NV21:
595  c->chrToYV12 = ff_nv21ToUV_sse2;
596  break;
597  case_rgb(rgb24, RGB24, sse2);
598  case_rgb(bgr24, BGR24, sse2);
599  case_rgb(bgra, BGRA, sse2);
600  case_rgb(rgba, RGBA, sse2);
601  case_rgb(abgr, ABGR, sse2);
602  case_rgb(argb, ARGB, sse2);
603  default:
604  break;
605  }
606  }
607  if (EXTERNAL_SSSE3(cpu_flags)) {
608  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
609  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
610  switch (c->srcFormat) {
611  case_rgb(rgb24, RGB24, ssse3);
612  case_rgb(bgr24, BGR24, ssse3);
613  default:
614  break;
615  }
616  }
617  if (EXTERNAL_SSE4(cpu_flags)) {
618  /* Xto15 don't need special sse4 functions */
619  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
620  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
621  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
622  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
623  HAVE_ALIGNED_STACK || ARCH_X86_64);
624  if (c->dstBpc == 16 && !isBE(c->dstFormat) && !(c->flags & SWS_ACCURATE_RND))
625  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
626  }
627 
628  if (EXTERNAL_AVX(cpu_flags)) {
629  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
630  HAVE_ALIGNED_STACK || ARCH_X86_64);
631  if (!(c->flags & SWS_ACCURATE_RND))
632  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx);
633 
634  switch (c->srcFormat) {
635  case AV_PIX_FMT_YUYV422:
636  c->chrToYV12 = ff_yuyvToUV_avx;
637  break;
638  case AV_PIX_FMT_UYVY422:
639  c->chrToYV12 = ff_uyvyToUV_avx;
640  break;
641  case AV_PIX_FMT_NV12:
642  c->chrToYV12 = ff_nv12ToUV_avx;
643  break;
644  case AV_PIX_FMT_NV21:
645  c->chrToYV12 = ff_nv21ToUV_avx;
646  break;
647  case_rgb(rgb24, RGB24, avx);
648  case_rgb(bgr24, BGR24, avx);
649  case_rgb(bgra, BGRA, avx);
650  case_rgb(rgba, RGBA, avx);
651  case_rgb(abgr, ABGR, avx);
652  case_rgb(argb, ARGB, avx);
653  default:
654  break;
655  }
656  }
657 
658 #if ARCH_X86_64
659 #define ASSIGN_AVX2_SCALE_FUNC(hscalefn, filtersize) \
660  switch (filtersize) { \
661  case 4: hscalefn = ff_hscale8to15_4_avx2; break; \
662  default: hscalefn = ff_hscale8to15_X4_avx2; break; \
663  break; \
664  }
665 
667  if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
668  ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
669  ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
670  }
671  }
672 
674  if (ARCH_X86_64)
675  switch (c->srcFormat) {
676  case_rgb(rgb24, RGB24, avx2);
677  case_rgb(bgr24, BGR24, avx2);
678  case_rgb(bgra, BGRA, avx2);
679  case_rgb(rgba, RGBA, avx2);
680  case_rgb(abgr, ABGR, avx2);
681  case_rgb(argb, ARGB, avx2);
682  }
683  switch (c->dstFormat) {
684  case AV_PIX_FMT_NV12:
685  case AV_PIX_FMT_NV24:
686  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
687  break;
688  case AV_PIX_FMT_NV21:
689  case AV_PIX_FMT_NV42:
690  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
691  break;
692  default:
693  break;
694  }
695  }
696 
697 
698 #define INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(fmt, name, opt) \
699  case fmt: \
700  c->readAlpPlanar = ff_planar_##name##_to_a_##opt;
701 
702 #define INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
703  case rgba_fmt: \
704  case rgb_fmt: \
705  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
706  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
707  break;
708 
709 #define INPUT_PLANER_RGB_YUV_FUNC_CASE(fmt, name, opt) \
710  case fmt: \
711  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
712  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
713  break;
714 
715 #define INPUT_PLANER_RGB_UV_FUNC_CASE(fmt, name, opt) \
716  case fmt: \
717  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
718  break;
719 
720 #define INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
721  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
722  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
723  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
724  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
725 
726 #define INPUT_PLANER_RGBAXX_UVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
727  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
728  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
729  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
730  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
731 
732 #define INPUT_PLANER_RGBAXX_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
733  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##LE, rgba_fmt##LE, name##le, opt) \
734  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##BE, rgba_fmt##BE, name##be, opt)
735 
736 #define INPUT_PLANER_RGBXX_YUV_FUNC_CASE(rgb_fmt, name, opt) \
737  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
738  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
739 
740 #define INPUT_PLANER_RGBXX_UV_FUNC_CASE(rgb_fmt, name, opt) \
741  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
742  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
743 
744 #define INPUT_PLANER_RGB_YUVA_ALL_CASES(opt) \
745  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, opt) \
746  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, opt) \
747  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, opt) \
748  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, opt) \
749  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, opt) \
750  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, opt) \
751  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, opt) \
752  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, opt)
753 
754 
755  if (EXTERNAL_SSE2(cpu_flags)) {
756  switch (c->srcFormat) {
757  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, sse2);
758  INPUT_PLANER_RGB_UV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse2);
759  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse2);
760  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse2);
761  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse2);
762  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse2);
763  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse2);
764  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse2);
765  default:
766  break;
767  }
768  }
769 
770  if (EXTERNAL_SSE4(cpu_flags)) {
771  switch (c->srcFormat) {
772  case AV_PIX_FMT_GBRAP:
773  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse4);
774  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse4);
775  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse4);
776  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse4);
777  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse4);
778  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse4);
779  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse4);
780  default:
781  break;
782  }
783  }
784 
786  switch (c->srcFormat) {
787  INPUT_PLANER_RGB_YUVA_ALL_CASES(avx2)
788  default:
789  break;
790  }
791  }
792 
793  if(c->flags & SWS_FULL_CHR_H_INT) {
794 
795  /* yuv2gbrp uses the SwsContext for yuv coefficients
796  if struct offsets change the asm needs to be updated too */
797  av_assert0(offsetof(SwsContext, yuv2rgb_y_offset) == 40292);
798 
799 #define YUV2ANYX_FUNC_CASE(fmt, name, opt) \
800  case fmt: \
801  c->yuv2anyX = ff_yuv2##name##_full_X_##opt; \
802  break;
803 
804 #define YUV2ANYX_GBRAP_CASES(opt) \
805  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP, gbrp, opt) \
806  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP, gbrap, opt) \
807  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9LE, gbrp9le, opt) \
808  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10LE, gbrp10le, opt) \
809  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10LE, gbrap10le, opt) \
810  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12LE, gbrp12le, opt) \
811  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12LE, gbrap12le, opt) \
812  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14LE, gbrp14le, opt) \
813  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16LE, gbrp16le, opt) \
814  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16LE, gbrap16le, opt) \
815  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32LE, gbrpf32le, opt) \
816  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32LE, gbrapf32le, opt) \
817  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9BE, gbrp9be, opt) \
818  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10BE, gbrp10be, opt) \
819  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10BE, gbrap10be, opt) \
820  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12BE, gbrp12be, opt) \
821  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12BE, gbrap12be, opt) \
822  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14BE, gbrp14be, opt) \
823  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16BE, gbrp16be, opt) \
824  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16BE, gbrap16be, opt) \
825  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32BE, gbrpf32be, opt) \
826  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32BE, gbrapf32be, opt)
827 
828  if (EXTERNAL_SSE2(cpu_flags)) {
829  switch (c->dstFormat) {
830  YUV2ANYX_GBRAP_CASES(sse2)
831  default:
832  break;
833  }
834  }
835 
836  if (EXTERNAL_SSE4(cpu_flags)) {
837  switch (c->dstFormat) {
838  YUV2ANYX_GBRAP_CASES(sse4)
839  default:
840  break;
841  }
842  }
843 
845  switch (c->dstFormat) {
846  YUV2ANYX_GBRAP_CASES(avx2)
847  default:
848  break;
849  }
850  }
851  }
852 
853 #endif
854 
856 }
SwsContext::vLumFilterSize
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
Definition: swscale_internal.h:418
AV_PIX_FMT_GBRAP16
#define AV_PIX_FMT_GBRAP16
Definition: pixfmt.h:501
DECLARE_ASM_ALIGNED
#define DECLARE_ASM_ALIGNED(n, t, v)
Definition: mem_internal.h:86
APCK_PTR2
#define APCK_PTR2
Definition: swscale_internal.h:60
cpu.h
SwsPlane::line
uint8_t ** line
line buffer
Definition: swscale_internal.h:1059
AV_PIX_FMT_YA8
@ AV_PIX_FMT_YA8
8 bits gray, 8 bits alpha
Definition: pixfmt.h:140
mem_internal.h
SwsContext::dstY
int dstY
Last destination vertical line output from last slice.
Definition: swscale_internal.h:430
av_unused
#define av_unused
Definition: attributes.h:131
EXTERNAL_AVX2_FAST
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
pixdesc.h
SwsContext::vChrFilter
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
Definition: swscale_internal.h:411
SwsContext::lumMmxFilter
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:512
SwsContext::vLumFilter
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
Definition: swscale_internal.h:410
RANGE_CONVERT_FUNCS
#define RANGE_CONVERT_FUNCS(opt)
Definition: swscale.c:454
DECLARE_ASM_CONST
#define DECLARE_ASM_CONST(n, t, v)
Definition: mem_internal.h:87
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:107
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
INPUT_FUNC
#define INPUT_FUNC(fmt, opt)
Definition: swscale.c:304
INPUT_FUNCS
#define INPUT_FUNCS(opt)
Definition: swscale.c:307
rgb
Definition: rpzaenc.c:60
AV_PIX_FMT_GBRP14
#define AV_PIX_FMT_GBRP14
Definition: pixfmt.h:496
AV_PIX_FMT_GBRAP
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:212
AV_PIX_FMT_GBRP10
#define AV_PIX_FMT_GBRP10
Definition: pixfmt.h:494
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:59
avassert.h
ff_sws_init_range_convert_x86
av_cold void ff_sws_init_range_convert_x86(SwsContext *c)
Definition: swscale.c:475
av_cold
#define av_cold
Definition: attributes.h:90
AV_PIX_FMT_GBRAP10
#define AV_PIX_FMT_GBRAP10
Definition: pixfmt.h:498
intreadwrite.h
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_PIX_FMT_GBRAP12
#define AV_PIX_FMT_GBRAP12
Definition: pixfmt.h:499
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
APCK_COEF
#define APCK_COEF
Definition: swscale_internal.h:61
SwsPlane::tmp
uint8_t ** tmp
Tmp line buffer used by mmx code.
Definition: swscale_internal.h:1060
SCALE_FUNC
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt)
Definition: swscale.c:227
VSCALE_FUNCS
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:284
SwsContext::vLumFilterPos
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
Definition: swscale_internal.h:414
SwsContext::yuv2rgb_y_offset
int yuv2rgb_y_offset
Definition: swscale_internal.h:467
if
if(ret)
Definition: filter_design.txt:179
VSCALEX_FUNC
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:266
AV_PIX_FMT_GBRP16
#define AV_PIX_FMT_GBRP16
Definition: pixfmt.h:497
NULL
#define NULL
Definition: coverity.c:32
ASSIGN_SSE_SCALE_FUNC
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
ff_sws_init_swscale_x86
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
Definition: swscale.c:487
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:74
EXTERNAL_SSE3
#define EXTERNAL_SSE3(flags)
Definition: cpu.h:62
SwsPlane
Slice plane.
Definition: swscale_internal.h:1054
ASSIGN_VSCALEX_FUNC
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
SwsContext::alpMmxFilter
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:520
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt)
AV_PIX_FMT_GBRP9
#define AV_PIX_FMT_GBRP9
Definition: pixfmt.h:493
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SwsContext::vChrFilterPos
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
Definition: swscale_internal.h:415
isBE
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:727
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:109
cpu.h
isAnyRGB
static av_always_inline int isAnyRGB(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:835
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
SWS_FULL_CHR_H_INT
#define SWS_FULL_CHR_H_INT
Perform full chroma upsampling when upscaling to RGB.
Definition: swscale.h:97
AV_PIX_FMT_GBRPF32
#define AV_PIX_FMT_GBRPF32
Definition: pixfmt.h:508
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:471
attributes.h
SwsContext::vChrFilterSize
int vChrFilterSize
Vertical filter size for chroma pixels.
Definition: swscale_internal.h:419
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:114
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
AV_PIX_FMT_GBRP12
#define AV_PIX_FMT_GBRP12
Definition: pixfmt.h:495
AV_PIX_FMT_NV24
@ AV_PIX_FMT_NV24
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:371
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:466
swscale_internal.h
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_PIX_FMT_NV21
@ AV_PIX_FMT_NV21
as above, but U and V bytes are swapped
Definition: pixfmt.h:97
AV_PIX_FMT_NV42
@ AV_PIX_FMT_NV42
as above, but U and V bytes are swapped
Definition: pixfmt.h:372
swscale_template.c
ff_dither8
const uint64_t ff_dither8[2]
Definition: swscale.c:37
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:88
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
U
#define U(x)
Definition: vpx_arith.h:37
AV_PIX_FMT_GBRAPF32
#define AV_PIX_FMT_GBRAPF32
Definition: pixfmt.h:509
YUV2YUVX_FUNC_MMX
#define YUV2YUVX_FUNC_MMX(opt, step)
Definition: swscale.c:182
INLINE_MMXEXT
#define INLINE_MMXEXT(flags)
Definition: cpu.h:88
SwsContext::chrMmxFilter
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:513
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
RANGE_CONVERT_FUNCS_DECL
#define RANGE_CONVERT_FUNCS_DECL(opt)
Definition: swscale.c:466
SwsPlane::sliceY
int sliceY
index of first line
Definition: swscale_internal.h:1057
VSCALE_FUNC
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:281
case_rgb
#define case_rgb(x, X, opt)
int32_t
int32_t
Definition: audioconvert.c:56
RGBA
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:42
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:482
YUV2YUVX_FUNC
#define YUV2YUVX_FUNC(opt, step)
Definition: swscale.c:195
ff_updateMMXDitherTables
void ff_updateMMXDitherTables(SwsContext *c, int dstY)
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
SwsContext
Definition: swscale_internal.h:299
SCALE_FUNCS_SSE
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:253
SwsContext::dstH
int dstH
Height of destination luma/alpha planes.
Definition: swscale_internal.h:323
APCK_SIZE
#define APCK_SIZE
Definition: swscale_internal.h:62
VSCALEX_FUNCS
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:270
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
swscale.h
ff_dither4
const uint64_t ff_dither4[2]
Definition: swscale.c:33