FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/pixdesc.h"
32 
33 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
34  0x0103010301030103LL,
35  0x0200020002000200LL,};
36 
37 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
38  0x0602060206020602LL,
39  0x0004000400040004LL,};
40 
41 #if HAVE_INLINE_ASM
42 
43 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
44 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
45 
46 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
47 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
48 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
49 
50 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
51 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
52 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
53 
54 
55 // MMXEXT versions
56 #if HAVE_MMXEXT_INLINE
57 #undef RENAME
58 #undef COMPILE_TEMPLATE_MMXEXT
59 #define COMPILE_TEMPLATE_MMXEXT 1
60 #define RENAME(a) a ## _mmxext
61 #include "swscale_template.c"
62 #endif
63 
64 void ff_updateMMXDitherTables(SwsInternal *c, int dstY)
65 {
66  const int dstH= c->dstH;
67  const int flags= c->flags;
68 
69  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
70  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
71  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
72 
73  int hasAlpha = c->needAlpha;
74  int32_t *vLumFilterPos= c->vLumFilterPos;
75  int32_t *vChrFilterPos= c->vChrFilterPos;
76  int16_t *vLumFilter= c->vLumFilter;
77  int16_t *vChrFilter= c->vChrFilter;
78  int32_t *lumMmxFilter= c->lumMmxFilter;
79  int32_t *chrMmxFilter= c->chrMmxFilter;
80  int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
81  const int vLumFilterSize= c->vLumFilterSize;
82  const int vChrFilterSize= c->vChrFilterSize;
83  const int chrDstY= dstY>>c->chrDstVSubSample;
84  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
85  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
86 
87  c->blueDither= ff_dither8[dstY&1];
88  if (c->dstFormat == AV_PIX_FMT_RGB555 || c->dstFormat == AV_PIX_FMT_BGR555)
89  c->greenDither= ff_dither8[dstY&1];
90  else
91  c->greenDither= ff_dither4[dstY&1];
92  c->redDither= ff_dither8[(dstY+1)&1];
93  if (dstY < dstH - 2) {
94  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
95  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
96  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
97 
98  int i;
99  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
100  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
101 
102  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
103  for (i = 0; i < neg; i++)
104  tmpY[i] = lumSrcPtr[neg];
105  for ( ; i < end; i++)
106  tmpY[i] = lumSrcPtr[i];
107  for ( ; i < vLumFilterSize; i++)
108  tmpY[i] = tmpY[i-1];
109  lumSrcPtr = tmpY;
110 
111  if (alpSrcPtr) {
112  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
113  for (i = 0; i < neg; i++)
114  tmpA[i] = alpSrcPtr[neg];
115  for ( ; i < end; i++)
116  tmpA[i] = alpSrcPtr[i];
117  for ( ; i < vLumFilterSize; i++)
118  tmpA[i] = tmpA[i - 1];
119  alpSrcPtr = tmpA;
120  }
121  }
122  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
123  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
124  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
125  for (i = 0; i < neg; i++) {
126  tmpU[i] = chrUSrcPtr[neg];
127  }
128  for ( ; i < end; i++) {
129  tmpU[i] = chrUSrcPtr[i];
130  }
131  for ( ; i < vChrFilterSize; i++) {
132  tmpU[i] = tmpU[i - 1];
133  }
134  chrUSrcPtr = tmpU;
135  }
136 
137  if (flags & SWS_ACCURATE_RND) {
138  int s= APCK_SIZE / 8;
139  for (i=0; i<vLumFilterSize; i+=2) {
140  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
141  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
142  lumMmxFilter[s*i+APCK_COEF/4 ]=
143  lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
144  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
145  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
146  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
147  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
148  alpMmxFilter[s*i+APCK_COEF/4 ]=
149  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
150  }
151  }
152  for (i=0; i<vChrFilterSize; i+=2) {
153  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
154  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
155  chrMmxFilter[s*i+APCK_COEF/4 ]=
156  chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
157  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
158  }
159  } else {
160  for (i=0; i<vLumFilterSize; i++) {
161  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
162  lumMmxFilter[4*i+2]=
163  lumMmxFilter[4*i+3]=
164  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
165  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
166  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
167  alpMmxFilter[4*i+2]=
168  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
169  }
170  }
171  for (i=0; i<vChrFilterSize; i++) {
172  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
173  chrMmxFilter[4*i+2]=
174  chrMmxFilter[4*i+3]=
175  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
176  }
177  }
178  }
179 }
180 #endif /* HAVE_INLINE_ASM */
181 
182 #define YUV2YUVX_FUNC_MMX(opt, step) \
183 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
184  uint8_t *dest, int dstW, \
185  const uint8_t *dither, int offset); \
186 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
187  const int16_t **src, uint8_t *dest, int dstW, \
188  const uint8_t *dither, int offset) \
189 { \
190  if(dstW > 0) \
191  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset); \
192  return; \
193 }
194 
195 #define YUV2YUVX_FUNC(opt, step) \
196 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
197  uint8_t *dest, int dstW, \
198  const uint8_t *dither, int offset); \
199 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
200  const int16_t **src, uint8_t *dest, int dstW, \
201  const uint8_t *dither, int offset) \
202 { \
203  int remainder = (dstW % step); \
204  int pixelsProcessed = dstW - remainder; \
205  if(((uintptr_t)dest) & 15){ \
206  yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset); \
207  return; \
208  } \
209  if(pixelsProcessed > 0) \
210  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
211  if(remainder > 0){ \
212  ff_yuv2yuvX_mmxext(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \
213  } \
214  return; \
215 }
216 
217 #if HAVE_MMXEXT_EXTERNAL
218 YUV2YUVX_FUNC_MMX(mmxext, 16)
219 #endif
220 #if HAVE_SSE3_EXTERNAL
221 YUV2YUVX_FUNC(sse3, 32)
222 #endif
223 #if HAVE_AVX2_EXTERNAL
224 YUV2YUVX_FUNC(avx2, 64)
225 #endif
226 
227 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
228 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
229  SwsInternal *c, int16_t *data, \
230  int dstW, const uint8_t *src, \
231  const int16_t *filter, \
232  const int32_t *filterPos, int filterSize)
233 
234 #define SCALE_FUNCS(filter_n, opt) \
235  SCALE_FUNC(filter_n, 8, 15, opt); \
236  SCALE_FUNC(filter_n, 9, 15, opt); \
237  SCALE_FUNC(filter_n, 10, 15, opt); \
238  SCALE_FUNC(filter_n, 12, 15, opt); \
239  SCALE_FUNC(filter_n, 14, 15, opt); \
240  SCALE_FUNC(filter_n, 16, 15, opt); \
241  SCALE_FUNC(filter_n, 8, 19, opt); \
242  SCALE_FUNC(filter_n, 9, 19, opt); \
243  SCALE_FUNC(filter_n, 10, 19, opt); \
244  SCALE_FUNC(filter_n, 12, 19, opt); \
245  SCALE_FUNC(filter_n, 14, 19, opt); \
246  SCALE_FUNC(filter_n, 16, 19, opt)
247 
248 #define SCALE_FUNCS_MMX(opt) \
249  SCALE_FUNCS(4, opt); \
250  SCALE_FUNCS(8, opt); \
251  SCALE_FUNCS(X, opt)
252 
253 #define SCALE_FUNCS_SSE(opt) \
254  SCALE_FUNCS(4, opt); \
255  SCALE_FUNCS(8, opt); \
256  SCALE_FUNCS(X4, opt); \
257  SCALE_FUNCS(X8, opt)
258 
259 SCALE_FUNCS_SSE(sse2);
260 SCALE_FUNCS_SSE(ssse3);
261 SCALE_FUNCS_SSE(sse4);
262 
263 SCALE_FUNC(4, 8, 15, avx2);
264 SCALE_FUNC(X4, 8, 15, avx2);
265 
266 #define VSCALEX_FUNC(size, opt) \
267 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
268  const int16_t **src, uint8_t *dest, int dstW, \
269  const uint8_t *dither, int offset)
270 #define VSCALEX_FUNCS(opt) \
271  VSCALEX_FUNC(8, opt); \
272  VSCALEX_FUNC(9, opt); \
273  VSCALEX_FUNC(10, opt)
274 
275 VSCALEX_FUNC(8, mmxext);
276 VSCALEX_FUNCS(sse2);
277 VSCALEX_FUNCS(sse4);
278 VSCALEX_FUNC(16, sse4);
279 VSCALEX_FUNCS(avx);
280 
281 #define VSCALE_FUNC(size, opt) \
282 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
283  const uint8_t *dither, int offset)
284 #define VSCALE_FUNCS(opt1, opt2) \
285  VSCALE_FUNC(8, opt1); \
286  VSCALE_FUNC(9, opt2); \
287  VSCALE_FUNC(10, opt2); \
288  VSCALE_FUNC(16, opt1)
289 
290 VSCALE_FUNCS(sse2, sse2);
291 VSCALE_FUNC(16, sse4);
292 VSCALE_FUNCS(avx, avx);
293 
294 #define INPUT_Y_FUNC(fmt, opt) \
295 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
296  const uint8_t *unused1, const uint8_t *unused2, \
297  int w, uint32_t *unused, void *opq)
298 #define INPUT_UV_FUNC(fmt, opt) \
299 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
300  const uint8_t *unused0, \
301  const uint8_t *src1, \
302  const uint8_t *src2, \
303  int w, uint32_t *unused, void *opq)
304 #define INPUT_FUNC(fmt, opt) \
305  INPUT_Y_FUNC(fmt, opt); \
306  INPUT_UV_FUNC(fmt, opt)
307 #define INPUT_FUNCS(opt) \
308  INPUT_FUNC(uyvy, opt); \
309  INPUT_FUNC(yuyv, opt); \
310  INPUT_UV_FUNC(nv12, opt); \
311  INPUT_UV_FUNC(nv21, opt); \
312  INPUT_FUNC(rgba, opt); \
313  INPUT_FUNC(bgra, opt); \
314  INPUT_FUNC(argb, opt); \
315  INPUT_FUNC(abgr, opt); \
316  INPUT_FUNC(rgb24, opt); \
317  INPUT_FUNC(bgr24, opt)
318 
319 INPUT_FUNCS(sse2);
320 INPUT_FUNCS(ssse3);
321 INPUT_FUNCS(avx);
322 INPUT_FUNC(rgba, avx2);
323 INPUT_FUNC(bgra, avx2);
324 INPUT_FUNC(argb, avx2);
325 INPUT_FUNC(abgr, avx2);
326 INPUT_FUNC(rgb24, avx2);
327 INPUT_FUNC(bgr24, avx2);
328 
329 #if ARCH_X86_64
330 #define YUV2NV_DECL(fmt, opt) \
331 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
332  const int16_t *filter, int filterSize, \
333  const int16_t **u, const int16_t **v, \
334  uint8_t *dst, int dstWidth)
335 
336 YUV2NV_DECL(nv12, avx2);
337 YUV2NV_DECL(nv21, avx2);
338 
339 #define YUV2GBRP_FN_DECL(fmt, opt) \
340 void ff_yuv2##fmt##_full_X_ ##opt(SwsInternal *c, const int16_t *lumFilter, \
341  const int16_t **lumSrcx, int lumFilterSize, \
342  const int16_t *chrFilter, const int16_t **chrUSrcx, \
343  const int16_t **chrVSrcx, int chrFilterSize, \
344  const int16_t **alpSrcx, uint8_t **dest, \
345  int dstW, int y)
346 
347 #define YUV2GBRP_DECL(opt) \
348 YUV2GBRP_FN_DECL(gbrp, opt); \
349 YUV2GBRP_FN_DECL(gbrap, opt); \
350 YUV2GBRP_FN_DECL(gbrp9le, opt); \
351 YUV2GBRP_FN_DECL(gbrp10le, opt); \
352 YUV2GBRP_FN_DECL(gbrap10le, opt); \
353 YUV2GBRP_FN_DECL(gbrp12le, opt); \
354 YUV2GBRP_FN_DECL(gbrap12le, opt); \
355 YUV2GBRP_FN_DECL(gbrp14le, opt); \
356 YUV2GBRP_FN_DECL(gbrp16le, opt); \
357 YUV2GBRP_FN_DECL(gbrap16le, opt); \
358 YUV2GBRP_FN_DECL(gbrpf32le, opt); \
359 YUV2GBRP_FN_DECL(gbrapf32le, opt); \
360 YUV2GBRP_FN_DECL(gbrp9be, opt); \
361 YUV2GBRP_FN_DECL(gbrp10be, opt); \
362 YUV2GBRP_FN_DECL(gbrap10be, opt); \
363 YUV2GBRP_FN_DECL(gbrp12be, opt); \
364 YUV2GBRP_FN_DECL(gbrap12be, opt); \
365 YUV2GBRP_FN_DECL(gbrp14be, opt); \
366 YUV2GBRP_FN_DECL(gbrp16be, opt); \
367 YUV2GBRP_FN_DECL(gbrap16be, opt); \
368 YUV2GBRP_FN_DECL(gbrpf32be, opt); \
369 YUV2GBRP_FN_DECL(gbrapf32be, opt)
370 
371 YUV2GBRP_DECL(sse2);
372 YUV2GBRP_DECL(sse4);
373 YUV2GBRP_DECL(avx2);
374 
375 #define INPUT_PLANAR_RGB_Y_FN_DECL(fmt, opt) \
376 void ff_planar_##fmt##_to_y_##opt(uint8_t *dst, \
377  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
378  void *opq)
379 
380 #define INPUT_PLANAR_RGB_UV_FN_DECL(fmt, opt) \
381 void ff_planar_##fmt##_to_uv_##opt(uint8_t *dstU, uint8_t *dstV, \
382  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
383  void *opq)
384 
385 #define INPUT_PLANAR_RGB_A_FN_DECL(fmt, opt) \
386 void ff_planar_##fmt##_to_a_##opt(uint8_t *dst, \
387  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
388  void *opq)
389 
390 
391 #define INPUT_PLANAR_RGBXX_A_DECL(fmt, opt) \
392 INPUT_PLANAR_RGB_A_FN_DECL(fmt##le, opt); \
393 INPUT_PLANAR_RGB_A_FN_DECL(fmt##be, opt)
394 
395 #define INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt) \
396 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##le, opt); \
397 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##be, opt)
398 
399 #define INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt) \
400 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##le, opt); \
401 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##be, opt)
402 
403 #define INPUT_PLANAR_RGBXX_YUVA_DECL(fmt, opt) \
404 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
405 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
406 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
407 
408 #define INPUT_PLANAR_RGBXX_YUV_DECL(fmt, opt) \
409 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
410 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt)
411 
412 #define INPUT_PLANAR_RGBXX_UVA_DECL(fmt, opt) \
413 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
414 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
415 
416 #define INPUT_PLANAR_RGB_A_ALL_DECL(opt) \
417 INPUT_PLANAR_RGB_A_FN_DECL(rgb, opt); \
418 INPUT_PLANAR_RGBXX_A_DECL(rgb10, opt); \
419 INPUT_PLANAR_RGBXX_A_DECL(rgb12, opt); \
420 INPUT_PLANAR_RGBXX_A_DECL(rgb16, opt); \
421 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, opt)
422 
423 #define INPUT_PLANAR_RGB_Y_ALL_DECL(opt) \
424 INPUT_PLANAR_RGB_Y_FN_DECL(rgb, opt); \
425 INPUT_PLANAR_RGBXX_Y_DECL(rgb9, opt); \
426 INPUT_PLANAR_RGBXX_Y_DECL(rgb10, opt); \
427 INPUT_PLANAR_RGBXX_Y_DECL(rgb12, opt); \
428 INPUT_PLANAR_RGBXX_Y_DECL(rgb14, opt); \
429 INPUT_PLANAR_RGBXX_Y_DECL(rgb16, opt); \
430 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, opt)
431 
432 #define INPUT_PLANAR_RGB_UV_ALL_DECL(opt) \
433 INPUT_PLANAR_RGB_UV_FN_DECL(rgb, opt); \
434 INPUT_PLANAR_RGBXX_UV_DECL(rgb9, opt); \
435 INPUT_PLANAR_RGBXX_UV_DECL(rgb10, opt); \
436 INPUT_PLANAR_RGBXX_UV_DECL(rgb12, opt); \
437 INPUT_PLANAR_RGBXX_UV_DECL(rgb14, opt); \
438 INPUT_PLANAR_RGBXX_UV_DECL(rgb16, opt); \
439 INPUT_PLANAR_RGBXX_UV_DECL(rgbf32, opt)
440 
441 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, sse2);
442 INPUT_PLANAR_RGB_UV_ALL_DECL(sse2);
443 INPUT_PLANAR_RGB_A_ALL_DECL(sse2);
444 
445 INPUT_PLANAR_RGB_Y_ALL_DECL(sse4);
446 INPUT_PLANAR_RGB_UV_ALL_DECL(sse4);
447 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, sse4);
448 
449 INPUT_PLANAR_RGB_Y_ALL_DECL(avx2);
450 INPUT_PLANAR_RGB_UV_ALL_DECL(avx2);
451 INPUT_PLANAR_RGB_A_ALL_DECL(avx2);
452 #endif
453 
454 #define RANGE_CONVERT_FUNCS(opt) do { \
455  if (c->dstBpc <= 14) { \
456  if (c->srcRange) { \
457  c->lumConvertRange = ff_lumRangeFromJpeg_ ##opt; \
458  c->chrConvertRange = ff_chrRangeFromJpeg_ ##opt; \
459  } else { \
460  c->lumConvertRange = ff_lumRangeToJpeg_ ##opt; \
461  c->chrConvertRange = ff_chrRangeToJpeg_ ##opt; \
462  } \
463  } \
464 } while (0)
465 
466 #define RANGE_CONVERT_FUNCS_DECL(opt) \
467 void ff_lumRangeFromJpeg_ ##opt(int16_t *dst, int width); \
468 void ff_chrRangeFromJpeg_ ##opt(int16_t *dstU, int16_t *dstV, int width); \
469 void ff_lumRangeToJpeg_ ##opt(int16_t *dst, int width); \
470 void ff_chrRangeToJpeg_ ##opt(int16_t *dstU, int16_t *dstV, int width); \
471 
474 
476 {
477  int cpu_flags = av_get_cpu_flags();
479  RANGE_CONVERT_FUNCS(avx2);
480  } else if (EXTERNAL_SSE2(cpu_flags)) {
481  RANGE_CONVERT_FUNCS(sse2);
482  }
483 }
484 
486 {
487  int cpu_flags = av_get_cpu_flags();
488 
489 #if HAVE_MMXEXT_INLINE
491  sws_init_swscale_mmxext(c);
492 #endif
493  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
494 #if HAVE_MMXEXT_EXTERNAL
496  c->yuv2planeX = yuv2yuvX_mmxext;
497 #endif
498 #if HAVE_SSE3_EXTERNAL
500  c->yuv2planeX = yuv2yuvX_sse3;
501 #endif
502 #if HAVE_AVX2_EXTERNAL
504  c->yuv2planeX = yuv2yuvX_avx2;
505 #endif
506  }
507 #if ARCH_X86_32 && !HAVE_ALIGNED_STACK
508  // The better yuv2planeX_8 functions need aligned stack on x86-32,
509  // so we use MMXEXT in this case if they are not available.
510  if (EXTERNAL_MMXEXT(cpu_flags)) {
511  if (c->dstBpc == 8 && !c->use_mmx_vfilter)
512  c->yuv2planeX = ff_yuv2planeX_8_mmxext;
513  }
514 #endif /* ARCH_X86_32 && !HAVE_ALIGNED_STACK */
515 
516 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
517  if (c->srcBpc == 8) { \
518  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
519  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
520  } else if (c->srcBpc == 9) { \
521  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
522  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
523  } else if (c->srcBpc == 10) { \
524  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
525  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
526  } else if (c->srcBpc == 12) { \
527  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
528  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
529  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
530  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
531  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
532  } else { /* c->srcBpc == 16 */ \
533  av_assert0(c->srcBpc == 16);\
534  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
535  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
536  } \
537 } while (0)
538 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
539 switch(c->dstBpc){ \
540  case 16: do_16_case; break; \
541  case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
542  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
543  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
544  }
545 #define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
546  switch(c->dstBpc){ \
547  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt; break; \
548  case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat)) vscalefn = ff_yuv2plane1_10_ ## opt; break; \
549  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_9_ ## opt; break; \
550  case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \
551  default: av_assert0(c->dstBpc>8); \
552  }
553 #define case_rgb(x, X, opt) \
554  case AV_PIX_FMT_ ## X: \
555  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
556  if (!c->chrSrcHSubSample) \
557  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
558  break
559 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
560  switch (filtersize) { \
561  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
562  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
563  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
564  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
565  break; \
566  }
567  if (EXTERNAL_SSE2(cpu_flags)) {
568  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
569  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
570  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
571  HAVE_ALIGNED_STACK || ARCH_X86_64);
572  if (!(c->flags & SWS_ACCURATE_RND))
573  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2);
574 
575  switch (c->srcFormat) {
576  case AV_PIX_FMT_YA8:
577  c->lumToYV12 = ff_yuyvToY_sse2;
578  if (c->needAlpha)
579  c->alpToYV12 = ff_uyvyToY_sse2;
580  break;
581  case AV_PIX_FMT_YUYV422:
582  c->lumToYV12 = ff_yuyvToY_sse2;
583  c->chrToYV12 = ff_yuyvToUV_sse2;
584  break;
585  case AV_PIX_FMT_UYVY422:
586  c->lumToYV12 = ff_uyvyToY_sse2;
587  c->chrToYV12 = ff_uyvyToUV_sse2;
588  break;
589  case AV_PIX_FMT_NV12:
590  c->chrToYV12 = ff_nv12ToUV_sse2;
591  break;
592  case AV_PIX_FMT_NV21:
593  c->chrToYV12 = ff_nv21ToUV_sse2;
594  break;
595  case_rgb(rgb24, RGB24, sse2);
596  case_rgb(bgr24, BGR24, sse2);
597  case_rgb(bgra, BGRA, sse2);
598  case_rgb(rgba, RGBA, sse2);
599  case_rgb(abgr, ABGR, sse2);
600  case_rgb(argb, ARGB, sse2);
601  default:
602  break;
603  }
604  }
605  if (EXTERNAL_SSSE3(cpu_flags)) {
606  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
607  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
608  switch (c->srcFormat) {
609  case_rgb(rgb24, RGB24, ssse3);
610  case_rgb(bgr24, BGR24, ssse3);
611  default:
612  break;
613  }
614  }
615  if (EXTERNAL_SSE4(cpu_flags)) {
616  /* Xto15 don't need special sse4 functions */
617  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
618  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
619  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
620  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
621  HAVE_ALIGNED_STACK || ARCH_X86_64);
622  if (c->dstBpc == 16 && !isBE(c->dstFormat) && !(c->flags & SWS_ACCURATE_RND))
623  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
624  }
625 
626  if (EXTERNAL_AVX(cpu_flags)) {
627  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
628  HAVE_ALIGNED_STACK || ARCH_X86_64);
629  if (!(c->flags & SWS_ACCURATE_RND))
630  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx);
631 
632  switch (c->srcFormat) {
633  case AV_PIX_FMT_YUYV422:
634  c->chrToYV12 = ff_yuyvToUV_avx;
635  break;
636  case AV_PIX_FMT_UYVY422:
637  c->chrToYV12 = ff_uyvyToUV_avx;
638  break;
639  case AV_PIX_FMT_NV12:
640  c->chrToYV12 = ff_nv12ToUV_avx;
641  break;
642  case AV_PIX_FMT_NV21:
643  c->chrToYV12 = ff_nv21ToUV_avx;
644  break;
645  case_rgb(rgb24, RGB24, avx);
646  case_rgb(bgr24, BGR24, avx);
647  case_rgb(bgra, BGRA, avx);
648  case_rgb(rgba, RGBA, avx);
649  case_rgb(abgr, ABGR, avx);
650  case_rgb(argb, ARGB, avx);
651  default:
652  break;
653  }
654  }
655 
656 #if ARCH_X86_64
657 #define ASSIGN_AVX2_SCALE_FUNC(hscalefn, filtersize) \
658  switch (filtersize) { \
659  case 4: hscalefn = ff_hscale8to15_4_avx2; break; \
660  default: hscalefn = ff_hscale8to15_X4_avx2; break; \
661  break; \
662  }
663 
665  if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
666  ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
667  ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
668  }
669  }
670 
672  if (ARCH_X86_64)
673  switch (c->srcFormat) {
674  case_rgb(rgb24, RGB24, avx2);
675  case_rgb(bgr24, BGR24, avx2);
676  case_rgb(bgra, BGRA, avx2);
677  case_rgb(rgba, RGBA, avx2);
678  case_rgb(abgr, ABGR, avx2);
679  case_rgb(argb, ARGB, avx2);
680  }
681  if (!(c->flags & SWS_ACCURATE_RND)) // FIXME
682  switch (c->dstFormat) {
683  case AV_PIX_FMT_NV12:
684  case AV_PIX_FMT_NV24:
685  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
686  break;
687  case AV_PIX_FMT_NV21:
688  case AV_PIX_FMT_NV42:
689  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
690  break;
691  default:
692  break;
693  }
694  }
695 
696 
697 #define INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(fmt, name, opt) \
698  case fmt: \
699  c->readAlpPlanar = ff_planar_##name##_to_a_##opt;
700 
701 #define INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
702  case rgba_fmt: \
703  case rgb_fmt: \
704  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
705  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
706  break;
707 
708 #define INPUT_PLANER_RGB_YUV_FUNC_CASE(fmt, name, opt) \
709  case fmt: \
710  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
711  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
712  break;
713 
714 #define INPUT_PLANER_RGB_UV_FUNC_CASE(fmt, name, opt) \
715  case fmt: \
716  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
717  break;
718 
719 #define INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
720  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
721  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
722  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
723  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
724 
725 #define INPUT_PLANER_RGBAXX_UVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
726  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
727  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
728  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
729  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
730 
731 #define INPUT_PLANER_RGBAXX_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
732  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##LE, rgba_fmt##LE, name##le, opt) \
733  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##BE, rgba_fmt##BE, name##be, opt)
734 
735 #define INPUT_PLANER_RGBXX_YUV_FUNC_CASE(rgb_fmt, name, opt) \
736  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
737  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
738 
739 #define INPUT_PLANER_RGBXX_UV_FUNC_CASE(rgb_fmt, name, opt) \
740  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
741  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
742 
743 #define INPUT_PLANER_RGB_YUVA_ALL_CASES(opt) \
744  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, opt) \
745  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, opt) \
746  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, opt) \
747  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, opt) \
748  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, opt) \
749  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, opt) \
750  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, opt) \
751  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, opt)
752 
753 
754  if (EXTERNAL_SSE2(cpu_flags)) {
755  switch (c->srcFormat) {
756  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, sse2);
757  INPUT_PLANER_RGB_UV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse2);
758  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse2);
759  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse2);
760  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse2);
761  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse2);
762  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse2);
763  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse2);
764  default:
765  break;
766  }
767  }
768 
769  if (EXTERNAL_SSE4(cpu_flags)) {
770  switch (c->srcFormat) {
771  case AV_PIX_FMT_GBRAP:
772  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse4);
773  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse4);
774  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse4);
775  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse4);
776  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse4);
777  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse4);
778  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse4);
779  default:
780  break;
781  }
782  }
783 
785  switch (c->srcFormat) {
786  INPUT_PLANER_RGB_YUVA_ALL_CASES(avx2)
787  default:
788  break;
789  }
790  }
791 
792  if(c->flags & SWS_FULL_CHR_H_INT) {
793 
794  /* yuv2gbrp uses the SwsInternal for yuv coefficients
795  if struct offsets change the asm needs to be updated too */
796  av_assert0(offsetof(SwsInternal, yuv2rgb_y_offset) == 40292);
797 
798 #define YUV2ANYX_FUNC_CASE(fmt, name, opt) \
799  case fmt: \
800  c->yuv2anyX = ff_yuv2##name##_full_X_##opt; \
801  break;
802 
803 #define YUV2ANYX_GBRAP_CASES(opt) \
804  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP, gbrp, opt) \
805  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP, gbrap, opt) \
806  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9LE, gbrp9le, opt) \
807  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10LE, gbrp10le, opt) \
808  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10LE, gbrap10le, opt) \
809  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12LE, gbrp12le, opt) \
810  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12LE, gbrap12le, opt) \
811  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14LE, gbrp14le, opt) \
812  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16LE, gbrp16le, opt) \
813  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16LE, gbrap16le, opt) \
814  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32LE, gbrpf32le, opt) \
815  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32LE, gbrapf32le, opt) \
816  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9BE, gbrp9be, opt) \
817  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10BE, gbrp10be, opt) \
818  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10BE, gbrap10be, opt) \
819  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12BE, gbrp12be, opt) \
820  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12BE, gbrap12be, opt) \
821  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14BE, gbrp14be, opt) \
822  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16BE, gbrp16be, opt) \
823  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16BE, gbrap16be, opt) \
824  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32BE, gbrpf32be, opt) \
825  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32BE, gbrapf32be, opt)
826 
827  if (EXTERNAL_SSE2(cpu_flags)) {
828  switch (c->dstFormat) {
829  YUV2ANYX_GBRAP_CASES(sse2)
830  default:
831  break;
832  }
833  }
834 
835  if (EXTERNAL_SSE4(cpu_flags)) {
836  switch (c->dstFormat) {
837  YUV2ANYX_GBRAP_CASES(sse4)
838  default:
839  break;
840  }
841  }
842 
844  switch (c->dstFormat) {
845  YUV2ANYX_GBRAP_CASES(avx2)
846  default:
847  break;
848  }
849  }
850  }
851 
852 #endif
853 }
AV_PIX_FMT_GBRAP16
#define AV_PIX_FMT_GBRAP16
Definition: pixfmt.h:525
DECLARE_ASM_ALIGNED
#define DECLARE_ASM_ALIGNED(n, t, v)
Definition: mem_internal.h:86
APCK_PTR2
#define APCK_PTR2
Definition: swscale_internal.h:61
cpu.h
SwsPlane::line
uint8_t ** line
line buffer
Definition: swscale_internal.h:1094
AV_PIX_FMT_YA8
@ AV_PIX_FMT_YA8
8 bits gray, 8 bits alpha
Definition: pixfmt.h:140
mem_internal.h
av_unused
#define av_unused
Definition: attributes.h:131
EXTERNAL_AVX2_FAST
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
pixdesc.h
RANGE_CONVERT_FUNCS
#define RANGE_CONVERT_FUNCS(opt)
Definition: swscale.c:454
DECLARE_ASM_CONST
#define DECLARE_ASM_CONST(n, t, v)
Definition: mem_internal.h:87
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:107
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
INPUT_FUNC
#define INPUT_FUNC(fmt, opt)
Definition: swscale.c:304
INPUT_FUNCS
#define INPUT_FUNCS(opt)
Definition: swscale.c:307
rgb
Definition: rpzaenc.c:60
AV_PIX_FMT_GBRP14
#define AV_PIX_FMT_GBRP14
Definition: pixfmt.h:520
AV_PIX_FMT_GBRAP
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:212
AV_PIX_FMT_GBRP10
#define AV_PIX_FMT_GBRP10
Definition: pixfmt.h:518
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:59
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
AV_PIX_FMT_GBRAP10
#define AV_PIX_FMT_GBRAP10
Definition: pixfmt.h:522
intreadwrite.h
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_PIX_FMT_GBRAP12
#define AV_PIX_FMT_GBRAP12
Definition: pixfmt.h:523
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
APCK_COEF
#define APCK_COEF
Definition: swscale_internal.h:62
SwsPlane::tmp
uint8_t ** tmp
Tmp line buffer used by mmx code.
Definition: swscale_internal.h:1095
SCALE_FUNC
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt)
Definition: swscale.c:227
VSCALE_FUNCS
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:284
if
if(ret)
Definition: filter_design.txt:179
VSCALEX_FUNC
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:266
ff_sws_init_range_convert_x86
av_cold void ff_sws_init_range_convert_x86(SwsInternal *c)
Definition: swscale.c:475
AV_PIX_FMT_GBRP16
#define AV_PIX_FMT_GBRP16
Definition: pixfmt.h:521
NULL
#define NULL
Definition: coverity.c:32
ASSIGN_SSE_SCALE_FUNC
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:74
EXTERNAL_SSE3
#define EXTERNAL_SSE3(flags)
Definition: cpu.h:62
SwsPlane
Slice plane.
Definition: swscale_internal.h:1089
ASSIGN_VSCALEX_FUNC
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt)
AV_PIX_FMT_GBRP9
#define AV_PIX_FMT_GBRP9
Definition: pixfmt.h:517
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
isBE
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:750
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:109
cpu.h
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
SWS_FULL_CHR_H_INT
#define SWS_FULL_CHR_H_INT
Perform full chroma upsampling when upscaling to RGB.
Definition: swscale.h:182
AV_PIX_FMT_GBRPF32
#define AV_PIX_FMT_GBRPF32
Definition: pixfmt.h:532
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:495
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:199
ff_sws_init_swscale_x86
av_cold void ff_sws_init_swscale_x86(SwsInternal *c)
Definition: swscale.c:485
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
AV_PIX_FMT_GBRP12
#define AV_PIX_FMT_GBRP12
Definition: pixfmt.h:519
AV_PIX_FMT_NV24
@ AV_PIX_FMT_NV24
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:371
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:490
swscale_internal.h
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_PIX_FMT_NV21
@ AV_PIX_FMT_NV21
as above, but U and V bytes are swapped
Definition: pixfmt.h:97
ff_updateMMXDitherTables
void ff_updateMMXDitherTables(SwsInternal *c, int dstY)
AV_PIX_FMT_NV42
@ AV_PIX_FMT_NV42
as above, but U and V bytes are swapped
Definition: pixfmt.h:372
swscale_template.c
ff_dither8
const uint64_t ff_dither8[2]
Definition: swscale.c:37
SwsInternal
Definition: swscale_internal.h:330
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:88
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
U
#define U(x)
Definition: vpx_arith.h:37
AV_PIX_FMT_GBRAPF32
#define AV_PIX_FMT_GBRAPF32
Definition: pixfmt.h:533
YUV2YUVX_FUNC_MMX
#define YUV2YUVX_FUNC_MMX(opt, step)
Definition: swscale.c:182
INLINE_MMXEXT
#define INLINE_MMXEXT(flags)
Definition: cpu.h:88
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
RANGE_CONVERT_FUNCS_DECL
#define RANGE_CONVERT_FUNCS_DECL(opt)
Definition: swscale.c:466
SwsPlane::sliceY
int sliceY
index of first line
Definition: swscale_internal.h:1092
VSCALE_FUNC
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:281
case_rgb
#define case_rgb(x, X, opt)
int32_t
int32_t
Definition: audioconvert.c:56
RGBA
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:42
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:482
YUV2YUVX_FUNC
#define YUV2YUVX_FUNC(opt, step)
Definition: swscale.c:195
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
SCALE_FUNCS_SSE
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:253
APCK_SIZE
#define APCK_SIZE
Definition: swscale_internal.h:63
VSCALEX_FUNCS
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:270
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
swscale.h
ff_dither4
const uint64_t ff_dither4[2]
Definition: swscale.c:33