FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/pixdesc.h"
32 
33 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
34  0x0103010301030103LL,
35  0x0200020002000200LL,};
36 
37 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
38  0x0602060206020602LL,
39  0x0004000400040004LL,};
40 
41 #if HAVE_INLINE_ASM
42 
43 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
44 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
45 
46 DECLARE_ASM_CONST(8, uint64_t, M24A) = 0x00FF0000FF0000FFLL;
47 DECLARE_ASM_CONST(8, uint64_t, M24B) = 0xFF0000FF0000FF00LL;
48 DECLARE_ASM_CONST(8, uint64_t, M24C) = 0x0000FF0000FF0000LL;
49 
50 // MMXEXT versions
51 #if HAVE_MMXEXT_INLINE
52 #undef RENAME
53 #undef COMPILE_TEMPLATE_MMXEXT
54 #define COMPILE_TEMPLATE_MMXEXT 1
55 #define RENAME(a) a ## _mmxext
56 #include "swscale_template.c"
57 #endif
58 #endif /* HAVE_INLINE_ASM */
59 
61 {
62  const int dstH= c->opts.dst_h;
63  const int flags= c->opts.flags;
64 
65  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
66  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
67  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
68 
69  int hasAlpha = c->needAlpha;
70  int32_t *vLumFilterPos= c->vLumFilterPos;
71  int32_t *vChrFilterPos= c->vChrFilterPos;
72  int16_t *vLumFilter= c->vLumFilter;
73  int16_t *vChrFilter= c->vChrFilter;
74  int32_t *lumMmxFilter= c->lumMmxFilter;
75  int32_t *chrMmxFilter= c->chrMmxFilter;
76  av_unused int32_t *alpMmxFilter= c->alpMmxFilter;
77  const int vLumFilterSize= c->vLumFilterSize;
78  const int vChrFilterSize= c->vChrFilterSize;
79  const int chrDstY= dstY>>c->chrDstVSubSample;
80  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
81  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
82 
83  c->blueDither= ff_dither8[dstY&1];
84  if (c->opts.dst_format == AV_PIX_FMT_RGB555 || c->opts.dst_format == AV_PIX_FMT_BGR555)
85  c->greenDither= ff_dither8[dstY&1];
86  else
87  c->greenDither= ff_dither4[dstY&1];
88  c->redDither= ff_dither8[(dstY+1)&1];
89  if (dstY < dstH - 2) {
90  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
91  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
92  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
93 
94  int i;
95  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->opts.src_h) {
96  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
97 
98  int neg = -firstLumSrcY, i, end = FFMIN(c->opts.src_h - firstLumSrcY, vLumFilterSize);
99  for (i = 0; i < neg; i++)
100  tmpY[i] = lumSrcPtr[neg];
101  for ( ; i < end; i++)
102  tmpY[i] = lumSrcPtr[i];
103  for ( ; i < vLumFilterSize; i++)
104  tmpY[i] = tmpY[i-1];
105  lumSrcPtr = tmpY;
106 
107  if (alpSrcPtr) {
108  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
109  for (i = 0; i < neg; i++)
110  tmpA[i] = alpSrcPtr[neg];
111  for ( ; i < end; i++)
112  tmpA[i] = alpSrcPtr[i];
113  for ( ; i < vLumFilterSize; i++)
114  tmpA[i] = tmpA[i - 1];
115  alpSrcPtr = tmpA;
116  }
117  }
118  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
119  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
120  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
121  for (i = 0; i < neg; i++) {
122  tmpU[i] = chrUSrcPtr[neg];
123  }
124  for ( ; i < end; i++) {
125  tmpU[i] = chrUSrcPtr[i];
126  }
127  for ( ; i < vChrFilterSize; i++) {
128  tmpU[i] = tmpU[i - 1];
129  }
130  chrUSrcPtr = tmpU;
131  }
132 
133  if (flags & SWS_ACCURATE_RND) {
134  int s= APCK_SIZE / 8;
135  for (i=0; i<vLumFilterSize; i+=2) {
136  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
137  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
138  lumMmxFilter[s*i+APCK_COEF/4 ]=
139  lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
140  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
141  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
142  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
143  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
144  alpMmxFilter[s*i+APCK_COEF/4 ]=
145  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
146  }
147  }
148  for (i=0; i<vChrFilterSize; i+=2) {
149  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
150  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
151  chrMmxFilter[s*i+APCK_COEF/4 ]=
152  chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
153  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
154  }
155  } else {
156  for (i=0; i<vLumFilterSize; i++) {
157  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
158  lumMmxFilter[4*i+2]=
159  lumMmxFilter[4*i+3]=
160  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
161  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
162  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
163  alpMmxFilter[4*i+2]=
164  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
165  }
166  }
167  for (i=0; i<vChrFilterSize; i++) {
168  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
169  chrMmxFilter[4*i+2]=
170  chrMmxFilter[4*i+3]=
171  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
172  }
173  }
174  }
175 }
176 
177 #define YUV2YUVX_FUNC(opt, step) \
178 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
179  uint8_t *dest, int dstW, \
180  const uint8_t *dither, int offset); \
181 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
182  const int16_t **src, uint8_t *dest, int dstW, \
183  const uint8_t *dither, int offset) \
184 { \
185  int remainder = (dstW % step); \
186  int pixelsProcessed = dstW - remainder; \
187  if(((uintptr_t)dest) & 15){ \
188  yuv2yuvX_sse2(filter, filterSize, src, dest, dstW, dither, offset); \
189  return; \
190  } \
191  if(pixelsProcessed > 0) \
192  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
193  if(remainder > 0){ \
194  ff_yuv2yuvX_sse2(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \
195  } \
196  return; \
197 }
198 
199 #if HAVE_SSE2_EXTERNAL
200 void ff_yuv2yuvX_sse2(const int16_t *filter, int filterSize, int srcOffset,
201  uint8_t *dest, int dstW,
202  const uint8_t *dither, int offset);
203 static void yuv2yuvX_sse2(const int16_t *filter, int filterSize,
204  const int16_t **src, uint8_t *dest, int dstW,
205  const uint8_t *dither, int offset)
206 {
207  if (dstW > 0)
208  ff_yuv2yuvX_sse2(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset);
209  return;
210 }
211 #if HAVE_SSE3_EXTERNAL
212 YUV2YUVX_FUNC(sse3, 32)
213 #endif
214 #if HAVE_AVX2_EXTERNAL
215 YUV2YUVX_FUNC(avx2, 64)
216 #endif
217 #endif
218 
219 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
220 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
221  SwsInternal *c, int16_t *data, \
222  int dstW, const uint8_t *src, \
223  const int16_t *filter, \
224  const int32_t *filterPos, int filterSize)
225 
226 #define SCALE_FUNCS(filter_n, opt) \
227  SCALE_FUNC(filter_n, 8, 15, opt); \
228  SCALE_FUNC(filter_n, 9, 15, opt); \
229  SCALE_FUNC(filter_n, 10, 15, opt); \
230  SCALE_FUNC(filter_n, 12, 15, opt); \
231  SCALE_FUNC(filter_n, 14, 15, opt); \
232  SCALE_FUNC(filter_n, 16, 15, opt); \
233  SCALE_FUNC(filter_n, 8, 19, opt); \
234  SCALE_FUNC(filter_n, 9, 19, opt); \
235  SCALE_FUNC(filter_n, 10, 19, opt); \
236  SCALE_FUNC(filter_n, 12, 19, opt); \
237  SCALE_FUNC(filter_n, 14, 19, opt); \
238  SCALE_FUNC(filter_n, 16, 19, opt)
239 
240 #define SCALE_FUNCS_MMX(opt) \
241  SCALE_FUNCS(4, opt); \
242  SCALE_FUNCS(8, opt); \
243  SCALE_FUNCS(X, opt)
244 
245 #define SCALE_FUNCS_SSE(opt) \
246  SCALE_FUNCS(4, opt); \
247  SCALE_FUNCS(8, opt); \
248  SCALE_FUNCS(X4, opt); \
249  SCALE_FUNCS(X8, opt)
250 
251 SCALE_FUNCS_SSE(sse2);
252 SCALE_FUNCS_SSE(ssse3);
253 SCALE_FUNCS_SSE(sse4);
254 
255 SCALE_FUNC(4, 8, 15, avx2);
256 SCALE_FUNC(X4, 8, 15, avx2);
257 
258 #define VSCALEX_FUNC(size, opt) \
259 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
260  const int16_t **src, uint8_t *dest, int dstW, \
261  const uint8_t *dither, int offset)
262 #define VSCALEX_FUNCS(opt) \
263  VSCALEX_FUNC(8, opt); \
264  VSCALEX_FUNC(9, opt); \
265  VSCALEX_FUNC(10, opt)
266 
267 VSCALEX_FUNCS(sse2);
268 VSCALEX_FUNCS(sse4);
269 VSCALEX_FUNC(16, sse4);
270 VSCALEX_FUNCS(avx);
271 
272 #define VSCALE_FUNC(size, opt) \
273 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
274  const uint8_t *dither, int offset)
275 #define VSCALE_FUNCS(opt1, opt2) \
276  VSCALE_FUNC(8, opt1); \
277  VSCALE_FUNC(9, opt2); \
278  VSCALE_FUNC(10, opt2); \
279  VSCALE_FUNC(16, opt1)
280 
281 VSCALE_FUNCS(sse2, sse2);
282 VSCALE_FUNC(16, sse4);
283 VSCALE_FUNCS(avx, avx);
284 
285 #define INPUT_Y_FUNC(fmt, opt) \
286 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
287  const uint8_t *unused1, const uint8_t *unused2, \
288  int w, uint32_t *unused, void *opq)
289 #define INPUT_UV_FUNC(fmt, opt) \
290 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
291  const uint8_t *unused0, \
292  const uint8_t *src1, \
293  const uint8_t *src2, \
294  int w, uint32_t *unused, void *opq)
295 #define INPUT_FUNC(fmt, opt) \
296  INPUT_Y_FUNC(fmt, opt); \
297  INPUT_UV_FUNC(fmt, opt)
298 #define INPUT_FUNCS(opt) \
299  INPUT_FUNC(uyvy, opt); \
300  INPUT_FUNC(yuyv, opt); \
301  INPUT_UV_FUNC(nv12, opt); \
302  INPUT_UV_FUNC(nv21, opt); \
303  INPUT_FUNC(rgba, opt); \
304  INPUT_FUNC(bgra, opt); \
305  INPUT_FUNC(argb, opt); \
306  INPUT_FUNC(abgr, opt); \
307  INPUT_FUNC(rgb24, opt); \
308  INPUT_FUNC(bgr24, opt)
309 
310 INPUT_FUNCS(sse2);
311 INPUT_FUNCS(ssse3);
312 INPUT_FUNCS(avx);
313 INPUT_FUNC(rgba, avx2);
314 INPUT_FUNC(bgra, avx2);
315 INPUT_FUNC(argb, avx2);
316 INPUT_FUNC(abgr, avx2);
317 INPUT_FUNC(rgb24, avx2);
318 INPUT_FUNC(bgr24, avx2);
319 
320 #if ARCH_X86_64
321 #define YUV2NV_DECL(fmt, opt) \
322 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
323  const int16_t *filter, int filterSize, \
324  const int16_t **u, const int16_t **v, \
325  uint8_t *dst, int dstWidth)
326 
327 YUV2NV_DECL(nv12, avx2);
328 YUV2NV_DECL(nv21, avx2);
329 
330 #define YUV2GBRP_FN_DECL(fmt, opt) \
331 void ff_yuv2##fmt##_full_X_ ##opt(SwsInternal *c, const int16_t *lumFilter, \
332  const int16_t **lumSrcx, int lumFilterSize, \
333  const int16_t *chrFilter, const int16_t **chrUSrcx, \
334  const int16_t **chrVSrcx, int chrFilterSize, \
335  const int16_t **alpSrcx, uint8_t **dest, \
336  int dstW, int y)
337 
338 #define YUV2GBRP_DECL(opt) \
339 YUV2GBRP_FN_DECL(gbrp, opt); \
340 YUV2GBRP_FN_DECL(gbrap, opt); \
341 YUV2GBRP_FN_DECL(gbrp9le, opt); \
342 YUV2GBRP_FN_DECL(gbrp10le, opt); \
343 YUV2GBRP_FN_DECL(gbrap10le, opt); \
344 YUV2GBRP_FN_DECL(gbrp12le, opt); \
345 YUV2GBRP_FN_DECL(gbrap12le, opt); \
346 YUV2GBRP_FN_DECL(gbrp14le, opt); \
347 YUV2GBRP_FN_DECL(gbrp16le, opt); \
348 YUV2GBRP_FN_DECL(gbrap16le, opt); \
349 YUV2GBRP_FN_DECL(gbrpf32le, opt); \
350 YUV2GBRP_FN_DECL(gbrapf32le, opt); \
351 YUV2GBRP_FN_DECL(gbrp9be, opt); \
352 YUV2GBRP_FN_DECL(gbrp10be, opt); \
353 YUV2GBRP_FN_DECL(gbrap10be, opt); \
354 YUV2GBRP_FN_DECL(gbrp12be, opt); \
355 YUV2GBRP_FN_DECL(gbrap12be, opt); \
356 YUV2GBRP_FN_DECL(gbrp14be, opt); \
357 YUV2GBRP_FN_DECL(gbrp16be, opt); \
358 YUV2GBRP_FN_DECL(gbrap16be, opt); \
359 YUV2GBRP_FN_DECL(gbrpf32be, opt); \
360 YUV2GBRP_FN_DECL(gbrapf32be, opt)
361 
362 YUV2GBRP_DECL(sse2);
363 YUV2GBRP_DECL(sse4);
364 YUV2GBRP_DECL(avx2);
365 
366 #define INPUT_PLANAR_RGB_Y_FN_DECL(fmt, opt) \
367 void ff_planar_##fmt##_to_y_##opt(uint8_t *dst, \
368  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
369  void *opq)
370 
371 #define INPUT_PLANAR_RGB_UV_FN_DECL(fmt, opt) \
372 void ff_planar_##fmt##_to_uv_##opt(uint8_t *dstU, uint8_t *dstV, \
373  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
374  void *opq)
375 
376 #define INPUT_PLANAR_RGB_A_FN_DECL(fmt, opt) \
377 void ff_planar_##fmt##_to_a_##opt(uint8_t *dst, \
378  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
379  void *opq)
380 
381 
382 #define INPUT_PLANAR_RGBXX_A_DECL(fmt, opt) \
383 INPUT_PLANAR_RGB_A_FN_DECL(fmt##le, opt); \
384 INPUT_PLANAR_RGB_A_FN_DECL(fmt##be, opt)
385 
386 #define INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt) \
387 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##le, opt); \
388 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##be, opt)
389 
390 #define INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt) \
391 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##le, opt); \
392 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##be, opt)
393 
394 #define INPUT_PLANAR_RGBXX_YUVA_DECL(fmt, opt) \
395 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
396 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
397 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
398 
399 #define INPUT_PLANAR_RGBXX_YUV_DECL(fmt, opt) \
400 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
401 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt)
402 
403 #define INPUT_PLANAR_RGBXX_UVA_DECL(fmt, opt) \
404 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
405 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
406 
407 #define INPUT_PLANAR_RGB_A_ALL_DECL(opt) \
408 INPUT_PLANAR_RGB_A_FN_DECL(rgb, opt); \
409 INPUT_PLANAR_RGBXX_A_DECL(rgb10, opt); \
410 INPUT_PLANAR_RGBXX_A_DECL(rgb12, opt); \
411 INPUT_PLANAR_RGBXX_A_DECL(rgb16, opt); \
412 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, opt)
413 
414 #define INPUT_PLANAR_RGB_Y_ALL_DECL(opt) \
415 INPUT_PLANAR_RGB_Y_FN_DECL(rgb, opt); \
416 INPUT_PLANAR_RGBXX_Y_DECL(rgb9, opt); \
417 INPUT_PLANAR_RGBXX_Y_DECL(rgb10, opt); \
418 INPUT_PLANAR_RGBXX_Y_DECL(rgb12, opt); \
419 INPUT_PLANAR_RGBXX_Y_DECL(rgb14, opt); \
420 INPUT_PLANAR_RGBXX_Y_DECL(rgb16, opt); \
421 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, opt)
422 
423 #define INPUT_PLANAR_RGB_UV_ALL_DECL(opt) \
424 INPUT_PLANAR_RGB_UV_FN_DECL(rgb, opt); \
425 INPUT_PLANAR_RGBXX_UV_DECL(rgb9, opt); \
426 INPUT_PLANAR_RGBXX_UV_DECL(rgb10, opt); \
427 INPUT_PLANAR_RGBXX_UV_DECL(rgb12, opt); \
428 INPUT_PLANAR_RGBXX_UV_DECL(rgb14, opt); \
429 INPUT_PLANAR_RGBXX_UV_DECL(rgb16, opt); \
430 INPUT_PLANAR_RGBXX_UV_DECL(rgbf32, opt)
431 
432 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, sse2);
433 INPUT_PLANAR_RGB_UV_ALL_DECL(sse2);
434 INPUT_PLANAR_RGB_A_ALL_DECL(sse2);
435 
436 INPUT_PLANAR_RGB_Y_ALL_DECL(sse4);
437 INPUT_PLANAR_RGB_UV_ALL_DECL(sse4);
438 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, sse4);
439 
440 INPUT_PLANAR_RGB_Y_ALL_DECL(avx2);
441 INPUT_PLANAR_RGB_UV_ALL_DECL(avx2);
442 INPUT_PLANAR_RGB_A_ALL_DECL(avx2);
443 #endif
444 
445 #define RANGE_CONVERT_FUNCS(opt, bpc) do { \
446  if (c->opts.src_range) { \
447  c->lumConvertRange = ff_lumRangeFromJpeg##bpc##_##opt; \
448  c->chrConvertRange = ff_chrRangeFromJpeg##bpc##_##opt; \
449  } else { \
450  c->lumConvertRange = ff_lumRangeToJpeg##bpc##_##opt; \
451  c->chrConvertRange = ff_chrRangeToJpeg##bpc##_##opt; \
452  } \
453 } while (0)
454 
455 #define RANGE_CONVERT_FUNCS_DECL(opt, bpc) \
456 void ff_lumRangeFromJpeg##bpc##_##opt(int16_t *dst, int width, \
457  uint32_t coeff, int64_t offset); \
458 void ff_chrRangeFromJpeg##bpc##_##opt(int16_t *dstU, int16_t *dstV, int width, \
459  uint32_t coeff, int64_t offset); \
460 void ff_lumRangeToJpeg##bpc##_##opt(int16_t *dst, int width, \
461  uint32_t coeff, int64_t offset); \
462 void ff_chrRangeToJpeg##bpc##_##opt(int16_t *dstU, int16_t *dstV, int width, \
463  uint32_t coeff, int64_t offset); \
464 
466 RANGE_CONVERT_FUNCS_DECL(sse4, 16)
468 RANGE_CONVERT_FUNCS_DECL(avx2, 16)
469 
471 {
472  int cpu_flags = av_get_cpu_flags();
474  if (c->dstBpc <= 14) {
475  RANGE_CONVERT_FUNCS(avx2, 8);
476  } else {
477  RANGE_CONVERT_FUNCS(avx2, 16);
478  }
479  } else if (EXTERNAL_SSE2(cpu_flags) && c->dstBpc <= 14) {
480  RANGE_CONVERT_FUNCS(sse2, 8);
481  } else if (EXTERNAL_SSE4(cpu_flags) && c->dstBpc > 14) {
482  RANGE_CONVERT_FUNCS(sse4, 16);
483  }
484 }
485 
487 {
488  int cpu_flags = av_get_cpu_flags();
489  enum AVPixelFormat dst_format = c->opts.dst_format;
490 
491  c->use_mmx_vfilter = 0;
492 
493  if (X86_MMXEXT(cpu_flags)) {
494  if (!is16BPS(dst_format) && !isNBPS(dst_format) && !isSemiPlanarYUV(dst_format)
495  && dst_format != AV_PIX_FMT_GRAYF32BE && dst_format != AV_PIX_FMT_GRAYF32LE
496  && !(c->opts.flags & SWS_BITEXACT)) {
497  if (c->opts.flags & SWS_ACCURATE_RND) {
498 #if HAVE_MMXEXT_INLINE
499  if (!(c->opts.flags & SWS_FULL_CHR_H_INT)) {
500  switch (c->opts.dst_format) {
501  case AV_PIX_FMT_RGB32: c->yuv2packedX = yuv2rgb32_X_ar_mmxext; break;
502 #if HAVE_6REGS
503  case AV_PIX_FMT_BGR24: c->yuv2packedX = yuv2bgr24_X_ar_mmxext; break;
504 #endif
505  case AV_PIX_FMT_RGB555: c->yuv2packedX = yuv2rgb555_X_ar_mmxext; break;
506  case AV_PIX_FMT_RGB565: c->yuv2packedX = yuv2rgb565_X_ar_mmxext; break;
507  case AV_PIX_FMT_YUYV422: c->yuv2packedX = yuv2yuyv422_X_ar_mmxext; break;
508  default: break;
509  }
510  }
511 #endif
512  } else {
513 #if HAVE_SSE2_EXTERNAL
514  if (EXTERNAL_SSE2(cpu_flags)) {
515  c->use_mmx_vfilter = 1;
516  c->yuv2planeX = yuv2yuvX_sse2;
517 #if HAVE_SSE3_EXTERNAL
519  c->yuv2planeX = yuv2yuvX_sse3;
520 #endif
521 #if HAVE_AVX2_EXTERNAL
523  c->yuv2planeX = yuv2yuvX_avx2;
524 #endif
525  }
526 #endif /* HAVE_SSE2_EXTERNAL */
527 #if HAVE_MMXEXT_INLINE
528  if (!(c->opts.flags & SWS_FULL_CHR_H_INT)) {
529  switch (c->opts.dst_format) {
530  case AV_PIX_FMT_RGB32: c->yuv2packedX = yuv2rgb32_X_mmxext; break;
531  case AV_PIX_FMT_BGR32: c->yuv2packedX = yuv2bgr32_X_mmxext; break;
532 #if HAVE_6REGS
533  case AV_PIX_FMT_BGR24: c->yuv2packedX = yuv2bgr24_X_mmxext; break;
534 #endif
535  case AV_PIX_FMT_RGB555: c->yuv2packedX = yuv2rgb555_X_mmxext; break;
536  case AV_PIX_FMT_RGB565: c->yuv2packedX = yuv2rgb565_X_mmxext; break;
537  case AV_PIX_FMT_YUYV422: c->yuv2packedX = yuv2yuyv422_X_mmxext; break;
538  default: break;
539  }
540  }
541 #endif
542  }
543 #if HAVE_MMXEXT_INLINE
544  if (!(c->opts.flags & SWS_FULL_CHR_H_INT)) {
545  switch (c->opts.dst_format) {
546  case AV_PIX_FMT_RGB32:
547  c->yuv2packed1 = yuv2rgb32_1_mmxext;
548  c->yuv2packed2 = yuv2rgb32_2_mmxext;
549  break;
550  case AV_PIX_FMT_BGR24:
551  c->yuv2packed1 = yuv2bgr24_1_mmxext;
552  c->yuv2packed2 = yuv2bgr24_2_mmxext;
553  break;
554  case AV_PIX_FMT_RGB555:
555  c->yuv2packed1 = yuv2rgb555_1_mmxext;
556  c->yuv2packed2 = yuv2rgb555_2_mmxext;
557  break;
558  case AV_PIX_FMT_RGB565:
559  c->yuv2packed1 = yuv2rgb565_1_mmxext;
560  c->yuv2packed2 = yuv2rgb565_2_mmxext;
561  break;
562  case AV_PIX_FMT_YUYV422:
563  c->yuv2packed1 = yuv2yuyv422_1_mmxext;
564  c->yuv2packed2 = yuv2yuyv422_2_mmxext;
565  break;
566  default:
567  break;
568  }
569  }
570 #endif
571  }
572 #if HAVE_MMXEXT_INLINE
573  if (c->srcBpc == 8 && c->dstBpc <= 14) {
574  // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
575  if (c->opts.flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) {
576  c->hyscale_fast = ff_hyscale_fast_mmxext;
577  c->hcscale_fast = ff_hcscale_fast_mmxext;
578  } else {
579  c->hyscale_fast = NULL;
580  c->hcscale_fast = NULL;
581  }
582  }
583 #endif
584  }
585 
586 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
587  if (c->srcBpc == 8) { \
588  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
589  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
590  } else if (c->srcBpc == 9) { \
591  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
592  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
593  } else if (c->srcBpc == 10) { \
594  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
595  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
596  } else if (c->srcBpc == 12) { \
597  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
598  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
599  } else if (c->srcBpc == 14 || ((c->opts.src_format==AV_PIX_FMT_PAL8||isAnyRGB(c->opts.src_format)) && av_pix_fmt_desc_get(c->opts.src_format)->comp[0].depth<16)) { \
600  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
601  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
602  } else { /* c->srcBpc == 16 */ \
603  av_assert0(c->srcBpc == 16);\
604  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
605  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
606  } \
607 } while (0)
608 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case) \
609 switch(c->dstBpc){ \
610  case 16: do_16_case; break; \
611  case 10: if (!isBE(c->opts.dst_format) && !isSemiPlanarYUV(c->opts.dst_format) && !isDataInHighBits(c->opts.dst_format)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
612  case 9: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
613  case 8: if (!c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
614  }
615 #define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
616  switch(c->dstBpc){ \
617  case 16: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2plane1_16_ ## opt; break; \
618  case 10: if (!isBE(c->opts.dst_format) && !isSemiPlanarYUV(c->opts.dst_format) && !isDataInHighBits(c->opts.dst_format)) vscalefn = ff_yuv2plane1_10_ ## opt; break; \
619  case 9: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2plane1_9_ ## opt; break; \
620  case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \
621  default: av_assert0(c->dstBpc>8); \
622  }
623 #define case_rgb(x, X, opt) \
624  case AV_PIX_FMT_ ## X: \
625  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
626  if (!c->chrSrcHSubSample) \
627  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
628  break
629 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
630  switch (filtersize) { \
631  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
632  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
633  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
634  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
635  break; \
636  }
637  if (EXTERNAL_SSE2(cpu_flags)) {
638  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
639  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
640  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, );
641  if (!(c->opts.flags & SWS_ACCURATE_RND))
642  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2);
643 
644  switch (c->opts.src_format) {
645  case AV_PIX_FMT_YA8:
646  c->lumToYV12 = ff_yuyvToY_sse2;
647  if (c->needAlpha)
648  c->alpToYV12 = ff_uyvyToY_sse2;
649  break;
650  case AV_PIX_FMT_YUYV422:
651  c->lumToYV12 = ff_yuyvToY_sse2;
652  c->chrToYV12 = ff_yuyvToUV_sse2;
653  break;
654  case AV_PIX_FMT_UYVY422:
655  c->lumToYV12 = ff_uyvyToY_sse2;
656  c->chrToYV12 = ff_uyvyToUV_sse2;
657  break;
658  case AV_PIX_FMT_NV12:
659  c->chrToYV12 = ff_nv12ToUV_sse2;
660  break;
661  case AV_PIX_FMT_NV21:
662  c->chrToYV12 = ff_nv21ToUV_sse2;
663  break;
664  case_rgb(rgb24, RGB24, sse2);
665  case_rgb(bgr24, BGR24, sse2);
666  case_rgb(bgra, BGRA, sse2);
667  case_rgb(rgba, RGBA, sse2);
668  case_rgb(abgr, ABGR, sse2);
669  case_rgb(argb, ARGB, sse2);
670  default:
671  break;
672  }
673  }
674  if (EXTERNAL_SSSE3(cpu_flags)) {
675  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
676  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
677  switch (c->opts.src_format) {
678  case_rgb(rgb24, RGB24, ssse3);
679  case_rgb(bgr24, BGR24, ssse3);
680  default:
681  break;
682  }
683  }
684  if (EXTERNAL_SSE4(cpu_flags)) {
685  /* Xto15 don't need special sse4 functions */
686  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
687  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
688  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
689  if (!isBE(c->opts.dst_format)) c->yuv2planeX = ff_yuv2planeX_16_sse4);
690  if (c->dstBpc == 16 && !isBE(c->opts.dst_format) && !(c->opts.flags & SWS_ACCURATE_RND))
691  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
692  }
693 
694  if (EXTERNAL_AVX(cpu_flags)) {
695  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, );
696  if (!(c->opts.flags & SWS_ACCURATE_RND))
697  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx);
698 
699  switch (c->opts.src_format) {
700  case AV_PIX_FMT_YUYV422:
701  c->chrToYV12 = ff_yuyvToUV_avx;
702  break;
703  case AV_PIX_FMT_UYVY422:
704  c->chrToYV12 = ff_uyvyToUV_avx;
705  break;
706  case AV_PIX_FMT_NV12:
707  c->chrToYV12 = ff_nv12ToUV_avx;
708  break;
709  case AV_PIX_FMT_NV21:
710  c->chrToYV12 = ff_nv21ToUV_avx;
711  break;
712  case_rgb(rgb24, RGB24, avx);
713  case_rgb(bgr24, BGR24, avx);
714  case_rgb(bgra, BGRA, avx);
715  case_rgb(rgba, RGBA, avx);
716  case_rgb(abgr, ABGR, avx);
717  case_rgb(argb, ARGB, avx);
718  default:
719  break;
720  }
721  }
722 
723 #if ARCH_X86_64
724 #define ASSIGN_AVX2_SCALE_FUNC(hscalefn, filtersize) \
725  switch (filtersize) { \
726  case 4: hscalefn = ff_hscale8to15_4_avx2; break; \
727  default: hscalefn = ff_hscale8to15_X4_avx2; break; \
728  break; \
729  }
730 
732  if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
733  ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
734  ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
735  }
736  }
737 
739  if (ARCH_X86_64)
740  switch (c->opts.src_format) {
741  case_rgb(rgb24, RGB24, avx2);
742  case_rgb(bgr24, BGR24, avx2);
743  case_rgb(bgra, BGRA, avx2);
744  case_rgb(rgba, RGBA, avx2);
745  case_rgb(abgr, ABGR, avx2);
746  case_rgb(argb, ARGB, avx2);
747  }
748  if (!(c->opts.flags & SWS_ACCURATE_RND)) // FIXME
749  switch (c->opts.dst_format) {
750  case AV_PIX_FMT_NV12:
751  case AV_PIX_FMT_NV24:
752  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
753  break;
754  case AV_PIX_FMT_NV21:
755  case AV_PIX_FMT_NV42:
756  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
757  break;
758  default:
759  break;
760  }
761  }
762 
763 
764 #define INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(fmt, name, opt) \
765  case fmt: \
766  c->readAlpPlanar = ff_planar_##name##_to_a_##opt;
767 
768 #define INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
769  case rgba_fmt: \
770  case rgb_fmt: \
771  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
772  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
773  break;
774 
775 #define INPUT_PLANER_RGB_YUV_FUNC_CASE(fmt, name, opt) \
776  case fmt: \
777  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
778  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
779  break;
780 
781 #define INPUT_PLANER_RGB_UV_FUNC_CASE(fmt, name, opt) \
782  case fmt: \
783  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
784  break;
785 
786 #define INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
787  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
788  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
789  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
790  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
791 
792 #define INPUT_PLANER_RGBAXX_UVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
793  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
794  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
795  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
796  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
797 
798 #define INPUT_PLANER_RGBAXX_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
799  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##LE, rgba_fmt##LE, name##le, opt) \
800  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##BE, rgba_fmt##BE, name##be, opt)
801 
802 #define INPUT_PLANER_RGBXX_YUV_FUNC_CASE(rgb_fmt, name, opt) \
803  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
804  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
805 
806 #define INPUT_PLANER_RGBXX_UV_FUNC_CASE(rgb_fmt, name, opt) \
807  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
808  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
809 
810 #define INPUT_PLANER_RGB_YUVA_ALL_CASES(opt) \
811  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, opt) \
812  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, opt) \
813  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, opt) \
814  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, opt) \
815  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, opt) \
816  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, opt) \
817  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, opt) \
818  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, opt)
819 
820 
821  if (EXTERNAL_SSE2(cpu_flags)) {
822  switch (c->opts.src_format) {
823  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, sse2);
824  INPUT_PLANER_RGB_UV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse2);
825  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse2);
826  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse2);
827  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse2);
828  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse2);
829  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse2);
830  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse2);
831  default:
832  break;
833  }
834  }
835 
836  if (EXTERNAL_SSE4(cpu_flags)) {
837  switch (c->opts.src_format) {
838  case AV_PIX_FMT_GBRAP:
839  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse4);
840  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse4);
841  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse4);
842  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse4);
843  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse4);
844  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse4);
845  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse4);
846  default:
847  break;
848  }
849  }
850 
852  switch (c->opts.src_format) {
853  INPUT_PLANER_RGB_YUVA_ALL_CASES(avx2)
854  default:
855  break;
856  }
857  }
858 
859  if(c->opts.flags & SWS_FULL_CHR_H_INT) {
860 
861 #define YUV2ANYX_FUNC_CASE(fmt, name, opt) \
862  case fmt: \
863  c->yuv2anyX = ff_yuv2##name##_full_X_##opt; \
864  break;
865 
866 #define YUV2ANYX_GBRAP_CASES(opt) \
867  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP, gbrp, opt) \
868  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP, gbrap, opt) \
869  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9LE, gbrp9le, opt) \
870  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10LE, gbrp10le, opt) \
871  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10LE, gbrap10le, opt) \
872  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12LE, gbrp12le, opt) \
873  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12LE, gbrap12le, opt) \
874  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14LE, gbrp14le, opt) \
875  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16LE, gbrp16le, opt) \
876  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16LE, gbrap16le, opt) \
877  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32LE, gbrpf32le, opt) \
878  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32LE, gbrapf32le, opt) \
879  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9BE, gbrp9be, opt) \
880  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10BE, gbrp10be, opt) \
881  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10BE, gbrap10be, opt) \
882  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12BE, gbrp12be, opt) \
883  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12BE, gbrap12be, opt) \
884  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14BE, gbrp14be, opt) \
885  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16BE, gbrp16be, opt) \
886  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16BE, gbrap16be, opt) \
887  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32BE, gbrpf32be, opt) \
888  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32BE, gbrapf32be, opt)
889 
890  if (EXTERNAL_SSE2(cpu_flags)) {
891  switch (c->opts.dst_format) {
892  YUV2ANYX_GBRAP_CASES(sse2)
893  default:
894  break;
895  }
896  }
897 
898  if (EXTERNAL_SSE4(cpu_flags)) {
899  switch (c->opts.dst_format) {
900  YUV2ANYX_GBRAP_CASES(sse4)
901  default:
902  break;
903  }
904  }
905 
907  switch (c->opts.dst_format) {
908  YUV2ANYX_GBRAP_CASES(avx2)
909  default:
910  break;
911  }
912  }
913  }
914 
915 #endif
916 }
ff_hyscale_fast_mmxext
void ff_hyscale_fast_mmxext(SwsInternal *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc)
Definition: hscale_fast_bilinear_simd.c:192
flags
const SwsFlags flags[]
Definition: swscale.c:72
AV_PIX_FMT_GBRAP16
#define AV_PIX_FMT_GBRAP16
Definition: pixfmt.h:565
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
APCK_PTR2
#define APCK_PTR2
Definition: swscale_internal.h:70
cpu.h
SwsPlane::line
uint8_t ** line
line buffer
Definition: swscale_internal.h:1110
AV_PIX_FMT_YA8
@ AV_PIX_FMT_YA8
8 bits gray, 8 bits alpha
Definition: pixfmt.h:140
mem_internal.h
AV_PIX_FMT_BGR32
#define AV_PIX_FMT_BGR32
Definition: pixfmt.h:513
av_unused
#define av_unused
Definition: attributes.h:164
EXTERNAL_AVX2_FAST
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:73
pixdesc.h
SWS_BITEXACT
@ SWS_BITEXACT
Definition: swscale.h:157
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
AV_PIX_FMT_GRAYF32LE
@ AV_PIX_FMT_GRAYF32LE
IEEE-754 single precision Y, 32bpp, little-endian.
Definition: pixfmt.h:364
INPUT_FUNC
#define INPUT_FUNC(fmt, opt)
Definition: swscale.c:295
INPUT_FUNCS
#define INPUT_FUNCS(opt)
Definition: swscale.c:298
SWS_FAST_BILINEAR
@ SWS_FAST_BILINEAR
Scaler selection options.
Definition: swscale.h:176
is16BPS
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:745
rgb
Definition: rpzaenc.c:60
RANGE_CONVERT_FUNCS_DECL
#define RANGE_CONVERT_FUNCS_DECL(opt, bpc)
Definition: swscale.c:455
AV_PIX_FMT_GBRP14
#define AV_PIX_FMT_GBRP14
Definition: pixfmt.h:560
RANGE_CONVERT_FUNCS
#define RANGE_CONVERT_FUNCS(opt, bpc)
Definition: swscale.c:445
AV_PIX_FMT_GBRAP
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:212
AV_PIX_FMT_GBRP10
#define AV_PIX_FMT_GBRP10
Definition: pixfmt.h:558
isNBPS
static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:759
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:62
avassert.h
av_cold
#define av_cold
Definition: attributes.h:119
AV_PIX_FMT_GBRAP10
#define AV_PIX_FMT_GBRAP10
Definition: pixfmt.h:562
intreadwrite.h
dither
static const uint16_t dither[8][8]
Definition: vf_gradfun.c:46
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_PIX_FMT_GBRAP12
#define AV_PIX_FMT_GBRAP12
Definition: pixfmt.h:563
APCK_COEF
#define APCK_COEF
Definition: swscale_internal.h:71
SwsPlane::tmp
uint8_t ** tmp
Tmp line buffer used by mmx code.
Definition: swscale_internal.h:1111
SCALE_FUNC
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt)
Definition: swscale.c:219
VSCALE_FUNCS
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:275
ASSIGN_VSCALEX_FUNC
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case)
if
if(ret)
Definition: filter_design.txt:179
isSemiPlanarYUV
static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:791
ff_hcscale_fast_mmxext
void ff_hcscale_fast_mmxext(SwsInternal *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc)
Definition: hscale_fast_bilinear_simd.c:282
VSCALEX_FUNC
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:258
ff_sws_init_range_convert_x86
av_cold void ff_sws_init_range_convert_x86(SwsInternal *c)
Definition: swscale.c:470
AV_PIX_FMT_GBRP16
#define AV_PIX_FMT_GBRP16
Definition: pixfmt.h:561
NULL
#define NULL
Definition: coverity.c:32
ASSIGN_SSE_SCALE_FUNC
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:74
EXTERNAL_SSE3
#define EXTERNAL_SSE3(flags)
Definition: cpu.h:56
SwsPlane
Slice plane.
Definition: swscale_internal.h:1105
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt)
AV_PIX_FMT_GBRP9
#define AV_PIX_FMT_GBRP9
Definition: pixfmt.h:557
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
isBE
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:766
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:104
cpu.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:424
AV_PIX_FMT_GBRPF32
#define AV_PIX_FMT_GBRPF32
Definition: pixfmt.h:578
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:532
AV_PIX_FMT_RGB32
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:511
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:53
ff_sws_init_swscale_x86
av_cold void ff_sws_init_swscale_x86(SwsInternal *c)
Definition: swscale.c:486
ff_updateMMXDitherTables
void ff_updateMMXDitherTables(SwsInternal *c, int dstY)
Definition: swscale.c:60
AV_PIX_FMT_GBRP12
#define AV_PIX_FMT_GBRP12
Definition: pixfmt.h:559
DECLARE_ASM_CONST
DECLARE_ASM_CONST(16, double, pd_1)[2]
AV_PIX_FMT_NV24
@ AV_PIX_FMT_NV24
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:371
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:527
swscale_internal.h
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_PIX_FMT_NV21
@ AV_PIX_FMT_NV21
as above, but U and V bytes are swapped
Definition: pixfmt.h:97
AV_PIX_FMT_NV42
@ AV_PIX_FMT_NV42
as above, but U and V bytes are swapped
Definition: pixfmt.h:372
swscale_template.c
ff_dither8
const uint64_t ff_dither8[2]
Definition: swscale.c:37
AV_PIX_FMT_RGB565
#define AV_PIX_FMT_RGB565
Definition: pixfmt.h:526
SwsInternal
Definition: swscale_internal.h:335
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:64
AV_PIX_FMT_GRAYF32BE
@ AV_PIX_FMT_GRAYF32BE
IEEE-754 single precision Y, 32bpp, big-endian.
Definition: pixfmt.h:363
SWS_FULL_CHR_H_INT
@ SWS_FULL_CHR_H_INT
Perform full chroma upsampling when upscaling to RGB.
Definition: swscale.h:133
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:88
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:62
U
#define U(x)
Definition: vpx_arith.h:37
AV_PIX_FMT_GBRAPF32
#define AV_PIX_FMT_GBRAPF32
Definition: pixfmt.h:579
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
SwsPlane::sliceY
int sliceY
index of first line
Definition: swscale_internal.h:1108
VSCALE_FUNC
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:272
case_rgb
#define case_rgb(x, X, opt)
int32_t
int32_t
Definition: audioconvert.c:56
RGBA
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:42
YUV2YUVX_FUNC
#define YUV2YUVX_FUNC(opt, step)
Definition: swscale.c:177
SWS_ACCURATE_RND
@ SWS_ACCURATE_RND
Force bit-exact output.
Definition: swscale.h:156
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:59
SCALE_FUNCS_SSE
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:245
APCK_SIZE
#define APCK_SIZE
Definition: swscale_internal.h:72
VSCALEX_FUNCS
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:262
src
#define src
Definition: vp8dsp.c:248
swscale.h
X86_MMXEXT
#define X86_MMXEXT(flags)
Definition: cpu.h:26
ff_dither4
const uint64_t ff_dither4[2]
Definition: swscale.c:33