FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/pixdesc.h"
32 
33 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
34  0x0103010301030103LL,
35  0x0200020002000200LL,};
36 
37 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
38  0x0602060206020602LL,
39  0x0004000400040004LL,};
40 
41 #if HAVE_INLINE_ASM
42 
43 #define DITHER1XBPP
44 
45 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
46 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
47 
48 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
49 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
50 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
51 
52 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
53 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
54 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
55 
56 
57 //MMX versions
58 #if HAVE_MMX_INLINE
59 #undef RENAME
60 #define COMPILE_TEMPLATE_MMXEXT 0
61 #define RENAME(a) a ## _mmx
62 #include "swscale_template.c"
63 #endif
64 
65 // MMXEXT versions
66 #if HAVE_MMXEXT_INLINE
67 #undef RENAME
68 #undef COMPILE_TEMPLATE_MMXEXT
69 #define COMPILE_TEMPLATE_MMXEXT 1
70 #define RENAME(a) a ## _mmxext
71 #include "swscale_template.c"
72 #endif
73 
75 {
76  const int dstH= c->dstH;
77  const int flags= c->flags;
78 
79  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
80  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
81  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
82 
83  int hasAlpha = c->needAlpha;
84  int32_t *vLumFilterPos= c->vLumFilterPos;
85  int32_t *vChrFilterPos= c->vChrFilterPos;
86  int16_t *vLumFilter= c->vLumFilter;
87  int16_t *vChrFilter= c->vChrFilter;
88  int32_t *lumMmxFilter= c->lumMmxFilter;
89  int32_t *chrMmxFilter= c->chrMmxFilter;
90  int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
91  const int vLumFilterSize= c->vLumFilterSize;
92  const int vChrFilterSize= c->vChrFilterSize;
93  const int chrDstY= dstY>>c->chrDstVSubSample;
94  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
95  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
96 
97  c->blueDither= ff_dither8[dstY&1];
98  if (c->dstFormat == AV_PIX_FMT_RGB555 || c->dstFormat == AV_PIX_FMT_BGR555)
99  c->greenDither= ff_dither8[dstY&1];
100  else
101  c->greenDither= ff_dither4[dstY&1];
102  c->redDither= ff_dither8[(dstY+1)&1];
103  if (dstY < dstH - 2) {
104  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
105  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
106  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
107 
108  int i;
109  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
110  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
111 
112  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
113  for (i = 0; i < neg; i++)
114  tmpY[i] = lumSrcPtr[neg];
115  for ( ; i < end; i++)
116  tmpY[i] = lumSrcPtr[i];
117  for ( ; i < vLumFilterSize; i++)
118  tmpY[i] = tmpY[i-1];
119  lumSrcPtr = tmpY;
120 
121  if (alpSrcPtr) {
122  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
123  for (i = 0; i < neg; i++)
124  tmpA[i] = alpSrcPtr[neg];
125  for ( ; i < end; i++)
126  tmpA[i] = alpSrcPtr[i];
127  for ( ; i < vLumFilterSize; i++)
128  tmpA[i] = tmpA[i - 1];
129  alpSrcPtr = tmpA;
130  }
131  }
132  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
133  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
134  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
135  for (i = 0; i < neg; i++) {
136  tmpU[i] = chrUSrcPtr[neg];
137  }
138  for ( ; i < end; i++) {
139  tmpU[i] = chrUSrcPtr[i];
140  }
141  for ( ; i < vChrFilterSize; i++) {
142  tmpU[i] = tmpU[i - 1];
143  }
144  chrUSrcPtr = tmpU;
145  }
146 
147  if (flags & SWS_ACCURATE_RND) {
148  int s= APCK_SIZE / 8;
149  for (i=0; i<vLumFilterSize; i+=2) {
150  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
151  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
154  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
155  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
156  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
157  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
160  }
161  }
162  for (i=0; i<vChrFilterSize; i+=2) {
163  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
164  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
167  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
168  }
169  } else {
170  for (i=0; i<vLumFilterSize; i++) {
171  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
172  lumMmxFilter[4*i+2]=
173  lumMmxFilter[4*i+3]=
174  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
175  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
176  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
177  alpMmxFilter[4*i+2]=
178  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
179  }
180  }
181  for (i=0; i<vChrFilterSize; i++) {
182  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
183  chrMmxFilter[4*i+2]=
184  chrMmxFilter[4*i+3]=
185  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
186  }
187  }
188  }
189 }
190 #endif /* HAVE_INLINE_ASM */
191 
192 #define YUV2YUVX_FUNC_MMX(opt, step) \
193 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
194  uint8_t *dest, int dstW, \
195  const uint8_t *dither, int offset); \
196 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
197  const int16_t **src, uint8_t *dest, int dstW, \
198  const uint8_t *dither, int offset) \
199 { \
200  if(dstW > 0) \
201  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset); \
202  return; \
203 }
204 
205 #define YUV2YUVX_FUNC(opt, step) \
206 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
207  uint8_t *dest, int dstW, \
208  const uint8_t *dither, int offset); \
209 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
210  const int16_t **src, uint8_t *dest, int dstW, \
211  const uint8_t *dither, int offset) \
212 { \
213  int remainder = (dstW % step); \
214  int pixelsProcessed = dstW - remainder; \
215  if(((uintptr_t)dest) & 15){ \
216  yuv2yuvX_mmx(filter, filterSize, src, dest, dstW, dither, offset); \
217  return; \
218  } \
219  if(pixelsProcessed > 0) \
220  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
221  if(remainder > 0){ \
222  ff_yuv2yuvX_mmx(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \
223  } \
224  return; \
225 }
226 
227 #if HAVE_MMX_EXTERNAL
228 YUV2YUVX_FUNC_MMX(mmx, 16)
229 #endif
230 #if HAVE_MMXEXT_EXTERNAL
231 YUV2YUVX_FUNC_MMX(mmxext, 16)
232 #endif
233 #if HAVE_SSE3_EXTERNAL
234 YUV2YUVX_FUNC(sse3, 32)
235 #endif
236 #if HAVE_AVX2_EXTERNAL
237 YUV2YUVX_FUNC(avx2, 64)
238 #endif
239 
240 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
241 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
242  SwsContext *c, int16_t *data, \
243  int dstW, const uint8_t *src, \
244  const int16_t *filter, \
245  const int32_t *filterPos, int filterSize)
246 
247 #define SCALE_FUNCS(filter_n, opt) \
248  SCALE_FUNC(filter_n, 8, 15, opt); \
249  SCALE_FUNC(filter_n, 9, 15, opt); \
250  SCALE_FUNC(filter_n, 10, 15, opt); \
251  SCALE_FUNC(filter_n, 12, 15, opt); \
252  SCALE_FUNC(filter_n, 14, 15, opt); \
253  SCALE_FUNC(filter_n, 16, 15, opt); \
254  SCALE_FUNC(filter_n, 8, 19, opt); \
255  SCALE_FUNC(filter_n, 9, 19, opt); \
256  SCALE_FUNC(filter_n, 10, 19, opt); \
257  SCALE_FUNC(filter_n, 12, 19, opt); \
258  SCALE_FUNC(filter_n, 14, 19, opt); \
259  SCALE_FUNC(filter_n, 16, 19, opt)
260 
261 #define SCALE_FUNCS_MMX(opt) \
262  SCALE_FUNCS(4, opt); \
263  SCALE_FUNCS(8, opt); \
264  SCALE_FUNCS(X, opt)
265 
266 #define SCALE_FUNCS_SSE(opt) \
267  SCALE_FUNCS(4, opt); \
268  SCALE_FUNCS(8, opt); \
269  SCALE_FUNCS(X4, opt); \
270  SCALE_FUNCS(X8, opt)
271 
272 #if ARCH_X86_32
273 SCALE_FUNCS_MMX(mmx);
274 #endif
275 SCALE_FUNCS_SSE(sse2);
276 SCALE_FUNCS_SSE(ssse3);
277 SCALE_FUNCS_SSE(sse4);
278 
279 #define VSCALEX_FUNC(size, opt) \
280 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
281  const int16_t **src, uint8_t *dest, int dstW, \
282  const uint8_t *dither, int offset)
283 #define VSCALEX_FUNCS(opt) \
284  VSCALEX_FUNC(8, opt); \
285  VSCALEX_FUNC(9, opt); \
286  VSCALEX_FUNC(10, opt)
287 
288 #if ARCH_X86_32
289 VSCALEX_FUNCS(mmxext);
290 #endif
291 VSCALEX_FUNCS(sse2);
292 VSCALEX_FUNCS(sse4);
293 VSCALEX_FUNC(16, sse4);
294 VSCALEX_FUNCS(avx);
295 
296 #define VSCALE_FUNC(size, opt) \
297 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
298  const uint8_t *dither, int offset)
299 #define VSCALE_FUNCS(opt1, opt2) \
300  VSCALE_FUNC(8, opt1); \
301  VSCALE_FUNC(9, opt2); \
302  VSCALE_FUNC(10, opt2); \
303  VSCALE_FUNC(16, opt1)
304 
305 #if ARCH_X86_32
306 VSCALE_FUNCS(mmx, mmxext);
307 #endif
308 VSCALE_FUNCS(sse2, sse2);
309 VSCALE_FUNC(16, sse4);
310 VSCALE_FUNCS(avx, avx);
311 
312 #define INPUT_Y_FUNC(fmt, opt) \
313 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
314  const uint8_t *unused1, const uint8_t *unused2, \
315  int w, uint32_t *unused)
316 #define INPUT_UV_FUNC(fmt, opt) \
317 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
318  const uint8_t *unused0, \
319  const uint8_t *src1, \
320  const uint8_t *src2, \
321  int w, uint32_t *unused)
322 #define INPUT_FUNC(fmt, opt) \
323  INPUT_Y_FUNC(fmt, opt); \
324  INPUT_UV_FUNC(fmt, opt)
325 #define INPUT_FUNCS(opt) \
326  INPUT_FUNC(uyvy, opt); \
327  INPUT_FUNC(yuyv, opt); \
328  INPUT_UV_FUNC(nv12, opt); \
329  INPUT_UV_FUNC(nv21, opt); \
330  INPUT_FUNC(rgba, opt); \
331  INPUT_FUNC(bgra, opt); \
332  INPUT_FUNC(argb, opt); \
333  INPUT_FUNC(abgr, opt); \
334  INPUT_FUNC(rgb24, opt); \
335  INPUT_FUNC(bgr24, opt)
336 
337 #if ARCH_X86_32
338 INPUT_FUNCS(mmx);
339 #endif
340 INPUT_FUNCS(sse2);
341 INPUT_FUNCS(ssse3);
342 INPUT_FUNCS(avx);
343 
344 #if ARCH_X86_64
345 #define YUV2NV_DECL(fmt, opt) \
346 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
347  const int16_t *filter, int filterSize, \
348  const int16_t **u, const int16_t **v, \
349  uint8_t *dst, int dstWidth)
350 
351 YUV2NV_DECL(nv12, avx2);
352 YUV2NV_DECL(nv21, avx2);
353 #endif
354 
356 {
357  int cpu_flags = av_get_cpu_flags();
358 
359 #if HAVE_MMX_INLINE
360  if (INLINE_MMX(cpu_flags))
361  sws_init_swscale_mmx(c);
362 #endif
363 #if HAVE_MMXEXT_INLINE
365  sws_init_swscale_mmxext(c);
366 #endif
367  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
368 #if HAVE_MMX_EXTERNAL
369  if (EXTERNAL_MMX(cpu_flags))
370  c->yuv2planeX = yuv2yuvX_mmx;
371 #endif
372 #if HAVE_MMXEXT_EXTERNAL
374  c->yuv2planeX = yuv2yuvX_mmxext;
375 #endif
376 #if HAVE_SSE3_EXTERNAL
378  c->yuv2planeX = yuv2yuvX_sse3;
379 #endif
380 #if HAVE_AVX2_EXTERNAL
382  c->yuv2planeX = yuv2yuvX_avx2;
383 #endif
384  }
385 
386 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
387  if (c->srcBpc == 8) { \
388  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
389  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
390  } else if (c->srcBpc == 9) { \
391  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
392  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
393  } else if (c->srcBpc == 10) { \
394  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
395  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
396  } else if (c->srcBpc == 12) { \
397  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
398  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
399  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
400  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
401  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
402  } else { /* c->srcBpc == 16 */ \
403  av_assert0(c->srcBpc == 16);\
404  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
405  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
406  } \
407 } while (0)
408 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
409  switch (filtersize) { \
410  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
411  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
412  default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
413  }
414 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
415 switch(c->dstBpc){ \
416  case 16: do_16_case; break; \
417  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
418  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
419  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
420  }
421 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
422  switch(c->dstBpc){ \
423  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
424  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
425  case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
426  case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
427  default: av_assert0(c->dstBpc>8); \
428  }
429 #define case_rgb(x, X, opt) \
430  case AV_PIX_FMT_ ## X: \
431  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
432  if (!c->chrSrcHSubSample) \
433  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
434  break
435 #if ARCH_X86_32
436  if (EXTERNAL_MMX(cpu_flags)) {
437  ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
438  ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
439  ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
440 
441  switch (c->srcFormat) {
442  case AV_PIX_FMT_YA8:
443  c->lumToYV12 = ff_yuyvToY_mmx;
444  if (c->needAlpha)
445  c->alpToYV12 = ff_uyvyToY_mmx;
446  break;
447  case AV_PIX_FMT_YUYV422:
448  c->lumToYV12 = ff_yuyvToY_mmx;
449  c->chrToYV12 = ff_yuyvToUV_mmx;
450  break;
451  case AV_PIX_FMT_UYVY422:
452  c->lumToYV12 = ff_uyvyToY_mmx;
453  c->chrToYV12 = ff_uyvyToUV_mmx;
454  break;
455  case AV_PIX_FMT_NV12:
456  c->chrToYV12 = ff_nv12ToUV_mmx;
457  break;
458  case AV_PIX_FMT_NV21:
459  c->chrToYV12 = ff_nv21ToUV_mmx;
460  break;
461  case_rgb(rgb24, RGB24, mmx);
462  case_rgb(bgr24, BGR24, mmx);
463  case_rgb(bgra, BGRA, mmx);
464  case_rgb(rgba, RGBA, mmx);
465  case_rgb(abgr, ABGR, mmx);
466  case_rgb(argb, ARGB, mmx);
467  default:
468  break;
469  }
470  }
471  if (EXTERNAL_MMXEXT(cpu_flags)) {
472  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1);
473  }
474 #endif /* ARCH_X86_32 */
475 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
476  switch (filtersize) { \
477  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
478  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
479  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
480  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
481  break; \
482  }
483  if (EXTERNAL_SSE2(cpu_flags)) {
484  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
485  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
486  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
487  HAVE_ALIGNED_STACK || ARCH_X86_64);
488  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
489 
490  switch (c->srcFormat) {
491  case AV_PIX_FMT_YA8:
492  c->lumToYV12 = ff_yuyvToY_sse2;
493  if (c->needAlpha)
494  c->alpToYV12 = ff_uyvyToY_sse2;
495  break;
496  case AV_PIX_FMT_YUYV422:
497  c->lumToYV12 = ff_yuyvToY_sse2;
498  c->chrToYV12 = ff_yuyvToUV_sse2;
499  break;
500  case AV_PIX_FMT_UYVY422:
501  c->lumToYV12 = ff_uyvyToY_sse2;
502  c->chrToYV12 = ff_uyvyToUV_sse2;
503  break;
504  case AV_PIX_FMT_NV12:
505  c->chrToYV12 = ff_nv12ToUV_sse2;
506  break;
507  case AV_PIX_FMT_NV21:
508  c->chrToYV12 = ff_nv21ToUV_sse2;
509  break;
510  case_rgb(rgb24, RGB24, sse2);
511  case_rgb(bgr24, BGR24, sse2);
512  case_rgb(bgra, BGRA, sse2);
513  case_rgb(rgba, RGBA, sse2);
514  case_rgb(abgr, ABGR, sse2);
515  case_rgb(argb, ARGB, sse2);
516  default:
517  break;
518  }
519  }
520  if (EXTERNAL_SSSE3(cpu_flags)) {
521  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
522  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
523  switch (c->srcFormat) {
524  case_rgb(rgb24, RGB24, ssse3);
525  case_rgb(bgr24, BGR24, ssse3);
526  default:
527  break;
528  }
529  }
530  if (EXTERNAL_SSE4(cpu_flags)) {
531  /* Xto15 don't need special sse4 functions */
532  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
533  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
534  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
535  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
536  HAVE_ALIGNED_STACK || ARCH_X86_64);
537  if (c->dstBpc == 16 && !isBE(c->dstFormat))
538  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
539  }
540 
541  if (EXTERNAL_AVX(cpu_flags)) {
542  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
543  HAVE_ALIGNED_STACK || ARCH_X86_64);
544  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
545 
546  switch (c->srcFormat) {
547  case AV_PIX_FMT_YUYV422:
548  c->chrToYV12 = ff_yuyvToUV_avx;
549  break;
550  case AV_PIX_FMT_UYVY422:
551  c->chrToYV12 = ff_uyvyToUV_avx;
552  break;
553  case AV_PIX_FMT_NV12:
554  c->chrToYV12 = ff_nv12ToUV_avx;
555  break;
556  case AV_PIX_FMT_NV21:
557  c->chrToYV12 = ff_nv21ToUV_avx;
558  break;
559  case_rgb(rgb24, RGB24, avx);
560  case_rgb(bgr24, BGR24, avx);
561  case_rgb(bgra, BGRA, avx);
562  case_rgb(rgba, RGBA, avx);
563  case_rgb(abgr, ABGR, avx);
564  case_rgb(argb, ARGB, avx);
565  default:
566  break;
567  }
568  }
569 
570 #if ARCH_X86_64
572  switch (c->dstFormat) {
573  case AV_PIX_FMT_NV12:
574  case AV_PIX_FMT_NV24:
575  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
576  break;
577  case AV_PIX_FMT_NV21:
578  case AV_PIX_FMT_NV42:
579  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
580  break;
581  default:
582  break;
583  }
584  }
585 #endif
586 }
INLINE_MMX
#define INLINE_MMX(flags)
Definition: cpu.h:86
SwsContext::vLumFilterSize
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
Definition: swscale_internal.h:385
ASSIGN_MMX_SCALE_FUNC
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
APCK_PTR2
#define APCK_PTR2
Definition: swscale_internal.h:57
cpu.h
SwsPlane::line
uint8_t ** line
line buffer
Definition: swscale_internal.h:977
AV_PIX_FMT_YA8
@ AV_PIX_FMT_YA8
8 bits gray, 8 bits alpha
Definition: pixfmt.h:143
mem_internal.h
DECLARE_ASM_CONST
#define DECLARE_ASM_CONST(n, t, v)
Definition: mem.h:119
SwsContext::dstY
int dstY
Last destination vertical line output from last slice.
Definition: swscale_internal.h:397
av_unused
#define av_unused
Definition: attributes.h:131
EXTERNAL_AVX2_FAST
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
pixdesc.h
SwsContext::vChrFilter
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
Definition: swscale_internal.h:378
SwsContext::lumMmxFilter
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:479
SwsContext::vLumFilter
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
Definition: swscale_internal.h:377
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:95
DECLARE_ASM_ALIGNED
#define DECLARE_ASM_ALIGNED(n, t, v)
Definition: mem.h:118
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:50
INPUT_FUNCS
#define INPUT_FUNCS(opt)
Definition: swscale.c:325
U
#define U(x)
Definition: vp56_arith.h:37
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
intreadwrite.h
s
#define s(width, name)
Definition: cbs_vp9.c:257
APCK_COEF
#define APCK_COEF
Definition: swscale_internal.h:58
SwsPlane::tmp
uint8_t ** tmp
Tmp line buffer used by mmx code.
Definition: swscale_internal.h:978
VSCALE_FUNCS
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:299
SwsContext::vLumFilterPos
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
Definition: swscale_internal.h:381
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk)
int32_t
int32_t
Definition: audio_convert.c:194
if
if(ret)
Definition: filter_design.txt:179
VSCALEX_FUNC
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:279
NULL
#define NULL
Definition: coverity.c:32
ASSIGN_SSE_SCALE_FUNC
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
ff_sws_init_swscale_x86
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
Definition: swscale.c:355
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:67
EXTERNAL_SSE3
#define EXTERNAL_SSE3(flags)
Definition: cpu.h:62
SwsPlane
Slice plane.
Definition: swscale_internal.h:972
ASSIGN_VSCALEX_FUNC
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
SwsContext::alpMmxFilter
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:487
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
for
for(j=16;j >0;--j)
Definition: h264pred_template.c:469
SwsContext::vChrFilterPos
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
Definition: swscale_internal.h:382
isBE
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:667
cpu.h
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:392
FFMIN
#define FFMIN(a, b)
Definition: common.h:105
attributes.h
SwsContext::vChrFilterSize
int vChrFilterSize
Vertical filter size for chroma pixels.
Definition: swscale_internal.h:386
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:83
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:117
i
int i
Definition: input.c:407
AV_PIX_FMT_NV24
@ AV_PIX_FMT_NV24
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:348
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:387
swscale_internal.h
AV_PIX_FMT_NV21
@ AV_PIX_FMT_NV21
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
AV_PIX_FMT_NV42
@ AV_PIX_FMT_NV42
as above, but U and V bytes are swapped
Definition: pixfmt.h:349
swscale_template.c
ff_dither8
const uint64_t ff_dither8[2]
Definition: swscale.c:37
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:81
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
YUV2YUVX_FUNC_MMX
#define YUV2YUVX_FUNC_MMX(opt, step)
Definition: swscale.c:192
AV_CPU_FLAG_MMXEXT
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:32
INLINE_MMXEXT
#define INLINE_MMXEXT(flags)
Definition: cpu.h:87
SwsContext::chrMmxFilter
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:480
SwsPlane::sliceY
int sliceY
index of first line
Definition: swscale_internal.h:975
VSCALE_FUNC
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:296
case_rgb
#define case_rgb(x, X, opt)
RGBA
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:39
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
YUV2YUVX_FUNC
#define YUV2YUVX_FUNC(opt, step)
Definition: swscale.c:205
SCALE_FUNCS_MMX
#define SCALE_FUNCS_MMX(opt)
Definition: swscale.c:261
ff_updateMMXDitherTables
void ff_updateMMXDitherTables(SwsContext *c, int dstY)
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
SwsContext
Definition: swscale_internal.h:283
EXTERNAL_MMX
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
SCALE_FUNCS_SSE
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:266
SwsContext::dstH
int dstH
Height of destination luma/alpha planes.
Definition: swscale_internal.h:296
APCK_SIZE
#define APCK_SIZE
Definition: swscale_internal.h:59
VSCALEX_FUNCS
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:283
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
swscale.h
ff_dither4
const uint64_t ff_dither4[2]
Definition: swscale.c:33