FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/pixdesc.h"
32 
33 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
34  0x0103010301030103LL,
35  0x0200020002000200LL,};
36 
37 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
38  0x0602060206020602LL,
39  0x0004000400040004LL,};
40 
41 #if HAVE_INLINE_ASM
42 
43 #define DITHER1XBPP
44 
45 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
46 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
47 
48 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
49 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
50 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
51 
52 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
53 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
54 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
55 
56 
57 //MMX versions
58 #if HAVE_MMX_INLINE
59 #undef RENAME
60 #define COMPILE_TEMPLATE_MMXEXT 0
61 #define RENAME(a) a ## _mmx
62 #include "swscale_template.c"
63 #endif
64 
65 // MMXEXT versions
66 #if HAVE_MMXEXT_INLINE
67 #undef RENAME
68 #undef COMPILE_TEMPLATE_MMXEXT
69 #define COMPILE_TEMPLATE_MMXEXT 1
70 #define RENAME(a) a ## _mmxext
71 #include "swscale_template.c"
72 #endif
73 
75 {
76  const int dstH= c->dstH;
77  const int flags= c->flags;
78 
79  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
80  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
81  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
82 
83  int hasAlpha = c->needAlpha;
86  int16_t *vLumFilter= c->vLumFilter;
87  int16_t *vChrFilter= c->vChrFilter;
91  const int vLumFilterSize= c->vLumFilterSize;
92  const int vChrFilterSize= c->vChrFilterSize;
93  const int chrDstY= dstY>>c->chrDstVSubSample;
94  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
95  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
96 
97  c->blueDither= ff_dither8[dstY&1];
99  c->greenDither= ff_dither8[dstY&1];
100  else
101  c->greenDither= ff_dither4[dstY&1];
102  c->redDither= ff_dither8[(dstY+1)&1];
103  if (dstY < dstH - 2) {
104  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
105  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
106  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
107 
108  int i;
109  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
110  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
111 
112  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
113  for (i = 0; i < neg; i++)
114  tmpY[i] = lumSrcPtr[neg];
115  for ( ; i < end; i++)
116  tmpY[i] = lumSrcPtr[i];
117  for ( ; i < vLumFilterSize; i++)
118  tmpY[i] = tmpY[i-1];
119  lumSrcPtr = tmpY;
120 
121  if (alpSrcPtr) {
122  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
123  for (i = 0; i < neg; i++)
124  tmpA[i] = alpSrcPtr[neg];
125  for ( ; i < end; i++)
126  tmpA[i] = alpSrcPtr[i];
127  for ( ; i < vLumFilterSize; i++)
128  tmpA[i] = tmpA[i - 1];
129  alpSrcPtr = tmpA;
130  }
131  }
132  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
133  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
134  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
135  for (i = 0; i < neg; i++) {
136  tmpU[i] = chrUSrcPtr[neg];
137  }
138  for ( ; i < end; i++) {
139  tmpU[i] = chrUSrcPtr[i];
140  }
141  for ( ; i < vChrFilterSize; i++) {
142  tmpU[i] = tmpU[i - 1];
143  }
144  chrUSrcPtr = tmpU;
145  }
146 
147  if (flags & SWS_ACCURATE_RND) {
148  int s= APCK_SIZE / 8;
149  for (i=0; i<vLumFilterSize; i+=2) {
150  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
151  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
152  lumMmxFilter[s*i+APCK_COEF/4 ]=
153  lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
154  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
155  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
156  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
157  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
158  alpMmxFilter[s*i+APCK_COEF/4 ]=
159  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
160  }
161  }
162  for (i=0; i<vChrFilterSize; i+=2) {
163  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
164  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
165  chrMmxFilter[s*i+APCK_COEF/4 ]=
166  chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
167  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
168  }
169  } else {
170  for (i=0; i<vLumFilterSize; i++) {
171  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
172  lumMmxFilter[4*i+2]=
173  lumMmxFilter[4*i+3]=
174  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
175  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
176  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
177  alpMmxFilter[4*i+2]=
178  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
179  }
180  }
181  for (i=0; i<vChrFilterSize; i++) {
182  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
183  chrMmxFilter[4*i+2]=
184  chrMmxFilter[4*i+3]=
185  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
186  }
187  }
188  }
189 }
190 #endif /* HAVE_INLINE_ASM */
191 
192 #define YUV2YUVX_FUNC_MMX(opt, step) \
193 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
194  uint8_t *dest, int dstW, \
195  const uint8_t *dither, int offset); \
196 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
197  const int16_t **src, uint8_t *dest, int dstW, \
198  const uint8_t *dither, int offset) \
199 { \
200  if(dstW > 0) \
201  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset); \
202  return; \
203 }
204 
205 #define YUV2YUVX_FUNC(opt, step) \
206 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
207  uint8_t *dest, int dstW, \
208  const uint8_t *dither, int offset); \
209 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
210  const int16_t **src, uint8_t *dest, int dstW, \
211  const uint8_t *dither, int offset) \
212 { \
213  int remainder = (dstW % step); \
214  int pixelsProcessed = dstW - remainder; \
215  if(((uintptr_t)dest) & 15){ \
216  yuv2yuvX_mmx(filter, filterSize, src, dest, dstW, dither, offset); \
217  return; \
218  } \
219  if(pixelsProcessed > 0) \
220  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
221  if(remainder > 0){ \
222  ff_yuv2yuvX_mmx(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \
223  } \
224  return; \
225 }
226 
227 #if HAVE_MMX_EXTERNAL
228 YUV2YUVX_FUNC_MMX(mmx, 16)
229 #endif
230 #if HAVE_MMXEXT_EXTERNAL
231 YUV2YUVX_FUNC_MMX(mmxext, 16)
232 #endif
233 #if HAVE_SSE3_EXTERNAL
234 YUV2YUVX_FUNC(sse3, 32)
235 #endif
236 #if HAVE_AVX2_EXTERNAL
237 YUV2YUVX_FUNC(avx2, 64)
238 #endif
239 
240 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
241 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
242  SwsContext *c, int16_t *data, \
243  int dstW, const uint8_t *src, \
244  const int16_t *filter, \
245  const int32_t *filterPos, int filterSize)
246 
247 #define SCALE_FUNCS(filter_n, opt) \
248  SCALE_FUNC(filter_n, 8, 15, opt); \
249  SCALE_FUNC(filter_n, 9, 15, opt); \
250  SCALE_FUNC(filter_n, 10, 15, opt); \
251  SCALE_FUNC(filter_n, 12, 15, opt); \
252  SCALE_FUNC(filter_n, 14, 15, opt); \
253  SCALE_FUNC(filter_n, 16, 15, opt); \
254  SCALE_FUNC(filter_n, 8, 19, opt); \
255  SCALE_FUNC(filter_n, 9, 19, opt); \
256  SCALE_FUNC(filter_n, 10, 19, opt); \
257  SCALE_FUNC(filter_n, 12, 19, opt); \
258  SCALE_FUNC(filter_n, 14, 19, opt); \
259  SCALE_FUNC(filter_n, 16, 19, opt)
260 
261 #define SCALE_FUNCS_MMX(opt) \
262  SCALE_FUNCS(4, opt); \
263  SCALE_FUNCS(8, opt); \
264  SCALE_FUNCS(X, opt)
265 
266 #define SCALE_FUNCS_SSE(opt) \
267  SCALE_FUNCS(4, opt); \
268  SCALE_FUNCS(8, opt); \
269  SCALE_FUNCS(X4, opt); \
270  SCALE_FUNCS(X8, opt)
271 
272 #if ARCH_X86_32
273 SCALE_FUNCS_MMX(mmx);
274 #endif
275 SCALE_FUNCS_SSE(sse2);
276 SCALE_FUNCS_SSE(ssse3);
277 SCALE_FUNCS_SSE(sse4);
278 
279 #define VSCALEX_FUNC(size, opt) \
280 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
281  const int16_t **src, uint8_t *dest, int dstW, \
282  const uint8_t *dither, int offset)
283 #define VSCALEX_FUNCS(opt) \
284  VSCALEX_FUNC(8, opt); \
285  VSCALEX_FUNC(9, opt); \
286  VSCALEX_FUNC(10, opt)
287 
288 #if ARCH_X86_32
289 VSCALEX_FUNCS(mmxext);
290 #endif
291 VSCALEX_FUNCS(sse2);
292 VSCALEX_FUNCS(sse4);
293 VSCALEX_FUNC(16, sse4);
294 VSCALEX_FUNCS(avx);
295 
296 #define VSCALE_FUNC(size, opt) \
297 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
298  const uint8_t *dither, int offset)
299 #define VSCALE_FUNCS(opt1, opt2) \
300  VSCALE_FUNC(8, opt1); \
301  VSCALE_FUNC(9, opt2); \
302  VSCALE_FUNC(10, opt2); \
303  VSCALE_FUNC(16, opt1)
304 
305 #if ARCH_X86_32
306 VSCALE_FUNCS(mmx, mmxext);
307 #endif
308 VSCALE_FUNCS(sse2, sse2);
309 VSCALE_FUNC(16, sse4);
310 VSCALE_FUNCS(avx, avx);
311 
312 #define INPUT_Y_FUNC(fmt, opt) \
313 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
314  const uint8_t *unused1, const uint8_t *unused2, \
315  int w, uint32_t *unused)
316 #define INPUT_UV_FUNC(fmt, opt) \
317 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
318  const uint8_t *unused0, \
319  const uint8_t *src1, \
320  const uint8_t *src2, \
321  int w, uint32_t *unused)
322 #define INPUT_FUNC(fmt, opt) \
323  INPUT_Y_FUNC(fmt, opt); \
324  INPUT_UV_FUNC(fmt, opt)
325 #define INPUT_FUNCS(opt) \
326  INPUT_FUNC(uyvy, opt); \
327  INPUT_FUNC(yuyv, opt); \
328  INPUT_UV_FUNC(nv12, opt); \
329  INPUT_UV_FUNC(nv21, opt); \
330  INPUT_FUNC(rgba, opt); \
331  INPUT_FUNC(bgra, opt); \
332  INPUT_FUNC(argb, opt); \
333  INPUT_FUNC(abgr, opt); \
334  INPUT_FUNC(rgb24, opt); \
335  INPUT_FUNC(bgr24, opt)
336 
337 #if ARCH_X86_32
338 INPUT_FUNCS(mmx);
339 #endif
340 INPUT_FUNCS(sse2);
341 INPUT_FUNCS(ssse3);
342 INPUT_FUNCS(avx);
343 
344 #if ARCH_X86_64
345 #define YUV2NV_DECL(fmt, opt) \
346 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
347  const int16_t *filter, int filterSize, \
348  const int16_t **u, const int16_t **v, \
349  uint8_t *dst, int dstWidth)
350 
351 YUV2NV_DECL(nv12, avx2);
352 YUV2NV_DECL(nv21, avx2);
353 #endif
354 
356 {
357  int cpu_flags = av_get_cpu_flags();
358 
359 #if HAVE_MMX_INLINE
360  if (INLINE_MMX(cpu_flags))
361  sws_init_swscale_mmx(c);
362 #endif
363 #if HAVE_MMXEXT_INLINE
364  if (INLINE_MMXEXT(cpu_flags))
365  sws_init_swscale_mmxext(c);
366 #endif
367  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
368 #if HAVE_MMX_EXTERNAL
369  if (EXTERNAL_MMX(cpu_flags))
370  c->yuv2planeX = yuv2yuvX_mmx;
371 #endif
372 #if HAVE_MMXEXT_EXTERNAL
373  if (EXTERNAL_MMXEXT(cpu_flags))
374  c->yuv2planeX = yuv2yuvX_mmxext;
375 #endif
376 #if HAVE_SSE3_EXTERNAL
377  if (EXTERNAL_SSE3(cpu_flags))
378  c->yuv2planeX = yuv2yuvX_sse3;
379 #endif
380 #if HAVE_AVX2_EXTERNAL
381  if (EXTERNAL_AVX2_FAST(cpu_flags))
382  c->yuv2planeX = yuv2yuvX_avx2;
383 #endif
384  }
385 
386 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
387  if (c->srcBpc == 8) { \
388  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
389  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
390  } else if (c->srcBpc == 9) { \
391  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
392  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
393  } else if (c->srcBpc == 10) { \
394  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
395  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
396  } else if (c->srcBpc == 12) { \
397  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
398  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
399  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
400  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
401  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
402  } else { /* c->srcBpc == 16 */ \
403  av_assert0(c->srcBpc == 16);\
404  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
405  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
406  } \
407 } while (0)
408 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
409  switch (filtersize) { \
410  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
411  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
412  default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
413  }
414 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
415 switch(c->dstBpc){ \
416  case 16: do_16_case; break; \
417  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
418  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
419  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
420  }
421 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
422  switch(c->dstBpc){ \
423  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
424  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
425  case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
426  case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
427  default: av_assert0(c->dstBpc>8); \
428  }
429 #define case_rgb(x, X, opt) \
430  case AV_PIX_FMT_ ## X: \
431  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
432  if (!c->chrSrcHSubSample) \
433  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
434  break
435 #if ARCH_X86_32
436  if (EXTERNAL_MMX(cpu_flags)) {
437  ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
438  ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
439  ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
440 
441  switch (c->srcFormat) {
442  case AV_PIX_FMT_YA8:
443  c->lumToYV12 = ff_yuyvToY_mmx;
444  if (c->needAlpha)
445  c->alpToYV12 = ff_uyvyToY_mmx;
446  break;
447  case AV_PIX_FMT_YUYV422:
448  c->lumToYV12 = ff_yuyvToY_mmx;
449  c->chrToYV12 = ff_yuyvToUV_mmx;
450  break;
451  case AV_PIX_FMT_UYVY422:
452  c->lumToYV12 = ff_uyvyToY_mmx;
453  c->chrToYV12 = ff_uyvyToUV_mmx;
454  break;
455  case AV_PIX_FMT_NV12:
456  c->chrToYV12 = ff_nv12ToUV_mmx;
457  break;
458  case AV_PIX_FMT_NV21:
459  c->chrToYV12 = ff_nv21ToUV_mmx;
460  break;
461  case_rgb(rgb24, RGB24, mmx);
462  case_rgb(bgr24, BGR24, mmx);
463  case_rgb(bgra, BGRA, mmx);
464  case_rgb(rgba, RGBA, mmx);
465  case_rgb(abgr, ABGR, mmx);
466  case_rgb(argb, ARGB, mmx);
467  default:
468  break;
469  }
470  }
471  if (EXTERNAL_MMXEXT(cpu_flags)) {
472  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1);
473  }
474 #endif /* ARCH_X86_32 */
475 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
476  switch (filtersize) { \
477  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
478  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
479  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
480  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
481  break; \
482  }
483  if (EXTERNAL_SSE2(cpu_flags)) {
484  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
485  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
486  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
487  HAVE_ALIGNED_STACK || ARCH_X86_64);
488  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
489 
490  switch (c->srcFormat) {
491  case AV_PIX_FMT_YA8:
492  c->lumToYV12 = ff_yuyvToY_sse2;
493  if (c->needAlpha)
494  c->alpToYV12 = ff_uyvyToY_sse2;
495  break;
496  case AV_PIX_FMT_YUYV422:
497  c->lumToYV12 = ff_yuyvToY_sse2;
498  c->chrToYV12 = ff_yuyvToUV_sse2;
499  break;
500  case AV_PIX_FMT_UYVY422:
501  c->lumToYV12 = ff_uyvyToY_sse2;
502  c->chrToYV12 = ff_uyvyToUV_sse2;
503  break;
504  case AV_PIX_FMT_NV12:
505  c->chrToYV12 = ff_nv12ToUV_sse2;
506  break;
507  case AV_PIX_FMT_NV21:
508  c->chrToYV12 = ff_nv21ToUV_sse2;
509  break;
510  case_rgb(rgb24, RGB24, sse2);
511  case_rgb(bgr24, BGR24, sse2);
512  case_rgb(bgra, BGRA, sse2);
513  case_rgb(rgba, RGBA, sse2);
514  case_rgb(abgr, ABGR, sse2);
515  case_rgb(argb, ARGB, sse2);
516  default:
517  break;
518  }
519  }
520  if (EXTERNAL_SSSE3(cpu_flags)) {
521  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
522  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
523  switch (c->srcFormat) {
524  case_rgb(rgb24, RGB24, ssse3);
525  case_rgb(bgr24, BGR24, ssse3);
526  default:
527  break;
528  }
529  }
530  if (EXTERNAL_SSE4(cpu_flags)) {
531  /* Xto15 don't need special sse4 functions */
532  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
533  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
535  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
536  HAVE_ALIGNED_STACK || ARCH_X86_64);
537  if (c->dstBpc == 16 && !isBE(c->dstFormat))
538  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
539  }
540 
541  if (EXTERNAL_AVX(cpu_flags)) {
543  HAVE_ALIGNED_STACK || ARCH_X86_64);
544  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
545 
546  switch (c->srcFormat) {
547  case AV_PIX_FMT_YUYV422:
548  c->chrToYV12 = ff_yuyvToUV_avx;
549  break;
550  case AV_PIX_FMT_UYVY422:
551  c->chrToYV12 = ff_uyvyToUV_avx;
552  break;
553  case AV_PIX_FMT_NV12:
554  c->chrToYV12 = ff_nv12ToUV_avx;
555  break;
556  case AV_PIX_FMT_NV21:
557  c->chrToYV12 = ff_nv21ToUV_avx;
558  break;
559  case_rgb(rgb24, RGB24, avx);
560  case_rgb(bgr24, BGR24, avx);
561  case_rgb(bgra, BGRA, avx);
562  case_rgb(rgba, RGBA, avx);
563  case_rgb(abgr, ABGR, avx);
564  case_rgb(argb, ARGB, avx);
565  default:
566  break;
567  }
568  }
569 
570 #if ARCH_X86_64
571  if (EXTERNAL_AVX2_FAST(cpu_flags)) {
572  switch (c->dstFormat) {
573  case AV_PIX_FMT_NV12:
574  case AV_PIX_FMT_NV24:
575  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
576  break;
577  case AV_PIX_FMT_NV21:
578  case AV_PIX_FMT_NV42:
579  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
580  break;
581  default:
582  break;
583  }
584  }
585 #endif
586 }
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:81
#define YUV2YUVX_FUNC_MMX(opt, step)
Definition: swscale.c:192
#define NULL
Definition: coverity.c:32
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
int chrSrcH
Height of source chroma planes.
8 bits gray, 8 bits alpha
Definition: pixfmt.h:143
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:296
#define SCALE_FUNCS_MMX(opt)
Definition: swscale.c:261
void(* chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of chroma planes to YV12 for horizontal scaler.
void(* alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of alpha plane to YV12 for horizontal scaler.
uint64_t redDither
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
static atomic_int cpu_flags
Definition: cpu.c:50
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:39
int dstY
Last destination vertical line output from last slice.
uint64_t blueDither
#define case_rgb(x, X, opt)
Macro definitions for various function/variable attributes.
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
int srcH
Height of source luma/alpha planes.
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:299
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
int chrDstVSubSample
Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination i...
#define av_cold
Definition: attributes.h:88
uint8_t ** line
line buffer
int vChrFilterSize
Vertical filter size for chroma pixels.
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:32
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
Definition: swscale.c:355
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:117
#define EXTERNAL_SSE3(flags)
Definition: cpu.h:62
external API header
enum AVPixelFormat dstFormat
Destination pixel format.
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:283
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
int dstH
Height of destination luma/alpha planes.
#define U(x)
Definition: vp56_arith.h:37
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
#define INLINE_MMX(flags)
Definition: cpu.h:86
int hLumFilterSize
Horizontal filter size for luma/alpha pixels.
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
simple assert() macros that are a bit more flexible than ISO C assert().
Slice plane.
#define YUV2YUVX_FUNC(opt, step)
Definition: swscale.c:205
SwsPlane plane[MAX_SLICE_PLANES]
color planes
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
int hChrFilterSize
Horizontal filter size for chroma pixels.
#define DECLARE_ASM_CONST(n, t, v)
Declare a static constant aligned variable appropriate for use in inline assembly code...
Definition: mem.h:119
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:348
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
void ff_updateMMXDitherTables(SwsContext *c, int dstY)
#define APCK_SIZE
#define FFMIN(a, b)
Definition: common.h:105
yuv2planar1_fn yuv2plane1
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:266
yuv2interleavedX_fn yuv2nv12cX
int32_t
#define s(width, name)
Definition: cbs_vp9.c:257
if(ret)
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:392
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
const uint64_t ff_dither4[2]
Definition: swscale.c:33
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
yuv2planarX_fn yuv2planeX
#define DECLARE_ASM_ALIGNED(n, t, v)
Declare an aligned variable appropriate for use in inline assembly code.
Definition: mem.h:118
struct SwsSlice * slice
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:67
#define APCK_COEF
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
as above, but U and V bytes are swapped
Definition: pixfmt.h:349
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
#define SWS_ACCURATE_RND
Definition: swscale.h:83
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:95
#define INPUT_FUNCS(opt)
Definition: swscale.c:325
#define flags(name, subs,...)
Definition: cbs_av1.c:561
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:279
#define INLINE_MMXEXT(flags)
Definition: cpu.h:87
enum AVPixelFormat srcFormat
Source pixel format.
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:387
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
uint64_t greenDither
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk)
uint8_t ** tmp
Tmp line buffer used by mmx code.
void(* lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of luma plane to YV12 for horizontal scaler.
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
#define APCK_PTR2
int sliceY
index of first line
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
for(j=16;j >0;--j)
int i
Definition: input.c:407
#define av_unused
Definition: attributes.h:131
const uint64_t ff_dither8[2]
Definition: swscale.c:37