FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/pixdesc.h"
32 
33 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
34  0x0103010301030103LL,
35  0x0200020002000200LL,};
36 
37 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
38  0x0602060206020602LL,
39  0x0004000400040004LL,};
40 
41 #if HAVE_INLINE_ASM
42 
43 #define DITHER1XBPP
44 
45 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
46 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
47 
48 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
49 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
50 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
51 
52 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
53 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
54 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
55 
56 
57 //MMX versions
58 #if HAVE_MMX_INLINE
59 #undef RENAME
60 #define COMPILE_TEMPLATE_MMXEXT 0
61 #define RENAME(a) a ## _mmx
62 #include "swscale_template.c"
63 #endif
64 
65 // MMXEXT versions
66 #if HAVE_MMXEXT_INLINE
67 #undef RENAME
68 #undef COMPILE_TEMPLATE_MMXEXT
69 #define COMPILE_TEMPLATE_MMXEXT 1
70 #define RENAME(a) a ## _mmxext
71 #include "swscale_template.c"
72 #endif
73 
75 {
76  const int dstH= c->dstH;
77  const int flags= c->flags;
78 
79  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
80  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
81  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
82 
83  int hasAlpha = c->needAlpha;
84  int32_t *vLumFilterPos= c->vLumFilterPos;
85  int32_t *vChrFilterPos= c->vChrFilterPos;
86  int16_t *vLumFilter= c->vLumFilter;
87  int16_t *vChrFilter= c->vChrFilter;
88  int32_t *lumMmxFilter= c->lumMmxFilter;
89  int32_t *chrMmxFilter= c->chrMmxFilter;
90  int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
91  const int vLumFilterSize= c->vLumFilterSize;
92  const int vChrFilterSize= c->vChrFilterSize;
93  const int chrDstY= dstY>>c->chrDstVSubSample;
94  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
95  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
96 
97  c->blueDither= ff_dither8[dstY&1];
98  if (c->dstFormat == AV_PIX_FMT_RGB555 || c->dstFormat == AV_PIX_FMT_BGR555)
99  c->greenDither= ff_dither8[dstY&1];
100  else
101  c->greenDither= ff_dither4[dstY&1];
102  c->redDither= ff_dither8[(dstY+1)&1];
103  if (dstY < dstH - 2) {
104  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
105  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
106  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
107 
108  int i;
109  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
110  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
111 
112  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
113  for (i = 0; i < neg; i++)
114  tmpY[i] = lumSrcPtr[neg];
115  for ( ; i < end; i++)
116  tmpY[i] = lumSrcPtr[i];
117  for ( ; i < vLumFilterSize; i++)
118  tmpY[i] = tmpY[i-1];
119  lumSrcPtr = tmpY;
120 
121  if (alpSrcPtr) {
122  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
123  for (i = 0; i < neg; i++)
124  tmpA[i] = alpSrcPtr[neg];
125  for ( ; i < end; i++)
126  tmpA[i] = alpSrcPtr[i];
127  for ( ; i < vLumFilterSize; i++)
128  tmpA[i] = tmpA[i - 1];
129  alpSrcPtr = tmpA;
130  }
131  }
132  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
133  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
134  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
135  for (i = 0; i < neg; i++) {
136  tmpU[i] = chrUSrcPtr[neg];
137  }
138  for ( ; i < end; i++) {
139  tmpU[i] = chrUSrcPtr[i];
140  }
141  for ( ; i < vChrFilterSize; i++) {
142  tmpU[i] = tmpU[i - 1];
143  }
144  chrUSrcPtr = tmpU;
145  }
146 
147  if (flags & SWS_ACCURATE_RND) {
148  int s= APCK_SIZE / 8;
149  for (i=0; i<vLumFilterSize; i+=2) {
150  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
151  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
154  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
155  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
156  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
157  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
160  }
161  }
162  for (i=0; i<vChrFilterSize; i+=2) {
163  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
164  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
167  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
168  }
169  } else {
170  for (i=0; i<vLumFilterSize; i++) {
171  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
172  lumMmxFilter[4*i+2]=
173  lumMmxFilter[4*i+3]=
174  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
175  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
176  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
177  alpMmxFilter[4*i+2]=
178  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
179  }
180  }
181  for (i=0; i<vChrFilterSize; i++) {
182  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
183  chrMmxFilter[4*i+2]=
184  chrMmxFilter[4*i+3]=
185  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
186  }
187  }
188  }
189 }
190 #endif /* HAVE_INLINE_ASM */
191 
192 #define YUV2YUVX_FUNC_MMX(opt, step) \
193 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
194  uint8_t *dest, int dstW, \
195  const uint8_t *dither, int offset); \
196 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
197  const int16_t **src, uint8_t *dest, int dstW, \
198  const uint8_t *dither, int offset) \
199 { \
200  if(dstW > 0) \
201  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset); \
202  return; \
203 }
204 
205 #define YUV2YUVX_FUNC(opt, step) \
206 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
207  uint8_t *dest, int dstW, \
208  const uint8_t *dither, int offset); \
209 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
210  const int16_t **src, uint8_t *dest, int dstW, \
211  const uint8_t *dither, int offset) \
212 { \
213  int remainder = (dstW % step); \
214  int pixelsProcessed = dstW - remainder; \
215  if(((uintptr_t)dest) & 15){ \
216  yuv2yuvX_mmx(filter, filterSize, src, dest, dstW, dither, offset); \
217  return; \
218  } \
219  if(pixelsProcessed > 0) \
220  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
221  if(remainder > 0){ \
222  ff_yuv2yuvX_mmx(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \
223  } \
224  return; \
225 }
226 
227 #if HAVE_MMX_EXTERNAL
228 YUV2YUVX_FUNC_MMX(mmx, 16)
229 #endif
230 #if HAVE_MMXEXT_EXTERNAL
231 YUV2YUVX_FUNC_MMX(mmxext, 16)
232 #endif
233 #if HAVE_SSE3_EXTERNAL
234 YUV2YUVX_FUNC(sse3, 32)
235 #endif
236 #if HAVE_AVX2_EXTERNAL
237 YUV2YUVX_FUNC(avx2, 64)
238 #endif
239 
240 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
241 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
242  SwsContext *c, int16_t *data, \
243  int dstW, const uint8_t *src, \
244  const int16_t *filter, \
245  const int32_t *filterPos, int filterSize)
246 
247 #define SCALE_FUNCS(filter_n, opt) \
248  SCALE_FUNC(filter_n, 8, 15, opt); \
249  SCALE_FUNC(filter_n, 9, 15, opt); \
250  SCALE_FUNC(filter_n, 10, 15, opt); \
251  SCALE_FUNC(filter_n, 12, 15, opt); \
252  SCALE_FUNC(filter_n, 14, 15, opt); \
253  SCALE_FUNC(filter_n, 16, 15, opt); \
254  SCALE_FUNC(filter_n, 8, 19, opt); \
255  SCALE_FUNC(filter_n, 9, 19, opt); \
256  SCALE_FUNC(filter_n, 10, 19, opt); \
257  SCALE_FUNC(filter_n, 12, 19, opt); \
258  SCALE_FUNC(filter_n, 14, 19, opt); \
259  SCALE_FUNC(filter_n, 16, 19, opt)
260 
261 #define SCALE_FUNCS_MMX(opt) \
262  SCALE_FUNCS(4, opt); \
263  SCALE_FUNCS(8, opt); \
264  SCALE_FUNCS(X, opt)
265 
266 #define SCALE_FUNCS_SSE(opt) \
267  SCALE_FUNCS(4, opt); \
268  SCALE_FUNCS(8, opt); \
269  SCALE_FUNCS(X4, opt); \
270  SCALE_FUNCS(X8, opt)
271 
272 #if ARCH_X86_32
273 SCALE_FUNCS_MMX(mmx);
274 #endif
275 SCALE_FUNCS_SSE(sse2);
276 SCALE_FUNCS_SSE(ssse3);
277 SCALE_FUNCS_SSE(sse4);
278 
279 SCALE_FUNC(4, 8, 15, avx2);
280 SCALE_FUNC(X4, 8, 15, avx2);
281 
282 #define VSCALEX_FUNC(size, opt) \
283 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
284  const int16_t **src, uint8_t *dest, int dstW, \
285  const uint8_t *dither, int offset)
286 #define VSCALEX_FUNCS(opt) \
287  VSCALEX_FUNC(8, opt); \
288  VSCALEX_FUNC(9, opt); \
289  VSCALEX_FUNC(10, opt)
290 
291 #if ARCH_X86_32
292 VSCALEX_FUNCS(mmxext);
293 #endif
294 VSCALEX_FUNCS(sse2);
295 VSCALEX_FUNCS(sse4);
296 VSCALEX_FUNC(16, sse4);
297 VSCALEX_FUNCS(avx);
298 
299 #define VSCALE_FUNC(size, opt) \
300 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
301  const uint8_t *dither, int offset)
302 #define VSCALE_FUNCS(opt1, opt2) \
303  VSCALE_FUNC(8, opt1); \
304  VSCALE_FUNC(9, opt2); \
305  VSCALE_FUNC(10, opt2); \
306  VSCALE_FUNC(16, opt1)
307 
308 #if ARCH_X86_32
309 VSCALE_FUNCS(mmx, mmxext);
310 #endif
311 VSCALE_FUNCS(sse2, sse2);
312 VSCALE_FUNC(16, sse4);
313 VSCALE_FUNCS(avx, avx);
314 
315 #define INPUT_Y_FUNC(fmt, opt) \
316 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
317  const uint8_t *unused1, const uint8_t *unused2, \
318  int w, uint32_t *unused)
319 #define INPUT_UV_FUNC(fmt, opt) \
320 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
321  const uint8_t *unused0, \
322  const uint8_t *src1, \
323  const uint8_t *src2, \
324  int w, uint32_t *unused)
325 #define INPUT_FUNC(fmt, opt) \
326  INPUT_Y_FUNC(fmt, opt); \
327  INPUT_UV_FUNC(fmt, opt)
328 #define INPUT_FUNCS(opt) \
329  INPUT_FUNC(uyvy, opt); \
330  INPUT_FUNC(yuyv, opt); \
331  INPUT_UV_FUNC(nv12, opt); \
332  INPUT_UV_FUNC(nv21, opt); \
333  INPUT_FUNC(rgba, opt); \
334  INPUT_FUNC(bgra, opt); \
335  INPUT_FUNC(argb, opt); \
336  INPUT_FUNC(abgr, opt); \
337  INPUT_FUNC(rgb24, opt); \
338  INPUT_FUNC(bgr24, opt)
339 
340 #if ARCH_X86_32
341 INPUT_FUNCS(mmx);
342 #endif
343 INPUT_FUNCS(sse2);
344 INPUT_FUNCS(ssse3);
345 INPUT_FUNCS(avx);
346 
347 #if ARCH_X86_64
348 #define YUV2NV_DECL(fmt, opt) \
349 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
350  const int16_t *filter, int filterSize, \
351  const int16_t **u, const int16_t **v, \
352  uint8_t *dst, int dstWidth)
353 
354 YUV2NV_DECL(nv12, avx2);
355 YUV2NV_DECL(nv21, avx2);
356 #endif
357 
359 {
360  int cpu_flags = av_get_cpu_flags();
361 
362 #if HAVE_MMX_INLINE
363  if (INLINE_MMX(cpu_flags))
364  sws_init_swscale_mmx(c);
365 #endif
366 #if HAVE_MMXEXT_INLINE
368  sws_init_swscale_mmxext(c);
369 #endif
370  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
371 #if HAVE_MMX_EXTERNAL
372  if (EXTERNAL_MMX(cpu_flags))
373  c->yuv2planeX = yuv2yuvX_mmx;
374 #endif
375 #if HAVE_MMXEXT_EXTERNAL
377  c->yuv2planeX = yuv2yuvX_mmxext;
378 #endif
379 #if HAVE_SSE3_EXTERNAL
381  c->yuv2planeX = yuv2yuvX_sse3;
382 #endif
383 #if HAVE_AVX2_EXTERNAL
385  c->yuv2planeX = yuv2yuvX_avx2;
386 #endif
387  }
388 
389 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
390  if (c->srcBpc == 8) { \
391  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
392  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
393  } else if (c->srcBpc == 9) { \
394  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
395  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
396  } else if (c->srcBpc == 10) { \
397  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
398  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
399  } else if (c->srcBpc == 12) { \
400  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
401  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
402  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
403  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
404  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
405  } else { /* c->srcBpc == 16 */ \
406  av_assert0(c->srcBpc == 16);\
407  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
408  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
409  } \
410 } while (0)
411 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
412  switch (filtersize) { \
413  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
414  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
415  default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
416  }
417 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
418 switch(c->dstBpc){ \
419  case 16: do_16_case; break; \
420  case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
421  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
422  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
423  }
424 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
425  switch(c->dstBpc){ \
426  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
427  case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
428  case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
429  case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
430  default: av_assert0(c->dstBpc>8); \
431  }
432 #define case_rgb(x, X, opt) \
433  case AV_PIX_FMT_ ## X: \
434  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
435  if (!c->chrSrcHSubSample) \
436  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
437  break
438 #if ARCH_X86_32
439  if (EXTERNAL_MMX(cpu_flags)) {
440  ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
441  ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
442  ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
443 
444  switch (c->srcFormat) {
445  case AV_PIX_FMT_YA8:
446  c->lumToYV12 = ff_yuyvToY_mmx;
447  if (c->needAlpha)
448  c->alpToYV12 = ff_uyvyToY_mmx;
449  break;
450  case AV_PIX_FMT_YUYV422:
451  c->lumToYV12 = ff_yuyvToY_mmx;
452  c->chrToYV12 = ff_yuyvToUV_mmx;
453  break;
454  case AV_PIX_FMT_UYVY422:
455  c->lumToYV12 = ff_uyvyToY_mmx;
456  c->chrToYV12 = ff_uyvyToUV_mmx;
457  break;
458  case AV_PIX_FMT_NV12:
459  c->chrToYV12 = ff_nv12ToUV_mmx;
460  break;
461  case AV_PIX_FMT_NV21:
462  c->chrToYV12 = ff_nv21ToUV_mmx;
463  break;
464  case_rgb(rgb24, RGB24, mmx);
465  case_rgb(bgr24, BGR24, mmx);
466  case_rgb(bgra, BGRA, mmx);
467  case_rgb(rgba, RGBA, mmx);
468  case_rgb(abgr, ABGR, mmx);
469  case_rgb(argb, ARGB, mmx);
470  default:
471  break;
472  }
473  }
474  if (EXTERNAL_MMXEXT(cpu_flags)) {
475  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1);
476  }
477 #endif /* ARCH_X86_32 */
478 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
479  switch (filtersize) { \
480  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
481  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
482  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
483  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
484  break; \
485  }
486  if (EXTERNAL_SSE2(cpu_flags)) {
487  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
488  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
489  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
490  HAVE_ALIGNED_STACK || ARCH_X86_64);
491  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
492 
493  switch (c->srcFormat) {
494  case AV_PIX_FMT_YA8:
495  c->lumToYV12 = ff_yuyvToY_sse2;
496  if (c->needAlpha)
497  c->alpToYV12 = ff_uyvyToY_sse2;
498  break;
499  case AV_PIX_FMT_YUYV422:
500  c->lumToYV12 = ff_yuyvToY_sse2;
501  c->chrToYV12 = ff_yuyvToUV_sse2;
502  break;
503  case AV_PIX_FMT_UYVY422:
504  c->lumToYV12 = ff_uyvyToY_sse2;
505  c->chrToYV12 = ff_uyvyToUV_sse2;
506  break;
507  case AV_PIX_FMT_NV12:
508  c->chrToYV12 = ff_nv12ToUV_sse2;
509  break;
510  case AV_PIX_FMT_NV21:
511  c->chrToYV12 = ff_nv21ToUV_sse2;
512  break;
513  case_rgb(rgb24, RGB24, sse2);
514  case_rgb(bgr24, BGR24, sse2);
515  case_rgb(bgra, BGRA, sse2);
516  case_rgb(rgba, RGBA, sse2);
517  case_rgb(abgr, ABGR, sse2);
518  case_rgb(argb, ARGB, sse2);
519  default:
520  break;
521  }
522  }
523  if (EXTERNAL_SSSE3(cpu_flags)) {
524  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
525  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
526  switch (c->srcFormat) {
527  case_rgb(rgb24, RGB24, ssse3);
528  case_rgb(bgr24, BGR24, ssse3);
529  default:
530  break;
531  }
532  }
533  if (EXTERNAL_SSE4(cpu_flags)) {
534  /* Xto15 don't need special sse4 functions */
535  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
536  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
537  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
538  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
539  HAVE_ALIGNED_STACK || ARCH_X86_64);
540  if (c->dstBpc == 16 && !isBE(c->dstFormat))
541  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
542  }
543 
544  if (EXTERNAL_AVX(cpu_flags)) {
545  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
546  HAVE_ALIGNED_STACK || ARCH_X86_64);
547  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
548 
549  switch (c->srcFormat) {
550  case AV_PIX_FMT_YUYV422:
551  c->chrToYV12 = ff_yuyvToUV_avx;
552  break;
553  case AV_PIX_FMT_UYVY422:
554  c->chrToYV12 = ff_uyvyToUV_avx;
555  break;
556  case AV_PIX_FMT_NV12:
557  c->chrToYV12 = ff_nv12ToUV_avx;
558  break;
559  case AV_PIX_FMT_NV21:
560  c->chrToYV12 = ff_nv21ToUV_avx;
561  break;
562  case_rgb(rgb24, RGB24, avx);
563  case_rgb(bgr24, BGR24, avx);
564  case_rgb(bgra, BGRA, avx);
565  case_rgb(rgba, RGBA, avx);
566  case_rgb(abgr, ABGR, avx);
567  case_rgb(argb, ARGB, avx);
568  default:
569  break;
570  }
571  }
572 
573 #if ARCH_X86_64
574 #define ASSIGN_AVX2_SCALE_FUNC(hscalefn, filtersize) \
575  switch (filtersize) { \
576  case 4: hscalefn = ff_hscale8to15_4_avx2; break; \
577  default: hscalefn = ff_hscale8to15_X4_avx2; break; \
578  break; \
579  }
580 
582  if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
583  if (c->chrDstW % 16 == 0)
584  ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
585  if (c->dstW % 16 == 0)
586  ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
587  }
588  }
589 
591  switch (c->dstFormat) {
592  case AV_PIX_FMT_NV12:
593  case AV_PIX_FMT_NV24:
594  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
595  break;
596  case AV_PIX_FMT_NV21:
597  case AV_PIX_FMT_NV42:
598  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
599  break;
600  default:
601  break;
602  }
603  }
604 #endif
605 }
INLINE_MMX
#define INLINE_MMX(flags)
Definition: cpu.h:86
SwsContext::vLumFilterSize
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
Definition: swscale_internal.h:419
ASSIGN_MMX_SCALE_FUNC
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
APCK_PTR2
#define APCK_PTR2
Definition: swscale_internal.h:61
cpu.h
SwsPlane::line
uint8_t ** line
line buffer
Definition: swscale_internal.h:1061
AV_PIX_FMT_YA8
@ AV_PIX_FMT_YA8
8 bits gray, 8 bits alpha
Definition: pixfmt.h:133
mem_internal.h
DECLARE_ASM_CONST
#define DECLARE_ASM_CONST(n, t, v)
Definition: mem.h:118
SwsContext::dstY
int dstY
Last destination vertical line output from last slice.
Definition: swscale_internal.h:431
av_unused
#define av_unused
Definition: attributes.h:131
EXTERNAL_AVX2_FAST
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
pixdesc.h
SwsContext::vChrFilter
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
Definition: swscale_internal.h:412
SwsContext::lumMmxFilter
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:513
SwsContext::vLumFilter
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
Definition: swscale_internal.h:411
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:98
DECLARE_ASM_ALIGNED
#define DECLARE_ASM_ALIGNED(n, t, v)
Definition: mem.h:117
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:50
INPUT_FUNCS
#define INPUT_FUNCS(opt)
Definition: swscale.c:328
U
#define U(x)
Definition: vp56_arith.h:37
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:57
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
intreadwrite.h
s
#define s(width, name)
Definition: cbs_vp9.c:257
APCK_COEF
#define APCK_COEF
Definition: swscale_internal.h:62
SwsPlane::tmp
uint8_t ** tmp
Tmp line buffer used by mmx code.
Definition: swscale_internal.h:1062
SCALE_FUNC
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt)
Definition: swscale.c:240
VSCALE_FUNCS
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:302
SwsContext::vLumFilterPos
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
Definition: swscale_internal.h:415
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk)
if
if(ret)
Definition: filter_design.txt:179
VSCALEX_FUNC
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:282
NULL
#define NULL
Definition: coverity.c:32
ASSIGN_SSE_SCALE_FUNC
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
ff_sws_init_swscale_x86
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
Definition: swscale.c:358
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:67
EXTERNAL_SSE3
#define EXTERNAL_SSE3(flags)
Definition: cpu.h:62
SwsPlane
Slice plane.
Definition: swscale_internal.h:1056
ASSIGN_VSCALEX_FUNC
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
SwsContext::alpMmxFilter
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:521
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
for
for(j=16;j >0;--j)
Definition: h264pred_template.c:469
SwsContext::vChrFilterPos
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
Definition: swscale_internal.h:416
isBE
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:717
cpu.h
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:397
attributes.h
SwsContext::vChrFilterSize
int vChrFilterSize
Vertical filter size for chroma pixels.
Definition: swscale_internal.h:420
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:84
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:116
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:271
AV_PIX_FMT_NV24
@ AV_PIX_FMT_NV24
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:338
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:392
swscale_internal.h
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_PIX_FMT_NV21
@ AV_PIX_FMT_NV21
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
AV_PIX_FMT_NV42
@ AV_PIX_FMT_NV42
as above, but U and V bytes are swapped
Definition: pixfmt.h:339
swscale_template.c
ff_dither8
const uint64_t ff_dither8[2]
Definition: swscale.c:37
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:81
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
YUV2YUVX_FUNC_MMX
#define YUV2YUVX_FUNC_MMX(opt, step)
Definition: swscale.c:192
AV_CPU_FLAG_MMXEXT
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:30
INLINE_MMXEXT
#define INLINE_MMXEXT(flags)
Definition: cpu.h:87
SwsContext::chrMmxFilter
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:514
SwsPlane::sliceY
int sliceY
index of first line
Definition: swscale_internal.h:1059
VSCALE_FUNC
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:299
case_rgb
#define case_rgb(x, X, opt)
int32_t
int32_t
Definition: audioconvert.c:56
RGBA
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:40
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
YUV2YUVX_FUNC
#define YUV2YUVX_FUNC(opt, step)
Definition: swscale.c:205
SCALE_FUNCS_MMX
#define SCALE_FUNCS_MMX(opt)
Definition: swscale.c:261
ff_updateMMXDitherTables
void ff_updateMMXDitherTables(SwsContext *c, int dstY)
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
SwsContext
Definition: swscale_internal.h:300
EXTERNAL_MMX
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
SCALE_FUNCS_SSE
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:266
SwsContext::dstH
int dstH
Height of destination luma/alpha planes.
Definition: swscale_internal.h:324
APCK_SIZE
#define APCK_SIZE
Definition: swscale_internal.h:63
VSCALEX_FUNCS
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:286
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
swscale.h
ff_dither4
const uint64_t ff_dither4[2]
Definition: swscale.c:33