FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/pixdesc.h"
32 
33 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
34  0x0103010301030103LL,
35  0x0200020002000200LL,};
36 
37 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
38  0x0602060206020602LL,
39  0x0004000400040004LL,};
40 
41 #if HAVE_INLINE_ASM
42 
43 #define DITHER1XBPP
44 
45 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
46 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
47 
48 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
49 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
50 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
51 
52 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
53 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
54 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
55 
56 
57 //MMX versions
58 #if HAVE_MMX_INLINE
59 #undef RENAME
60 #define COMPILE_TEMPLATE_MMXEXT 0
61 #define RENAME(a) a ## _mmx
62 #include "swscale_template.c"
63 #endif
64 
65 // MMXEXT versions
66 #if HAVE_MMXEXT_INLINE
67 #undef RENAME
68 #undef COMPILE_TEMPLATE_MMXEXT
69 #define COMPILE_TEMPLATE_MMXEXT 1
70 #define RENAME(a) a ## _mmxext
71 #include "swscale_template.c"
72 #endif
73 
75 {
76  const int dstH= c->dstH;
77  const int flags= c->flags;
78 
79  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
80  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
81  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
82 
83  int hasAlpha = c->needAlpha;
86  int16_t *vLumFilter= c->vLumFilter;
87  int16_t *vChrFilter= c->vChrFilter;
91  const int vLumFilterSize= c->vLumFilterSize;
92  const int vChrFilterSize= c->vChrFilterSize;
93  const int chrDstY= dstY>>c->chrDstVSubSample;
94  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
95  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
96 
97  c->blueDither= ff_dither8[dstY&1];
99  c->greenDither= ff_dither8[dstY&1];
100  else
101  c->greenDither= ff_dither4[dstY&1];
102  c->redDither= ff_dither8[(dstY+1)&1];
103  if (dstY < dstH - 2) {
104  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
105  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
106  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
107 
108  int i;
109  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
110  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
111 
112  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
113  for (i = 0; i < neg; i++)
114  tmpY[i] = lumSrcPtr[neg];
115  for ( ; i < end; i++)
116  tmpY[i] = lumSrcPtr[i];
117  for ( ; i < vLumFilterSize; i++)
118  tmpY[i] = tmpY[i-1];
119  lumSrcPtr = tmpY;
120 
121  if (alpSrcPtr) {
122  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
123  for (i = 0; i < neg; i++)
124  tmpA[i] = alpSrcPtr[neg];
125  for ( ; i < end; i++)
126  tmpA[i] = alpSrcPtr[i];
127  for ( ; i < vLumFilterSize; i++)
128  tmpA[i] = tmpA[i - 1];
129  alpSrcPtr = tmpA;
130  }
131  }
132  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
133  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
134  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
135  for (i = 0; i < neg; i++) {
136  tmpU[i] = chrUSrcPtr[neg];
137  }
138  for ( ; i < end; i++) {
139  tmpU[i] = chrUSrcPtr[i];
140  }
141  for ( ; i < vChrFilterSize; i++) {
142  tmpU[i] = tmpU[i - 1];
143  }
144  chrUSrcPtr = tmpU;
145  }
146 
147  if (flags & SWS_ACCURATE_RND) {
148  int s= APCK_SIZE / 8;
149  for (i=0; i<vLumFilterSize; i+=2) {
150  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
151  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
152  lumMmxFilter[s*i+APCK_COEF/4 ]=
153  lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
154  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
155  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
156  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
157  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
158  alpMmxFilter[s*i+APCK_COEF/4 ]=
159  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
160  }
161  }
162  for (i=0; i<vChrFilterSize; i+=2) {
163  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
164  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
165  chrMmxFilter[s*i+APCK_COEF/4 ]=
166  chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
167  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
168  }
169  } else {
170  for (i=0; i<vLumFilterSize; i++) {
171  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
172  lumMmxFilter[4*i+2]=
173  lumMmxFilter[4*i+3]=
174  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
175  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
176  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
177  alpMmxFilter[4*i+2]=
178  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
179  }
180  }
181  for (i=0; i<vChrFilterSize; i++) {
182  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
183  chrMmxFilter[4*i+2]=
184  chrMmxFilter[4*i+3]=
185  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
186  }
187  }
188  }
189 }
190 #endif /* HAVE_INLINE_ASM */
191 
192 #define YUV2YUVX_FUNC_MMX(opt, step) \
193 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
194  uint8_t *dest, int dstW, \
195  const uint8_t *dither, int offset); \
196 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
197  const int16_t **src, uint8_t *dest, int dstW, \
198  const uint8_t *dither, int offset) \
199 { \
200  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset); \
201  return; \
202 }
203 
204 #define YUV2YUVX_FUNC(opt, step) \
205 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
206  uint8_t *dest, int dstW, \
207  const uint8_t *dither, int offset); \
208 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
209  const int16_t **src, uint8_t *dest, int dstW, \
210  const uint8_t *dither, int offset) \
211 { \
212  int remainder = (dstW % step); \
213  int pixelsProcessed = dstW - remainder; \
214  if(((uintptr_t)dest) & 15){ \
215  yuv2yuvX_mmx(filter, filterSize, src, dest, dstW, dither, offset); \
216  return; \
217  } \
218  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
219  if(remainder > 0){ \
220  ff_yuv2yuvX_mmx(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \
221  } \
222  return; \
223 }
224 
225 #if HAVE_MMX_EXTERNAL
226 YUV2YUVX_FUNC_MMX(mmx, 16)
227 #endif
228 #if HAVE_MMXEXT_EXTERNAL
229 YUV2YUVX_FUNC_MMX(mmxext, 16)
230 #endif
231 #if HAVE_SSE3_EXTERNAL
232 YUV2YUVX_FUNC(sse3, 32)
233 #endif
234 #if HAVE_AVX2_EXTERNAL
235 YUV2YUVX_FUNC(avx2, 64)
236 #endif
237 
238 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
239 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
240  SwsContext *c, int16_t *data, \
241  int dstW, const uint8_t *src, \
242  const int16_t *filter, \
243  const int32_t *filterPos, int filterSize)
244 
245 #define SCALE_FUNCS(filter_n, opt) \
246  SCALE_FUNC(filter_n, 8, 15, opt); \
247  SCALE_FUNC(filter_n, 9, 15, opt); \
248  SCALE_FUNC(filter_n, 10, 15, opt); \
249  SCALE_FUNC(filter_n, 12, 15, opt); \
250  SCALE_FUNC(filter_n, 14, 15, opt); \
251  SCALE_FUNC(filter_n, 16, 15, opt); \
252  SCALE_FUNC(filter_n, 8, 19, opt); \
253  SCALE_FUNC(filter_n, 9, 19, opt); \
254  SCALE_FUNC(filter_n, 10, 19, opt); \
255  SCALE_FUNC(filter_n, 12, 19, opt); \
256  SCALE_FUNC(filter_n, 14, 19, opt); \
257  SCALE_FUNC(filter_n, 16, 19, opt)
258 
259 #define SCALE_FUNCS_MMX(opt) \
260  SCALE_FUNCS(4, opt); \
261  SCALE_FUNCS(8, opt); \
262  SCALE_FUNCS(X, opt)
263 
264 #define SCALE_FUNCS_SSE(opt) \
265  SCALE_FUNCS(4, opt); \
266  SCALE_FUNCS(8, opt); \
267  SCALE_FUNCS(X4, opt); \
268  SCALE_FUNCS(X8, opt)
269 
270 #if ARCH_X86_32
271 SCALE_FUNCS_MMX(mmx);
272 #endif
273 SCALE_FUNCS_SSE(sse2);
274 SCALE_FUNCS_SSE(ssse3);
275 SCALE_FUNCS_SSE(sse4);
276 
277 #define VSCALEX_FUNC(size, opt) \
278 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
279  const int16_t **src, uint8_t *dest, int dstW, \
280  const uint8_t *dither, int offset)
281 #define VSCALEX_FUNCS(opt) \
282  VSCALEX_FUNC(8, opt); \
283  VSCALEX_FUNC(9, opt); \
284  VSCALEX_FUNC(10, opt)
285 
286 #if ARCH_X86_32
287 VSCALEX_FUNCS(mmxext);
288 #endif
289 VSCALEX_FUNCS(sse2);
290 VSCALEX_FUNCS(sse4);
291 VSCALEX_FUNC(16, sse4);
292 VSCALEX_FUNCS(avx);
293 
294 #define VSCALE_FUNC(size, opt) \
295 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
296  const uint8_t *dither, int offset)
297 #define VSCALE_FUNCS(opt1, opt2) \
298  VSCALE_FUNC(8, opt1); \
299  VSCALE_FUNC(9, opt2); \
300  VSCALE_FUNC(10, opt2); \
301  VSCALE_FUNC(16, opt1)
302 
303 #if ARCH_X86_32
304 VSCALE_FUNCS(mmx, mmxext);
305 #endif
306 VSCALE_FUNCS(sse2, sse2);
307 VSCALE_FUNC(16, sse4);
308 VSCALE_FUNCS(avx, avx);
309 
310 #define INPUT_Y_FUNC(fmt, opt) \
311 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
312  const uint8_t *unused1, const uint8_t *unused2, \
313  int w, uint32_t *unused)
314 #define INPUT_UV_FUNC(fmt, opt) \
315 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
316  const uint8_t *unused0, \
317  const uint8_t *src1, \
318  const uint8_t *src2, \
319  int w, uint32_t *unused)
320 #define INPUT_FUNC(fmt, opt) \
321  INPUT_Y_FUNC(fmt, opt); \
322  INPUT_UV_FUNC(fmt, opt)
323 #define INPUT_FUNCS(opt) \
324  INPUT_FUNC(uyvy, opt); \
325  INPUT_FUNC(yuyv, opt); \
326  INPUT_UV_FUNC(nv12, opt); \
327  INPUT_UV_FUNC(nv21, opt); \
328  INPUT_FUNC(rgba, opt); \
329  INPUT_FUNC(bgra, opt); \
330  INPUT_FUNC(argb, opt); \
331  INPUT_FUNC(abgr, opt); \
332  INPUT_FUNC(rgb24, opt); \
333  INPUT_FUNC(bgr24, opt)
334 
335 #if ARCH_X86_32
336 INPUT_FUNCS(mmx);
337 #endif
338 INPUT_FUNCS(sse2);
339 INPUT_FUNCS(ssse3);
340 INPUT_FUNCS(avx);
341 
342 #if ARCH_X86_64
343 #define YUV2NV_DECL(fmt, opt) \
344 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
345  const int16_t *filter, int filterSize, \
346  const int16_t **u, const int16_t **v, \
347  uint8_t *dst, int dstWidth)
348 
349 YUV2NV_DECL(nv12, avx2);
350 YUV2NV_DECL(nv21, avx2);
351 #endif
352 
354 {
355  int cpu_flags = av_get_cpu_flags();
356 
357 #if HAVE_MMX_INLINE
358  if (INLINE_MMX(cpu_flags))
359  sws_init_swscale_mmx(c);
360 #endif
361 #if HAVE_MMXEXT_INLINE
362  if (INLINE_MMXEXT(cpu_flags))
363  sws_init_swscale_mmxext(c);
364 #endif
365  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
366 #if HAVE_MMX_EXTERNAL
367  if (EXTERNAL_MMX(cpu_flags))
368  c->yuv2planeX = yuv2yuvX_mmx;
369 #endif
370 #if HAVE_MMXEXT_EXTERNAL
371  if (EXTERNAL_MMXEXT(cpu_flags))
372  c->yuv2planeX = yuv2yuvX_mmxext;
373 #endif
374 #if HAVE_SSE3_EXTERNAL
375  if (EXTERNAL_SSE3(cpu_flags))
376  c->yuv2planeX = yuv2yuvX_sse3;
377 #endif
378 #if HAVE_AVX2_EXTERNAL
379  if (EXTERNAL_AVX2_FAST(cpu_flags))
380  c->yuv2planeX = yuv2yuvX_avx2;
381 #endif
382  }
383 
384 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
385  if (c->srcBpc == 8) { \
386  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
387  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
388  } else if (c->srcBpc == 9) { \
389  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
390  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
391  } else if (c->srcBpc == 10) { \
392  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
393  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
394  } else if (c->srcBpc == 12) { \
395  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
396  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
397  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
398  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
399  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
400  } else { /* c->srcBpc == 16 */ \
401  av_assert0(c->srcBpc == 16);\
402  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
403  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
404  } \
405 } while (0)
406 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
407  switch (filtersize) { \
408  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
409  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
410  default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
411  }
412 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
413 switch(c->dstBpc){ \
414  case 16: do_16_case; break; \
415  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
416  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
417  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
418  }
419 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
420  switch(c->dstBpc){ \
421  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
422  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
423  case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
424  case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
425  default: av_assert0(c->dstBpc>8); \
426  }
427 #define case_rgb(x, X, opt) \
428  case AV_PIX_FMT_ ## X: \
429  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
430  if (!c->chrSrcHSubSample) \
431  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
432  break
433 #if ARCH_X86_32
434  if (EXTERNAL_MMX(cpu_flags)) {
435  ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
436  ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
437  ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
438 
439  switch (c->srcFormat) {
440  case AV_PIX_FMT_YA8:
441  c->lumToYV12 = ff_yuyvToY_mmx;
442  if (c->needAlpha)
443  c->alpToYV12 = ff_uyvyToY_mmx;
444  break;
445  case AV_PIX_FMT_YUYV422:
446  c->lumToYV12 = ff_yuyvToY_mmx;
447  c->chrToYV12 = ff_yuyvToUV_mmx;
448  break;
449  case AV_PIX_FMT_UYVY422:
450  c->lumToYV12 = ff_uyvyToY_mmx;
451  c->chrToYV12 = ff_uyvyToUV_mmx;
452  break;
453  case AV_PIX_FMT_NV12:
454  c->chrToYV12 = ff_nv12ToUV_mmx;
455  break;
456  case AV_PIX_FMT_NV21:
457  c->chrToYV12 = ff_nv21ToUV_mmx;
458  break;
459  case_rgb(rgb24, RGB24, mmx);
460  case_rgb(bgr24, BGR24, mmx);
461  case_rgb(bgra, BGRA, mmx);
462  case_rgb(rgba, RGBA, mmx);
463  case_rgb(abgr, ABGR, mmx);
464  case_rgb(argb, ARGB, mmx);
465  default:
466  break;
467  }
468  }
469  if (EXTERNAL_MMXEXT(cpu_flags)) {
470  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1);
471  }
472 #endif /* ARCH_X86_32 */
473 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
474  switch (filtersize) { \
475  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
476  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
477  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
478  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
479  break; \
480  }
481  if (EXTERNAL_SSE2(cpu_flags)) {
482  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
483  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
484  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
485  HAVE_ALIGNED_STACK || ARCH_X86_64);
486  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
487 
488  switch (c->srcFormat) {
489  case AV_PIX_FMT_YA8:
490  c->lumToYV12 = ff_yuyvToY_sse2;
491  if (c->needAlpha)
492  c->alpToYV12 = ff_uyvyToY_sse2;
493  break;
494  case AV_PIX_FMT_YUYV422:
495  c->lumToYV12 = ff_yuyvToY_sse2;
496  c->chrToYV12 = ff_yuyvToUV_sse2;
497  break;
498  case AV_PIX_FMT_UYVY422:
499  c->lumToYV12 = ff_uyvyToY_sse2;
500  c->chrToYV12 = ff_uyvyToUV_sse2;
501  break;
502  case AV_PIX_FMT_NV12:
503  c->chrToYV12 = ff_nv12ToUV_sse2;
504  break;
505  case AV_PIX_FMT_NV21:
506  c->chrToYV12 = ff_nv21ToUV_sse2;
507  break;
508  case_rgb(rgb24, RGB24, sse2);
509  case_rgb(bgr24, BGR24, sse2);
510  case_rgb(bgra, BGRA, sse2);
511  case_rgb(rgba, RGBA, sse2);
512  case_rgb(abgr, ABGR, sse2);
513  case_rgb(argb, ARGB, sse2);
514  default:
515  break;
516  }
517  }
518  if (EXTERNAL_SSSE3(cpu_flags)) {
519  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
520  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
521  switch (c->srcFormat) {
522  case_rgb(rgb24, RGB24, ssse3);
523  case_rgb(bgr24, BGR24, ssse3);
524  default:
525  break;
526  }
527  }
528  if (EXTERNAL_SSE4(cpu_flags)) {
529  /* Xto15 don't need special sse4 functions */
530  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
531  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
533  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
534  HAVE_ALIGNED_STACK || ARCH_X86_64);
535  if (c->dstBpc == 16 && !isBE(c->dstFormat))
536  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
537  }
538 
539  if (EXTERNAL_AVX(cpu_flags)) {
541  HAVE_ALIGNED_STACK || ARCH_X86_64);
542  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
543 
544  switch (c->srcFormat) {
545  case AV_PIX_FMT_YUYV422:
546  c->chrToYV12 = ff_yuyvToUV_avx;
547  break;
548  case AV_PIX_FMT_UYVY422:
549  c->chrToYV12 = ff_uyvyToUV_avx;
550  break;
551  case AV_PIX_FMT_NV12:
552  c->chrToYV12 = ff_nv12ToUV_avx;
553  break;
554  case AV_PIX_FMT_NV21:
555  c->chrToYV12 = ff_nv21ToUV_avx;
556  break;
557  case_rgb(rgb24, RGB24, avx);
558  case_rgb(bgr24, BGR24, avx);
559  case_rgb(bgra, BGRA, avx);
560  case_rgb(rgba, RGBA, avx);
561  case_rgb(abgr, ABGR, avx);
562  case_rgb(argb, ARGB, avx);
563  default:
564  break;
565  }
566  }
567 
568 #if ARCH_X86_64
569  if (EXTERNAL_AVX2_FAST(cpu_flags)) {
570  switch (c->dstFormat) {
571  case AV_PIX_FMT_NV12:
572  case AV_PIX_FMT_NV24:
573  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
574  break;
575  case AV_PIX_FMT_NV21:
576  case AV_PIX_FMT_NV42:
577  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
578  break;
579  default:
580  break;
581  }
582  }
583 #endif
584 }
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:81
#define YUV2YUVX_FUNC_MMX(opt, step)
Definition: swscale.c:192
#define NULL
Definition: coverity.c:32
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
int chrSrcH
Height of source chroma planes.
8 bits gray, 8 bits alpha
Definition: pixfmt.h:143
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:294
#define SCALE_FUNCS_MMX(opt)
Definition: swscale.c:259
void(* chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of chroma planes to YV12 for horizontal scaler.
void(* alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of alpha plane to YV12 for horizontal scaler.
uint64_t redDither
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
static atomic_int cpu_flags
Definition: cpu.c:50
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:39
int dstY
Last destination vertical line output from last slice.
uint64_t blueDither
#define case_rgb(x, X, opt)
Macro definitions for various function/variable attributes.
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
int srcH
Height of source luma/alpha planes.
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:297
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
int chrDstVSubSample
Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination i...
#define av_cold
Definition: attributes.h:88
uint8_t ** line
line buffer
int vChrFilterSize
Vertical filter size for chroma pixels.
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:32
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
Definition: swscale.c:353
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:117
#define EXTERNAL_SSE3(flags)
Definition: cpu.h:62
external API header
enum AVPixelFormat dstFormat
Destination pixel format.
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:281
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
int dstH
Height of destination luma/alpha planes.
#define U(x)
Definition: vp56_arith.h:37
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
#define INLINE_MMX(flags)
Definition: cpu.h:86
int hLumFilterSize
Horizontal filter size for luma/alpha pixels.
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
simple assert() macros that are a bit more flexible than ISO C assert().
Slice plane.
#define YUV2YUVX_FUNC(opt, step)
Definition: swscale.c:204
SwsPlane plane[MAX_SLICE_PLANES]
color planes
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
int hChrFilterSize
Horizontal filter size for chroma pixels.
#define DECLARE_ASM_CONST(n, t, v)
Declare a static constant aligned variable appropriate for use in inline assembly code...
Definition: mem.h:119
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:348
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
void ff_updateMMXDitherTables(SwsContext *c, int dstY)
#define APCK_SIZE
#define FFMIN(a, b)
Definition: common.h:105
yuv2planar1_fn yuv2plane1
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:264
yuv2interleavedX_fn yuv2nv12cX
int32_t
#define s(width, name)
Definition: cbs_vp9.c:257
if(ret)
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:392
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
const uint64_t ff_dither4[2]
Definition: swscale.c:33
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
yuv2planarX_fn yuv2planeX
#define DECLARE_ASM_ALIGNED(n, t, v)
Declare an aligned variable appropriate for use in inline assembly code.
Definition: mem.h:118
struct SwsSlice * slice
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:67
#define APCK_COEF
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
as above, but U and V bytes are swapped
Definition: pixfmt.h:349
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
#define SWS_ACCURATE_RND
Definition: swscale.h:83
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:95
#define INPUT_FUNCS(opt)
Definition: swscale.c:323
#define flags(name, subs,...)
Definition: cbs_av1.c:561
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:277
#define INLINE_MMXEXT(flags)
Definition: cpu.h:87
enum AVPixelFormat srcFormat
Source pixel format.
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:387
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
uint64_t greenDither
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk)
uint8_t ** tmp
Tmp line buffer used by mmx code.
void(* lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of luma plane to YV12 for horizontal scaler.
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
#define APCK_PTR2
int sliceY
index of first line
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
for(j=16;j >0;--j)
int i
Definition: input.c:407
#define av_unused
Definition: attributes.h:131
const uint64_t ff_dither8[2]
Definition: swscale.c:37