FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/pixdesc.h"
32 
33 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
34  0x0103010301030103LL,
35  0x0200020002000200LL,};
36 
37 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
38  0x0602060206020602LL,
39  0x0004000400040004LL,};
40 
41 #if HAVE_INLINE_ASM
42 
43 #define DITHER1XBPP
44 
45 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
46 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
47 DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
48 DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
49 
50 DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
51 DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
52 DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
53 DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL;
54 DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL;
55 DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL;
56 
57 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
58 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
59 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
60 
61 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
62 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
63 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
64 
65 
66 //MMX versions
67 #if HAVE_MMX_INLINE
68 #undef RENAME
69 #define COMPILE_TEMPLATE_MMXEXT 0
70 #define RENAME(a) a ## _mmx
71 #include "swscale_template.c"
72 #endif
73 
74 // MMXEXT versions
75 #if HAVE_MMXEXT_INLINE
76 #undef RENAME
77 #undef COMPILE_TEMPLATE_MMXEXT
78 #define COMPILE_TEMPLATE_MMXEXT 1
79 #define RENAME(a) a ## _mmxext
80 #include "swscale_template.c"
81 #endif
82 
84 {
85  const int dstH= c->dstH;
86  const int flags= c->flags;
87 
88  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
89  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
90  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
91 
92  int hasAlpha = c->needAlpha;
95  int16_t *vLumFilter= c->vLumFilter;
96  int16_t *vChrFilter= c->vChrFilter;
100  const int vLumFilterSize= c->vLumFilterSize;
101  const int vChrFilterSize= c->vChrFilterSize;
102  const int chrDstY= dstY>>c->chrDstVSubSample;
103  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
104  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
105 
106  c->blueDither= ff_dither8[dstY&1];
108  c->greenDither= ff_dither8[dstY&1];
109  else
110  c->greenDither= ff_dither4[dstY&1];
111  c->redDither= ff_dither8[(dstY+1)&1];
112  if (dstY < dstH - 2) {
113  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
114  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
115  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
116 
117  int i;
118  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
119  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
120 
121  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
122  for (i = 0; i < neg; i++)
123  tmpY[i] = lumSrcPtr[neg];
124  for ( ; i < end; i++)
125  tmpY[i] = lumSrcPtr[i];
126  for ( ; i < vLumFilterSize; i++)
127  tmpY[i] = tmpY[i-1];
128  lumSrcPtr = tmpY;
129 
130  if (alpSrcPtr) {
131  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
132  for (i = 0; i < neg; i++)
133  tmpA[i] = alpSrcPtr[neg];
134  for ( ; i < end; i++)
135  tmpA[i] = alpSrcPtr[i];
136  for ( ; i < vLumFilterSize; i++)
137  tmpA[i] = tmpA[i - 1];
138  alpSrcPtr = tmpA;
139  }
140  }
141  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
142  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
143  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
144  for (i = 0; i < neg; i++) {
145  tmpU[i] = chrUSrcPtr[neg];
146  }
147  for ( ; i < end; i++) {
148  tmpU[i] = chrUSrcPtr[i];
149  }
150  for ( ; i < vChrFilterSize; i++) {
151  tmpU[i] = tmpU[i - 1];
152  }
153  chrUSrcPtr = tmpU;
154  }
155 
156  if (flags & SWS_ACCURATE_RND) {
157  int s= APCK_SIZE / 8;
158  for (i=0; i<vLumFilterSize; i+=2) {
159  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
160  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
161  lumMmxFilter[s*i+APCK_COEF/4 ]=
162  lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
163  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
164  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
165  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
166  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
167  alpMmxFilter[s*i+APCK_COEF/4 ]=
168  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
169  }
170  }
171  for (i=0; i<vChrFilterSize; i+=2) {
172  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
173  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
174  chrMmxFilter[s*i+APCK_COEF/4 ]=
175  chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
176  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
177  }
178  } else {
179  for (i=0; i<vLumFilterSize; i++) {
180  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
181  lumMmxFilter[4*i+2]=
182  lumMmxFilter[4*i+3]=
183  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
184  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
185  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
186  alpMmxFilter[4*i+2]=
187  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
188  }
189  }
190  for (i=0; i<vChrFilterSize; i++) {
191  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
192  chrMmxFilter[4*i+2]=
193  chrMmxFilter[4*i+3]=
194  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
195  }
196  }
197  }
198 }
199 
200 #if HAVE_MMXEXT
201 static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
202  const int16_t **src, uint8_t *dest, int dstW,
203  const uint8_t *dither, int offset)
204 {
205  if(((uintptr_t)dest) & 15){
206  yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
207  return;
208  }
209  filterSize--;
210 #define MAIN_FUNCTION \
211  "pxor %%xmm0, %%xmm0 \n\t" \
212  "punpcklbw %%xmm0, %%xmm3 \n\t" \
213  "movd %4, %%xmm1 \n\t" \
214  "punpcklwd %%xmm1, %%xmm1 \n\t" \
215  "punpckldq %%xmm1, %%xmm1 \n\t" \
216  "punpcklqdq %%xmm1, %%xmm1 \n\t" \
217  "psllw $3, %%xmm1 \n\t" \
218  "paddw %%xmm1, %%xmm3 \n\t" \
219  "psraw $4, %%xmm3 \n\t" \
220  "movdqa %%xmm3, %%xmm4 \n\t" \
221  "movdqa %%xmm3, %%xmm7 \n\t" \
222  "movl %3, %%ecx \n\t" \
223  "mov %0, %%"FF_REG_d" \n\t"\
224  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
225  ".p2align 4 \n\t" /* FIXME Unroll? */\
226  "1: \n\t"\
227  "movddup 8(%%"FF_REG_d"), %%xmm0 \n\t" /* filterCoeff */\
228  "movdqa (%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm2 \n\t" /* srcData */\
229  "movdqa 16(%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm5 \n\t" /* srcData */\
230  "add $16, %%"FF_REG_d" \n\t"\
231  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
232  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
233  "pmulhw %%xmm0, %%xmm2 \n\t"\
234  "pmulhw %%xmm0, %%xmm5 \n\t"\
235  "paddw %%xmm2, %%xmm3 \n\t"\
236  "paddw %%xmm5, %%xmm4 \n\t"\
237  " jnz 1b \n\t"\
238  "psraw $3, %%xmm3 \n\t"\
239  "psraw $3, %%xmm4 \n\t"\
240  "packuswb %%xmm4, %%xmm3 \n\t"\
241  "movntdq %%xmm3, (%1, %%"FF_REG_c") \n\t"\
242  "add $16, %%"FF_REG_c" \n\t"\
243  "cmp %2, %%"FF_REG_c" \n\t"\
244  "movdqa %%xmm7, %%xmm3 \n\t" \
245  "movdqa %%xmm7, %%xmm4 \n\t" \
246  "mov %0, %%"FF_REG_d" \n\t"\
247  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
248  "jb 1b \n\t"
249 
250  if (offset) {
251  __asm__ volatile(
252  "movq %5, %%xmm3 \n\t"
253  "movdqa %%xmm3, %%xmm4 \n\t"
254  "psrlq $24, %%xmm3 \n\t"
255  "psllq $40, %%xmm4 \n\t"
256  "por %%xmm4, %%xmm3 \n\t"
257  MAIN_FUNCTION
258  :: "g" (filter),
259  "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
260  "m"(filterSize), "m"(((uint64_t *) dither)[0])
261  : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
262  "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
263  );
264  } else {
265  __asm__ volatile(
266  "movq %5, %%xmm3 \n\t"
267  MAIN_FUNCTION
268  :: "g" (filter),
269  "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
270  "m"(filterSize), "m"(((uint64_t *) dither)[0])
271  : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
272  "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
273  );
274  }
275 }
276 #endif
277 
278 #endif /* HAVE_INLINE_ASM */
279 
280 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
281 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
282  SwsContext *c, int16_t *data, \
283  int dstW, const uint8_t *src, \
284  const int16_t *filter, \
285  const int32_t *filterPos, int filterSize)
286 
287 #define SCALE_FUNCS(filter_n, opt) \
288  SCALE_FUNC(filter_n, 8, 15, opt); \
289  SCALE_FUNC(filter_n, 9, 15, opt); \
290  SCALE_FUNC(filter_n, 10, 15, opt); \
291  SCALE_FUNC(filter_n, 12, 15, opt); \
292  SCALE_FUNC(filter_n, 14, 15, opt); \
293  SCALE_FUNC(filter_n, 16, 15, opt); \
294  SCALE_FUNC(filter_n, 8, 19, opt); \
295  SCALE_FUNC(filter_n, 9, 19, opt); \
296  SCALE_FUNC(filter_n, 10, 19, opt); \
297  SCALE_FUNC(filter_n, 12, 19, opt); \
298  SCALE_FUNC(filter_n, 14, 19, opt); \
299  SCALE_FUNC(filter_n, 16, 19, opt)
300 
301 #define SCALE_FUNCS_MMX(opt) \
302  SCALE_FUNCS(4, opt); \
303  SCALE_FUNCS(8, opt); \
304  SCALE_FUNCS(X, opt)
305 
306 #define SCALE_FUNCS_SSE(opt) \
307  SCALE_FUNCS(4, opt); \
308  SCALE_FUNCS(8, opt); \
309  SCALE_FUNCS(X4, opt); \
310  SCALE_FUNCS(X8, opt)
311 
312 #if ARCH_X86_32
313 SCALE_FUNCS_MMX(mmx);
314 #endif
315 SCALE_FUNCS_SSE(sse2);
316 SCALE_FUNCS_SSE(ssse3);
317 SCALE_FUNCS_SSE(sse4);
318 
319 #define VSCALEX_FUNC(size, opt) \
320 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
321  const int16_t **src, uint8_t *dest, int dstW, \
322  const uint8_t *dither, int offset)
323 #define VSCALEX_FUNCS(opt) \
324  VSCALEX_FUNC(8, opt); \
325  VSCALEX_FUNC(9, opt); \
326  VSCALEX_FUNC(10, opt)
327 
328 #if ARCH_X86_32
329 VSCALEX_FUNCS(mmxext);
330 #endif
331 VSCALEX_FUNCS(sse2);
332 VSCALEX_FUNCS(sse4);
333 VSCALEX_FUNC(16, sse4);
334 VSCALEX_FUNCS(avx);
335 
336 #define VSCALE_FUNC(size, opt) \
337 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
338  const uint8_t *dither, int offset)
339 #define VSCALE_FUNCS(opt1, opt2) \
340  VSCALE_FUNC(8, opt1); \
341  VSCALE_FUNC(9, opt2); \
342  VSCALE_FUNC(10, opt2); \
343  VSCALE_FUNC(16, opt1)
344 
345 #if ARCH_X86_32
346 VSCALE_FUNCS(mmx, mmxext);
347 #endif
348 VSCALE_FUNCS(sse2, sse2);
349 VSCALE_FUNC(16, sse4);
350 VSCALE_FUNCS(avx, avx);
351 
352 #define INPUT_Y_FUNC(fmt, opt) \
353 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
354  const uint8_t *unused1, const uint8_t *unused2, \
355  int w, uint32_t *unused)
356 #define INPUT_UV_FUNC(fmt, opt) \
357 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
358  const uint8_t *unused0, \
359  const uint8_t *src1, \
360  const uint8_t *src2, \
361  int w, uint32_t *unused)
362 #define INPUT_FUNC(fmt, opt) \
363  INPUT_Y_FUNC(fmt, opt); \
364  INPUT_UV_FUNC(fmt, opt)
365 #define INPUT_FUNCS(opt) \
366  INPUT_FUNC(uyvy, opt); \
367  INPUT_FUNC(yuyv, opt); \
368  INPUT_UV_FUNC(nv12, opt); \
369  INPUT_UV_FUNC(nv21, opt); \
370  INPUT_FUNC(rgba, opt); \
371  INPUT_FUNC(bgra, opt); \
372  INPUT_FUNC(argb, opt); \
373  INPUT_FUNC(abgr, opt); \
374  INPUT_FUNC(rgb24, opt); \
375  INPUT_FUNC(bgr24, opt)
376 
377 #if ARCH_X86_32
378 INPUT_FUNCS(mmx);
379 #endif
380 INPUT_FUNCS(sse2);
381 INPUT_FUNCS(ssse3);
382 INPUT_FUNCS(avx);
383 
384 #if ARCH_X86_64
385 #define YUV2NV_DECL(fmt, opt) \
386 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
387  const int16_t *filter, int filterSize, \
388  const int16_t **u, const int16_t **v, \
389  uint8_t *dst, int dstWidth)
390 
391 YUV2NV_DECL(nv12, avx2);
392 YUV2NV_DECL(nv21, avx2);
393 #endif
394 
396 {
397  int cpu_flags = av_get_cpu_flags();
398 
399 #if HAVE_MMX_INLINE
400  if (INLINE_MMX(cpu_flags))
401  sws_init_swscale_mmx(c);
402 #endif
403 #if HAVE_MMXEXT_INLINE
404  if (INLINE_MMXEXT(cpu_flags))
405  sws_init_swscale_mmxext(c);
406  if (cpu_flags & AV_CPU_FLAG_SSE3){
407  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
408  c->yuv2planeX = yuv2yuvX_sse3;
409  }
410 #endif
411 
412 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
413  if (c->srcBpc == 8) { \
414  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
415  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
416  } else if (c->srcBpc == 9) { \
417  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
418  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
419  } else if (c->srcBpc == 10) { \
420  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
421  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
422  } else if (c->srcBpc == 12) { \
423  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
424  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
425  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
426  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
427  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
428  } else { /* c->srcBpc == 16 */ \
429  av_assert0(c->srcBpc == 16);\
430  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
431  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
432  } \
433 } while (0)
434 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
435  switch (filtersize) { \
436  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
437  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
438  default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
439  }
440 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
441 switch(c->dstBpc){ \
442  case 16: do_16_case; break; \
443  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
444  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
445  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
446  }
447 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
448  switch(c->dstBpc){ \
449  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
450  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
451  case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
452  case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
453  default: av_assert0(c->dstBpc>8); \
454  }
455 #define case_rgb(x, X, opt) \
456  case AV_PIX_FMT_ ## X: \
457  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
458  if (!c->chrSrcHSubSample) \
459  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
460  break
461 #if ARCH_X86_32
462  if (EXTERNAL_MMX(cpu_flags)) {
463  ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
464  ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
465  ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
466 
467  switch (c->srcFormat) {
468  case AV_PIX_FMT_YA8:
469  c->lumToYV12 = ff_yuyvToY_mmx;
470  if (c->needAlpha)
471  c->alpToYV12 = ff_uyvyToY_mmx;
472  break;
473  case AV_PIX_FMT_YUYV422:
474  c->lumToYV12 = ff_yuyvToY_mmx;
475  c->chrToYV12 = ff_yuyvToUV_mmx;
476  break;
477  case AV_PIX_FMT_UYVY422:
478  c->lumToYV12 = ff_uyvyToY_mmx;
479  c->chrToYV12 = ff_uyvyToUV_mmx;
480  break;
481  case AV_PIX_FMT_NV12:
482  c->chrToYV12 = ff_nv12ToUV_mmx;
483  break;
484  case AV_PIX_FMT_NV21:
485  c->chrToYV12 = ff_nv21ToUV_mmx;
486  break;
487  case_rgb(rgb24, RGB24, mmx);
488  case_rgb(bgr24, BGR24, mmx);
489  case_rgb(bgra, BGRA, mmx);
490  case_rgb(rgba, RGBA, mmx);
491  case_rgb(abgr, ABGR, mmx);
492  case_rgb(argb, ARGB, mmx);
493  default:
494  break;
495  }
496  }
497  if (EXTERNAL_MMXEXT(cpu_flags)) {
498  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1);
499  }
500 #endif /* ARCH_X86_32 */
501 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
502  switch (filtersize) { \
503  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
504  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
505  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
506  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
507  break; \
508  }
509  if (EXTERNAL_SSE2(cpu_flags)) {
510  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
511  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
512  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
513  HAVE_ALIGNED_STACK || ARCH_X86_64);
514  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
515 
516  switch (c->srcFormat) {
517  case AV_PIX_FMT_YA8:
518  c->lumToYV12 = ff_yuyvToY_sse2;
519  if (c->needAlpha)
520  c->alpToYV12 = ff_uyvyToY_sse2;
521  break;
522  case AV_PIX_FMT_YUYV422:
523  c->lumToYV12 = ff_yuyvToY_sse2;
524  c->chrToYV12 = ff_yuyvToUV_sse2;
525  break;
526  case AV_PIX_FMT_UYVY422:
527  c->lumToYV12 = ff_uyvyToY_sse2;
528  c->chrToYV12 = ff_uyvyToUV_sse2;
529  break;
530  case AV_PIX_FMT_NV12:
531  c->chrToYV12 = ff_nv12ToUV_sse2;
532  break;
533  case AV_PIX_FMT_NV21:
534  c->chrToYV12 = ff_nv21ToUV_sse2;
535  break;
536  case_rgb(rgb24, RGB24, sse2);
537  case_rgb(bgr24, BGR24, sse2);
538  case_rgb(bgra, BGRA, sse2);
539  case_rgb(rgba, RGBA, sse2);
540  case_rgb(abgr, ABGR, sse2);
541  case_rgb(argb, ARGB, sse2);
542  default:
543  break;
544  }
545  }
546  if (EXTERNAL_SSSE3(cpu_flags)) {
547  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
548  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
549  switch (c->srcFormat) {
550  case_rgb(rgb24, RGB24, ssse3);
551  case_rgb(bgr24, BGR24, ssse3);
552  default:
553  break;
554  }
555  }
556  if (EXTERNAL_SSE4(cpu_flags)) {
557  /* Xto15 don't need special sse4 functions */
558  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
559  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
561  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
562  HAVE_ALIGNED_STACK || ARCH_X86_64);
563  if (c->dstBpc == 16 && !isBE(c->dstFormat))
564  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
565  }
566 
567  if (EXTERNAL_AVX(cpu_flags)) {
569  HAVE_ALIGNED_STACK || ARCH_X86_64);
570  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
571 
572  switch (c->srcFormat) {
573  case AV_PIX_FMT_YUYV422:
574  c->chrToYV12 = ff_yuyvToUV_avx;
575  break;
576  case AV_PIX_FMT_UYVY422:
577  c->chrToYV12 = ff_uyvyToUV_avx;
578  break;
579  case AV_PIX_FMT_NV12:
580  c->chrToYV12 = ff_nv12ToUV_avx;
581  break;
582  case AV_PIX_FMT_NV21:
583  c->chrToYV12 = ff_nv21ToUV_avx;
584  break;
585  case_rgb(rgb24, RGB24, avx);
586  case_rgb(bgr24, BGR24, avx);
587  case_rgb(bgra, BGRA, avx);
588  case_rgb(rgba, RGBA, avx);
589  case_rgb(abgr, ABGR, avx);
590  case_rgb(argb, ARGB, avx);
591  default:
592  break;
593  }
594  }
595 
596 #if ARCH_X86_64
597  if (EXTERNAL_AVX2_FAST(cpu_flags)) {
598  switch (c->dstFormat) {
599  case AV_PIX_FMT_NV12:
600  case AV_PIX_FMT_NV24:
601  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
602  break;
603  case AV_PIX_FMT_NV21:
604  case AV_PIX_FMT_NV42:
605  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
606  break;
607  default:
608  break;
609  }
610  }
611 #endif
612 }
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:81
#define NULL
Definition: coverity.c:32
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
int chrSrcH
Height of source chroma planes.
8 bits gray, 8 bits alpha
Definition: pixfmt.h:143
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:336
#define SCALE_FUNCS_MMX(opt)
Definition: swscale.c:301
void(* chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of chroma planes to YV12 for horizontal scaler.
void(* alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of alpha plane to YV12 for horizontal scaler.
uint64_t redDither
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
static atomic_int cpu_flags
Definition: cpu.c:50
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:39
int dstY
Last destination vertical line output from last slice.
uint64_t blueDither
#define case_rgb(x, X, opt)
Macro definitions for various function/variable attributes.
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
int srcH
Height of source luma/alpha planes.
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:339
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
int chrDstVSubSample
Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination i...
uint8_t
#define av_cold
Definition: attributes.h:88
uint8_t ** line
line buffer
int vChrFilterSize
Vertical filter size for chroma pixels.
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:92
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:32
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
Definition: swscale.c:395
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:117
external API header
enum AVPixelFormat dstFormat
Destination pixel format.
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:323
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
int dstH
Height of destination luma/alpha planes.
#define U(x)
Definition: vp56_arith.h:37
#define src
Definition: vp8dsp.c:255
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
#define INLINE_MMX(flags)
Definition: cpu.h:86
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
int hLumFilterSize
Horizontal filter size for luma/alpha pixels.
static const uint8_t dither[8][8]
Definition: vf_fspp.c:59
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
simple assert() macros that are a bit more flexible than ISO C assert().
Slice plane.
SwsPlane plane[MAX_SLICE_PLANES]
color planes
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
int hChrFilterSize
Horizontal filter size for chroma pixels.
#define DECLARE_ASM_CONST(n, t, v)
Declare a static constant aligned variable appropriate for use in inline assembly code...
Definition: mem.h:119
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:348
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
void ff_updateMMXDitherTables(SwsContext *c, int dstY)
#define APCK_SIZE
#define FFMIN(a, b)
Definition: common.h:96
#define AV_CPU_FLAG_SSE3
Prescott SSE3 functions.
Definition: cpu.h:40
yuv2planar1_fn yuv2plane1
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:306
yuv2interleavedX_fn yuv2nv12cX
int32_t
#define XMM_CLOBBERS(...)
Definition: asm.h:98
#define s(width, name)
Definition: cbs_vp9.c:257
int dstW
Width of destination luma/alpha planes.
if(ret)
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:392
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
const uint64_t ff_dither4[2]
Definition: swscale.c:33
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
yuv2planarX_fn yuv2planeX
#define DECLARE_ASM_ALIGNED(n, t, v)
Declare an aligned variable appropriate for use in inline assembly code.
Definition: mem.h:118
struct SwsSlice * slice
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:67
#define APCK_COEF
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
as above, but U and V bytes are swapped
Definition: pixfmt.h:349
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
#define SWS_ACCURATE_RND
Definition: swscale.h:83
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:95
#define INPUT_FUNCS(opt)
Definition: swscale.c:365
#define flags(name, subs,...)
Definition: cbs_av1.c:561
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:319
#define INLINE_MMXEXT(flags)
Definition: cpu.h:87
__asm__(".macro parse_r var r\n\t""\\var = -1\n\t"_IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)".iflt \\var\n\t"".error \"Unable to parse register name \\r\"\n\t"".endif\n\t"".endm")
enum AVPixelFormat srcFormat
Source pixel format.
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:387
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
uint64_t greenDither
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk)
uint8_t ** tmp
Tmp line buffer used by mmx code.
int x86_reg
Definition: asm.h:72
void(* lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of luma plane to YV12 for horizontal scaler.
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
#define APCK_PTR2
int sliceY
index of first line
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
for(j=16;j >0;--j)
int i
Definition: input.c:407
#define av_unused
Definition: attributes.h:131
const uint64_t ff_dither8[2]
Definition: swscale.c:37