FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/pixdesc.h"
31 
32 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
33  0x0103010301030103LL,
34  0x0200020002000200LL,};
35 
36 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
37  0x0602060206020602LL,
38  0x0004000400040004LL,};
39 
40 #if HAVE_INLINE_ASM
41 
42 #define DITHER1XBPP
43 
44 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
45 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
46 DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
47 DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
48 
49 DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
50 DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
51 DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
52 DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL;
53 DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL;
54 DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL;
55 
56 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
57 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
58 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
59 
60 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
61 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
62 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
63 
64 
65 //MMX versions
66 #if HAVE_MMX_INLINE
67 #undef RENAME
68 #define COMPILE_TEMPLATE_MMXEXT 0
69 #define RENAME(a) a ## _mmx
70 #include "swscale_template.c"
71 #endif
72 
73 // MMXEXT versions
74 #if HAVE_MMXEXT_INLINE
75 #undef RENAME
76 #undef COMPILE_TEMPLATE_MMXEXT
77 #define COMPILE_TEMPLATE_MMXEXT 1
78 #define RENAME(a) a ## _mmxext
79 #include "swscale_template.c"
80 #endif
81 
83 {
84  const int dstH= c->dstH;
85  const int flags= c->flags;
86 
87  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
88  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
89  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
90 
91  int hasAlpha = c->needAlpha;
94  int16_t *vLumFilter= c->vLumFilter;
95  int16_t *vChrFilter= c->vChrFilter;
99  const int vLumFilterSize= c->vLumFilterSize;
100  const int vChrFilterSize= c->vChrFilterSize;
101  const int chrDstY= dstY>>c->chrDstVSubSample;
102  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
103  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
104 
105  c->blueDither= ff_dither8[dstY&1];
107  c->greenDither= ff_dither8[dstY&1];
108  else
109  c->greenDither= ff_dither4[dstY&1];
110  c->redDither= ff_dither8[(dstY+1)&1];
111  if (dstY < dstH - 2) {
112  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
113  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
114  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
115 
116  int i;
117  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
118  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
119 
120  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
121  for (i = 0; i < neg; i++)
122  tmpY[i] = lumSrcPtr[neg];
123  for ( ; i < end; i++)
124  tmpY[i] = lumSrcPtr[i];
125  for ( ; i < vLumFilterSize; i++)
126  tmpY[i] = tmpY[i-1];
127  lumSrcPtr = tmpY;
128 
129  if (alpSrcPtr) {
130  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
131  for (i = 0; i < neg; i++)
132  tmpA[i] = alpSrcPtr[neg];
133  for ( ; i < end; i++)
134  tmpA[i] = alpSrcPtr[i];
135  for ( ; i < vLumFilterSize; i++)
136  tmpA[i] = tmpA[i - 1];
137  alpSrcPtr = tmpA;
138  }
139  }
140  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
141  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
142  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
143  for (i = 0; i < neg; i++) {
144  tmpU[i] = chrUSrcPtr[neg];
145  }
146  for ( ; i < end; i++) {
147  tmpU[i] = chrUSrcPtr[i];
148  }
149  for ( ; i < vChrFilterSize; i++) {
150  tmpU[i] = tmpU[i - 1];
151  }
152  chrUSrcPtr = tmpU;
153  }
154 
155  if (flags & SWS_ACCURATE_RND) {
156  int s= APCK_SIZE / 8;
157  for (i=0; i<vLumFilterSize; i+=2) {
158  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
159  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
160  lumMmxFilter[s*i+APCK_COEF/4 ]=
161  lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
162  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
163  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
164  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
165  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
166  alpMmxFilter[s*i+APCK_COEF/4 ]=
167  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
168  }
169  }
170  for (i=0; i<vChrFilterSize; i+=2) {
171  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
172  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
173  chrMmxFilter[s*i+APCK_COEF/4 ]=
174  chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
175  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
176  }
177  } else {
178  for (i=0; i<vLumFilterSize; i++) {
179  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
180  lumMmxFilter[4*i+2]=
181  lumMmxFilter[4*i+3]=
182  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
183  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
184  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
185  alpMmxFilter[4*i+2]=
186  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
187  }
188  }
189  for (i=0; i<vChrFilterSize; i++) {
190  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
191  chrMmxFilter[4*i+2]=
192  chrMmxFilter[4*i+3]=
193  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
194  }
195  }
196  }
197 }
198 
199 #if HAVE_MMXEXT
200 static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
201  const int16_t **src, uint8_t *dest, int dstW,
202  const uint8_t *dither, int offset)
203 {
204  if(((uintptr_t)dest) & 15){
205  yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
206  return;
207  }
208  filterSize--;
209 #define MAIN_FUNCTION \
210  "pxor %%xmm0, %%xmm0 \n\t" \
211  "punpcklbw %%xmm0, %%xmm3 \n\t" \
212  "movd %4, %%xmm1 \n\t" \
213  "punpcklwd %%xmm1, %%xmm1 \n\t" \
214  "punpckldq %%xmm1, %%xmm1 \n\t" \
215  "punpcklqdq %%xmm1, %%xmm1 \n\t" \
216  "psllw $3, %%xmm1 \n\t" \
217  "paddw %%xmm1, %%xmm3 \n\t" \
218  "psraw $4, %%xmm3 \n\t" \
219  "movdqa %%xmm3, %%xmm4 \n\t" \
220  "movdqa %%xmm3, %%xmm7 \n\t" \
221  "movl %3, %%ecx \n\t" \
222  "mov %0, %%"FF_REG_d" \n\t"\
223  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
224  ".p2align 4 \n\t" /* FIXME Unroll? */\
225  "1: \n\t"\
226  "movddup 8(%%"FF_REG_d"), %%xmm0 \n\t" /* filterCoeff */\
227  "movdqa (%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm2 \n\t" /* srcData */\
228  "movdqa 16(%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm5 \n\t" /* srcData */\
229  "add $16, %%"FF_REG_d" \n\t"\
230  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
231  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
232  "pmulhw %%xmm0, %%xmm2 \n\t"\
233  "pmulhw %%xmm0, %%xmm5 \n\t"\
234  "paddw %%xmm2, %%xmm3 \n\t"\
235  "paddw %%xmm5, %%xmm4 \n\t"\
236  " jnz 1b \n\t"\
237  "psraw $3, %%xmm3 \n\t"\
238  "psraw $3, %%xmm4 \n\t"\
239  "packuswb %%xmm4, %%xmm3 \n\t"\
240  "movntdq %%xmm3, (%1, %%"FF_REG_c") \n\t"\
241  "add $16, %%"FF_REG_c" \n\t"\
242  "cmp %2, %%"FF_REG_c" \n\t"\
243  "movdqa %%xmm7, %%xmm3 \n\t" \
244  "movdqa %%xmm7, %%xmm4 \n\t" \
245  "mov %0, %%"FF_REG_d" \n\t"\
246  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
247  "jb 1b \n\t"
248 
249  if (offset) {
250  __asm__ volatile(
251  "movq %5, %%xmm3 \n\t"
252  "movdqa %%xmm3, %%xmm4 \n\t"
253  "psrlq $24, %%xmm3 \n\t"
254  "psllq $40, %%xmm4 \n\t"
255  "por %%xmm4, %%xmm3 \n\t"
256  MAIN_FUNCTION
257  :: "g" (filter),
258  "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
259  "m"(filterSize), "m"(((uint64_t *) dither)[0])
260  : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
261  "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
262  );
263  } else {
264  __asm__ volatile(
265  "movq %5, %%xmm3 \n\t"
266  MAIN_FUNCTION
267  :: "g" (filter),
268  "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
269  "m"(filterSize), "m"(((uint64_t *) dither)[0])
270  : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
271  "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
272  );
273  }
274 }
275 #endif
276 
277 #endif /* HAVE_INLINE_ASM */
278 
279 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
280 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
281  SwsContext *c, int16_t *data, \
282  int dstW, const uint8_t *src, \
283  const int16_t *filter, \
284  const int32_t *filterPos, int filterSize)
285 
286 #define SCALE_FUNCS(filter_n, opt) \
287  SCALE_FUNC(filter_n, 8, 15, opt); \
288  SCALE_FUNC(filter_n, 9, 15, opt); \
289  SCALE_FUNC(filter_n, 10, 15, opt); \
290  SCALE_FUNC(filter_n, 12, 15, opt); \
291  SCALE_FUNC(filter_n, 14, 15, opt); \
292  SCALE_FUNC(filter_n, 16, 15, opt); \
293  SCALE_FUNC(filter_n, 8, 19, opt); \
294  SCALE_FUNC(filter_n, 9, 19, opt); \
295  SCALE_FUNC(filter_n, 10, 19, opt); \
296  SCALE_FUNC(filter_n, 12, 19, opt); \
297  SCALE_FUNC(filter_n, 14, 19, opt); \
298  SCALE_FUNC(filter_n, 16, 19, opt)
299 
300 #define SCALE_FUNCS_MMX(opt) \
301  SCALE_FUNCS(4, opt); \
302  SCALE_FUNCS(8, opt); \
303  SCALE_FUNCS(X, opt)
304 
305 #define SCALE_FUNCS_SSE(opt) \
306  SCALE_FUNCS(4, opt); \
307  SCALE_FUNCS(8, opt); \
308  SCALE_FUNCS(X4, opt); \
309  SCALE_FUNCS(X8, opt)
310 
311 #if ARCH_X86_32
312 SCALE_FUNCS_MMX(mmx);
313 #endif
314 SCALE_FUNCS_SSE(sse2);
315 SCALE_FUNCS_SSE(ssse3);
316 SCALE_FUNCS_SSE(sse4);
317 
318 #define VSCALEX_FUNC(size, opt) \
319 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
320  const int16_t **src, uint8_t *dest, int dstW, \
321  const uint8_t *dither, int offset)
322 #define VSCALEX_FUNCS(opt) \
323  VSCALEX_FUNC(8, opt); \
324  VSCALEX_FUNC(9, opt); \
325  VSCALEX_FUNC(10, opt)
326 
327 #if ARCH_X86_32
328 VSCALEX_FUNCS(mmxext);
329 #endif
330 VSCALEX_FUNCS(sse2);
331 VSCALEX_FUNCS(sse4);
332 VSCALEX_FUNC(16, sse4);
333 VSCALEX_FUNCS(avx);
334 
335 #define VSCALE_FUNC(size, opt) \
336 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
337  const uint8_t *dither, int offset)
338 #define VSCALE_FUNCS(opt1, opt2) \
339  VSCALE_FUNC(8, opt1); \
340  VSCALE_FUNC(9, opt2); \
341  VSCALE_FUNC(10, opt2); \
342  VSCALE_FUNC(16, opt1)
343 
344 #if ARCH_X86_32
345 VSCALE_FUNCS(mmx, mmxext);
346 #endif
347 VSCALE_FUNCS(sse2, sse2);
348 VSCALE_FUNC(16, sse4);
349 VSCALE_FUNCS(avx, avx);
350 
351 #define INPUT_Y_FUNC(fmt, opt) \
352 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
353  const uint8_t *unused1, const uint8_t *unused2, \
354  int w, uint32_t *unused)
355 #define INPUT_UV_FUNC(fmt, opt) \
356 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
357  const uint8_t *unused0, \
358  const uint8_t *src1, \
359  const uint8_t *src2, \
360  int w, uint32_t *unused)
361 #define INPUT_FUNC(fmt, opt) \
362  INPUT_Y_FUNC(fmt, opt); \
363  INPUT_UV_FUNC(fmt, opt)
364 #define INPUT_FUNCS(opt) \
365  INPUT_FUNC(uyvy, opt); \
366  INPUT_FUNC(yuyv, opt); \
367  INPUT_UV_FUNC(nv12, opt); \
368  INPUT_UV_FUNC(nv21, opt); \
369  INPUT_FUNC(rgba, opt); \
370  INPUT_FUNC(bgra, opt); \
371  INPUT_FUNC(argb, opt); \
372  INPUT_FUNC(abgr, opt); \
373  INPUT_FUNC(rgb24, opt); \
374  INPUT_FUNC(bgr24, opt)
375 
376 #if ARCH_X86_32
377 INPUT_FUNCS(mmx);
378 #endif
379 INPUT_FUNCS(sse2);
380 INPUT_FUNCS(ssse3);
381 INPUT_FUNCS(avx);
382 
383 #if ARCH_X86_64
384 #define YUV2NV_DECL(fmt, opt) \
385 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
386  const int16_t *filter, int filterSize, \
387  const int16_t **u, const int16_t **v, \
388  uint8_t *dst, int dstWidth)
389 
390 YUV2NV_DECL(nv12, avx2);
391 YUV2NV_DECL(nv21, avx2);
392 #endif
393 
395 {
396  int cpu_flags = av_get_cpu_flags();
397 
398 #if HAVE_MMX_INLINE
399  if (INLINE_MMX(cpu_flags))
400  sws_init_swscale_mmx(c);
401 #endif
402 #if HAVE_MMXEXT_INLINE
403  if (INLINE_MMXEXT(cpu_flags))
404  sws_init_swscale_mmxext(c);
405  if (cpu_flags & AV_CPU_FLAG_SSE3){
406  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
407  c->yuv2planeX = yuv2yuvX_sse3;
408  }
409 #endif
410 
411 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
412  if (c->srcBpc == 8) { \
413  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
414  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
415  } else if (c->srcBpc == 9) { \
416  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
417  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
418  } else if (c->srcBpc == 10) { \
419  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
420  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
421  } else if (c->srcBpc == 12) { \
422  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
423  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
424  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
425  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
426  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
427  } else { /* c->srcBpc == 16 */ \
428  av_assert0(c->srcBpc == 16);\
429  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
430  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
431  } \
432 } while (0)
433 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
434  switch (filtersize) { \
435  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
436  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
437  default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
438  }
439 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
440 switch(c->dstBpc){ \
441  case 16: do_16_case; break; \
442  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
443  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
444  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
445  }
446 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
447  switch(c->dstBpc){ \
448  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
449  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
450  case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
451  case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
452  default: av_assert0(c->dstBpc>8); \
453  }
454 #define case_rgb(x, X, opt) \
455  case AV_PIX_FMT_ ## X: \
456  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
457  if (!c->chrSrcHSubSample) \
458  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
459  break
460 #if ARCH_X86_32
461  if (EXTERNAL_MMX(cpu_flags)) {
462  ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
463  ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
464  ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
465 
466  switch (c->srcFormat) {
467  case AV_PIX_FMT_YA8:
468  c->lumToYV12 = ff_yuyvToY_mmx;
469  if (c->needAlpha)
470  c->alpToYV12 = ff_uyvyToY_mmx;
471  break;
472  case AV_PIX_FMT_YUYV422:
473  c->lumToYV12 = ff_yuyvToY_mmx;
474  c->chrToYV12 = ff_yuyvToUV_mmx;
475  break;
476  case AV_PIX_FMT_UYVY422:
477  c->lumToYV12 = ff_uyvyToY_mmx;
478  c->chrToYV12 = ff_uyvyToUV_mmx;
479  break;
480  case AV_PIX_FMT_NV12:
481  c->chrToYV12 = ff_nv12ToUV_mmx;
482  break;
483  case AV_PIX_FMT_NV21:
484  c->chrToYV12 = ff_nv21ToUV_mmx;
485  break;
486  case_rgb(rgb24, RGB24, mmx);
487  case_rgb(bgr24, BGR24, mmx);
488  case_rgb(bgra, BGRA, mmx);
489  case_rgb(rgba, RGBA, mmx);
490  case_rgb(abgr, ABGR, mmx);
491  case_rgb(argb, ARGB, mmx);
492  default:
493  break;
494  }
495  }
496  if (EXTERNAL_MMXEXT(cpu_flags)) {
497  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1);
498  }
499 #endif /* ARCH_X86_32 */
500 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
501  switch (filtersize) { \
502  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
503  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
504  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
505  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
506  break; \
507  }
508  if (EXTERNAL_SSE2(cpu_flags)) {
509  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
510  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
511  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
512  HAVE_ALIGNED_STACK || ARCH_X86_64);
513  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
514 
515  switch (c->srcFormat) {
516  case AV_PIX_FMT_YA8:
517  c->lumToYV12 = ff_yuyvToY_sse2;
518  if (c->needAlpha)
519  c->alpToYV12 = ff_uyvyToY_sse2;
520  break;
521  case AV_PIX_FMT_YUYV422:
522  c->lumToYV12 = ff_yuyvToY_sse2;
523  c->chrToYV12 = ff_yuyvToUV_sse2;
524  break;
525  case AV_PIX_FMT_UYVY422:
526  c->lumToYV12 = ff_uyvyToY_sse2;
527  c->chrToYV12 = ff_uyvyToUV_sse2;
528  break;
529  case AV_PIX_FMT_NV12:
530  c->chrToYV12 = ff_nv12ToUV_sse2;
531  break;
532  case AV_PIX_FMT_NV21:
533  c->chrToYV12 = ff_nv21ToUV_sse2;
534  break;
535  case_rgb(rgb24, RGB24, sse2);
536  case_rgb(bgr24, BGR24, sse2);
537  case_rgb(bgra, BGRA, sse2);
538  case_rgb(rgba, RGBA, sse2);
539  case_rgb(abgr, ABGR, sse2);
540  case_rgb(argb, ARGB, sse2);
541  default:
542  break;
543  }
544  }
545  if (EXTERNAL_SSSE3(cpu_flags)) {
546  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
547  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
548  switch (c->srcFormat) {
549  case_rgb(rgb24, RGB24, ssse3);
550  case_rgb(bgr24, BGR24, ssse3);
551  default:
552  break;
553  }
554  }
555  if (EXTERNAL_SSE4(cpu_flags)) {
556  /* Xto15 don't need special sse4 functions */
557  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
558  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
560  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
561  HAVE_ALIGNED_STACK || ARCH_X86_64);
562  if (c->dstBpc == 16 && !isBE(c->dstFormat))
563  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
564  }
565 
566  if (EXTERNAL_AVX(cpu_flags)) {
568  HAVE_ALIGNED_STACK || ARCH_X86_64);
569  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
570 
571  switch (c->srcFormat) {
572  case AV_PIX_FMT_YUYV422:
573  c->chrToYV12 = ff_yuyvToUV_avx;
574  break;
575  case AV_PIX_FMT_UYVY422:
576  c->chrToYV12 = ff_uyvyToUV_avx;
577  break;
578  case AV_PIX_FMT_NV12:
579  c->chrToYV12 = ff_nv12ToUV_avx;
580  break;
581  case AV_PIX_FMT_NV21:
582  c->chrToYV12 = ff_nv21ToUV_avx;
583  break;
584  case_rgb(rgb24, RGB24, avx);
585  case_rgb(bgr24, BGR24, avx);
586  case_rgb(bgra, BGRA, avx);
587  case_rgb(rgba, RGBA, avx);
588  case_rgb(abgr, ABGR, avx);
589  case_rgb(argb, ARGB, avx);
590  default:
591  break;
592  }
593  }
594 
595 #if ARCH_X86_64
596  if (EXTERNAL_AVX2_FAST(cpu_flags)) {
597  switch (c->dstFormat) {
598  case AV_PIX_FMT_NV12:
599  case AV_PIX_FMT_NV24:
600  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
601  break;
602  case AV_PIX_FMT_NV21:
603  case AV_PIX_FMT_NV42:
604  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
605  break;
606  default:
607  break;
608  }
609  }
610 #endif
611 }
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:81
#define NULL
Definition: coverity.c:32
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
int chrSrcH
Height of source chroma planes.
8 bits gray, 8 bits alpha
Definition: pixfmt.h:143
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:335
#define SCALE_FUNCS_MMX(opt)
Definition: swscale.c:300
void(* chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of chroma planes to YV12 for horizontal scaler.
void(* alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of alpha plane to YV12 for horizontal scaler.
uint64_t redDither
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
static atomic_int cpu_flags
Definition: cpu.c:50
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:39
int dstY
Last destination vertical line output from last slice.
uint64_t blueDither
#define case_rgb(x, X, opt)
Macro definitions for various function/variable attributes.
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
int srcH
Height of source luma/alpha planes.
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:338
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
int chrDstVSubSample
Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination i...
uint8_t
#define av_cold
Definition: attributes.h:88
uint8_t ** line
line buffer
int vChrFilterSize
Vertical filter size for chroma pixels.
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:32
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
Definition: swscale.c:394
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:112
external API header
enum AVPixelFormat dstFormat
Destination pixel format.
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:322
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
int dstH
Height of destination luma/alpha planes.
#define U(x)
Definition: vp56_arith.h:37
#define src
Definition: vp8dsp.c:254
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
#define INLINE_MMX(flags)
Definition: cpu.h:86
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
int hLumFilterSize
Horizontal filter size for luma/alpha pixels.
static const uint8_t dither[8][8]
Definition: vf_fspp.c:57
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
simple assert() macros that are a bit more flexible than ISO C assert().
Slice plane.
SwsPlane plane[MAX_SLICE_PLANES]
color planes
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
int hChrFilterSize
Horizontal filter size for chroma pixels.
#define DECLARE_ASM_CONST(n, t, v)
Declare a static constant aligned variable appropriate for use in inline assembly code...
Definition: mem.h:114
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:348
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
void ff_updateMMXDitherTables(SwsContext *c, int dstY)
#define APCK_SIZE
#define FFMIN(a, b)
Definition: common.h:96
#define AV_CPU_FLAG_SSE3
Prescott SSE3 functions.
Definition: cpu.h:40
yuv2planar1_fn yuv2plane1
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:305
yuv2interleavedX_fn yuv2nv12cX
int32_t
#define XMM_CLOBBERS(...)
Definition: asm.h:98
#define s(width, name)
Definition: cbs_vp9.c:257
int dstW
Width of destination luma/alpha planes.
if(ret)
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:392
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
const uint64_t ff_dither4[2]
Definition: swscale.c:32
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
yuv2planarX_fn yuv2planeX
#define DECLARE_ASM_ALIGNED(n, t, v)
Declare an aligned variable appropriate for use in inline assembly code.
Definition: mem.h:113
struct SwsSlice * slice
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:67
#define APCK_COEF
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
as above, but U and V bytes are swapped
Definition: pixfmt.h:349
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
#define SWS_ACCURATE_RND
Definition: swscale.h:83
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
#define INPUT_FUNCS(opt)
Definition: swscale.c:364
#define flags(name, subs,...)
Definition: cbs_av1.c:560
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:318
#define INLINE_MMXEXT(flags)
Definition: cpu.h:87
enum AVPixelFormat srcFormat
Source pixel format.
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:387
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
uint64_t greenDither
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk)
uint8_t ** tmp
Tmp line buffer used by mmx code.
int x86_reg
Definition: asm.h:72
void(* lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of luma plane to YV12 for horizontal scaler.
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
#define APCK_PTR2
int sliceY
index of first line
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
for(j=16;j >0;--j)
int i
Definition: input.c:406
#define av_unused
Definition: attributes.h:131
const uint64_t ff_dither8[2]
Definition: swscale.c:36