FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/pixdesc.h"
31 
32 #if HAVE_INLINE_ASM
33 
34 #define DITHER1XBPP
35 
36 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
37 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
38 DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
39 DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
40 
41 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
42  0x0103010301030103LL,
43  0x0200020002000200LL,};
44 
45 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
46  0x0602060206020602LL,
47  0x0004000400040004LL,};
48 
49 DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
50 DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
51 DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
52 DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL;
53 DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL;
54 DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL;
55 
56 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
57 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
58 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
59 
60 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
61 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
62 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
63 
64 
65 //MMX versions
66 #if HAVE_MMX_INLINE
67 #undef RENAME
68 #define COMPILE_TEMPLATE_MMXEXT 0
69 #define RENAME(a) a ## _mmx
70 #include "swscale_template.c"
71 #endif
72 
73 // MMXEXT versions
74 #if HAVE_MMXEXT_INLINE
75 #undef RENAME
76 #undef COMPILE_TEMPLATE_MMXEXT
77 #define COMPILE_TEMPLATE_MMXEXT 1
78 #define RENAME(a) a ## _mmxext
79 #include "swscale_template.c"
80 #endif
81 
83  int lastInLumBuf, int lastInChrBuf)
84 {
85  const int dstH= c->dstH;
86  const int flags= c->flags;
87 
88  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
89  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
90  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
91 
92  int hasAlpha = c->needAlpha;
95  int16_t *vLumFilter= c->vLumFilter;
96  int16_t *vChrFilter= c->vChrFilter;
100  const int vLumFilterSize= c->vLumFilterSize;
101  const int vChrFilterSize= c->vChrFilterSize;
102  const int chrDstY= dstY>>c->chrDstVSubSample;
103  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
104  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
105 
106  c->blueDither= ff_dither8[dstY&1];
108  c->greenDither= ff_dither8[dstY&1];
109  else
110  c->greenDither= ff_dither4[dstY&1];
111  c->redDither= ff_dither8[(dstY+1)&1];
112  if (dstY < dstH - 2) {
113  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
114  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
115  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
116 
117  int i;
118  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
119  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
120 
121  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
122  for (i = 0; i < neg; i++)
123  tmpY[i] = lumSrcPtr[neg];
124  for ( ; i < end; i++)
125  tmpY[i] = lumSrcPtr[i];
126  for ( ; i < vLumFilterSize; i++)
127  tmpY[i] = tmpY[i-1];
128  lumSrcPtr = tmpY;
129 
130  if (alpSrcPtr) {
131  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
132  for (i = 0; i < neg; i++)
133  tmpA[i] = alpSrcPtr[neg];
134  for ( ; i < end; i++)
135  tmpA[i] = alpSrcPtr[i];
136  for ( ; i < vLumFilterSize; i++)
137  tmpA[i] = tmpA[i - 1];
138  alpSrcPtr = tmpA;
139  }
140  }
141  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
142  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
143  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
144  for (i = 0; i < neg; i++) {
145  tmpU[i] = chrUSrcPtr[neg];
146  }
147  for ( ; i < end; i++) {
148  tmpU[i] = chrUSrcPtr[i];
149  }
150  for ( ; i < vChrFilterSize; i++) {
151  tmpU[i] = tmpU[i - 1];
152  }
153  chrUSrcPtr = tmpU;
154  }
155 
156  if (flags & SWS_ACCURATE_RND) {
157  int s= APCK_SIZE / 8;
158  for (i=0; i<vLumFilterSize; i+=2) {
159  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
160  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
161  lumMmxFilter[s*i+APCK_COEF/4 ]=
162  lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
163  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
164  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
165  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
166  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
167  alpMmxFilter[s*i+APCK_COEF/4 ]=
168  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
169  }
170  }
171  for (i=0; i<vChrFilterSize; i+=2) {
172  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
173  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
174  chrMmxFilter[s*i+APCK_COEF/4 ]=
175  chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
176  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
177  }
178  } else {
179  for (i=0; i<vLumFilterSize; i++) {
180  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
181  lumMmxFilter[4*i+2]=
182  lumMmxFilter[4*i+3]=
183  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
184  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
185  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
186  alpMmxFilter[4*i+2]=
187  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
188  }
189  }
190  for (i=0; i<vChrFilterSize; i++) {
191  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
192  chrMmxFilter[4*i+2]=
193  chrMmxFilter[4*i+3]=
194  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
195  }
196  }
197  }
198 }
199 
200 #if HAVE_MMXEXT
201 static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
202  const int16_t **src, uint8_t *dest, int dstW,
203  const uint8_t *dither, int offset)
204 {
205  if(((uintptr_t)dest) & 15){
206  yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
207  return;
208  }
209  filterSize--;
210 #define MAIN_FUNCTION \
211  "pxor %%xmm0, %%xmm0 \n\t" \
212  "punpcklbw %%xmm0, %%xmm3 \n\t" \
213  "movd %4, %%xmm1 \n\t" \
214  "punpcklwd %%xmm1, %%xmm1 \n\t" \
215  "punpckldq %%xmm1, %%xmm1 \n\t" \
216  "punpcklqdq %%xmm1, %%xmm1 \n\t" \
217  "psllw $3, %%xmm1 \n\t" \
218  "paddw %%xmm1, %%xmm3 \n\t" \
219  "psraw $4, %%xmm3 \n\t" \
220  "movdqa %%xmm3, %%xmm4 \n\t" \
221  "movdqa %%xmm3, %%xmm7 \n\t" \
222  "movl %3, %%ecx \n\t" \
223  "mov %0, %%"FF_REG_d" \n\t"\
224  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
225  ".p2align 4 \n\t" /* FIXME Unroll? */\
226  "1: \n\t"\
227  "movddup 8(%%"FF_REG_d"), %%xmm0 \n\t" /* filterCoeff */\
228  "movdqa (%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm2 \n\t" /* srcData */\
229  "movdqa 16(%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm5 \n\t" /* srcData */\
230  "add $16, %%"FF_REG_d" \n\t"\
231  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
232  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
233  "pmulhw %%xmm0, %%xmm2 \n\t"\
234  "pmulhw %%xmm0, %%xmm5 \n\t"\
235  "paddw %%xmm2, %%xmm3 \n\t"\
236  "paddw %%xmm5, %%xmm4 \n\t"\
237  " jnz 1b \n\t"\
238  "psraw $3, %%xmm3 \n\t"\
239  "psraw $3, %%xmm4 \n\t"\
240  "packuswb %%xmm4, %%xmm3 \n\t"\
241  "movntdq %%xmm3, (%1, %%"FF_REG_c") \n\t"\
242  "add $16, %%"FF_REG_c" \n\t"\
243  "cmp %2, %%"FF_REG_c" \n\t"\
244  "movdqa %%xmm7, %%xmm3 \n\t" \
245  "movdqa %%xmm7, %%xmm4 \n\t" \
246  "mov %0, %%"FF_REG_d" \n\t"\
247  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
248  "jb 1b \n\t"
249 
250  if (offset) {
251  __asm__ volatile(
252  "movq %5, %%xmm3 \n\t"
253  "movdqa %%xmm3, %%xmm4 \n\t"
254  "psrlq $24, %%xmm3 \n\t"
255  "psllq $40, %%xmm4 \n\t"
256  "por %%xmm4, %%xmm3 \n\t"
257  MAIN_FUNCTION
258  :: "g" (filter),
259  "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
260  "m"(filterSize), "m"(((uint64_t *) dither)[0])
261  : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
262  "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
263  );
264  } else {
265  __asm__ volatile(
266  "movq %5, %%xmm3 \n\t"
267  MAIN_FUNCTION
268  :: "g" (filter),
269  "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
270  "m"(filterSize), "m"(((uint64_t *) dither)[0])
271  : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
272  "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
273  );
274  }
275 }
276 #endif
277 
278 #endif /* HAVE_INLINE_ASM */
279 
280 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
281 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
282  SwsContext *c, int16_t *data, \
283  int dstW, const uint8_t *src, \
284  const int16_t *filter, \
285  const int32_t *filterPos, int filterSize)
286 
287 #define SCALE_FUNCS(filter_n, opt) \
288  SCALE_FUNC(filter_n, 8, 15, opt); \
289  SCALE_FUNC(filter_n, 9, 15, opt); \
290  SCALE_FUNC(filter_n, 10, 15, opt); \
291  SCALE_FUNC(filter_n, 12, 15, opt); \
292  SCALE_FUNC(filter_n, 14, 15, opt); \
293  SCALE_FUNC(filter_n, 16, 15, opt); \
294  SCALE_FUNC(filter_n, 8, 19, opt); \
295  SCALE_FUNC(filter_n, 9, 19, opt); \
296  SCALE_FUNC(filter_n, 10, 19, opt); \
297  SCALE_FUNC(filter_n, 12, 19, opt); \
298  SCALE_FUNC(filter_n, 14, 19, opt); \
299  SCALE_FUNC(filter_n, 16, 19, opt)
300 
301 #define SCALE_FUNCS_MMX(opt) \
302  SCALE_FUNCS(4, opt); \
303  SCALE_FUNCS(8, opt); \
304  SCALE_FUNCS(X, opt)
305 
306 #define SCALE_FUNCS_SSE(opt) \
307  SCALE_FUNCS(4, opt); \
308  SCALE_FUNCS(8, opt); \
309  SCALE_FUNCS(X4, opt); \
310  SCALE_FUNCS(X8, opt)
311 
312 #if ARCH_X86_32
313 SCALE_FUNCS_MMX(mmx);
314 #endif
315 SCALE_FUNCS_SSE(sse2);
316 SCALE_FUNCS_SSE(ssse3);
317 SCALE_FUNCS_SSE(sse4);
318 
319 #define VSCALEX_FUNC(size, opt) \
320 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
321  const int16_t **src, uint8_t *dest, int dstW, \
322  const uint8_t *dither, int offset)
323 #define VSCALEX_FUNCS(opt) \
324  VSCALEX_FUNC(8, opt); \
325  VSCALEX_FUNC(9, opt); \
326  VSCALEX_FUNC(10, opt)
327 
328 #if ARCH_X86_32
329 VSCALEX_FUNCS(mmxext);
330 #endif
331 VSCALEX_FUNCS(sse2);
332 VSCALEX_FUNCS(sse4);
333 VSCALEX_FUNC(16, sse4);
334 VSCALEX_FUNCS(avx);
335 
336 #define VSCALE_FUNC(size, opt) \
337 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
338  const uint8_t *dither, int offset)
339 #define VSCALE_FUNCS(opt1, opt2) \
340  VSCALE_FUNC(8, opt1); \
341  VSCALE_FUNC(9, opt2); \
342  VSCALE_FUNC(10, opt2); \
343  VSCALE_FUNC(16, opt1)
344 
345 #if ARCH_X86_32
346 VSCALE_FUNCS(mmx, mmxext);
347 #endif
348 VSCALE_FUNCS(sse2, sse2);
349 VSCALE_FUNC(16, sse4);
350 VSCALE_FUNCS(avx, avx);
351 
352 #define INPUT_Y_FUNC(fmt, opt) \
353 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
354  const uint8_t *unused1, const uint8_t *unused2, \
355  int w, uint32_t *unused)
356 #define INPUT_UV_FUNC(fmt, opt) \
357 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
358  const uint8_t *unused0, \
359  const uint8_t *src1, \
360  const uint8_t *src2, \
361  int w, uint32_t *unused)
362 #define INPUT_FUNC(fmt, opt) \
363  INPUT_Y_FUNC(fmt, opt); \
364  INPUT_UV_FUNC(fmt, opt)
365 #define INPUT_FUNCS(opt) \
366  INPUT_FUNC(uyvy, opt); \
367  INPUT_FUNC(yuyv, opt); \
368  INPUT_UV_FUNC(nv12, opt); \
369  INPUT_UV_FUNC(nv21, opt); \
370  INPUT_FUNC(rgba, opt); \
371  INPUT_FUNC(bgra, opt); \
372  INPUT_FUNC(argb, opt); \
373  INPUT_FUNC(abgr, opt); \
374  INPUT_FUNC(rgb24, opt); \
375  INPUT_FUNC(bgr24, opt)
376 
377 #if ARCH_X86_32
378 INPUT_FUNCS(mmx);
379 #endif
380 INPUT_FUNCS(sse2);
381 INPUT_FUNCS(ssse3);
382 INPUT_FUNCS(avx);
383 
385 {
386  int cpu_flags = av_get_cpu_flags();
387 
388 #if HAVE_MMX_INLINE
389  if (INLINE_MMX(cpu_flags))
390  sws_init_swscale_mmx(c);
391 #endif
392 #if HAVE_MMXEXT_INLINE
393  if (INLINE_MMXEXT(cpu_flags))
394  sws_init_swscale_mmxext(c);
395  if (cpu_flags & AV_CPU_FLAG_SSE3){
396  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
397  c->yuv2planeX = yuv2yuvX_sse3;
398  }
399 #endif
400 
401 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
402  if (c->srcBpc == 8) { \
403  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
404  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
405  } else if (c->srcBpc == 9) { \
406  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
407  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
408  } else if (c->srcBpc == 10) { \
409  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
410  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
411  } else if (c->srcBpc == 12) { \
412  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
413  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
414  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
415  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
416  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
417  } else { /* c->srcBpc == 16 */ \
418  av_assert0(c->srcBpc == 16);\
419  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
420  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
421  } \
422 } while (0)
423 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
424  switch (filtersize) { \
425  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
426  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
427  default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
428  }
429 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
430 switch(c->dstBpc){ \
431  case 16: do_16_case; break; \
432  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
433  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
434  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
435  }
436 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
437  switch(c->dstBpc){ \
438  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
439  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
440  case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
441  case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
442  default: av_assert0(c->dstBpc>8); \
443  }
444 #define case_rgb(x, X, opt) \
445  case AV_PIX_FMT_ ## X: \
446  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
447  if (!c->chrSrcHSubSample) \
448  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
449  break
450 #if ARCH_X86_32
451  if (EXTERNAL_MMX(cpu_flags)) {
452  ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
453  ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
454  ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
455 
456  switch (c->srcFormat) {
457  case AV_PIX_FMT_YA8:
458  c->lumToYV12 = ff_yuyvToY_mmx;
459  if (c->needAlpha)
460  c->alpToYV12 = ff_uyvyToY_mmx;
461  break;
462  case AV_PIX_FMT_YUYV422:
463  c->lumToYV12 = ff_yuyvToY_mmx;
464  c->chrToYV12 = ff_yuyvToUV_mmx;
465  break;
466  case AV_PIX_FMT_UYVY422:
467  c->lumToYV12 = ff_uyvyToY_mmx;
468  c->chrToYV12 = ff_uyvyToUV_mmx;
469  break;
470  case AV_PIX_FMT_NV12:
471  c->chrToYV12 = ff_nv12ToUV_mmx;
472  break;
473  case AV_PIX_FMT_NV21:
474  c->chrToYV12 = ff_nv21ToUV_mmx;
475  break;
476  case_rgb(rgb24, RGB24, mmx);
477  case_rgb(bgr24, BGR24, mmx);
478  case_rgb(bgra, BGRA, mmx);
479  case_rgb(rgba, RGBA, mmx);
480  case_rgb(abgr, ABGR, mmx);
481  case_rgb(argb, ARGB, mmx);
482  default:
483  break;
484  }
485  }
486  if (EXTERNAL_MMXEXT(cpu_flags)) {
487  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1);
488  }
489 #endif /* ARCH_X86_32 */
490 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
491  switch (filtersize) { \
492  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
493  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
494  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
495  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
496  break; \
497  }
498  if (EXTERNAL_SSE2(cpu_flags)) {
499  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
500  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
501  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
502  HAVE_ALIGNED_STACK || ARCH_X86_64);
503  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
504 
505  switch (c->srcFormat) {
506  case AV_PIX_FMT_YA8:
507  c->lumToYV12 = ff_yuyvToY_sse2;
508  if (c->needAlpha)
509  c->alpToYV12 = ff_uyvyToY_sse2;
510  break;
511  case AV_PIX_FMT_YUYV422:
512  c->lumToYV12 = ff_yuyvToY_sse2;
513  c->chrToYV12 = ff_yuyvToUV_sse2;
514  break;
515  case AV_PIX_FMT_UYVY422:
516  c->lumToYV12 = ff_uyvyToY_sse2;
517  c->chrToYV12 = ff_uyvyToUV_sse2;
518  break;
519  case AV_PIX_FMT_NV12:
520  c->chrToYV12 = ff_nv12ToUV_sse2;
521  break;
522  case AV_PIX_FMT_NV21:
523  c->chrToYV12 = ff_nv21ToUV_sse2;
524  break;
525  case_rgb(rgb24, RGB24, sse2);
526  case_rgb(bgr24, BGR24, sse2);
527  case_rgb(bgra, BGRA, sse2);
528  case_rgb(rgba, RGBA, sse2);
529  case_rgb(abgr, ABGR, sse2);
530  case_rgb(argb, ARGB, sse2);
531  default:
532  break;
533  }
534  }
535  if (EXTERNAL_SSSE3(cpu_flags)) {
536  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
537  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
538  switch (c->srcFormat) {
539  case_rgb(rgb24, RGB24, ssse3);
540  case_rgb(bgr24, BGR24, ssse3);
541  default:
542  break;
543  }
544  }
545  if (EXTERNAL_SSE4(cpu_flags)) {
546  /* Xto15 don't need special sse4 functions */
547  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
548  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
550  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
551  HAVE_ALIGNED_STACK || ARCH_X86_64);
552  if (c->dstBpc == 16 && !isBE(c->dstFormat))
553  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
554  }
555 
556  if (EXTERNAL_AVX(cpu_flags)) {
558  HAVE_ALIGNED_STACK || ARCH_X86_64);
559  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
560 
561  switch (c->srcFormat) {
562  case AV_PIX_FMT_YUYV422:
563  c->chrToYV12 = ff_yuyvToUV_avx;
564  break;
565  case AV_PIX_FMT_UYVY422:
566  c->chrToYV12 = ff_uyvyToUV_avx;
567  break;
568  case AV_PIX_FMT_NV12:
569  c->chrToYV12 = ff_nv12ToUV_avx;
570  break;
571  case AV_PIX_FMT_NV21:
572  c->chrToYV12 = ff_nv21ToUV_avx;
573  break;
574  case_rgb(rgb24, RGB24, avx);
575  case_rgb(bgr24, BGR24, avx);
576  case_rgb(bgra, BGRA, avx);
577  case_rgb(rgba, RGBA, avx);
578  case_rgb(abgr, ABGR, avx);
579  case_rgb(argb, ARGB, avx);
580  default:
581  break;
582  }
583  }
584 }
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:81
#define NULL
Definition: coverity.c:32
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
const uint64_t ff_dither8[2]
int chrBufIndex
Index in ring buffer of the last scaled horizontal chroma line from source.
int chrSrcH
Height of source chroma planes.
8 bits gray, 8 bits alpha
Definition: pixfmt.h:143
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:336
#define SCALE_FUNCS_MMX(opt)
Definition: swscale.c:301
void(* chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of chroma planes to YV12 for horizontal scaler.
void(* alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of alpha plane to YV12 for horizontal scaler.
uint64_t redDither
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
static atomic_int cpu_flags
Definition: cpu.c:50
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:39
#define src
Definition: vp8dsp.c:254
int dstY
Last destination vertical line output from last slice.
uint64_t blueDither
#define case_rgb(x, X, opt)
Macro definitions for various function/variable attributes.
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
int srcH
Height of source luma/alpha planes.
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:339
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
int chrDstVSubSample
Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination i...
uint8_t
#define av_cold
Definition: attributes.h:82
uint8_t ** line
line buffer
int vChrFilterSize
Vertical filter size for chroma pixels.
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:32
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
Definition: swscale.c:384
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:112
int lastInLumBuf
Last scaled horizontal luma/alpha line from source in the ring buffer.
external API header
enum AVPixelFormat dstFormat
Destination pixel format.
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:323
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
int dstH
Height of destination luma/alpha planes.
#define U(x)
Definition: vp56_arith.h:37
#define INLINE_MMX(flags)
Definition: cpu.h:86
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
const uint64_t ff_dither4[2]
int hLumFilterSize
Horizontal filter size for luma/alpha pixels.
static const uint8_t dither[8][8]
Definition: vf_fspp.c:57
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
simple assert() macros that are a bit more flexible than ISO C assert().
Slice plane.
SwsPlane plane[MAX_SLICE_PLANES]
color planes
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
int hChrFilterSize
Horizontal filter size for chroma pixels.
#define DECLARE_ASM_CONST(n, t, v)
Declare a static constant aligned variable appropriate for use in inline assembly code...
Definition: mem.h:114
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
#define APCK_SIZE
#define FFMIN(a, b)
Definition: common.h:96
#define AV_CPU_FLAG_SSE3
Prescott SSE3 functions.
Definition: cpu.h:40
yuv2planar1_fn yuv2plane1
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:306
int32_t
#define XMM_CLOBBERS(...)
Definition: asm.h:98
#define s(width, name)
Definition: cbs_vp9.c:257
int dstW
Width of destination luma/alpha planes.
if(ret)
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:380
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
yuv2planarX_fn yuv2planeX
#define DECLARE_ASM_ALIGNED(n, t, v)
Declare an aligned variable appropriate for use in inline assembly code.
Definition: mem.h:113
struct SwsSlice * slice
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:67
#define APCK_COEF
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
#define SWS_ACCURATE_RND
Definition: swscale.h:83
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
#define INPUT_FUNCS(opt)
Definition: swscale.c:365
#define flags(name, subs,...)
Definition: cbs_av1.c:564
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
int lumBufIndex
Index in ring buffer of the last scaled horizontal luma/alpha line from source.
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:319
#define INLINE_MMXEXT(flags)
Definition: cpu.h:87
int lastInChrBuf
Last scaled horizontal chroma line from source in the ring buffer.
enum AVPixelFormat srcFormat
Source pixel format.
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:375
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
uint64_t greenDither
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk)
uint8_t ** tmp
Tmp line buffer used by mmx code.
int x86_reg
Definition: asm.h:72
void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, int lastInLumBuf, int lastInChrBuf)
void(* lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of luma plane to YV12 for horizontal scaler.
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
#define APCK_PTR2
int sliceY
index of first line
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
for(j=16;j >0;--j)
#define av_unused
Definition: attributes.h:125