FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/pixdesc.h"
31 
32 #if HAVE_INLINE_ASM
33 
34 #define DITHER1XBPP
35 
36 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
37 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
38 DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
39 DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
40 
41 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
42  0x0103010301030103LL,
43  0x0200020002000200LL,};
44 
45 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
46  0x0602060206020602LL,
47  0x0004000400040004LL,};
48 
49 DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
50 DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
51 DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
52 DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL;
53 DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL;
54 DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL;
55 
56 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
57 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
58 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
59 
60 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
61 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
62 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
63 
64 
65 //MMX versions
66 #if HAVE_MMX_INLINE
67 #undef RENAME
68 #define COMPILE_TEMPLATE_MMXEXT 0
69 #define RENAME(a) a ## _mmx
70 #include "swscale_template.c"
71 #endif
72 
73 // MMXEXT versions
74 #if HAVE_MMXEXT_INLINE
75 #undef RENAME
76 #undef COMPILE_TEMPLATE_MMXEXT
77 #define COMPILE_TEMPLATE_MMXEXT 1
78 #define RENAME(a) a ## _mmxext
79 #include "swscale_template.c"
80 #endif
81 
83  int lastInLumBuf, int lastInChrBuf)
84 {
85  const int dstH= c->dstH;
86  const int flags= c->flags;
87 
88  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
89  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
90  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
91 
92  int hasAlpha = c->needAlpha;
93  int32_t *vLumFilterPos= c->vLumFilterPos;
94  int32_t *vChrFilterPos= c->vChrFilterPos;
95  int16_t *vLumFilter= c->vLumFilter;
96  int16_t *vChrFilter= c->vChrFilter;
97  int32_t *lumMmxFilter= c->lumMmxFilter;
98  int32_t *chrMmxFilter= c->chrMmxFilter;
99  int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
100  const int vLumFilterSize= c->vLumFilterSize;
101  const int vChrFilterSize= c->vChrFilterSize;
102  const int chrDstY= dstY>>c->chrDstVSubSample;
103  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
104  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
105 
106  c->blueDither= ff_dither8[dstY&1];
107  if (c->dstFormat == AV_PIX_FMT_RGB555 || c->dstFormat == AV_PIX_FMT_BGR555)
108  c->greenDither= ff_dither8[dstY&1];
109  else
110  c->greenDither= ff_dither4[dstY&1];
111  c->redDither= ff_dither8[(dstY+1)&1];
112  if (dstY < dstH - 2) {
113  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
114  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
115  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
116 
117  int i;
118  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
119  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
120 
121  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
122  for (i = 0; i < neg; i++)
123  tmpY[i] = lumSrcPtr[neg];
124  for ( ; i < end; i++)
125  tmpY[i] = lumSrcPtr[i];
126  for ( ; i < vLumFilterSize; i++)
127  tmpY[i] = tmpY[i-1];
128  lumSrcPtr = tmpY;
129 
130  if (alpSrcPtr) {
131  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
132  for (i = 0; i < neg; i++)
133  tmpA[i] = alpSrcPtr[neg];
134  for ( ; i < end; i++)
135  tmpA[i] = alpSrcPtr[i];
136  for ( ; i < vLumFilterSize; i++)
137  tmpA[i] = tmpA[i - 1];
138  alpSrcPtr = tmpA;
139  }
140  }
141  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
142  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
143  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
144  for (i = 0; i < neg; i++) {
145  tmpU[i] = chrUSrcPtr[neg];
146  }
147  for ( ; i < end; i++) {
148  tmpU[i] = chrUSrcPtr[i];
149  }
150  for ( ; i < vChrFilterSize; i++) {
151  tmpU[i] = tmpU[i - 1];
152  }
153  chrUSrcPtr = tmpU;
154  }
155 
156  if (flags & SWS_ACCURATE_RND) {
157  int s= APCK_SIZE / 8;
158  for (i=0; i<vLumFilterSize; i+=2) {
159  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
160  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
163  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
164  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
165  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
166  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
169  }
170  }
171  for (i=0; i<vChrFilterSize; i+=2) {
172  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
173  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
176  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
177  }
178  } else {
179  for (i=0; i<vLumFilterSize; i++) {
180  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
181  lumMmxFilter[4*i+2]=
182  lumMmxFilter[4*i+3]=
183  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
184  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
185  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
186  alpMmxFilter[4*i+2]=
187  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
188  }
189  }
190  for (i=0; i<vChrFilterSize; i++) {
191  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
192  chrMmxFilter[4*i+2]=
193  chrMmxFilter[4*i+3]=
194  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
195  }
196  }
197  }
198 }
199 
200 #if HAVE_MMXEXT
201 static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
202  const int16_t **src, uint8_t *dest, int dstW,
203  const uint8_t *dither, int offset)
204 {
205  if(((uintptr_t)dest) & 15){
206  yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
207  return;
208  }
209  filterSize--;
210 #define MAIN_FUNCTION \
211  "pxor %%xmm0, %%xmm0 \n\t" \
212  "punpcklbw %%xmm0, %%xmm3 \n\t" \
213  "movd %4, %%xmm1 \n\t" \
214  "punpcklwd %%xmm1, %%xmm1 \n\t" \
215  "punpckldq %%xmm1, %%xmm1 \n\t" \
216  "punpcklqdq %%xmm1, %%xmm1 \n\t" \
217  "psllw $3, %%xmm1 \n\t" \
218  "paddw %%xmm1, %%xmm3 \n\t" \
219  "psraw $4, %%xmm3 \n\t" \
220  "movdqa %%xmm3, %%xmm4 \n\t" \
221  "movdqa %%xmm3, %%xmm7 \n\t" \
222  "movl %3, %%ecx \n\t" \
223  "mov %0, %%"FF_REG_d" \n\t"\
224  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
225  ".p2align 4 \n\t" /* FIXME Unroll? */\
226  "1: \n\t"\
227  "movddup 8(%%"FF_REG_d"), %%xmm0 \n\t" /* filterCoeff */\
228  "movdqa (%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm2 \n\t" /* srcData */\
229  "movdqa 16(%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm5 \n\t" /* srcData */\
230  "add $16, %%"FF_REG_d" \n\t"\
231  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
232  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
233  "pmulhw %%xmm0, %%xmm2 \n\t"\
234  "pmulhw %%xmm0, %%xmm5 \n\t"\
235  "paddw %%xmm2, %%xmm3 \n\t"\
236  "paddw %%xmm5, %%xmm4 \n\t"\
237  " jnz 1b \n\t"\
238  "psraw $3, %%xmm3 \n\t"\
239  "psraw $3, %%xmm4 \n\t"\
240  "packuswb %%xmm4, %%xmm3 \n\t"\
241  "movntdq %%xmm3, (%1, %%"FF_REG_c") \n\t"\
242  "add $16, %%"FF_REG_c" \n\t"\
243  "cmp %2, %%"FF_REG_c" \n\t"\
244  "movdqa %%xmm7, %%xmm3 \n\t" \
245  "movdqa %%xmm7, %%xmm4 \n\t" \
246  "mov %0, %%"FF_REG_d" \n\t"\
247  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
248  "jb 1b \n\t"
249 
250  if (offset) {
251  __asm__ volatile(
252  "movq %5, %%xmm3 \n\t"
253  "movdqa %%xmm3, %%xmm4 \n\t"
254  "psrlq $24, %%xmm3 \n\t"
255  "psllq $40, %%xmm4 \n\t"
256  "por %%xmm4, %%xmm3 \n\t"
257  MAIN_FUNCTION
258  :: "g" (filter),
259  "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
260  "m"(filterSize), "m"(((uint64_t *) dither)[0])
261  : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
262  "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
263  );
264  } else {
265  __asm__ volatile(
266  "movq %5, %%xmm3 \n\t"
267  MAIN_FUNCTION
268  :: "g" (filter),
269  "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
270  "m"(filterSize), "m"(((uint64_t *) dither)[0])
271  : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
272  "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
273  );
274  }
275 }
276 #endif
277 
278 #endif /* HAVE_INLINE_ASM */
279 
280 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
281 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
282  SwsContext *c, int16_t *data, \
283  int dstW, const uint8_t *src, \
284  const int16_t *filter, \
285  const int32_t *filterPos, int filterSize)
286 
287 #define SCALE_FUNCS(filter_n, opt) \
288  SCALE_FUNC(filter_n, 8, 15, opt); \
289  SCALE_FUNC(filter_n, 9, 15, opt); \
290  SCALE_FUNC(filter_n, 10, 15, opt); \
291  SCALE_FUNC(filter_n, 12, 15, opt); \
292  SCALE_FUNC(filter_n, 14, 15, opt); \
293  SCALE_FUNC(filter_n, 16, 15, opt); \
294  SCALE_FUNC(filter_n, 8, 19, opt); \
295  SCALE_FUNC(filter_n, 9, 19, opt); \
296  SCALE_FUNC(filter_n, 10, 19, opt); \
297  SCALE_FUNC(filter_n, 12, 19, opt); \
298  SCALE_FUNC(filter_n, 14, 19, opt); \
299  SCALE_FUNC(filter_n, 16, 19, opt)
300 
301 #define SCALE_FUNCS_MMX(opt) \
302  SCALE_FUNCS(4, opt); \
303  SCALE_FUNCS(8, opt); \
304  SCALE_FUNCS(X, opt)
305 
306 #define SCALE_FUNCS_SSE(opt) \
307  SCALE_FUNCS(4, opt); \
308  SCALE_FUNCS(8, opt); \
309  SCALE_FUNCS(X4, opt); \
310  SCALE_FUNCS(X8, opt)
311 
312 #if ARCH_X86_32
313 SCALE_FUNCS_MMX(mmx);
314 #endif
315 SCALE_FUNCS_SSE(sse2);
316 SCALE_FUNCS_SSE(ssse3);
317 SCALE_FUNCS_SSE(sse4);
318 
319 #define VSCALEX_FUNC(size, opt) \
320 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
321  const int16_t **src, uint8_t *dest, int dstW, \
322  const uint8_t *dither, int offset)
323 #define VSCALEX_FUNCS(opt) \
324  VSCALEX_FUNC(8, opt); \
325  VSCALEX_FUNC(9, opt); \
326  VSCALEX_FUNC(10, opt)
327 
328 #if ARCH_X86_32
329 VSCALEX_FUNCS(mmxext);
330 #endif
331 VSCALEX_FUNCS(sse2);
332 VSCALEX_FUNCS(sse4);
333 VSCALEX_FUNC(16, sse4);
334 VSCALEX_FUNCS(avx);
335 
336 #define VSCALE_FUNC(size, opt) \
337 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
338  const uint8_t *dither, int offset)
339 #define VSCALE_FUNCS(opt1, opt2) \
340  VSCALE_FUNC(8, opt1); \
341  VSCALE_FUNC(9, opt2); \
342  VSCALE_FUNC(10, opt2); \
343  VSCALE_FUNC(16, opt1)
344 
345 #if ARCH_X86_32
346 VSCALE_FUNCS(mmx, mmxext);
347 #endif
348 VSCALE_FUNCS(sse2, sse2);
349 VSCALE_FUNC(16, sse4);
350 VSCALE_FUNCS(avx, avx);
351 
352 #define INPUT_Y_FUNC(fmt, opt) \
353 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
354  const uint8_t *unused1, const uint8_t *unused2, \
355  int w, uint32_t *unused)
356 #define INPUT_UV_FUNC(fmt, opt) \
357 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
358  const uint8_t *unused0, \
359  const uint8_t *src1, \
360  const uint8_t *src2, \
361  int w, uint32_t *unused)
362 #define INPUT_FUNC(fmt, opt) \
363  INPUT_Y_FUNC(fmt, opt); \
364  INPUT_UV_FUNC(fmt, opt)
365 #define INPUT_FUNCS(opt) \
366  INPUT_FUNC(uyvy, opt); \
367  INPUT_FUNC(yuyv, opt); \
368  INPUT_UV_FUNC(nv12, opt); \
369  INPUT_UV_FUNC(nv21, opt); \
370  INPUT_FUNC(rgba, opt); \
371  INPUT_FUNC(bgra, opt); \
372  INPUT_FUNC(argb, opt); \
373  INPUT_FUNC(abgr, opt); \
374  INPUT_FUNC(rgb24, opt); \
375  INPUT_FUNC(bgr24, opt)
376 
377 #if ARCH_X86_32
378 INPUT_FUNCS(mmx);
379 #endif
380 INPUT_FUNCS(sse2);
381 INPUT_FUNCS(ssse3);
382 INPUT_FUNCS(avx);
383 
385 {
386  int cpu_flags = av_get_cpu_flags();
387 
388 #if HAVE_MMX_INLINE
389  if (INLINE_MMX(cpu_flags))
390  sws_init_swscale_mmx(c);
391 #endif
392 #if HAVE_MMXEXT_INLINE
394  sws_init_swscale_mmxext(c);
396  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
397  c->yuv2planeX = yuv2yuvX_sse3;
398  }
399 #endif
400 
401 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
402  if (c->srcBpc == 8) { \
403  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
404  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
405  } else if (c->srcBpc == 9) { \
406  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
407  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
408  } else if (c->srcBpc == 10) { \
409  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
410  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
411  } else if (c->srcBpc == 12) { \
412  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
413  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
414  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
415  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
416  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
417  } else { /* c->srcBpc == 16 */ \
418  av_assert0(c->srcBpc == 16);\
419  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
420  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
421  } \
422 } while (0)
423 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
424  switch (filtersize) { \
425  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
426  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
427  default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
428  }
429 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
430 switch(c->dstBpc){ \
431  case 16: do_16_case; break; \
432  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
433  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
434  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
435  }
436 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
437  switch(c->dstBpc){ \
438  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
439  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
440  case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
441  case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
442  default: av_assert0(c->dstBpc>8); \
443  }
444 #define case_rgb(x, X, opt) \
445  case AV_PIX_FMT_ ## X: \
446  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
447  if (!c->chrSrcHSubSample) \
448  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
449  break
450 #if ARCH_X86_32
451  if (EXTERNAL_MMX(cpu_flags)) {
452  ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
453  ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
454  ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
455 
456  switch (c->srcFormat) {
457  case AV_PIX_FMT_YA8:
458  c->lumToYV12 = ff_yuyvToY_mmx;
459  if (c->needAlpha)
460  c->alpToYV12 = ff_uyvyToY_mmx;
461  break;
462  case AV_PIX_FMT_YUYV422:
463  c->lumToYV12 = ff_yuyvToY_mmx;
464  c->chrToYV12 = ff_yuyvToUV_mmx;
465  break;
466  case AV_PIX_FMT_UYVY422:
467  c->lumToYV12 = ff_uyvyToY_mmx;
468  c->chrToYV12 = ff_uyvyToUV_mmx;
469  break;
470  case AV_PIX_FMT_NV12:
471  c->chrToYV12 = ff_nv12ToUV_mmx;
472  break;
473  case AV_PIX_FMT_NV21:
474  c->chrToYV12 = ff_nv21ToUV_mmx;
475  break;
476  case_rgb(rgb24, RGB24, mmx);
477  case_rgb(bgr24, BGR24, mmx);
478  case_rgb(bgra, BGRA, mmx);
479  case_rgb(rgba, RGBA, mmx);
480  case_rgb(abgr, ABGR, mmx);
481  case_rgb(argb, ARGB, mmx);
482  default:
483  break;
484  }
485  }
486  if (EXTERNAL_MMXEXT(cpu_flags)) {
487  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1);
488  }
489 #endif /* ARCH_X86_32 */
490 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
491  switch (filtersize) { \
492  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
493  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
494  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
495  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
496  break; \
497  }
498  if (EXTERNAL_SSE2(cpu_flags)) {
499  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
500  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
501  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
502  HAVE_ALIGNED_STACK || ARCH_X86_64);
503  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
504 
505  switch (c->srcFormat) {
506  case AV_PIX_FMT_YA8:
507  c->lumToYV12 = ff_yuyvToY_sse2;
508  if (c->needAlpha)
509  c->alpToYV12 = ff_uyvyToY_sse2;
510  break;
511  case AV_PIX_FMT_YUYV422:
512  c->lumToYV12 = ff_yuyvToY_sse2;
513  c->chrToYV12 = ff_yuyvToUV_sse2;
514  break;
515  case AV_PIX_FMT_UYVY422:
516  c->lumToYV12 = ff_uyvyToY_sse2;
517  c->chrToYV12 = ff_uyvyToUV_sse2;
518  break;
519  case AV_PIX_FMT_NV12:
520  c->chrToYV12 = ff_nv12ToUV_sse2;
521  break;
522  case AV_PIX_FMT_NV21:
523  c->chrToYV12 = ff_nv21ToUV_sse2;
524  break;
525  case_rgb(rgb24, RGB24, sse2);
526  case_rgb(bgr24, BGR24, sse2);
527  case_rgb(bgra, BGRA, sse2);
528  case_rgb(rgba, RGBA, sse2);
529  case_rgb(abgr, ABGR, sse2);
530  case_rgb(argb, ARGB, sse2);
531  default:
532  break;
533  }
534  }
535  if (EXTERNAL_SSSE3(cpu_flags)) {
536  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
537  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
538  switch (c->srcFormat) {
539  case_rgb(rgb24, RGB24, ssse3);
540  case_rgb(bgr24, BGR24, ssse3);
541  default:
542  break;
543  }
544  }
545  if (EXTERNAL_SSE4(cpu_flags)) {
546  /* Xto15 don't need special sse4 functions */
547  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
548  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
549  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
550  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
551  HAVE_ALIGNED_STACK || ARCH_X86_64);
552  if (c->dstBpc == 16 && !isBE(c->dstFormat))
553  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
554  }
555 
556  if (EXTERNAL_AVX(cpu_flags)) {
557  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
558  HAVE_ALIGNED_STACK || ARCH_X86_64);
559  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
560 
561  switch (c->srcFormat) {
562  case AV_PIX_FMT_YUYV422:
563  c->chrToYV12 = ff_yuyvToUV_avx;
564  break;
565  case AV_PIX_FMT_UYVY422:
566  c->chrToYV12 = ff_uyvyToUV_avx;
567  break;
568  case AV_PIX_FMT_NV12:
569  c->chrToYV12 = ff_nv12ToUV_avx;
570  break;
571  case AV_PIX_FMT_NV21:
572  c->chrToYV12 = ff_nv21ToUV_avx;
573  break;
574  case_rgb(rgb24, RGB24, avx);
575  case_rgb(bgr24, BGR24, avx);
576  case_rgb(bgra, BGRA, avx);
577  case_rgb(rgba, RGBA, avx);
578  case_rgb(abgr, ABGR, avx);
579  case_rgb(argb, ARGB, avx);
580  default:
581  break;
582  }
583  }
584 }
INLINE_MMX
#define INLINE_MMX(flags)
Definition: cpu.h:86
SwsContext::lastInChrBuf
int lastInChrBuf
Last scaled horizontal chroma line from source in the ring buffer.
Definition: swscale_internal.h:352
SwsContext::vLumFilterSize
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
Definition: swscale_internal.h:384
ASSIGN_MMX_SCALE_FUNC
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
AV_CPU_FLAG_SSE3
#define AV_CPU_FLAG_SSE3
Prescott SSE3 functions.
Definition: cpu.h:40
APCK_PTR2
#define APCK_PTR2
Definition: swscale_internal.h:56
cpu.h
SwsPlane::line
uint8_t ** line
line buffer
Definition: swscale_internal.h:934
AV_PIX_FMT_YA8
@ AV_PIX_FMT_YA8
8 bits gray, 8 bits alpha
Definition: pixfmt.h:143
SwsContext::dstW
int dstW
Width of destination luma/alpha planes.
Definition: swscale_internal.h:480
DECLARE_ASM_CONST
#define DECLARE_ASM_CONST(n, t, v)
Definition: mem.h:114
SwsContext::dstY
int dstY
Last destination vertical line output from last slice.
Definition: swscale_internal.h:396
av_unused
#define av_unused
Definition: attributes.h:125
end
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
pixdesc.h
SwsContext::vChrFilter
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
Definition: swscale_internal.h:377
SwsContext::lumMmxFilter
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:478
SwsContext::vLumFilter
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
Definition: swscale_internal.h:376
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
DECLARE_ASM_ALIGNED
#define DECLARE_ASM_ALIGNED(n, t, v)
Definition: mem.h:113
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:50
INPUT_FUNCS
#define INPUT_FUNCS(opt)
Definition: swscale.c:365
U
#define U(x)
Definition: vp56_arith.h:37
src
#define src
Definition: vp8dsp.c:254
ff_dither4
const uint64_t ff_dither4[2]
avassert.h
av_cold
#define av_cold
Definition: attributes.h:84
intreadwrite.h
s
#define s(width, name)
Definition: cbs_vp9.c:257
APCK_COEF
#define APCK_COEF
Definition: swscale_internal.h:57
SwsPlane::tmp
uint8_t ** tmp
Tmp line buffer used by mmx code.
Definition: swscale_internal.h:935
VSCALE_FUNCS
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:339
XMM_CLOBBERS
#define XMM_CLOBBERS(...)
Definition: asm.h:98
SwsContext::vLumFilterPos
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
Definition: swscale_internal.h:380
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk)
int32_t
int32_t
Definition: audio_convert.c:194
if
if(ret)
Definition: filter_design.txt:179
VSCALEX_FUNC
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:319
NULL
#define NULL
Definition: coverity.c:32
ASSIGN_SSE_SCALE_FUNC
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
ff_sws_init_swscale_x86
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
Definition: swscale.c:384
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:67
SwsPlane
Slice plane.
Definition: swscale_internal.h:929
ASSIGN_VSCALEX_FUNC
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
SwsContext::alpMmxFilter
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:486
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
for
for(j=16;j >0;--j)
Definition: h264pred_template.c:469
SwsContext::vChrFilterPos
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
Definition: swscale_internal.h:381
isBE
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:660
cpu.h
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:380
FFMIN
#define FFMIN(a, b)
Definition: common.h:96
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
attributes.h
SwsContext::vChrFilterSize
int vChrFilterSize
Vertical filter size for chroma pixels.
Definition: swscale_internal.h:385
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:83
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:112
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:375
swscale_internal.h
uint8_t
uint8_t
Definition: audio_convert.c:194
AV_PIX_FMT_NV21
@ AV_PIX_FMT_NV21
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
swscale_template.c
SwsContext::lumBufIndex
int lumBufIndex
Index in ring buffer of the last scaled horizontal luma/alpha line from source.
Definition: swscale_internal.h:353
ff_dither8
const uint64_t ff_dither8[2]
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:81
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
SwsContext::chrBufIndex
int chrBufIndex
Index in ring buffer of the last scaled horizontal chroma line from source.
Definition: swscale_internal.h:354
config.h
AV_CPU_FLAG_MMXEXT
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:32
INLINE_MMXEXT
#define INLINE_MMXEXT(flags)
Definition: cpu.h:87
SwsContext::chrMmxFilter
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:479
SwsPlane::sliceY
int sliceY
index of first line
Definition: swscale_internal.h:932
VSCALE_FUNC
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:336
x86_reg
int x86_reg
Definition: asm.h:72
case_rgb
#define case_rgb(x, X, opt)
RGBA
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:39
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:565
SCALE_FUNCS_MMX
#define SCALE_FUNCS_MMX(opt)
Definition: swscale.c:301
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
SwsContext
Definition: swscale_internal.h:280
EXTERNAL_MMX
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
SCALE_FUNCS_SSE
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:306
SwsContext::dstH
int dstH
Height of destination luma/alpha planes.
Definition: swscale_internal.h:293
APCK_SIZE
#define APCK_SIZE
Definition: swscale_internal.h:58
VSCALEX_FUNCS
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:323
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
ff_updateMMXDitherTables
void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, int lastInLumBuf, int lastInChrBuf)
swscale.h
SwsContext::lastInLumBuf
int lastInLumBuf
Last scaled horizontal luma/alpha line from source in the ring buffer.
Definition: swscale_internal.h:351
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:57