FFmpeg
yuv2rgb_template.c
Go to the documentation of this file.
1 /*
2  * software YUV to RGB converter
3  *
4  * Copyright (C) 2001-2007 Michael Niedermayer
5  * (c) 2010 Konstantin Shishkov
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include <stdint.h>
25 
26 #include "libavutil/x86/asm.h"
28 
29 #undef MOVNTQ
30 #undef EMMS
31 #undef SFENCE
32 
33 #if COMPILE_TEMPLATE_MMXEXT
34 #define MOVNTQ "movntq"
35 #define SFENCE "sfence"
36 #else
37 #define MOVNTQ "movq"
38 #define SFENCE " # nop"
39 #endif
40 
41 #define REG_BLUE "0"
42 #define REG_RED "1"
43 #define REG_GREEN "2"
44 #define REG_ALPHA "3"
45 
46 #define YUV2RGB_LOOP(depth) \
47  h_size = (c->dstW + 7) & ~7; \
48  if (h_size * depth > FFABS(dstStride[0])) \
49  h_size -= 8; \
50  \
51  vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \
52  \
53  __asm__ volatile ("pxor %mm4, %mm4\n\t"); \
54  for (y = 0; y < srcSliceH; y++) { \
55  uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \
56  const uint8_t *py = src[0] + y * srcStride[0]; \
57  const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
58  const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
59  x86_reg index = -h_size / 2; \
60 
61 #define YUV2RGB_INITIAL_LOAD \
62  __asm__ volatile ( \
63  "movq (%5, %0, 2), %%mm6\n\t" \
64  "movd (%2, %0), %%mm0\n\t" \
65  "movd (%3, %0), %%mm1\n\t" \
66  "1: \n\t" \
67 
68 /* YUV2RGB core
69  * Conversion is performed in usual way:
70  * R = Y' * Ycoef + Vred * V'
71  * G = Y' * Ycoef + Vgreen * V' + Ugreen * U'
72  * B = Y' * Ycoef + Ublue * U'
73  *
74  * where X' = X * 8 - Xoffset (multiplication is performed to increase
75  * precision a bit).
76  * Since it operates in YUV420 colorspace, Y component is additionally
77  * split into Y1 and Y2 for even and odd pixels.
78  *
79  * Input:
80  * mm0 - U (4 elems), mm1 - V (4 elems), mm6 - Y (8 elems), mm4 - zero register
81  * Output:
82  * mm1 - R, mm2 - G, mm0 - B
83  */
84 #define YUV2RGB \
85  /* convert Y, U, V into Y1', Y2', U', V' */ \
86  "movq %%mm6, %%mm7\n\t" \
87  "punpcklbw %%mm4, %%mm0\n\t" \
88  "punpcklbw %%mm4, %%mm1\n\t" \
89  "pand "MANGLE(mmx_00ffw)", %%mm6\n\t" \
90  "psrlw $8, %%mm7\n\t" \
91  "psllw $3, %%mm0\n\t" \
92  "psllw $3, %%mm1\n\t" \
93  "psllw $3, %%mm6\n\t" \
94  "psllw $3, %%mm7\n\t" \
95  "psubsw "U_OFFSET"(%4), %%mm0\n\t" \
96  "psubsw "V_OFFSET"(%4), %%mm1\n\t" \
97  "psubw "Y_OFFSET"(%4), %%mm6\n\t" \
98  "psubw "Y_OFFSET"(%4), %%mm7\n\t" \
99 \
100  /* multiply by coefficients */ \
101  "movq %%mm0, %%mm2\n\t" \
102  "movq %%mm1, %%mm3\n\t" \
103  "pmulhw "UG_COEFF"(%4), %%mm2\n\t" \
104  "pmulhw "VG_COEFF"(%4), %%mm3\n\t" \
105  "pmulhw "Y_COEFF" (%4), %%mm6\n\t" \
106  "pmulhw "Y_COEFF" (%4), %%mm7\n\t" \
107  "pmulhw "UB_COEFF"(%4), %%mm0\n\t" \
108  "pmulhw "VR_COEFF"(%4), %%mm1\n\t" \
109  "paddsw %%mm3, %%mm2\n\t" \
110  /* now: mm0 = UB, mm1 = VR, mm2 = CG */ \
111  /* mm6 = Y1, mm7 = Y2 */ \
112 \
113  /* produce RGB */ \
114  "movq %%mm7, %%mm3\n\t" \
115  "movq %%mm7, %%mm5\n\t" \
116  "paddsw %%mm0, %%mm3\n\t" \
117  "paddsw %%mm1, %%mm5\n\t" \
118  "paddsw %%mm2, %%mm7\n\t" \
119  "paddsw %%mm6, %%mm0\n\t" \
120  "paddsw %%mm6, %%mm1\n\t" \
121  "paddsw %%mm6, %%mm2\n\t" \
122 
123 #define RGB_PACK_INTERLEAVE \
124  /* pack and interleave even/odd pixels */ \
125  "packuswb %%mm1, %%mm0\n\t" \
126  "packuswb %%mm5, %%mm3\n\t" \
127  "packuswb %%mm2, %%mm2\n\t" \
128  "movq %%mm0, %%mm1\n\n" \
129  "packuswb %%mm7, %%mm7\n\t" \
130  "punpcklbw %%mm3, %%mm0\n\t" \
131  "punpckhbw %%mm3, %%mm1\n\t" \
132  "punpcklbw %%mm7, %%mm2\n\t" \
133 
134 #define YUV2RGB_ENDLOOP(depth) \
135  "movq 8 (%5, %0, 2), %%mm6\n\t" \
136  "movd 4 (%3, %0), %%mm1\n\t" \
137  "movd 4 (%2, %0), %%mm0\n\t" \
138  "add $"AV_STRINGIFY(depth * 8)", %1\n\t" \
139  "add $4, %0\n\t" \
140  "js 1b\n\t" \
141 
142 #if COMPILE_TEMPLATE_MMXEXT
143 #undef RGB_PACK24_B_OPERANDS
144 #define RGB_PACK24_B_OPERANDS NAMED_CONSTRAINTS_ARRAY_ADD(mask1101,mask0110,mask0100,mask0010,mask1001)
145 #else
146 #undef RGB_PACK24_B_OPERANDS
147 #define RGB_PACK24_B_OPERANDS
148 #endif
149 
150 #define YUV2RGB_OPERANDS \
151  : "+r" (index), "+r" (image) \
152  : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
153  "r" (py - 2*index) \
154  NAMED_CONSTRAINTS_ADD(mmx_00ffw,pb_03,pb_07,mmx_redmask,pb_e0) \
155  RGB_PACK24_B_OPERANDS \
156  : "memory" \
157  ); \
158  } \
159 
160 #define YUV2RGB_OPERANDS_ALPHA \
161  : "+r" (index), "+r" (image) \
162  : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
163  "r" (py - 2*index), "r" (pa - 2*index) \
164  NAMED_CONSTRAINTS_ADD(mmx_00ffw) \
165  : "memory" \
166  ); \
167  } \
168 
169 #define YUV2RGB_ENDFUNC \
170  __asm__ volatile (SFENCE"\n\t" \
171  "emms \n\t"); \
172  return srcSliceH; \
173 
174 #define IF0(x)
175 #define IF1(x) x
176 
177 #define RGB_PACK16(gmask, is15) \
178  "pand "MANGLE(mmx_redmask)", %%mm0\n\t" \
179  "pand "MANGLE(mmx_redmask)", %%mm1\n\t" \
180  "movq %%mm2, %%mm3\n\t" \
181  "psllw $"AV_STRINGIFY(3-is15)", %%mm2\n\t" \
182  "psrlw $"AV_STRINGIFY(5+is15)", %%mm3\n\t" \
183  "psrlw $3, %%mm0\n\t" \
184  IF##is15("psrlw $1, %%mm1\n\t") \
185  "pand "MANGLE(pb_e0)", %%mm2\n\t" \
186  "pand "MANGLE(gmask)", %%mm3\n\t" \
187  "por %%mm2, %%mm0\n\t" \
188  "por %%mm3, %%mm1\n\t" \
189  "movq %%mm0, %%mm2\n\t" \
190  "punpcklbw %%mm1, %%mm0\n\t" \
191  "punpckhbw %%mm1, %%mm2\n\t" \
192  MOVNTQ " %%mm0, (%1)\n\t" \
193  MOVNTQ " %%mm2, 8(%1)\n\t" \
194 
195 #define DITHER_RGB \
196  "paddusb "BLUE_DITHER"(%4), %%mm0\n\t" \
197  "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \
198  "paddusb "RED_DITHER"(%4), %%mm1\n\t" \
199 
200 #if !COMPILE_TEMPLATE_MMXEXT
201 static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
202  int srcStride[],
203  int srcSliceY, int srcSliceH,
204  uint8_t *dst[], int dstStride[])
205 {
206  int y, h_size, vshift;
207 
208  YUV2RGB_LOOP(2)
209 
210 #ifdef DITHER1XBPP
211  c->blueDither = ff_dither8[y & 1];
212  c->greenDither = ff_dither8[y & 1];
213  c->redDither = ff_dither8[(y + 1) & 1];
214 #endif
215 
217  YUV2RGB
219 #ifdef DITHER1XBPP
220  DITHER_RGB
221 #endif
222  RGB_PACK16(pb_03, 1)
223 
224  YUV2RGB_ENDLOOP(2)
227 }
228 
229 static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
230  int srcStride[],
231  int srcSliceY, int srcSliceH,
232  uint8_t *dst[], int dstStride[])
233 {
234  int y, h_size, vshift;
235 
236  YUV2RGB_LOOP(2)
237 
238 #ifdef DITHER1XBPP
239  c->blueDither = ff_dither8[y & 1];
240  c->greenDither = ff_dither4[y & 1];
241  c->redDither = ff_dither8[(y + 1) & 1];
242 #endif
243 
245  YUV2RGB
247 #ifdef DITHER1XBPP
248  DITHER_RGB
249 #endif
250  RGB_PACK16(pb_07, 0)
251 
252  YUV2RGB_ENDLOOP(2)
255 }
256 #endif /* !COMPILE_TEMPLATE_MMXEXT */
257 
258 #define RGB_PACK24(blue, red)\
259  "packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\
260  "packuswb %%mm5, %%mm1 \n" /* B0 B2 B4 B6 B1 B3 B5 B7 */\
261  "packuswb %%mm7, %%mm2 \n" /* G0 G2 G4 G6 G1 G3 G5 G7 */\
262  "movq %%mm"red", %%mm3 \n"\
263  "movq %%mm"blue", %%mm6 \n"\
264  "psrlq $32, %%mm"red" \n" /* R1 R3 R5 R7 */\
265  "punpcklbw %%mm2, %%mm3 \n" /* R0 G0 R2 G2 R4 G4 R6 G6 */\
266  "punpcklbw %%mm"red", %%mm6 \n" /* B0 R1 B2 R3 B4 R5 B6 R7 */\
267  "movq %%mm3, %%mm5 \n"\
268  "punpckhbw %%mm"blue", %%mm2 \n" /* G1 B1 G3 B3 G5 B5 G7 B7 */\
269  "punpcklwd %%mm6, %%mm3 \n" /* R0 G0 B0 R1 R2 G2 B2 R3 */\
270  "punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\
271  RGB_PACK24_B
272 
273 #if COMPILE_TEMPLATE_MMXEXT
274 DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1};
275 DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0};
276 DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0};
277 DECLARE_ASM_CONST(8, int16_t, mask1001[4]) = {-1, 0, 0,-1};
278 DECLARE_ASM_CONST(8, int16_t, mask0100[4]) = { 0,-1, 0, 0};
279 #undef RGB_PACK24_B
280 #define RGB_PACK24_B\
281  "pshufw $0xc6, %%mm2, %%mm1 \n"\
282  "pshufw $0x84, %%mm3, %%mm6 \n"\
283  "pshufw $0x38, %%mm5, %%mm7 \n"\
284  "pand "MANGLE(mask1101)", %%mm6 \n" /* R0 G0 B0 R1 -- -- R2 G2 */\
285  "movq %%mm1, %%mm0 \n"\
286  "pand "MANGLE(mask0110)", %%mm7 \n" /* -- -- R6 G6 B6 R7 -- -- */\
287  "movq %%mm1, %%mm2 \n"\
288  "pand "MANGLE(mask0100)", %%mm1 \n" /* -- -- G3 B3 -- -- -- -- */\
289  "psrlq $48, %%mm3 \n" /* B2 R3 -- -- -- -- -- -- */\
290  "pand "MANGLE(mask0010)", %%mm0 \n" /* -- -- -- -- G1 B1 -- -- */\
291  "psllq $32, %%mm5 \n" /* -- -- -- -- R4 G4 B4 R5 */\
292  "pand "MANGLE(mask1001)", %%mm2 \n" /* G5 B5 -- -- -- -- G7 B7 */\
293  "por %%mm3, %%mm1 \n"\
294  "por %%mm6, %%mm0 \n"\
295  "por %%mm5, %%mm1 \n"\
296  "por %%mm7, %%mm2 \n"\
297  MOVNTQ" %%mm0, (%1) \n"\
298  MOVNTQ" %%mm1, 8(%1) \n"\
299  MOVNTQ" %%mm2, 16(%1) \n"\
300 
301 #else
302 #undef RGB_PACK24_B
303 #define RGB_PACK24_B\
304  "movd %%mm3, (%1) \n" /* R0 G0 B0 R1 */\
305  "movd %%mm2, 4(%1) \n" /* G1 B1 */\
306  "psrlq $32, %%mm3 \n"\
307  "psrlq $16, %%mm2 \n"\
308  "movd %%mm3, 6(%1) \n" /* R2 G2 B2 R3 */\
309  "movd %%mm2, 10(%1) \n" /* G3 B3 */\
310  "psrlq $16, %%mm2 \n"\
311  "movd %%mm5, 12(%1) \n" /* R4 G4 B4 R5 */\
312  "movd %%mm2, 16(%1) \n" /* G5 B5 */\
313  "psrlq $32, %%mm5 \n"\
314  "movd %%mm2, 20(%1) \n" /* -- -- G7 B7 */\
315  "movd %%mm5, 18(%1) \n" /* R6 G6 B6 R7 */\
316 
317 #endif
318 
319 static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
320  int srcStride[],
321  int srcSliceY, int srcSliceH,
322  uint8_t *dst[], int dstStride[])
323 {
324  int y, h_size, vshift;
325 
326  YUV2RGB_LOOP(3)
327 
329  YUV2RGB
331 
332  YUV2RGB_ENDLOOP(3)
335 }
336 
337 static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
338  int srcStride[],
339  int srcSliceY, int srcSliceH,
340  uint8_t *dst[], int dstStride[])
341 {
342  int y, h_size, vshift;
343 
344  YUV2RGB_LOOP(3)
345 
347  YUV2RGB
349 
350  YUV2RGB_ENDLOOP(3)
353 }
354 
355 
356 #define SET_EMPTY_ALPHA \
357  "pcmpeqd %%mm"REG_ALPHA", %%mm"REG_ALPHA"\n\t" /* set alpha to 0xFF */ \
358 
359 #define LOAD_ALPHA \
360  "movq (%6, %0, 2), %%mm"REG_ALPHA"\n\t" \
361 
362 #define RGB_PACK32(red, green, blue, alpha) \
363  "movq %%mm"blue", %%mm5\n\t" \
364  "movq %%mm"red", %%mm6\n\t" \
365  "punpckhbw %%mm"green", %%mm5\n\t" \
366  "punpcklbw %%mm"green", %%mm"blue"\n\t" \
367  "punpckhbw %%mm"alpha", %%mm6\n\t" \
368  "punpcklbw %%mm"alpha", %%mm"red"\n\t" \
369  "movq %%mm"blue", %%mm"green"\n\t" \
370  "movq %%mm5, %%mm"alpha"\n\t" \
371  "punpcklwd %%mm"red", %%mm"blue"\n\t" \
372  "punpckhwd %%mm"red", %%mm"green"\n\t" \
373  "punpcklwd %%mm6, %%mm5\n\t" \
374  "punpckhwd %%mm6, %%mm"alpha"\n\t" \
375  MOVNTQ " %%mm"blue", 0(%1)\n\t" \
376  MOVNTQ " %%mm"green", 8(%1)\n\t" \
377  MOVNTQ " %%mm5, 16(%1)\n\t" \
378  MOVNTQ " %%mm"alpha", 24(%1)\n\t" \
379 
380 #if !COMPILE_TEMPLATE_MMXEXT
381 static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
382  int srcStride[],
383  int srcSliceY, int srcSliceH,
384  uint8_t *dst[], int dstStride[])
385 {
386  int y, h_size, vshift;
387 
388  YUV2RGB_LOOP(4)
389 
391  YUV2RGB
395 
396  YUV2RGB_ENDLOOP(4)
399 }
400 
401 #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
402 static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
403  int srcStride[],
404  int srcSliceY, int srcSliceH,
405  uint8_t *dst[], int dstStride[])
406 {
407  int y, h_size, vshift;
408 
409  YUV2RGB_LOOP(4)
410 
411  const uint8_t *pa = src[3] + y * srcStride[3];
413  YUV2RGB
415  LOAD_ALPHA
417 
418  YUV2RGB_ENDLOOP(4)
421 }
422 #endif
423 
424 static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
425  int srcStride[],
426  int srcSliceY, int srcSliceH,
427  uint8_t *dst[], int dstStride[])
428 {
429  int y, h_size, vshift;
430 
431  YUV2RGB_LOOP(4)
432 
434  YUV2RGB
438 
439  YUV2RGB_ENDLOOP(4)
442 }
443 
444 #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
445 static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
446  int srcStride[],
447  int srcSliceY, int srcSliceH,
448  uint8_t *dst[], int dstStride[])
449 {
450  int y, h_size, vshift;
451 
452  YUV2RGB_LOOP(4)
453 
454  const uint8_t *pa = src[3] + y * srcStride[3];
456  YUV2RGB
458  LOAD_ALPHA
460 
461  YUV2RGB_ENDLOOP(4)
464 }
465 #endif
466 
467 #endif /* !COMPILE_TEMPLATE_MMXEXT */
RGB_PACK_INTERLEAVE
#define RGB_PACK_INTERLEAVE
Definition: yuv2rgb_template.c:123
DECLARE_ASM_CONST
#define DECLARE_ASM_CONST(n, t, v)
Definition: mem.h:114
YUV2RGB_OPERANDS_ALPHA
#define YUV2RGB_OPERANDS_ALPHA
Definition: yuv2rgb_template.c:160
DITHER_RGB
#define DITHER_RGB
Definition: yuv2rgb_template.c:195
REG_GREEN
#define REG_GREEN
Definition: yuv2rgb_template.c:43
YUV2RGB_LOOP
#define YUV2RGB_LOOP(depth)
Definition: yuv2rgb_template.c:46
RGB_PACK16
#define RGB_PACK16(gmask, is15)
Definition: yuv2rgb_template.c:177
src
#define src
Definition: vp8dsp.c:254
ff_dither4
const uint64_t ff_dither4[2]
LOAD_ALPHA
#define LOAD_ALPHA
Definition: yuv2rgb_template.c:359
YUV2RGB
#define YUV2RGB
Definition: yuv2rgb_template.c:84
YUV2RGB_OPERANDS
#define YUV2RGB_OPERANDS
Definition: yuv2rgb_template.c:150
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
REG_ALPHA
#define REG_ALPHA
Definition: yuv2rgb_template.c:44
REG_BLUE
#define REG_BLUE
Definition: yuv2rgb_template.c:41
asm.h
YUV2RGB_ENDLOOP
#define YUV2RGB_ENDLOOP(depth)
Definition: yuv2rgb_template.c:134
RGB_PACK24
#define RGB_PACK24(blue, red)
Definition: yuv2rgb_template.c:258
SET_EMPTY_ALPHA
#define SET_EMPTY_ALPHA
Definition: yuv2rgb_template.c:356
RENAME
#define RENAME(name)
Definition: ffv1.h:197
swscale_internal.h
uint8_t
uint8_t
Definition: audio_convert.c:194
RGB_PACK32
#define RGB_PACK32(red, green, blue, alpha)
Definition: yuv2rgb_template.c:362
REG_RED
#define REG_RED
Definition: yuv2rgb_template.c:42
ff_dither8
const uint64_t ff_dither8[2]
YUV2RGB_INITIAL_LOAD
#define YUV2RGB_INITIAL_LOAD
Definition: yuv2rgb_template.c:61
YUV2RGB_ENDFUNC
#define YUV2RGB_ENDFUNC
Definition: yuv2rgb_template.c:169
SwsContext
Definition: swscale_internal.h:280