26 #define YUV2RGB_LOAD_COE \
28 __m128i y_offset = __lsx_vreplgr2vr_d(c->yOffset); \
29 __m128i u_offset = __lsx_vreplgr2vr_d(c->uOffset); \
30 __m128i v_offset = __lsx_vreplgr2vr_d(c->vOffset); \
32 __m128i ug_coeff = __lsx_vreplgr2vr_d(c->ugCoeff); \
33 __m128i vg_coeff = __lsx_vreplgr2vr_d(c->vgCoeff); \
34 __m128i y_coeff = __lsx_vreplgr2vr_d(c->yCoeff); \
35 __m128i ub_coeff = __lsx_vreplgr2vr_d(c->ubCoeff); \
36 __m128i vr_coeff = __lsx_vreplgr2vr_d(c->vrCoeff); \
39 m_y1 = __lsx_vld(py_1, 0); \
40 m_y2 = __lsx_vld(py_2, 0); \
41 m_u = __lsx_vldrepl_d(pu, 0); \
42 m_v = __lsx_vldrepl_d(pv, 0); \
43 DUP2_ARG2(__lsx_vilvl_b, m_u, m_u, m_v, m_v, m_u, m_v); \
44 DUP2_ARG2(__lsx_vilvh_b, zero, m_u, zero, m_v, m_u_h, m_v_h); \
45 DUP2_ARG2(__lsx_vilvl_b, zero, m_u, zero, m_v, m_u, m_v); \
46 DUP2_ARG2(__lsx_vilvh_b, zero, m_y1, zero, m_y2, m_y1_h, m_y2_h); \
47 DUP2_ARG2(__lsx_vilvl_b, zero, m_y1, zero, m_y2, m_y1, m_y2); \
59 #define YUV2RGB(y1, y2, u, v, r1, g1, b1, r2, g2, b2) \
61 y1 = __lsx_vslli_h(y1, 3); \
62 y2 = __lsx_vslli_h(y2, 3); \
63 u = __lsx_vslli_h(u, 3); \
64 v = __lsx_vslli_h(v, 3); \
65 y1 = __lsx_vsub_h(y1, y_offset); \
66 y2 = __lsx_vsub_h(y2, y_offset); \
67 u = __lsx_vsub_h(u, u_offset); \
68 v = __lsx_vsub_h(v, v_offset); \
69 y_1 = __lsx_vmuh_h(y1, y_coeff); \
70 y_2 = __lsx_vmuh_h(y2, y_coeff); \
71 u2g = __lsx_vmuh_h(u, ug_coeff); \
72 u2b = __lsx_vmuh_h(u, ub_coeff); \
73 v2r = __lsx_vmuh_h(v, vr_coeff); \
74 v2g = __lsx_vmuh_h(v, vg_coeff); \
75 r1 = __lsx_vsadd_h(y_1, v2r); \
76 v2g = __lsx_vsadd_h(v2g, u2g); \
77 g1 = __lsx_vsadd_h(y_1, v2g); \
78 b1 = __lsx_vsadd_h(y_1, u2b); \
79 r2 = __lsx_vsadd_h(y_2, v2r); \
80 g2 = __lsx_vsadd_h(y_2, v2g); \
81 b2 = __lsx_vsadd_h(y_2, u2b); \
82 DUP4_ARG1(__lsx_vclip255_h, r1, g1, b1, r2, r1, g1, b1, r2); \
83 DUP2_ARG1(__lsx_vclip255_h, g2, b2, g2, b2); \
86 #define RGB_PACK(r, g, b, rgb_l, rgb_h) \
89 rg = __lsx_vpackev_b(g, r); \
90 DUP2_ARG3(__lsx_vshuf_b, b, rg, shuf2, b, rg, shuf3, rgb_l, rgb_h); \
93 #define RGB32_PACK(a, r, g, b, rgb_l, rgb_h) \
96 ra = __lsx_vpackev_b(r, a); \
97 bg = __lsx_vpackev_b(b, g); \
98 rgb_l = __lsx_vilvl_h(bg, ra); \
99 rgb_h = __lsx_vilvh_h(bg, ra); \
102 #define RGB_STORE(rgb_l, rgb_h, image) \
104 __lsx_vstelm_d(rgb_l, image, 0, 0); \
105 __lsx_vstelm_d(rgb_l, image, 8, 1); \
106 __lsx_vstelm_d(rgb_h, image, 16, 0); \
109 #define RGB32_STORE(rgb_l, rgb_h, image) \
111 __lsx_vst(rgb_l, image, 0); \
112 __lsx_vst(rgb_h, image, 16); \
115 #define YUV2RGBFUNC(func_name, dst_type, alpha) \
116 int func_name(SwsInternal *c, const uint8_t *const src[], \
117 const int srcStride[], int srcSliceY, int srcSliceH, \
118 uint8_t *const dst[], const int dstStride[]) \
120 int x, y, h_size, vshift, res; \
121 __m128i m_y1, m_y2, m_u, m_v; \
122 __m128i m_y1_h, m_y2_h, m_u_h, m_v_h; \
123 __m128i y_1, y_2, u2g, v2g, u2b, v2r, rgb1_l, rgb1_h; \
124 __m128i rgb2_l, rgb2_h, r1, g1, b1, r2, g2, b2; \
125 __m128i shuf2 = {0x0504120302100100, 0x0A18090816070614}; \
126 __m128i shuf3 = {0x1E0F0E1C0D0C1A0B, 0x0101010101010101}; \
127 __m128i zero = __lsx_vldi(0); \
131 h_size = c->opts.dst_w >> 4; \
132 res = (c->opts.dst_w & 15) >> 1; \
133 vshift = c->opts.src_format != AV_PIX_FMT_YUV422P; \
134 for (y = 0; y < srcSliceH; y += 2) { \
135 dst_type av_unused *r, *g, *b; \
136 dst_type *image1 = (dst_type *)(dst[0] + (y + srcSliceY) * dstStride[0]);\
137 dst_type *image2 = (dst_type *)(image1 + dstStride[0]);\
138 const uint8_t *py_1 = src[0] + y * srcStride[0]; \
139 const uint8_t *py_2 = py_1 + srcStride[0]; \
140 const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
141 const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
142 for(x = 0; x < h_size; x++) { \
144 #define YUV2RGBFUNC32(func_name, dst_type, alpha) \
145 int func_name(SwsInternal *c, const uint8_t *const src[], \
146 const int srcStride[], int srcSliceY, int srcSliceH, \
147 uint8_t *const dst[], const int dstStride[]) \
149 int x, y, h_size, vshift, res; \
150 __m128i m_y1, m_y2, m_u, m_v; \
151 __m128i m_y1_h, m_y2_h, m_u_h, m_v_h; \
152 __m128i y_1, y_2, u2g, v2g, u2b, v2r, rgb1_l, rgb1_h; \
153 __m128i rgb2_l, rgb2_h, r1, g1, b1, r2, g2, b2; \
154 __m128i a = __lsx_vldi(0xFF); \
155 __m128i zero = __lsx_vldi(0); \
159 h_size = c->opts.dst_w >> 4; \
160 res = (c->opts.dst_w & 15) >> 1; \
161 vshift = c->opts.src_format != AV_PIX_FMT_YUV422P; \
162 for (y = 0; y < srcSliceH; y += 2) { \
163 int yd = y + srcSliceY; \
164 dst_type av_unused *r, *g, *b; \
165 dst_type *image1 = (dst_type *)(dst[0] + (yd) * dstStride[0]); \
166 dst_type *image2 = (dst_type *)(dst[0] + (yd + 1) * dstStride[0]); \
167 const uint8_t *py_1 = src[0] + y * srcStride[0]; \
168 const uint8_t *py_2 = py_1 + srcStride[0]; \
169 const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
170 const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
171 for(x = 0; x < h_size; x++) { \
173 #define DEALYUV2RGBREMAIN \
181 for (x = 0; x < res; x++) { \
182 int av_unused U, V, Y; \
185 r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM]; \
186 g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] \
187 + c->table_gV[V+YUVRGB_TABLE_HEADROOM]); \
188 b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM];
190 #define DEALYUV2RGBREMAIN32 \
198 for (x = 0; x < res; x++) { \
199 int av_unused U, V, Y; \
202 r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM]; \
203 g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] \
204 + c->table_gV[V+YUVRGB_TABLE_HEADROOM]); \
205 b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM]; \
207 #define PUTRGB24(dst, src) \
217 #define PUTBGR24(dst, src) \
227 #define PUTRGB(dst, src) \
229 dst[0] = r[Y] + g[Y] + b[Y]; \
231 dst[1] = r[Y] + g[Y] + b[Y]; \
257 YUV2RGB(m_y1, m_y2, m_u, m_v, r1, g1,
b1, r2, g2,
b2);
262 YUV2RGB(m_y1_h, m_y2_h, m_u_h, m_v_h, r1, g1,
b1, r2, g2,
b2);
275 YUV2RGB(m_y1, m_y2, m_u, m_v, r1, g1,
b1, r2, g2,
b2);
280 YUV2RGB(m_y1_h, m_y2_h, m_u_h, m_v_h, r1, g1,
b1, r2, g2,
b2);
293 YUV2RGB(m_y1, m_y2, m_u, m_v, r1, g1,
b1, r2, g2,
b2);
298 YUV2RGB(m_y1_h, m_y2_h, m_u_h, m_v_h, r1, g1,
b1, r2, g2,
b2);
311 YUV2RGB(m_y1, m_y2, m_u, m_v, r1, g1,
b1, r2, g2,
b2);
316 YUV2RGB(m_y1_h, m_y2_h, m_u_h, m_v_h, r1, g1,
b1, r2, g2,
b2);
329 YUV2RGB(m_y1, m_y2, m_u, m_v, r1, g1,
b1, r2, g2,
b2);
334 YUV2RGB(m_y1_h, m_y2_h, m_u_h, m_v_h, r1, g1,
b1, r2, g2,
b2);
347 YUV2RGB(m_y1, m_y2, m_u, m_v, r1, g1,
b1, r2, g2,
b2);
352 YUV2RGB(m_y1_h, m_y2_h, m_u_h, m_v_h, r1, g1,
b1, r2, g2,
b2);