FFmpeg
Macros | Functions
yuv2rgb_lasx.c File Reference
#include "swscale_loongarch.h"
#include "libavutil/loongarch/loongson_intrinsics.h"

Go to the source code of this file.

Macros

#define YUV2RGB_LOAD_COE
 
#define LOAD_YUV_16
 
#define YUV2RGB
 
#define YUV2RGB_RES
 
#define RGB_PACK(r, g, b, rgb_l, rgb_h)
 
#define RGB32_PACK(a, r, g, b, rgb_l, rgb_h)
 
#define RGB_STORE_RES(rgb_l, rgb_h, image_1, image_2)
 
#define RGB_STORE(rgb_l, rgb_h, image)
 
#define RGB32_STORE(rgb_l, rgb_h, image)
 
#define RGB32_STORE_RES(rgb_l, rgb_h, image_1, image_2)
 
#define YUV2RGBFUNC(func_name, dst_type, alpha)
 
#define YUV2RGBFUNC32(func_name, dst_type, alpha)
 
#define DEALYUV2RGBREMAIN
 
#define DEALYUV2RGBREMAIN32
 
#define END_FUNC()
 

Functions

LOAD_YUV_16 YUV2RGB RGB_PACK (r1, g1, b1, rgb1_l, rgb1_h)
 
 RGB_PACK (r2, g2, b2, rgb2_l, rgb2_h)
 
 RGB_STORE (rgb1_l, rgb1_h, image1)
 
 RGB_STORE (rgb2_l, rgb2_h, image2)
 
 RGB_STORE_RES (rgb1_l, rgb1_h, image1, image2)
 
LOAD_YUV_16 YUV2RGB RGB_PACK (b1, g1, r1, rgb1_l, rgb1_h)
 
 RGB_PACK (b2, g2, r2, rgb2_l, rgb2_h)
 
LOAD_YUV_16 YUV2RGB RGB32_PACK (r1, g1, b1, a, rgb1_l, rgb1_h)
 
 RGB32_PACK (r2, g2, b2, a, rgb2_l, rgb2_h)
 
 RGB32_STORE (rgb1_l, rgb1_h, image1)
 
 RGB32_STORE (rgb2_l, rgb2_h, image2)
 
 RGB32_STORE_RES (rgb1_l, rgb1_h, image1, image2)
 
LOAD_YUV_16 YUV2RGB RGB32_PACK (b1, g1, r1, a, rgb1_l, rgb1_h)
 
 RGB32_PACK (b2, g2, r2, a, rgb2_l, rgb2_h)
 
LOAD_YUV_16 YUV2RGB RGB32_PACK (a, r1, g1, b1, rgb1_l, rgb1_h)
 
 RGB32_PACK (a, r2, g2, b2, rgb2_l, rgb2_h)
 
LOAD_YUV_16 YUV2RGB RGB32_PACK (a, b1, g1, r1, rgb1_l, rgb1_h)
 
 RGB32_PACK (a, b2, g2, r2, rgb2_l, rgb2_h)
 

Macro Definition Documentation

◆ YUV2RGB_LOAD_COE

#define YUV2RGB_LOAD_COE
Value:
/* Load x_offset */ \
__m256i y_offset = __lasx_xvreplgr2vr_d(c->yOffset); \
__m256i u_offset = __lasx_xvreplgr2vr_d(c->uOffset); \
__m256i v_offset = __lasx_xvreplgr2vr_d(c->vOffset); \
/* Load x_coeff */ \
__m256i ug_coeff = __lasx_xvreplgr2vr_d(c->ugCoeff); \
__m256i vg_coeff = __lasx_xvreplgr2vr_d(c->vgCoeff); \
__m256i y_coeff = __lasx_xvreplgr2vr_d(c->yCoeff); \
__m256i ub_coeff = __lasx_xvreplgr2vr_d(c->ubCoeff); \
__m256i vr_coeff = __lasx_xvreplgr2vr_d(c->vrCoeff); \

Definition at line 25 of file yuv2rgb_lasx.c.

◆ LOAD_YUV_16

#define LOAD_YUV_16
Value:
m_y1 = __lasx_xvld(py_1, 0); \
m_y2 = __lasx_xvld(py_2, 0); \
m_u = __lasx_xvldrepl_d(pu, 0); \
m_v = __lasx_xvldrepl_d(pv, 0); \
m_u = __lasx_xvilvl_b(m_u, m_u); \
m_v = __lasx_xvilvl_b(m_v, m_v); \
DUP4_ARG1(__lasx_vext2xv_hu_bu, m_y1, m_y2, m_u, m_v, \
m_y1, m_y2, m_u, m_v); \

Definition at line 37 of file yuv2rgb_lasx.c.

◆ YUV2RGB

#define YUV2RGB
Value:
m_y1 = __lasx_xvslli_h(m_y1, 3); \
m_y2 = __lasx_xvslli_h(m_y2, 3); \
m_u = __lasx_xvslli_h(m_u, 3); \
m_v = __lasx_xvslli_h(m_v, 3); \
m_y1 = __lasx_xvsub_h(m_y1, y_offset); \
m_y2 = __lasx_xvsub_h(m_y2, y_offset); \
m_u = __lasx_xvsub_h(m_u, u_offset); \
m_v = __lasx_xvsub_h(m_v, v_offset); \
y_1 = __lasx_xvmuh_h(m_y1, y_coeff); \
y_2 = __lasx_xvmuh_h(m_y2, y_coeff); \
u2g = __lasx_xvmuh_h(m_u, ug_coeff); \
u2b = __lasx_xvmuh_h(m_u, ub_coeff); \
v2r = __lasx_xvmuh_h(m_v, vr_coeff); \
v2g = __lasx_xvmuh_h(m_v, vg_coeff); \
r1 = __lasx_xvsadd_h(y_1, v2r); \
v2g = __lasx_xvsadd_h(v2g, u2g); \
g1 = __lasx_xvsadd_h(y_1, v2g); \
b1 = __lasx_xvsadd_h(y_1, u2b); \
r2 = __lasx_xvsadd_h(y_2, v2r); \
g2 = __lasx_xvsadd_h(y_2, v2g); \
b2 = __lasx_xvsadd_h(y_2, u2b); \
DUP4_ARG1(__lasx_xvclip255_h, r1, g1, b1, r2, r1, g1, b1, r2); \
DUP2_ARG1(__lasx_xvclip255_h, g2, b2, g2, b2); \

Definition at line 57 of file yuv2rgb_lasx.c.

◆ YUV2RGB_RES

#define YUV2RGB_RES
Value:
m_y1 = __lasx_xvldrepl_d(py_1, 0); \
m_y2 = __lasx_xvldrepl_d(py_2, 0); \
m_u = __lasx_xvldrepl_w(pu, 0); \
m_v = __lasx_xvldrepl_w(pv, 0); \
m_y1 = __lasx_xvilvl_d(m_y2, m_y1); \
m_u = __lasx_xvilvl_b(m_u, m_u); \
m_v = __lasx_xvilvl_b(m_v, m_v); \
m_y1 = __lasx_vext2xv_hu_bu(m_y1); \
m_u = __lasx_vext2xv_hu_bu(m_u); \
m_v = __lasx_vext2xv_hu_bu(m_v); \
m_y1 = __lasx_xvslli_h(m_y1, 3); \
m_u = __lasx_xvslli_h(m_u, 3); \
m_v = __lasx_xvslli_h(m_v, 3); \
m_y1 = __lasx_xvsub_h(m_y1, y_offset); \
m_u = __lasx_xvsub_h(m_u, u_offset); \
m_v = __lasx_xvsub_h(m_v, v_offset); \
y_1 = __lasx_xvmuh_h(m_y1, y_coeff); \
u2g = __lasx_xvmuh_h(m_u, ug_coeff); \
u2b = __lasx_xvmuh_h(m_u, ub_coeff); \
v2r = __lasx_xvmuh_h(m_v, vr_coeff); \
v2g = __lasx_xvmuh_h(m_v, vg_coeff); \
r1 = __lasx_xvsadd_h(y_1, v2r); \
v2g = __lasx_xvsadd_h(v2g, u2g); \
g1 = __lasx_xvsadd_h(y_1, v2g); \
b1 = __lasx_xvsadd_h(y_1, u2b); \
r1 = __lasx_xvclip255_h(r1); \
g1 = __lasx_xvclip255_h(g1); \
b1 = __lasx_xvclip255_h(b1); \

Definition at line 82 of file yuv2rgb_lasx.c.

◆ RGB_PACK

#define RGB_PACK (   r,
  g,
  b,
  rgb_l,
  rgb_h 
)
Value:
{ \
__m256i rg; \
rg = __lasx_xvpackev_b(g, r); \
DUP2_ARG3(__lasx_xvshuf_b, b, rg, shuf2, b, rg, shuf3, rgb_l, rgb_h); \
}

Definition at line 112 of file yuv2rgb_lasx.c.

◆ RGB32_PACK

#define RGB32_PACK (   a,
  r,
  g,
  b,
  rgb_l,
  rgb_h 
)
Value:
{ \
__m256i ra, bg, tmp0, tmp1; \
ra = __lasx_xvpackev_b(r, a); \
bg = __lasx_xvpackev_b(b, g); \
tmp0 = __lasx_xvilvl_h(bg, ra); \
tmp1 = __lasx_xvilvh_h(bg, ra); \
rgb_l = __lasx_xvpermi_q(tmp1, tmp0, 0x20); \
rgb_h = __lasx_xvpermi_q(tmp1, tmp0, 0x31); \
}

Definition at line 119 of file yuv2rgb_lasx.c.

◆ RGB_STORE_RES

#define RGB_STORE_RES (   rgb_l,
  rgb_h,
  image_1,
  image_2 
)
Value:
{ \
__lasx_xvstelm_d(rgb_l, image_1, 0, 0); \
__lasx_xvstelm_d(rgb_l, image_1, 8, 1); \
__lasx_xvstelm_d(rgb_h, image_1, 16, 0); \
__lasx_xvstelm_d(rgb_l, image_2, 0, 2); \
__lasx_xvstelm_d(rgb_l, image_2, 8, 3); \
__lasx_xvstelm_d(rgb_h, image_2, 16, 2); \
}

Definition at line 130 of file yuv2rgb_lasx.c.

◆ RGB_STORE

#define RGB_STORE (   rgb_l,
  rgb_h,
  image 
)
Value:
{ \
__lasx_xvstelm_d(rgb_l, image, 0, 0); \
__lasx_xvstelm_d(rgb_l, image, 8, 1); \
__lasx_xvstelm_d(rgb_h, image, 16, 0); \
__lasx_xvstelm_d(rgb_l, image, 24, 2); \
__lasx_xvstelm_d(rgb_l, image, 32, 3); \
__lasx_xvstelm_d(rgb_h, image, 40, 2); \
}

Definition at line 140 of file yuv2rgb_lasx.c.

◆ RGB32_STORE

#define RGB32_STORE (   rgb_l,
  rgb_h,
  image 
)
Value:
{ \
__lasx_xvst(rgb_l, image, 0); \
__lasx_xvst(rgb_h, image, 32); \
}

Definition at line 150 of file yuv2rgb_lasx.c.

◆ RGB32_STORE_RES

#define RGB32_STORE_RES (   rgb_l,
  rgb_h,
  image_1,
  image_2 
)
Value:
{ \
__lasx_xvst(rgb_l, image_1, 0); \
__lasx_xvst(rgb_h, image_2, 0); \
}

Definition at line 156 of file yuv2rgb_lasx.c.

◆ YUV2RGBFUNC

#define YUV2RGBFUNC (   func_name,
  dst_type,
  alpha 
)
Value:
int func_name(SwsContext *c, const uint8_t *src[], \
int srcStride[], int srcSliceY, int srcSliceH, \
uint8_t *dst[], int dstStride[]) \
{ \
int x, y, h_size, vshift, res; \
__m256i m_y1, m_y2, m_u, m_v; \
__m256i y_1, y_2, u2g, v2g, u2b, v2r, rgb1_l, rgb1_h; \
__m256i rgb2_l, rgb2_h, r1, g1, b1, r2, g2, b2; \
__m256i shuf2 = {0x0504120302100100, 0x0A18090816070614, \
0x0504120302100100, 0x0A18090816070614}; \
__m256i shuf3 = {0x1E0F0E1C0D0C1A0B, 0x0101010101010101, \
0x1E0F0E1C0D0C1A0B, 0x0101010101010101}; \
YUV2RGB_LOAD_COE \
y = (c->dstW + 7) & ~7; \
h_size = y >> 4; \
res = y & 15; \
\
vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \
for (y = 0; y < srcSliceH; y += 2) { \
dst_type *image1 = (dst_type *)(dst[0] + (y + srcSliceY) * dstStride[0]);\
dst_type *image2 = (dst_type *)(image1 + dstStride[0]);\
const uint8_t *py_1 = src[0] + y * srcStride[0]; \
const uint8_t *py_2 = py_1 + srcStride[0]; \
const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
for(x = 0; x < h_size; x++) { \

Definition at line 162 of file yuv2rgb_lasx.c.

◆ YUV2RGBFUNC32

#define YUV2RGBFUNC32 (   func_name,
  dst_type,
  alpha 
)
Value:
int func_name(SwsContext *c, const uint8_t *src[], \
int srcStride[], int srcSliceY, int srcSliceH, \
uint8_t *dst[], int dstStride[]) \
{ \
int x, y, h_size, vshift, res; \
__m256i m_y1, m_y2, m_u, m_v; \
__m256i y_1, y_2, u2g, v2g, u2b, v2r, rgb1_l, rgb1_h; \
__m256i rgb2_l, rgb2_h, r1, g1, b1, r2, g2, b2; \
__m256i a = __lasx_xvldi(0xFF); \
\
YUV2RGB_LOAD_COE \
y = (c->dstW + 7) & ~7; \
h_size = y >> 4; \
res = y & 15; \
\
vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \
for (y = 0; y < srcSliceH; y += 2) { \
int yd = y + srcSliceY; \
dst_type av_unused *r, *g, *b; \
dst_type *image1 = (dst_type *)(dst[0] + (yd) * dstStride[0]); \
dst_type *image2 = (dst_type *)(dst[0] + (yd + 1) * dstStride[0]); \
const uint8_t *py_1 = src[0] + y * srcStride[0]; \
const uint8_t *py_2 = py_1 + srcStride[0]; \
const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
for(x = 0; x < h_size; x++) { \

Definition at line 190 of file yuv2rgb_lasx.c.

◆ DEALYUV2RGBREMAIN

#define DEALYUV2RGBREMAIN
Value:
py_1 += 16; \
py_2 += 16; \
pu += 8; \
pv += 8; \
image1 += 48; \
image2 += 48; \
} \
if (res) { \

Definition at line 218 of file yuv2rgb_lasx.c.

◆ DEALYUV2RGBREMAIN32

#define DEALYUV2RGBREMAIN32
Value:
py_1 += 16; \
py_2 += 16; \
pu += 8; \
pv += 8; \
image1 += 16; \
image2 += 16; \
} \
if (res) { \

Definition at line 228 of file yuv2rgb_lasx.c.

◆ END_FUNC

#define END_FUNC ( )
Value:
} \
} \
return srcSliceH; \
}

Definition at line 239 of file yuv2rgb_lasx.c.

Function Documentation

◆ RGB_PACK() [1/4]

RGB_PACK ( r1  ,
g1  ,
b1  ,
rgb1_l  ,
rgb1_h   
)

◆ RGB_PACK() [2/4]

RGB_PACK ( r2  ,
g2  ,
b2  ,
rgb2_l  ,
rgb2_h   
)

◆ RGB_STORE() [1/2]

RGB_STORE ( rgb1_l  ,
rgb1_h  ,
image1   
)

◆ RGB_STORE() [2/2]

RGB_STORE ( rgb2_l  ,
rgb2_h  ,
image2   
)

◆ RGB_STORE_RES()

RGB_STORE_RES ( rgb1_l  ,
rgb1_h  ,
image1  ,
image2   
)

◆ RGB_PACK() [3/4]

RGB_PACK ( b1  ,
g1  ,
r1  ,
rgb1_l  ,
rgb1_h   
)

◆ RGB_PACK() [4/4]

RGB_PACK ( b2  ,
g2  ,
r2  ,
rgb2_l  ,
rgb2_h   
)

◆ RGB32_PACK() [1/8]

RGB32_PACK ( r1  ,
g1  ,
b1  ,
a  ,
rgb1_l  ,
rgb1_h   
)

◆ RGB32_PACK() [2/8]

RGB32_PACK ( r2  ,
g2  ,
b2  ,
a  ,
rgb2_l  ,
rgb2_h   
)

◆ RGB32_STORE() [1/2]

RGB32_STORE ( rgb1_l  ,
rgb1_h  ,
image1   
)

◆ RGB32_STORE() [2/2]

RGB32_STORE ( rgb2_l  ,
rgb2_h  ,
image2   
)

◆ RGB32_STORE_RES()

RGB32_STORE_RES ( rgb1_l  ,
rgb1_h  ,
image1  ,
image2   
)

◆ RGB32_PACK() [3/8]

RGB32_PACK ( b1  ,
g1  ,
r1  ,
a  ,
rgb1_l  ,
rgb1_h   
)

◆ RGB32_PACK() [4/8]

RGB32_PACK ( b2  ,
g2  ,
r2  ,
a  ,
rgb2_l  ,
rgb2_h   
)

◆ RGB32_PACK() [5/8]

RGB32_PACK ( a  ,
r1  ,
g1  ,
b1  ,
rgb1_l  ,
rgb1_h   
)

◆ RGB32_PACK() [6/8]

RGB32_PACK ( a  ,
r2  ,
g2  ,
b2  ,
rgb2_l  ,
rgb2_h   
)

◆ RGB32_PACK() [7/8]

RGB32_PACK ( a  ,
b1  ,
g1  ,
r1  ,
rgb1_l  ,
rgb1_h   
)

◆ RGB32_PACK() [8/8]

RGB32_PACK ( a  ,
b2  ,
g2  ,
r2  ,
rgb2_l  ,
rgb2_h   
)
r
const char * r
Definition: vf_curves.c:126
av_unused
#define av_unused
Definition: attributes.h:131
b
#define b
Definition: input.c:41
ra
#define ra
Definition: regdef.h:57
b1
static double b1(void *priv, double x, double y)
Definition: vf_xfade.c:2035
g
const char * g
Definition: vf_curves.c:127
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
srcSliceH
return srcSliceH
Definition: yuv2rgb_template.c:87
b2
static double b2(void *priv, double x, double y)
Definition: vf_xfade.c:2036
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
pv
#define pv
Definition: regdef.h:60
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
SwsContext
Definition: swscale_internal.h:299