FFmpeg
Macros | Functions
swscale_lasx.c File Reference
#include "swscale_loongarch.h"
#include "libavutil/loongarch/loongson_intrinsics.h"
#include "libavutil/intreadwrite.h"

Go to the source code of this file.

Macros

#define SCALE_8_16(_sh)
 
#define SCALE_8_8(_sh)
 
#define SCALE_8_4(_sh)
 
#define SCALE_8_2(_sh)
 
#define SCALE_4_16(_sh)
 
#define SCALE_4_8(_sh)
 
#define SCALE_4_4(_sh)
 
#define SCALE_4_2(_sh)
 
#define SCALE_16
 
#define SCALE_8
 
#define SCALE_16
 

Functions

void ff_hscale_8_to_15_lasx (SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
 
void ff_hscale_8_to_19_lasx (SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
 
void ff_hscale_16_to_15_lasx (SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
 
void ff_hscale_16_to_19_lasx (SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
 

Macro Definition Documentation

◆ SCALE_8_16

#define SCALE_8_16 (   _sh)

Definition at line 26 of file swscale_lasx.c.

◆ SCALE_8_8

#define SCALE_8_8 (   _sh)

Definition at line 91 of file swscale_lasx.c.

◆ SCALE_8_4

#define SCALE_8_4 (   _sh)
Value:
{ \
src0 = __lasx_xvldrepl_d(src + filterPos[0], 0); \
src1 = __lasx_xvldrepl_d(src + filterPos[1], 0); \
src2 = __lasx_xvldrepl_d(src + filterPos[2], 0); \
src3 = __lasx_xvldrepl_d(src + filterPos[3], 0); \
filter0 = __lasx_xvld(filter, 0); \
filter1 = __lasx_xvld(filter, 32); \
filterPos += 4; \
filter += 32; \
src0 = __lasx_xvilvl_d(src1, src0); \
src2 = __lasx_xvilvl_d(src3, src2); \
src0 = __lasx_vext2xv_hu_bu(src0); \
src2 = __lasx_vext2xv_hu_bu(src2); \
src0 = __lasx_xvdp2_w_h(src0, filter0); \
src1 = __lasx_xvdp2_w_h(src2, filter1); \
src0 = __lasx_xvhaddw_d_w(src0, src0); \
src1 = __lasx_xvhaddw_d_w(src1, src1); \
src0 = __lasx_xvpickev_w(src1, src0); \
src0 = __lasx_xvhaddw_d_w(src0, src0); \
src0 = __lasx_xvpickev_w(src0, src0); \
src0 = __lasx_xvsrai_w(src0, _sh); \
src0 = __lasx_xvmin_w(src0, vmax); \
src0 = __lasx_xvperm_w(src0, shuf); \
}

Definition at line 125 of file swscale_lasx.c.

◆ SCALE_8_2

#define SCALE_8_2 (   _sh)
Value:
{ \
src0 = __lasx_xvldrepl_d(src + filterPos[0], 0); \
src1 = __lasx_xvldrepl_d(src + filterPos[1], 0); \
filter0 = __lasx_xvld(filter, 0); \
src0 = __lasx_xvilvl_d(src1, src0); \
src0 = __lasx_vext2xv_hu_bu(src0); \
src0 = __lasx_xvdp2_w_h(filter0, src0); \
src0 = __lasx_xvhaddw_d_w(src0, src0); \
src0 = __lasx_xvhaddw_q_d(src0, src0); \
src0 = __lasx_xvsrai_w(src0, _sh); \
src0 = __lasx_xvmin_w(src0, vmax); \
dst[0] = __lasx_xvpickve2gr_w(src0, 0); \
dst[1] = __lasx_xvpickve2gr_w(src0, 4); \
filterPos += 2; \
filter += 16; \
dst += 2; \
}

Definition at line 151 of file swscale_lasx.c.

◆ SCALE_4_16

#define SCALE_4_16 (   _sh)

Definition at line 170 of file swscale_lasx.c.

◆ SCALE_4_8

#define SCALE_4_8 (   _sh)
Value:
{ \
src0 = __lasx_xvldrepl_w(src + filterPos[0], 0); \
src1 = __lasx_xvldrepl_w(src + filterPos[1], 0); \
src2 = __lasx_xvldrepl_w(src + filterPos[2], 0); \
src3 = __lasx_xvldrepl_w(src + filterPos[3], 0); \
src4 = __lasx_xvldrepl_w(src + filterPos[4], 0); \
src5 = __lasx_xvldrepl_w(src + filterPos[5], 0); \
src6 = __lasx_xvldrepl_w(src + filterPos[6], 0); \
src7 = __lasx_xvldrepl_w(src + filterPos[7], 0); \
filter0 = __lasx_xvld(filter, 0); \
filter1 = __lasx_xvld(filter, 32); \
filterPos += 8; \
filter += 32; \
DUP4_ARG2(__lasx_xvilvl_w, src1, src0, src3, src2, src5, \
src4, src7, src6, src0, src2, src4, src6); \
src0 = __lasx_xvilvl_d(src2, src0); \
src1 = __lasx_xvilvl_d(src6, src4); \
\
src0 = __lasx_vext2xv_hu_bu(src0); \
src1 = __lasx_vext2xv_hu_bu(src1); \
src0 = __lasx_xvdp2_w_h(filter0, src0); \
src1 = __lasx_xvdp2_w_h(filter1, src1); \
src0 = __lasx_xvhaddw_d_w(src0, src0); \
src1 = __lasx_xvhaddw_d_w(src1, src1); \
src0 = __lasx_xvpickev_w(src1, src0); \
src0 = __lasx_xvsrai_w(src0, _sh); \
src0 = __lasx_xvmin_w(src0, vmax); \
}

Definition at line 218 of file swscale_lasx.c.

◆ SCALE_4_4

#define SCALE_4_4 (   _sh)
Value:
{ \
src0 = __lasx_xvldrepl_w(src + filterPos[0], 0); \
src1 = __lasx_xvldrepl_w(src + filterPos[1], 0); \
src2 = __lasx_xvldrepl_w(src + filterPos[2], 0); \
src3 = __lasx_xvldrepl_w(src + filterPos[3], 0); \
filter0 = __lasx_xvld(filter, 0); \
filterPos += 4; \
filter += 16; \
src0 = __lasx_xvilvl_w(src1, src0); \
src1 = __lasx_xvilvl_w(src3, src2); \
\
src0 = __lasx_xvilvl_d(src1, src0); \
src0 = __lasx_vext2xv_hu_bu(src0); \
src0 = __lasx_xvdp2_w_h(filter0, src0); \
src0 = __lasx_xvhaddw_d_w(src0, src0); \
src0 = __lasx_xvsrai_w(src0, _sh); \
src0 = __lasx_xvmin_w(src0, vmax); \
src0 = __lasx_xvpickev_w(src0, src0); \
src0 = __lasx_xvpermi_d(src0, 0xd8); \
}

Definition at line 248 of file swscale_lasx.c.

◆ SCALE_4_2

#define SCALE_4_2 (   _sh)
Value:
{ \
src0 = __lasx_xvldrepl_w(src + filterPos[0], 0); \
src1 = __lasx_xvldrepl_w(src + filterPos[1], 0); \
filter0 = __lasx_xvld(filter, 0); \
src0 = __lasx_xvilvl_w(src1, src0); \
src0 = __lasx_vext2xv_hu_bu(src0); \
src0 = __lasx_xvdp2_w_h(filter0, src0); \
src0 = __lasx_xvhaddw_d_w(src0, src0); \
src0 = __lasx_xvsrai_w(src0, _sh); \
src0 = __lasx_xvmin_w(src0, vmax); \
dst[0] = __lasx_xvpickve2gr_w(src0, 0); \
dst[1] = __lasx_xvpickve2gr_w(src0, 2); \
filterPos += 2; \
filter += 8; \
dst += 2; \
}

Definition at line 270 of file swscale_lasx.c.

◆ SCALE_16 [1/2]

#define SCALE_16
Value:
{ \
int dex = j << 1; \
src0 = __lasx_xvldrepl_d((srcPos1 + j), 0); \
src1 = __lasx_xvldrepl_d((srcPos2 + j), 0); \
src2 = __lasx_xvldrepl_d((srcPos3 + j), 0); \
src3 = __lasx_xvldrepl_d((srcPos4 + j), 0); \
DUP4_ARG2(__lasx_xvldx, filterStart1, dex, filterStart2, dex, \
filterStart3, dex, filterStart4, dex, filter0, \
filter1, filter2, filter3); \
src0 = __lasx_xvpermi_q(src0, src1, 0x02); \
src1 = __lasx_xvpermi_q(src2, src3, 0x02); \
filter0 = __lasx_xvpermi_q(filter0, filter1, 0x02); \
filter1 = __lasx_xvpermi_q(filter2, filter3, 0x02); \
src0 = __lasx_xvilvl_b(zero, src0); \
src1 = __lasx_xvilvl_b(zero, src1); \
out0 = __lasx_xvdp2_w_h(filter0, src0); \
out1 = __lasx_xvdp2_w_h(filter1, src1); \
src0 = __lasx_xvhaddw_d_w(out0, out0); \
src1 = __lasx_xvhaddw_d_w(out1, out1); \
out0 = __lasx_xvpackev_d(src1, src0); \
out1 = __lasx_xvpackod_d(src1, src0); \
out0 = __lasx_xvadd_w(out0, out1); \
out = __lasx_xvadd_w(out, out0); \
}

Definition at line 654 of file swscale_lasx.c.

◆ SCALE_8

#define SCALE_8
Value:
{ \
__m256i src0, src1, src2, src3, filter0, filter1, out0, out1; \
DUP4_ARG2(__lasx_xvld, src + filterPos[0], 0, src + filterPos[1], 0, \
src + filterPos[2], 0, src + filterPos[3], 0, src0, src1, src2,\
src3); \
filter0 = __lasx_xvld(filter, 0); \
filter1 = __lasx_xvld(filter, 32); \
src0 = __lasx_xvpermi_q(src0, src1, 0x02); \
src2 = __lasx_xvpermi_q(src2, src3, 0x02); \
out0 = __lasx_xvdp2_w_hu_h(src0, filter0); \
out1 = __lasx_xvdp2_w_hu_h(src2, filter1); \
src0 = __lasx_xvhaddw_d_w(out0, out0); \
src1 = __lasx_xvhaddw_d_w(out1, out1); \
out0 = __lasx_xvpackev_d(src1, src0); \
out1 = __lasx_xvpackod_d(src1, src0); \
out0 = __lasx_xvadd_w(out0, out1); \
out0 = __lasx_xvsra_w(out0, shift); \
out0 = __lasx_xvmin_w(out0, v_max); \
dst[0] = __lasx_xvpickve2gr_w(out0, 0); \
dst[1] = __lasx_xvpickve2gr_w(out0, 4); \
dst[2] = __lasx_xvpickve2gr_w(out0, 2); \
dst[3] = __lasx_xvpickve2gr_w(out0, 6); \
filterPos += 4; \
filter += 32; \
dst += 4; \
}

Definition at line 626 of file swscale_lasx.c.

◆ SCALE_16 [2/2]

#define SCALE_16
Value:
{ \
int dex = j << 1; \
DUP4_ARG2(__lasx_xvldx, srcPos1, dex, srcPos2, dex, srcPos3, dex, \
srcPos4, dex, src0, src1, src2, src3); \
DUP4_ARG2(__lasx_xvldx, filterStart1, dex, filterStart2, dex, \
filterStart3, dex, filterStart4, dex, filter0, \
filter1, filter2, filter3); \
src0 = __lasx_xvpermi_q(src0, src1, 0x02); \
src1 = __lasx_xvpermi_q(src2, src3, 0x02); \
filter0 = __lasx_xvpermi_q(filter0, filter1, 0x02); \
filter1 = __lasx_xvpermi_q(filter2, filter3, 0x02); \
out0 = __lasx_xvdp2_w_hu_h(src0, filter0); \
out1 = __lasx_xvdp2_w_hu_h(src1, filter1); \
src0 = __lasx_xvhaddw_d_w(out0, out0); \
src1 = __lasx_xvhaddw_d_w(out1, out1); \
out0 = __lasx_xvpackev_d(src1, src0); \
out1 = __lasx_xvpackod_d(src1, src0); \
out0 = __lasx_xvadd_w(out0, out1); \
out = __lasx_xvadd_w(out, out0); \
}

Definition at line 654 of file swscale_lasx.c.

Function Documentation

◆ ff_hscale_8_to_15_lasx()

void ff_hscale_8_to_15_lasx ( SwsContext c,
int16_t *  dst,
int  dstW,
const uint8_t *  src,
const int16_t *  filter,
const int32_t filterPos,
int  filterSize 
)

Definition at line 314 of file swscale_lasx.c.

Referenced by ff_sws_init_swscale_loongarch().

◆ ff_hscale_8_to_19_lasx()

void ff_hscale_8_to_19_lasx ( SwsContext c,
int16_t *  _dst,
int  dstW,
const uint8_t *  src,
const int16_t *  filter,
const int32_t filterPos,
int  filterSize 
)

Definition at line 474 of file swscale_lasx.c.

Referenced by ff_sws_init_swscale_loongarch().

◆ ff_hscale_16_to_15_lasx()

void ff_hscale_16_to_15_lasx ( SwsContext c,
int16_t *  dst,
int  dstW,
const uint8_t *  _src,
const int16_t *  filter,
const int32_t filterPos,
int  filterSize 
)

Definition at line 676 of file swscale_lasx.c.

Referenced by ff_sws_init_swscale_loongarch().

◆ ff_hscale_16_to_19_lasx()

void ff_hscale_16_to_19_lasx ( SwsContext c,
int16_t *  _dst,
int  dstW,
const uint8_t *  _src,
const int16_t *  filter,
const int32_t filterPos,
int  filterSize 
)

Definition at line 823 of file swscale_lasx.c.

Referenced by ff_sws_init_swscale_loongarch().

out
FILE * out
Definition: movenc.c:55
filter1
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
Definition: dcadsp.c:360
src1
const pixel * src1
Definition: h264pred_template.c:421
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
shift
static int shift(int a, int b)
Definition: bonk.c:261
src2
const pixel * src2
Definition: h264pred_template.c:422
src0
const pixel *const src0
Definition: h264pred_template.c:420
filter0
static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
Definition: dcadsp.c:352
zero
#define zero
Definition: regdef.h:64
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418