[FFmpeg-devel] [PATCH 2/4] lavc/vp8dsp: R-V V loop_filter_simple
Rémi Denis-Courmont
remi at remlab.net
Mon Jun 24 23:19:24 EEST 2024
Le lauantaina 22. kesäkuuta 2024, 18.58.04 EEST uk7b at foxmail.com a écrit :
> From: sunyuechi <sunyuechi at iscas.ac.cn>
>
> C908 X60
> vp8_loop_filter_simple_h_c : 7.0 6.0
> vp8_loop_filter_simple_h_rvv_i32 : 3.2 2.7
> vp8_loop_filter_simple_v_c : 7.2 6.5
> vp8_loop_filter_simple_v_rvv_i32 : 1.7 1.2
> ---
> libavcodec/riscv/vp8dsp_init.c | 18 ++++++-
> libavcodec/riscv/vp8dsp_rvv.S | 87 ++++++++++++++++++++++++++++++++++
> 2 files changed, 104 insertions(+), 1 deletion(-)
>
> diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c
> index dcb6307d5b..8c5b2c8b04 100644
> --- a/libavcodec/riscv/vp8dsp_init.c
> +++ b/libavcodec/riscv/vp8dsp_init.c
> @@ -49,6 +49,9 @@ VP8_BILIN(16, rvv256);
> VP8_BILIN(8, rvv256);
> VP8_BILIN(4, rvv256);
>
> +VP8_LF(rvv128);
> +VP8_LF(rvv256);
> +
> av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c)
> {
> #if HAVE_RV
> @@ -147,9 +150,15 @@ av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c)
> av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c)
> {
> #if HAVE_RVV
> + int vlenb = ff_get_rv_vlenb();
> +
> +#define init_loop_filter(vlen) \
> + c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter16_simple_rvv##vlen;
> \ + c->vp8_h_loop_filter_simple =
> ff_vp8_h_loop_filter16_simple_rvv##vlen; +
> int flags = av_get_cpu_flags();
>
> - if (flags & AV_CPU_FLAG_RVV_I32 && ff_rv_vlen_least(128)) {
> + if (flags & AV_CPU_FLAG_RVV_I32 && vlenb >= 16) {
> #if __riscv_xlen >= 64
> if (flags & AV_CPU_FLAG_RVV_I64)
> c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_rvv;
> @@ -159,6 +168,13 @@ av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c)
> c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_rvv;
> if (flags & AV_CPU_FLAG_RVV_I64)
> c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_rvv;
> +
> + if (vlenb >= 32) {
> + init_loop_filter(256);
> + } else {
> + init_loop_filter(128);
> + }
> }
> +#undef init_loop_filter
> #endif
> }
> diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
> index 0cbf1672f7..b5f8bb31b4 100644
> --- a/libavcodec/riscv/vp8dsp_rvv.S
> +++ b/libavcodec/riscv/vp8dsp_rvv.S
> @@ -275,6 +275,93 @@ func ff_vp78_idct_dc_add4uv_rvv, zve64x
> ret
> endfunc
>
> +.macro filter_fmin len, vlen, a, f1, p0f2, q0f1
> + vsetvlstatic16 \len, \vlen
> + vsext.vf2 \q0f1, \a
> + vmin.vx \p0f2, \q0f1, a7
> + vmin.vx \q0f1, \q0f1, t3
> + vadd.vi \p0f2, \p0f2, 3
> + vadd.vi \q0f1, \q0f1, 4
> + vsra.vi \p0f2, \p0f2, 3
> + vsra.vi \f1, \q0f1, 3
vssra.vi
> + vadd.vv \p0f2, \p0f2, v8
> + vsub.vv \q0f1, v16, \f1
> + vmax.vx \p0f2, \p0f2, zero
> + vmax.vx \q0f1, \q0f1, zero
> +.endm
> +
> +.macro filter len, vlen, type, normal, inner, dst, stride, fE, fI, thresh
> +.ifc \type,v
> + slli a6, \stride, 1
> + sub t2, \dst, a6
> + add t4, \dst, \stride
> + sub t1, \dst, \stride
> + vle8.v v1, (t2)
> + vle8.v v11, (t4)
> + vle8.v v17, (t1)
> + vle8.v v22, (\dst)
> +.else
> + addi t1, \dst, -1
> + addi a6, \dst, -2
> + addi t4, \dst, 1
> + vlse8.v v1, (a6), \stride
> + vlse8.v v11, (t4), \stride
> + vlse8.v v17, (t1), \stride
> + vlse8.v v22, (\dst), \stride
vlsseg4e8.v
> +.endif
> + vwsubu.vv v12, v1, v11 // p1-q1
> + vwsubu.vv v24, v22, v17 // q0-p0
> + vnclip.wi v23, v12, 0
I can't find where VXRM is initialised for that.
> + vsetvlstatic16 \len, \vlen
> + // vp8_simple_limit(dst + i, stride, flim)
> + li a7, 2
> + vneg.v v18, v12
> + vmax.vv v18, v18, v12
> + vneg.v v8, v24
> + vmax.vv v8, v8, v24
> + vsrl.vi v18, v18, 1
> + vmacc.vx v18, a7, v8
> + vmsleu.vx v0, v18, \fE
> +
> + li t5, 3
> + li a7, 124
> + li t3, 123
> + vmul.vx v30, v24, t5
> + vsext.vf2 v4, v23
> + vzext.vf2 v8, v17 // p0
> + vzext.vf2 v16, v22 // q0
> + vadd.vv v12, v30, v4
vwadd.wv
> + vsetvlstatic8 \len, \vlen
> + vnclip.wi v11, v12, 0
> + filter_fmin \len, \vlen, v11, v24, v4, v6
> + vsetvlstatic8 \len, \vlen
> + vnclipu.wi v4, v4, 0
> + vnclipu.wi v6, v6, 0
> +
> +.ifc \type,v
> + vse8.v v4, (t1), v0.t
> + vse8.v v6, (\dst), v0.t
> +.else
> + vsse8.v v4, (t1), \stride, v0.t
> + vsse8.v v6, (\dst), \stride, v0.t
vsseg2e8.v
> +.endif
> +
> +.endm
> +
> +.irp vlen,256,128
> +func ff_vp8_v_loop_filter16_simple_rvv\vlen, zve32x
> + vsetvlstatic8 16, \vlen
> + filter 16, \vlen, v, 0, 0, a0, a1, a2, a3, a4
> + ret
> +endfunc
> +
> +func ff_vp8_h_loop_filter16_simple_rvv\vlen, zve32x
> + vsetvlstatic8 16, \vlen
> + filter 16, \vlen, h, 0, 0, a0, a1, a2, a3, a4
> + ret
> +endfunc
> +.endr
> +
> .macro bilin_load_h dst mn
> addi t5, a2, 1
> vle8.v \dst, (a2)
--
レミ・デニ-クールモン
http://www.remlab.net/
More information about the ffmpeg-devel
mailing list