[FFmpeg-devel] [PATCH 3/5] lavc/vvc_mc: R-V V put_uni_pixels
flow gg
hlefthleft at gmail.com
Mon Oct 28 19:12:58 EET 2024
> Up to 64-bit rows, you can use strided loads and stores here.
Due to the SRC_OFFSET in testing, only e8 and e16 can be loaded; e32 cannot
be loaded (Bus error).
Since the width ranges from 4 to 128, it seems that strided loads may not
be possible.
> Though for memory copying, unaligned scalar accesses might be just as
fast.
> Or perhaps not if the vectors are not aligned but vectors should not be
> necessary here. This is especially true on the BPi whose memory bus is
rather
> slow, so even scalar copy can saturate it.
I agree in theory, but since the test results seem to show some effect,
it would be great if we could improve the testing to confirm it actually
has no effect...
<uk7b at foxmail.com> 于2024年10月29日周二 01:08写道:
> From: sunyuechi <sunyuechi at iscas.ac.cn>
>
> k230
> banana_f3
> put_uni_pixels_chroma_8_4x4_c: 128.3 ( 1.00x)
> 90.5 ( 1.00x)
> put_uni_pixels_chroma_8_4x4_rvv_i32: 17.6 ( 7.30x)
> 17.4 ( 5.18x)
> put_uni_pixels_chroma_8_8x8_c: 295.1 ( 1.00x)
> 163.2 ( 1.00x)
> put_uni_pixels_chroma_8_8x8_rvv_i32: 35.8 ( 8.24x)
> 27.9 ( 5.84x)
> put_uni_pixels_chroma_8_16x16_c: 619.3 ( 1.00x)
> 267.4 ( 1.00x)
> put_uni_pixels_chroma_8_16x16_rvv_i32: 72.8 ( 8.50x)
> 48.7 ( 5.49x)
> put_uni_pixels_chroma_8_32x32_c: 1433.8 ( 1.00x)
> 538.2 ( 1.00x)
> put_uni_pixels_chroma_8_32x32_rvv_i32: 230.3 ( 6.23x)
> 236.2 ( 2.28x)
> put_uni_pixels_chroma_8_64x64_c: 3517.3 ( 1.00x)
> 1455.0 ( 1.00x)
> put_uni_pixels_chroma_8_64x64_rvv_i32: 813.6 ( 4.32x)
> 590.2 ( 2.47x)
> put_uni_pixels_chroma_8_128x128_c: 10174.6 ( 1.00x)
> 5798.7 ( 1.00x)
> put_uni_pixels_chroma_8_128x128_rvv_i32: 2989.3 ( 3.40x)
> 2371.4 ( 2.45x)
> put_uni_pixels_luma_8_4x4_c: 128.6 ( 1.00x)
> 90.5 ( 1.00x)
> put_uni_pixels_luma_8_4x4_rvv_i32: 17.3 ( 7.42x)
> 17.4 ( 5.18x)
> put_uni_pixels_luma_8_8x8_c: 295.1 ( 1.00x)
> 142.4 ( 1.00x)
> put_uni_pixels_luma_8_8x8_rvv_i32: 26.6 (11.10x)
> 27.9 ( 5.10x)
> put_uni_pixels_luma_8_16x16_c: 600.6 ( 1.00x)
> 277.7 ( 1.00x)
> put_uni_pixels_luma_8_16x16_rvv_i32: 82.1 ( 7.32x)
> 48.7 ( 5.70x)
> put_uni_pixels_luma_8_32x32_c: 1406.1 ( 1.00x)
> 528.0 ( 1.00x)
> put_uni_pixels_luma_8_32x32_rvv_i32: 230.3 ( 6.10x)
> 131.9 ( 4.00x)
> put_uni_pixels_luma_8_64x64_c: 4600.6 ( 1.00x)
> 1309.2 ( 1.00x)
> put_uni_pixels_luma_8_64x64_rvv_i32: 1073.1 ( 4.29x)
> 382.2 ( 3.43x)
> put_uni_pixels_luma_8_128x128_c: 11350.3 ( 1.00x)
> 3506.9 ( 1.00x)
> put_uni_pixels_luma_8_128x128_rvv_i32: 3119.1 ( 3.64x)
> 2017.5 ( 1.74x)
> ---
> libavcodec/riscv/h26x/h2656_inter_rvv.S | 53 +++++++++++++++++++++++++
> libavcodec/riscv/h26x/h2656dsp.h | 33 +++++++++++++++
> libavcodec/riscv/vvc/Makefile | 3 +-
> libavcodec/riscv/vvc/vvcdsp_init.c | 5 +++
> 4 files changed, 93 insertions(+), 1 deletion(-)
> create mode 100644 libavcodec/riscv/h26x/h2656_inter_rvv.S
> create mode 100644 libavcodec/riscv/h26x/h2656dsp.h
>
> diff --git a/libavcodec/riscv/h26x/h2656_inter_rvv.S
> b/libavcodec/riscv/h26x/h2656_inter_rvv.S
> new file mode 100644
> index 0000000000..6692e33acf
> --- /dev/null
> +++ b/libavcodec/riscv/h26x/h2656_inter_rvv.S
> @@ -0,0 +1,53 @@
> +/*
> + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences
> (ISCAS).
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +#include "libavcodec/riscv/h26x/asm.S"
> +
> +.macro put_uni_pixels w, vlen, id
> +\id\w\vlen:
> +.if \w == 128 && \vlen == 128
> + li t0, \w
> + vsetvli zero, t0, e8, m8, ta, ma
> +.else
> + vsetvlstatic8 \w, \vlen
> +.endif
> +1:
> + vle8.v v0, (a2)
> + addi a4, a4, -1
> + vse8.v v0, (a0)
> + add a2, a2, a3
> + add a0, a0, a1
> + bnez a4, 1b
> + ret
> +.endm
> +
> +.macro func_put_uni_pixels vlen
> +func ff_h2656_put_uni_pixels_8_rvv_\vlen\(), zve32x, zbb, zba
> + lpad 0
> + POW2_JMP_TABLE 4, \vlen
> + POW2_J \vlen, 4, a7
> + .irp w,2,4,8,16,32,64,128
> + put_uni_pixels \w, \vlen, 4
> + .endr
> +endfunc
> +.endm
> +
> +func_put_uni_pixels 256
> +func_put_uni_pixels 128
> diff --git a/libavcodec/riscv/h26x/h2656dsp.h
> b/libavcodec/riscv/h26x/h2656dsp.h
> new file mode 100644
> index 0000000000..41ba6bc331
> --- /dev/null
> +++ b/libavcodec/riscv/h26x/h2656dsp.h
> @@ -0,0 +1,33 @@
> +/*
> + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences
> (ISCAS).
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_RISCV_H26X_H2656DSP_H
> +#define AVCODEC_RISCV_H26X_H2656DSP_H
> +
> +#define H2656_PEL_PROTOTYPE(name, D, opt) \
> +void ff_h2656_put_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst,
> ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int
> height, const int8_t *hf, const int8_t *vf, int width) \
> +
> +#define H2656_MC_8TAP_PROTOTYPES(fname, bitd, opt) \
> + H2656_PEL_PROTOTYPE(fname, bitd, opt); \
> +
> +H2656_MC_8TAP_PROTOTYPES(pixels , 8, rvv_256);
> +H2656_MC_8TAP_PROTOTYPES(pixels , 8, rvv_128);
> +
> +#endif
> diff --git a/libavcodec/riscv/vvc/Makefile b/libavcodec/riscv/vvc/Makefile
> index 582b051579..ec116aebc1 100644
> --- a/libavcodec/riscv/vvc/Makefile
> +++ b/libavcodec/riscv/vvc/Makefile
> @@ -1,2 +1,3 @@
> OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvcdsp_init.o
> -RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o
> +RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o \
> + riscv/h26x/h2656_inter_rvv.o
> diff --git a/libavcodec/riscv/vvc/vvcdsp_init.c
> b/libavcodec/riscv/vvc/vvcdsp_init.c
> index bee892cb7c..9dea70f392 100644
> --- a/libavcodec/riscv/vvc/vvcdsp_init.c
> +++ b/libavcodec/riscv/vvc/vvcdsp_init.c
> @@ -25,6 +25,7 @@
> #include "libavutil/riscv/cpu.h"
> #include "libavcodec/vvc/dsp.h"
> #include "libavcodec/vvc/dec.h"
> +#include "libavcodec/riscv/h26x/h2656dsp.h"
>
> #define bf(fn, bd, opt) fn##_##bd##_##opt
>
> @@ -72,8 +73,12 @@ PUT_PIXELS_PROTOTYPES2(8, rvv_256)
> c->inter.dst[C][w][idx1][idx2] = a;
> \
> } while (0)
> \
>
> +#define DIR_FUNCS(d, C, opt)
> \
> + PEL_FUNC(put_##d, C, 0, 0, ff_h2656_put_##d##_pixels_8_##opt);
> \
> +
> #define FUNCS(C, opt)
> \
> PEL_FUNC(put, C, 0, 0, ff_vvc_put_pixels_8_##opt);
> \
> + DIR_FUNCS(uni, C, opt);
> \
>
> void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd)
> {
> --
> 2.47.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>
More information about the ffmpeg-devel
mailing list