[FFmpeg-devel] [PATCH] avcodec/mips: Fixed rnd_val variable to 6 in hevc uni mc msa functions
Manojkumar Bhosale
Manojkumar.Bhosale at imgtec.com
Mon Sep 18 15:16:49 EEST 2017
LGTM
-----Original Message-----
From: ffmpeg-devel [mailto:ffmpeg-devel-bounces at ffmpeg.org] On Behalf Of kaustubh.raste at imgtec.com
Sent: Monday, September 18, 2017 1:49 PM
To: ffmpeg-devel at ffmpeg.org
Cc: Kaustubh Raste
Subject: [FFmpeg-devel] [PATCH] avcodec/mips: Fixed rnd_val variable to 6 in hevc uni mc msa functions
From: Kaustubh Raste <kaustubh.raste at imgtec.com>
Signed-off-by: Kaustubh Raste <kaustubh.raste at imgtec.com>
---
libavcodec/mips/hevc_mc_uni_msa.c | 372 +++++++++++++------------------------
1 file changed, 133 insertions(+), 239 deletions(-)
diff --git a/libavcodec/mips/hevc_mc_uni_msa.c b/libavcodec/mips/hevc_mc_uni_msa.c
index 754fbdb..cf22e7f 100644
--- a/libavcodec/mips/hevc_mc_uni_msa.c
+++ b/libavcodec/mips/hevc_mc_uni_msa.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale at imgtec.com)
+ * Copyright (c) 2015 - 2017 Manojkumar Bhosale (Manojkumar.Bhosale at imgtec.com)
*
* This file is part of FFmpeg.
*
@@ -359,16 +359,14 @@ static const uint8_t mc_filt_mask_arr[16 * 3] = {
static void common_hz_8t_4x4_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, uint8_t rnd_val)
+ const int8_t *filter)
{
v16u8 mask0, mask1, mask2, mask3, out;
v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
v8i16 filt, out0, out1;
- v8i16 rnd_vec;
mask0 = LD_UB(&mc_filt_mask_arr[16]);
src -= 3;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -382,7 +380,7 @@ static void common_hz_8t_4x4_msa(uint8_t *src, int32_t src_stride,
XORI_B4_128_SB(src0, src1, src2, src3);
HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out0, out1);
- SRAR_H2_SH(out0, out1, rnd_vec);
+ SRARI_H2_SH(out0, out1, 6);
SAT_SH2_SH(out0, out1, 7);
out = PCKEV_XORI128_UB(out0, out1);
ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
@@ -390,17 +388,15 @@ static void common_hz_8t_4x4_msa(uint8_t *src, int32_t src_stride,
static void common_hz_8t_4x8_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, uint8_t rnd_val)
+ const int8_t *filter)
{
v16i8 filt0, filt1, filt2, filt3;
v16i8 src0, src1, src2, src3;
v16u8 mask0, mask1, mask2, mask3, out;
v8i16 filt, out0, out1, out2, out3;
- v8i16 rnd_vec;
mask0 = LD_UB(&mc_filt_mask_arr[16]);
src -= 3;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -419,7 +415,7 @@ static void common_hz_8t_4x8_msa(uint8_t *src, int32_t src_stride,
XORI_B4_128_SB(src0, src1, src2, src3);
HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
out = PCKEV_XORI128_UB(out0, out1);
ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
@@ -430,16 +426,14 @@ static void common_hz_8t_4x8_msa(uint8_t *src, int32_t src_stride,
static void common_hz_8t_4x16_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, uint8_t rnd_val)
+ const int8_t *filter)
{
v16u8 mask0, mask1, mask2, mask3, out;
v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
v8i16 filt, out0, out1, out2, out3;
- v8i16 rnd_vec;
mask0 = LD_UB(&mc_filt_mask_arr[16]);
src -= 3;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -459,7 +453,7 @@ static void common_hz_8t_4x16_msa(uint8_t *src, int32_t src_stride,
src += (4 * src_stride);
HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
out = PCKEV_XORI128_UB(out0, out1);
ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
@@ -479,7 +473,7 @@ static void common_hz_8t_4x16_msa(uint8_t *src, int32_t src_stride,
HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
out = PCKEV_XORI128_UB(out0, out1);
ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
@@ -490,30 +484,27 @@ static void common_hz_8t_4x16_msa(uint8_t *src, int32_t src_stride,
static void common_hz_8t_4w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height, uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
if (4 == height) {
- common_hz_8t_4x4_msa(src, src_stride, dst, dst_stride, filter, rnd_val);
+ common_hz_8t_4x4_msa(src, src_stride, dst, dst_stride, filter);
} else if (8 == height) {
- common_hz_8t_4x8_msa(src, src_stride, dst, dst_stride, filter, rnd_val);
+ common_hz_8t_4x8_msa(src, src_stride, dst, dst_stride, filter);
} else if (16 == height) {
- common_hz_8t_4x16_msa(src, src_stride, dst, dst_stride, filter,
- rnd_val);
+ common_hz_8t_4x16_msa(src, src_stride, dst, dst_stride, filter);
}
}
static void common_hz_8t_8x4_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, uint8_t rnd_val)
+ const int8_t *filter)
{
v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
v8i16 filt, out0, out1, out2, out3;
- v8i16 rnd_vec;
mask0 = LD_UB(&mc_filt_mask_arr[0]);
src -= 3;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -528,7 +519,7 @@ static void common_hz_8t_8x4_msa(uint8_t *src, int32_t src_stride,
HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out0, out1,
out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
tmp0 = PCKEV_XORI128_UB(out0, out1);
tmp1 = PCKEV_XORI128_UB(out2, out3);
@@ -537,18 +528,15 @@ static void common_hz_8t_8x4_msa(uint8_t *src, int32_t src_stride,
static void common_hz_8t_8x8mult_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
v8i16 filt, out0, out1, out2, out3;
- v8i16 rnd_vec;
mask0 = LD_UB(&mc_filt_mask_arr[0]);
src -= 3;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -565,7 +553,7 @@ static void common_hz_8t_8x8mult_msa(uint8_t *src, int32_t src_stride,
HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out0,
out1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
tmp0 = PCKEV_XORI128_UB(out0, out1);
tmp1 = PCKEV_XORI128_UB(out2, out3);
@@ -576,32 +564,28 @@ static void common_hz_8t_8x8mult_msa(uint8_t *src, int32_t src_stride,
static void common_hz_8t_8w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
if (4 == height) {
- common_hz_8t_8x4_msa(src, src_stride, dst, dst_stride, filter, rnd_val);
+ common_hz_8t_8x4_msa(src, src_stride, dst, dst_stride, filter);
} else {
common_hz_8t_8x8mult_msa(src, src_stride, dst, dst_stride, filter,
- height, rnd_val);
+ height);
}
}
static void common_hz_8t_12w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint8_t *src1_ptr, *dst1;
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
v8i16 filt, out0, out1, out2, out3;
v16u8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask00, tmp0, tmp1;
- v8i16 rnd_vec;
mask00 = LD_UB(&mc_filt_mask_arr[0]);
mask0 = LD_UB(&mc_filt_mask_arr[16]);
- rnd_vec = __msa_fill_h(rnd_val);
src1_ptr = src - 3;
dst1 = dst;
@@ -628,7 +612,7 @@ static void common_hz_8t_12w_msa(uint8_t *src, int32_t src_stride,
HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask00, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out0,
out1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
tmp0 = PCKEV_XORI128_UB(out0, out1);
tmp1 = PCKEV_XORI128_UB(out2, out3);
@@ -642,7 +626,7 @@ static void common_hz_8t_12w_msa(uint8_t *src, int32_t src_stride,
HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask4, mask5,
mask6, filt0, filt1, filt2, filt3, out0,
out1);
- SRAR_H2_SH(out0, out1, rnd_vec);
+ SRARI_H2_SH(out0, out1, 6);
SAT_SH2_SH(out0, out1, 7);
tmp0 = PCKEV_XORI128_UB(out0, out1);
ST4x4_UB(tmp0, tmp0, 0, 1, 2, 3, dst, dst_stride);
@@ -652,18 +636,15 @@ static void common_hz_8t_12w_msa(uint8_t *src, int32_t src_stride,
static void common_hz_8t_16w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
v16u8 mask0, mask1, mask2, mask3, out;
v8i16 filt, out0, out1, out2, out3;
- v8i16 rnd_vec;
mask0 = LD_UB(&mc_filt_mask_arr[0]);
src -= 3;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -681,7 +662,7 @@ static void common_hz_8t_16w_msa(uint8_t *src, int32_t src_stride,
HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out0,
out1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
out = PCKEV_XORI128_UB(out0, out1);
ST_UB(out, dst);
@@ -694,8 +675,7 @@ static void common_hz_8t_16w_msa(uint8_t *src, int32_t src_stride,
static void common_hz_8t_24w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
@@ -704,11 +684,9 @@ static void common_hz_8t_24w_msa(uint8_t *src, int32_t src_stride,
v16i8 vec11;
v8i16 out0, out1, out2, out3, out4, out5, out6, out7, out8, out9, out10;
v8i16 out11, filt;
- v8i16 rnd_vec;
mask0 = LD_UB(&mc_filt_mask_arr[0]);
src -= 3;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -754,8 +732,8 @@ static void common_hz_8t_24w_msa(uint8_t *src, int32_t src_stride,
ADDS_SH4_SH(out0, out4, out8, out10, out2, out6, out9, out11, out0,
out8, out2, out9);
ADDS_SH2_SH(out1, out5, out3, out7, out1, out3);
- SRAR_H4_SH(out0, out8, out2, out9, rnd_vec);
- SRAR_H2_SH(out1, out3, rnd_vec);
+ SRARI_H4_SH(out0, out8, out2, out9, 6);
+ SRARI_H2_SH(out1, out3, 6);
SAT_SH4_SH(out0, out8, out2, out9, 7);
SAT_SH2_SH(out1, out3, 7);
out = PCKEV_XORI128_UB(out8, out9);
@@ -771,18 +749,15 @@ static void common_hz_8t_24w_msa(uint8_t *src, int32_t src_stride,
static void common_hz_8t_32w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
v16u8 mask0, mask1, mask2, mask3, out;
v8i16 filt, out0, out1, out2, out3;
- v8i16 rnd_vec;
mask0 = LD_UB(&mc_filt_mask_arr[0]);
src -= 3;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -802,7 +777,7 @@ static void common_hz_8t_32w_msa(uint8_t *src, int32_t src_stride,
HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out0,
out1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
src0 = LD_SB(src);
@@ -821,7 +796,7 @@ static void common_hz_8t_32w_msa(uint8_t *src, int32_t src_stride,
HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out0,
out1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
out = PCKEV_XORI128_UB(out0, out1);
ST_UB(out, dst);
@@ -833,18 +808,15 @@ static void common_hz_8t_32w_msa(uint8_t *src, int32_t src_stride,
static void common_hz_8t_48w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3, vec0, vec1, vec2;
v16u8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask7, out;
v8i16 filt, out0, out1, out2, out3, out4, out5, out6;
- v8i16 rnd_vec;
mask0 = LD_UB(&mc_filt_mask_arr[0]);
src -= 3;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -879,8 +851,8 @@ static void common_hz_8t_48w_msa(uint8_t *src, int32_t src_stride,
out5 = __msa_dpadd_s_h(out5, vec2, filt3);
ADDS_SH2_SH(out0, out3, out1, out4, out0, out1);
out2 = __msa_adds_s_h(out2, out5);
- SRAR_H2_SH(out0, out1, rnd_vec);
- out6 = __msa_srar_h(out2, rnd_vec);
+ SRARI_H2_SH(out0, out1, 6);
+ out6 = __msa_srari_h(out2, 6);
SAT_SH3_SH(out0, out1, out6, 7);
out = PCKEV_XORI128_UB(out0, out1);
ST_UB(out, dst);
@@ -905,7 +877,8 @@ static void common_hz_8t_48w_msa(uint8_t *src, int32_t src_stride,
out5 = __msa_dpadd_s_h(out5, vec2, filt3);
ADDS_SH2_SH(out0, out3, out1, out4, out3, out4);
out5 = __msa_adds_s_h(out2, out5);
- SRAR_H3_SH(out3, out4, out5, rnd_vec);
+ SRARI_H2_SH(out3, out4, 6);
+ out5 = __msa_srari_h(out5, 6);
SAT_SH3_SH(out3, out4, out5, 7);
out = PCKEV_XORI128_UB(out6, out3);
ST_UB(out, dst + 16);
@@ -917,18 +890,15 @@ static void common_hz_8t_48w_msa(uint8_t *src, int32_t src_stride,
static void common_hz_8t_64w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
int32_t loop_cnt;
v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
v16u8 mask0, mask1, mask2, mask3, out;
v8i16 filt, out0, out1, out2, out3;
- v8i16 rnd_vec;
mask0 = LD_UB(&mc_filt_mask_arr[0]);
src -= 3;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -948,7 +918,7 @@ static void common_hz_8t_64w_msa(uint8_t *src, int32_t src_stride,
HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
mask2, mask3, filt0, filt1, filt2, filt3,
out0, out1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
out = PCKEV_XORI128_UB(out0, out1);
ST_UB(out, dst);
@@ -965,7 +935,7 @@ static void common_hz_8t_64w_msa(uint8_t *src, int32_t src_stride,
HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
mask2, mask3, filt0, filt1, filt2, filt3,
out0, out1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
out = PCKEV_XORI128_UB(out0, out1);
ST_UB(out, dst + 32);
@@ -977,8 +947,7 @@ static void common_hz_8t_64w_msa(uint8_t *src, int32_t src_stride,
static void common_vt_8t_4w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
@@ -987,10 +956,8 @@ static void common_vt_8t_4w_msa(uint8_t *src, int32_t src_stride,
v16i8 src10998, filt0, filt1, filt2, filt3;
v16u8 out;
v8i16 filt, out10, out32;
- v8i16 rnd_vec;
src -= (3 * src_stride);
- rnd_vec = __msa_fill_h(rnd_val);
filt = LD_SH(filter);
SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
@@ -1017,7 +984,7 @@ static void common_vt_8t_4w_msa(uint8_t *src, int32_t src_stride,
filt1, filt2, filt3);
out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998, filt0,
filt1, filt2, filt3);
- SRAR_H2_SH(out10, out32, rnd_vec);
+ SRARI_H2_SH(out10, out32, 6);
SAT_SH2_SH(out10, out32, 7);
out = PCKEV_XORI128_UB(out10, out32);
ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
@@ -1032,8 +999,7 @@ static void common_vt_8t_4w_msa(uint8_t *src, int32_t src_stride,
static void common_vt_8t_8w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
@@ -1041,10 +1007,8 @@ static void common_vt_8t_8w_msa(uint8_t *src, int32_t src_stride,
v16i8 src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3;
v16u8 tmp0, tmp1;
v8i16 filt, out0_r, out1_r, out2_r, out3_r;
- v8i16 rnd_vec;
src -= (3 * src_stride);
- rnd_vec = __msa_fill_h(rnd_val);
filt = LD_SH(filter);
SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
@@ -1071,7 +1035,7 @@ static void common_vt_8t_8w_msa(uint8_t *src, int32_t src_stride,
filt1, filt2, filt3);
out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
filt1, filt2, filt3);
- SRAR_H4_SH(out0_r, out1_r, out2_r, out3_r, rnd_vec);
+ SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
tmp0 = PCKEV_XORI128_UB(out0_r, out1_r);
tmp1 = PCKEV_XORI128_UB(out2_r, out3_r);
@@ -1090,8 +1054,7 @@ static void common_vt_8t_8w_msa(uint8_t *src, int32_t src_stride,
static void common_vt_8t_12w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
int32_t loop_cnt;
uint32_t out2, out3;
@@ -1100,11 +1063,9 @@ static void common_vt_8t_12w_msa(uint8_t *src, int32_t src_stride,
v16i8 res2, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
v8i16 vec01, vec23, vec45, vec67, tmp0, tmp1, tmp2;
v8i16 filt, filt0, filt1, filt2, filt3;
- v8i16 rnd_vec;
v4i32 mask = { 2, 6, 2, 6 };
src -= (3 * src_stride);
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter_y */
filt = LD_SH(filter);
@@ -1140,7 +1101,8 @@ static void common_vt_8t_12w_msa(uint8_t *src, int32_t src_stride,
vec45, vec67);
tmp2 = FILT_8TAP_DPADD_S_H(vec01, vec23, vec45, vec67, filt0, filt1,
filt2, filt3);
- SRAR_H3_SH(tmp0, tmp1, tmp2, rnd_vec);
+ SRARI_H2_SH(tmp0, tmp1, 6);
+ tmp2 = __msa_srari_h(tmp2, 6);
SAT_SH3_SH(tmp0, tmp1, tmp2, 7);
PCKEV_B3_SB(tmp0, tmp0, tmp1, tmp1, tmp2, tmp2, res0, res1, res2);
XORI_B3_128_SB(res0, res1, res2);
@@ -1174,8 +1136,7 @@ static void common_vt_8t_12w_msa(uint8_t *src, int32_t src_stride,
static void common_vt_8t_16w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
@@ -1185,10 +1146,8 @@ static void common_vt_8t_16w_msa(uint8_t *src, int32_t src_stride,
v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
v16u8 tmp0, tmp1, tmp2, tmp3;
v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
- v8i16 rnd_vec;
src -= (3 * src_stride);
- rnd_vec = __msa_fill_h(rnd_val);
filt = LD_SH(filter);
SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
@@ -1228,8 +1187,8 @@ static void common_vt_8t_16w_msa(uint8_t *src, int32_t src_stride,
filt1, filt2, filt3);
out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0,
filt1, filt2, filt3);
- SRAR_H4_SH(out0_r, out1_r, out2_r, out3_r, rnd_vec);
- SRAR_H4_SH(out0_l, out1_l, out2_l, out3_l, rnd_vec);
+ SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
+ SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 6);
SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
@@ -1257,7 +1216,7 @@ static void common_vt_8t_16w_msa(uint8_t *src, int32_t src_stride,
static void common_vt_8t_16w_mult_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
const int8_t *filter, int32_t height,
- uint8_t rnd_val, int32_t width)
+ int32_t width)
{
uint8_t *src_tmp;
uint8_t *dst_tmp;
@@ -1269,10 +1228,8 @@ static void common_vt_8t_16w_mult_msa(uint8_t *src, int32_t src_stride,
v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
v16u8 tmp0, tmp1, tmp2, tmp3;
v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
- v8i16 rnd_vec;
src -= (3 * src_stride);
- rnd_vec = __msa_fill_h(rnd_val);
filt = LD_SH(filter);
SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
@@ -1315,8 +1272,8 @@ static void common_vt_8t_16w_mult_msa(uint8_t *src, int32_t src_stride,
filt0, filt1, filt2, filt3);
out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l,
filt0, filt1, filt2, filt3);
- SRAR_H4_SH(out0_r, out1_r, out2_r, out3_r, rnd_vec);
- SRAR_H4_SH(out0_l, out1_l, out2_l, out3_l, rnd_vec);
+ SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
+ SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 6);
SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
@@ -1347,37 +1304,37 @@ static void common_vt_8t_16w_mult_msa(uint8_t *src, int32_t src_stride,
static void common_vt_8t_24w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height, uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
- rnd_val, 16);
+ 16);
common_vt_8t_8w_msa(src + 16, src_stride, dst + 16, dst_stride, filter,
- height, rnd_val);
+ height);
}
static void common_vt_8t_32w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height, uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
- rnd_val, 32);
+ 32);
}
static void common_vt_8t_48w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height, uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
- rnd_val, 48);
+ 48);
}
static void common_vt_8t_64w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height, uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
- rnd_val, 64);
+ 64);
}
static void hevc_hv_uni_8t_4w_msa(uint8_t *src,
@@ -1736,16 +1693,14 @@ static void hevc_hv_uni_8t_64w_msa(uint8_t *src,
static void common_hz_4t_4x2_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, uint8_t rnd_val)
+ const int8_t *filter)
{
v16i8 filt0, filt1, src0, src1, mask0, mask1, vec0, vec1;
v16u8 out;
v8i16 filt, res0;
- v8i16 rnd_vec;
mask0 = LD_SB(&mc_filt_mask_arr[16]);
src -= 1;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -1757,7 +1712,7 @@ static void common_hz_4t_4x2_msa(uint8_t *src, int32_t src_stride,
XORI_B2_128_SB(src0, src1);
VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
res0 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt0, filt1);
- res0 = __msa_srar_h(res0, rnd_vec);
+ res0 = __msa_srari_h(res0, 6);
res0 = __msa_sat_s_h(res0, 7);
out = PCKEV_XORI128_UB(res0, res0);
ST4x2_UB(out, dst, dst_stride);
@@ -1765,16 +1720,14 @@ static void common_hz_4t_4x2_msa(uint8_t *src, int32_t src_stride,
static void common_hz_4t_4x4_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, uint8_t rnd_val)
+ const int8_t *filter)
{
v16i8 src0, src1, src2, src3, filt0, filt1, mask0, mask1;
v8i16 filt, out0, out1;
v16u8 out;
- v8i16 rnd_vec;
mask0 = LD_SB(&mc_filt_mask_arr[16]);
src -= 1;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -1786,7 +1739,7 @@ static void common_hz_4t_4x4_msa(uint8_t *src, int32_t src_stride,
XORI_B4_128_SB(src0, src1, src2, src3);
HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
filt0, filt1, out0, out1);
- SRAR_H2_SH(out0, out1, rnd_vec);
+ SRARI_H2_SH(out0, out1, 6);
SAT_SH2_SH(out0, out1, 7);
out = PCKEV_XORI128_UB(out0, out1);
ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
@@ -1794,16 +1747,14 @@ static void common_hz_4t_4x4_msa(uint8_t *src, int32_t src_stride,
static void common_hz_4t_4x8_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, uint8_t rnd_val)
+ const int8_t *filter)
{
v16i8 src0, src1, src2, src3, filt0, filt1, mask0, mask1;
v16u8 out;
v8i16 filt, out0, out1, out2, out3;
- v8i16 rnd_vec;
mask0 = LD_SB(&mc_filt_mask_arr[16]);
src -= 1;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -1821,7 +1772,7 @@ static void common_hz_4t_4x8_msa(uint8_t *src, int32_t src_stride,
XORI_B4_128_SB(src0, src1, src2, src3);
HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
filt0, filt1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
out = PCKEV_XORI128_UB(out0, out1);
ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
@@ -1832,17 +1783,15 @@ static void common_hz_4t_4x8_msa(uint8_t *src, int32_t src_stride,
static void common_hz_4t_4x16_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, uint8_t rnd_val)
+ const int8_t *filter)
{
v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
v16i8 filt0, filt1, mask0, mask1;
v16u8 out;
v8i16 filt, out0, out1, out2, out3;
- v8i16 rnd_vec;
mask0 = LD_SB(&mc_filt_mask_arr[16]);
src -= 1;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -1857,7 +1806,7 @@ static void common_hz_4t_4x16_msa(uint8_t *src, int32_t src_stride,
filt0, filt1, out0, out1);
HORIZ_4TAP_4WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1,
filt0, filt1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
out = PCKEV_XORI128_UB(out0, out1);
ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
@@ -1873,7 +1822,7 @@ static void common_hz_4t_4x16_msa(uint8_t *src, int32_t src_stride,
filt0, filt1, out0, out1);
HORIZ_4TAP_4WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1,
filt0, filt1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
out = PCKEV_XORI128_UB(out0, out1);
ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
@@ -1884,35 +1833,30 @@ static void common_hz_4t_4x16_msa(uint8_t *src, int32_t src_stride,
static void common_hz_4t_4w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
if (2 == height) {
- common_hz_4t_4x2_msa(src, src_stride, dst, dst_stride, filter, rnd_val);
+ common_hz_4t_4x2_msa(src, src_stride, dst, dst_stride, filter);
} else if (4 == height) {
- common_hz_4t_4x4_msa(src, src_stride, dst, dst_stride, filter, rnd_val);
+ common_hz_4t_4x4_msa(src, src_stride, dst, dst_stride, filter);
} else if (8 == height) {
- common_hz_4t_4x8_msa(src, src_stride, dst, dst_stride, filter, rnd_val);
+ common_hz_4t_4x8_msa(src, src_stride, dst, dst_stride, filter);
} else if (16 == height) {
- common_hz_4t_4x16_msa(src, src_stride, dst, dst_stride, filter,
- rnd_val);
+ common_hz_4t_4x16_msa(src, src_stride, dst, dst_stride, filter);
}
}
static void common_hz_4t_6w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, filt0, filt1, mask0, mask1;
v16u8 out4, out5;
v8i16 filt, out0, out1, out2, out3;
- v8i16 rnd_vec;
mask0 = LD_SB(&mc_filt_mask_arr[0]);
src -= 1;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -1927,7 +1871,7 @@ static void common_hz_4t_6w_msa(uint8_t *src, int32_t src_stride,
XORI_B4_128_SB(src0, src1, src2, src3);
HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, filt0,
filt1, out0, out1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
out4 = PCKEV_XORI128_UB(out0, out1);
@@ -1939,18 +1883,15 @@ static void common_hz_4t_6w_msa(uint8_t *src, int32_t src_stride,
static void common_hz_4t_8x2mult_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, filt0, filt1, mask0, mask1;
v16u8 out;
v8i16 filt, vec0, vec1, vec2, vec3;
- v8i16 rnd_vec;
mask0 = LD_SB(&mc_filt_mask_arr[0]);
src -= 1;
- rnd_vec = __msa_fill_h(rnd_val);
filt = LD_SH(filter);
SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
@@ -1966,7 +1907,7 @@ static void common_hz_4t_8x2mult_msa(uint8_t *src, int32_t src_stride,
DOTP_SB2_SH(vec0, vec1, filt0, filt0, vec0, vec1);
VSHF_B2_SH(src0, src0, src1, src1, mask1, mask1, vec2, vec3);
DPADD_SB2_SH(vec2, vec3, filt1, filt1, vec0, vec1);
- SRAR_H2_SH(vec0, vec1, rnd_vec);
+ SRARI_H2_SH(vec0, vec1, 6);
SAT_SH2_SH(vec0, vec1, 7);
out = PCKEV_XORI128_UB(vec0, vec1);
ST8x2_UB(out, dst, dst_stride);
@@ -1976,18 +1917,15 @@ static void common_hz_4t_8x2mult_msa(uint8_t *src, int32_t src_stride,
static void common_hz_4t_8x4mult_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, filt0, filt1, mask0, mask1;
v16u8 tmp0, tmp1;
v8i16 filt, out0, out1, out2, out3;
- v8i16 rnd_vec;
mask0 = LD_SB(&mc_filt_mask_arr[0]);
src -= 1;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -2002,7 +1940,7 @@ static void common_hz_4t_8x4mult_msa(uint8_t *src, int32_t src_stride,
XORI_B4_128_SB(src0, src1, src2, src3);
HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, filt0,
filt1, out0, out1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
tmp0 = PCKEV_XORI128_UB(out0, out1);
tmp1 = PCKEV_XORI128_UB(out2, out3);
@@ -2013,22 +1951,20 @@ static void common_hz_4t_8x4mult_msa(uint8_t *src, int32_t src_stride,
static void common_hz_4t_8w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
if ((2 == height) || (6 == height)) {
common_hz_4t_8x2mult_msa(src, src_stride, dst, dst_stride, filter,
- height, rnd_val);
+ height);
} else {
common_hz_4t_8x4mult_msa(src, src_stride, dst, dst_stride, filter,
- height, rnd_val);
+ height);
}
}
static void common_hz_4t_12w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, filt0, filt1, mask0, mask1, mask2, mask3;
@@ -2036,7 +1972,6 @@ static void common_hz_4t_12w_msa(uint8_t *src, int32_t src_stride,
v16i8 vec10, vec11;
v16u8 tmp0, tmp1;
v8i16 filt, out0, out1, out2, out3, out4, out5;
- v8i16 rnd_vec;
mask0 = LD_SB(&mc_filt_mask_arr[0]);
mask2 = LD_SB(&mc_filt_mask_arr[32]);
@@ -2050,8 +1985,6 @@ static void common_hz_4t_12w_msa(uint8_t *src, int32_t src_stride,
mask1 = mask0 + 2;
mask3 = mask2 + 2;
- rnd_vec = __msa_fill_h(rnd_val);
-
for (loop_cnt = (height >> 2); loop_cnt--;) {
LD_SB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
@@ -2069,8 +2002,8 @@ static void common_hz_4t_12w_msa(uint8_t *src, int32_t src_stride,
DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt1, filt1, filt1, filt1,
out2, out3, out4, out5);
DPADD_SB2_SH(vec2, vec3, filt1, filt1, out0, out1);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
- SRAR_H2_SH(out4, out5, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
+ SRARI_H2_SH(out4, out5, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
SAT_SH2_SH(out4, out5, 7);
tmp0 = PCKEV_XORI128_UB(out2, out3);
@@ -2084,19 +2017,16 @@ static void common_hz_4t_12w_msa(uint8_t *src, int32_t src_stride,
static void common_hz_4t_16w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
v16i8 filt0, filt1, mask0, mask1;
v8i16 filt, out0, out1, out2, out3, out4, out5, out6, out7;
v16u8 out;
- v8i16 rnd_vec;
mask0 = LD_SB(&mc_filt_mask_arr[0]);
src -= 1;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -2114,8 +2044,8 @@ static void common_hz_4t_16w_msa(uint8_t *src, int32_t src_stride,
filt1, out0, out1, out2, out3);
HORIZ_4TAP_8WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1, filt0,
filt1, out4, out5, out6, out7);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
- SRAR_H4_SH(out4, out5, out6, out7, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
+ SRARI_H4_SH(out4, out5, out6, out7, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
SAT_SH4_SH(out4, out5, out6, out7, 7);
out = PCKEV_XORI128_UB(out0, out1);
@@ -2135,8 +2065,7 @@ static void common_hz_4t_16w_msa(uint8_t *src, int32_t src_stride,
static void common_hz_4t_24w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint8_t *dst1 = dst + 16;
uint32_t loop_cnt;
@@ -2145,11 +2074,9 @@ static void common_hz_4t_24w_msa(uint8_t *src, int32_t src_stride,
v16i8 filt0, filt1, mask0, mask1, mask00, mask11;
v8i16 filt, out0, out1, out2, out3;
v16u8 tmp0, tmp1;
- v8i16 rnd_vec;
mask0 = LD_SB(&mc_filt_mask_arr[0]);
src -= 1;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -2173,7 +2100,7 @@ static void common_hz_4t_24w_msa(uint8_t *src, int32_t src_stride,
out0, out1, out2, out3);
DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1,
out0, out1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
tmp0 = PCKEV_XORI128_UB(out0, out1);
ST_UB(tmp0, dst);
@@ -2190,7 +2117,7 @@ static void common_hz_4t_24w_msa(uint8_t *src, int32_t src_stride,
out0, out1, out2, out3);
DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1,
out0, out1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
tmp0 = PCKEV_XORI128_UB(out0, out1);
ST_UB(tmp0, dst);
@@ -2210,7 +2137,7 @@ static void common_hz_4t_24w_msa(uint8_t *src, int32_t src_stride,
DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1,
out0, out1, out2, out3);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
tmp0 = PCKEV_XORI128_UB(out0, out1);
tmp1 = PCKEV_XORI128_UB(out2, out3);
@@ -2221,19 +2148,16 @@ static void common_hz_4t_24w_msa(uint8_t *src, int32_t src_stride,
static void common_hz_4t_32w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
v16i8 filt0, filt1, mask0, mask1;
v16u8 out;
v8i16 filt, out0, out1, out2, out3, out4, out5, out6, out7;
- v8i16 rnd_vec;
mask0 = LD_SB(&mc_filt_mask_arr[0]);
src -= 1;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter */
filt = LD_SH(filter);
@@ -2257,8 +2181,8 @@ static void common_hz_4t_32w_msa(uint8_t *src, int32_t src_stride,
filt0, filt1, out0, out1, out2, out3);
HORIZ_4TAP_8WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1,
filt0, filt1, out4, out5, out6, out7);
- SRAR_H4_SH(out0, out1, out2, out3, rnd_vec);
- SRAR_H4_SH(out4, out5, out6, out7, rnd_vec);
+ SRARI_H4_SH(out0, out1, out2, out3, 6);
+ SRARI_H4_SH(out4, out5, out6, out7, 6);
SAT_SH4_SH(out0, out1, out2, out3, 7);
SAT_SH4_SH(out4, out5, out6, out7, 7);
out = PCKEV_XORI128_UB(out0, out1);
@@ -2276,16 +2200,14 @@ static void common_hz_4t_32w_msa(uint8_t *src, int32_t src_stride,
static void common_vt_4t_4x2_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, uint8_t rnd_val)
+ const int8_t *filter)
{
v16i8 src0, src1, src2, src3, src4, src10_r, src32_r, src21_r, src43_r;
v16i8 src2110, src4332, filt0, filt1;
v16u8 out;
v8i16 filt, out10;
- v8i16 rnd_vec;
src -= src_stride;
- rnd_vec = __msa_fill_h(rnd_val);
filt = LD_SH(filter);
SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
@@ -2301,7 +2223,7 @@ static void common_vt_4t_4x2_msa(uint8_t *src, int32_t src_stride,
src4332 = (v16i8) __msa_ilvr_d((v2i64) src43_r, (v2i64) src32_r);
src4332 = (v16i8) __msa_xori_b((v16u8) src4332, 128);
out10 = FILT_4TAP_DPADD_S_H(src2110, src4332, filt0, filt1);
- out10 = __msa_srar_h(out10, rnd_vec);
+ out10 = __msa_srari_h(out10, 6);
out10 = __msa_sat_s_h(out10, 7);
out = PCKEV_XORI128_UB(out10, out10);
ST4x2_UB(out, dst, dst_stride);
@@ -2309,8 +2231,7 @@ static void common_vt_4t_4x2_msa(uint8_t *src, int32_t src_stride,
static void common_vt_4t_4x4multiple_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5;
@@ -2318,10 +2239,8 @@ static void common_vt_4t_4x4multiple_msa(uint8_t *src, int32_t src_stride,
v16i8 src2110, src4332, filt0, filt1;
v8i16 filt, out10, out32;
v16u8 out;
- v8i16 rnd_vec;
src -= src_stride;
- rnd_vec = __msa_fill_h(rnd_val);
filt = LD_SH(filter);
SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
@@ -2348,7 +2267,7 @@ static void common_vt_4t_4x4multiple_msa(uint8_t *src, int32_t src_stride,
src2110 = (v16i8) __msa_ilvr_d((v2i64) src65_r, (v2i64) src54_r);
src2110 = (v16i8) __msa_xori_b((v16u8) src2110, 128);
out32 = FILT_4TAP_DPADD_S_H(src4332, src2110, filt0, filt1);
- SRAR_H2_SH(out10, out32, rnd_vec);
+ SRARI_H2_SH(out10, out32, 6);
SAT_SH2_SH(out10, out32, 7);
out = PCKEV_XORI128_UB(out10, out32);
ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
@@ -2358,30 +2277,26 @@ static void common_vt_4t_4x4multiple_msa(uint8_t *src, int32_t src_stride,
static void common_vt_4t_4w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
if (2 == height) {
- common_vt_4t_4x2_msa(src, src_stride, dst, dst_stride, filter, rnd_val);
+ common_vt_4t_4x2_msa(src, src_stride, dst, dst_stride, filter);
} else {
common_vt_4t_4x4multiple_msa(src, src_stride, dst, dst_stride, filter,
- height, rnd_val);
+ height);
}
}
static void common_vt_4t_6w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, out0, out1;
v8i16 vec01, vec12, vec23, vec30, tmp0, tmp1, tmp2, tmp3;
v8i16 filt, filt0, filt1;
- v8i16 rnd_vec;
src -= src_stride;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter_y */
filt = LD_SH(filter);
@@ -2414,7 +2329,7 @@ static void common_vt_4t_6w_msa(uint8_t *src, int32_t src_stride,
vec12 = (v8i16) __msa_ilvr_b((v16i8) vec2, (v16i8) vec1);
tmp3 = FILT_4TAP_DPADD_S_H(vec30, vec12, filt0, filt1);
- SRAR_H4_SH(tmp0, tmp1, tmp2, tmp3, rnd_vec);
+ SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 6);
SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
out0 = PCKEV_XORI128_UB(tmp0, tmp1);
out1 = PCKEV_XORI128_UB(tmp2, tmp3);
@@ -2425,15 +2340,13 @@ static void common_vt_4t_6w_msa(uint8_t *src, int32_t src_stride,
static void common_vt_4t_8x2_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, uint8_t rnd_val)
+ const int8_t *filter)
{
v16i8 src0, src1, src2, src3, src4;
v8i16 src01, src12, src23, src34, tmp0, tmp1, filt, filt0, filt1;
v16u8 out;
- v8i16 rnd_vec;
src -= src_stride;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter_y */
filt = LD_SH(filter);
@@ -2445,7 +2358,7 @@ static void common_vt_4t_8x2_msa(uint8_t *src, int32_t src_stride,
tmp0 = FILT_4TAP_DPADD_S_H(src01, src23, filt0, filt1);
ILVR_B2_SH(src2, src1, src4, src3, src12, src34);
tmp1 = FILT_4TAP_DPADD_S_H(src12, src34, filt0, filt1);
- SRAR_H2_SH(tmp0, tmp1, rnd_vec);
+ SRARI_H2_SH(tmp0, tmp1, 6);
SAT_SH2_SH(tmp0, tmp1, 7);
out = PCKEV_XORI128_UB(tmp0, tmp1);
ST8x2_UB(out, dst, dst_stride);
@@ -2453,17 +2366,15 @@ static void common_vt_4t_8x2_msa(uint8_t *src, int32_t src_stride,
static void common_vt_4t_8x6_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, uint8_t rnd_val)
+ const int8_t *filter)
{
uint32_t loop_cnt;
uint64_t out0, out1, out2;
v16i8 src0, src1, src2, src3, src4, src5;
v8i16 vec0, vec1, vec2, vec3, vec4, tmp0, tmp1, tmp2;
v8i16 filt, filt0, filt1;
- v8i16 rnd_vec;
src -= src_stride;
- rnd_vec = __msa_fill_h(rnd_val);
/* rearranging filter_y */
filt = LD_SH(filter);
@@ -2484,7 +2395,8 @@ static void common_vt_4t_8x6_msa(uint8_t *src, int32_t src_stride,
tmp0 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt0, filt1);
tmp1 = FILT_4TAP_DPADD_S_H(vec2, vec3, filt0, filt1);
tmp2 = FILT_4TAP_DPADD_S_H(vec1, vec4, filt0, filt1);
- SRAR_H3_SH(tmp0, tmp1, tmp2, rnd_vec);
+ SRARI_H2_SH(tmp0, tmp1, 6);
+ tmp2 = __msa_srari_h(tmp2, 6);
SAT_SH3_SH(tmp0, tmp1, tmp2, 7);
PCKEV_B2_SH(tmp1, tmp0, tmp2, tmp2, tmp0, tmp2);
XORI_B2_128_SH(tmp0, tmp2);
@@ -2507,18 +2419,15 @@ static void common_vt_4t_8x6_msa(uint8_t *src, int32_t src_stride,
static void common_vt_4t_8x4mult_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src7, src8, src9, src10;
v16i8 src10_r, src72_r, src98_r, src21_r, src87_r, src109_r, filt0, filt1;
v16u8 tmp0, tmp1;
v8i16 filt, out0_r, out1_r, out2_r, out3_r;
- v8i16 rnd_vec;
src -= src_stride;
- rnd_vec = __msa_fill_h(rnd_val);
filt = LD_SH(filter);
SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
@@ -2540,7 +2449,7 @@ static void common_vt_4t_8x4mult_msa(uint8_t *src, int32_t src_stride,
out1_r = FILT_4TAP_DPADD_S_H(src21_r, src87_r, filt0, filt1);
out2_r = FILT_4TAP_DPADD_S_H(src72_r, src98_r, filt0, filt1);
out3_r = FILT_4TAP_DPADD_S_H(src87_r, src109_r, filt0, filt1);
- SRAR_H4_SH(out0_r, out1_r, out2_r, out3_r, rnd_vec);
+ SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
tmp0 = PCKEV_XORI128_UB(out0_r, out1_r);
tmp1 = PCKEV_XORI128_UB(out2_r, out3_r);
@@ -2555,23 +2464,21 @@ static void common_vt_4t_8x4mult_msa(uint8_t *src, int32_t src_stride,
static void common_vt_4t_8w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
if (2 == height) {
- common_vt_4t_8x2_msa(src, src_stride, dst, dst_stride, filter, rnd_val);
+ common_vt_4t_8x2_msa(src, src_stride, dst, dst_stride, filter);
} else if (6 == height) {
- common_vt_4t_8x6_msa(src, src_stride, dst, dst_stride, filter, rnd_val);
+ common_vt_4t_8x6_msa(src, src_stride, dst, dst_stride, filter);
} else {
common_vt_4t_8x4mult_msa(src, src_stride, dst, dst_stride,
- filter, height, rnd_val);
+ filter, height);
}
}
static void common_vt_4t_12w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6;
@@ -2580,14 +2487,11 @@ static void common_vt_4t_12w_msa(uint8_t *src, int32_t src_stride,
v8i16 src10, src21, src32, src43, src54, src65, src87, src109, src1211;
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, filt, filt0, filt1;
v4u32 mask = { 2, 6, 2, 6 };
- v8i16 rnd_vec;
/* rearranging filter_y */
filt = LD_SH(filter);
SPLATI_H2_SH(filt, 0, 1, filt0, filt1);
- rnd_vec = __msa_fill_h(rnd_val);
-
src -= src_stride;
LD_SB3(src, src_stride, src0, src1, src2);
@@ -2613,8 +2517,8 @@ static void common_vt_4t_12w_msa(uint8_t *src, int32_t src_stride,
ILVR_B3_SH(vec1, vec0, vec3, vec2, vec5, vec4, src87, src109, src1211);
tmp4 = FILT_4TAP_DPADD_S_H(src87, src109, filt0, filt1);
tmp5 = FILT_4TAP_DPADD_S_H(src109, src1211, filt0, filt1);
- SRAR_H4_SH(tmp0, tmp1, tmp2, tmp3, rnd_vec);
- SRAR_H2_SH(tmp4, tmp5, rnd_vec);
+ SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 6);
+ SRARI_H2_SH(tmp4, tmp5, 6);
SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
SAT_SH2_SH(tmp4, tmp5, 7);
out0 = PCKEV_XORI128_UB(tmp0, tmp1);
@@ -2635,8 +2539,7 @@ static void common_vt_4t_12w_msa(uint8_t *src, int32_t src_stride,
static void common_vt_4t_16w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6;
@@ -2644,10 +2547,8 @@ static void common_vt_4t_16w_msa(uint8_t *src, int32_t src_stride,
v16i8 src32_l, src54_l, src21_l, src43_l, src65_l, filt0, filt1;
v16u8 tmp0, tmp1, tmp2, tmp3;
v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
- v8i16 rnd_vec;
src -= src_stride;
- rnd_vec = __msa_fill_h(rnd_val);
filt = LD_SH(filter);
SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
@@ -2676,8 +2577,8 @@ static void common_vt_4t_16w_msa(uint8_t *src, int32_t src_stride,
out1_l = FILT_4TAP_DPADD_S_H(src21_l, src43_l, filt0, filt1);
out2_l = FILT_4TAP_DPADD_S_H(src32_l, src54_l, filt0, filt1);
out3_l = FILT_4TAP_DPADD_S_H(src43_l, src65_l, filt0, filt1);
- SRAR_H4_SH(out0_r, out1_r, out2_r, out3_r, rnd_vec);
- SRAR_H4_SH(out0_l, out1_l, out2_l, out3_l, rnd_vec);
+ SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
+ SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 6);
SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
@@ -2696,8 +2597,7 @@ static void common_vt_4t_16w_msa(uint8_t *src, int32_t src_stride,
static void common_vt_4t_24w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
uint32_t loop_cnt;
uint64_t out0, out1;
@@ -2707,15 +2607,12 @@ static void common_vt_4t_24w_msa(uint8_t *src, int32_t src_stride,
v16i8 src109_r, src10_l, src32_l, src21_l, src43_l;
v16u8 out;
v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l;
- v8i16 rnd_vec;
src -= src_stride;
filt = LD_SH(filter);
SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
- rnd_vec = __msa_fill_h(rnd_val);
-
/* 16 width */
LD_SB3(src, src_stride, src0, src1, src2);
XORI_B3_128_SB(src0, src1, src2);
@@ -2752,8 +2649,8 @@ static void common_vt_4t_24w_msa(uint8_t *src, int32_t src_stride,
out3_r = FILT_4TAP_DPADD_S_H(src87_r, src109_r, filt0, filt1);
/* 16 + 8 width */
- SRAR_H4_SH(out0_r, out1_r, out2_r, out3_r, rnd_vec);
- SRAR_H2_SH(out0_l, out1_l, rnd_vec);
+ SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
+ SRARI_H2_SH(out0_l, out1_l, 6);
SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
SAT_SH2_SH(out0_l, out1_l, 7);
out = PCKEV_XORI128_UB(out0_r, out0_l);
@@ -2792,8 +2689,8 @@ static void common_vt_4t_24w_msa(uint8_t *src, int32_t src_stride,
out3_r = FILT_4TAP_DPADD_S_H(src109_r, src87_r, filt0, filt1);
/* 16 + 8 width */
- SRAR_H4_SH(out0_r, out1_r, out2_r, out3_r, rnd_vec);
- SRAR_H2_SH(out0_l, out1_l, rnd_vec);
+ SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
+ SRARI_H2_SH(out0_l, out1_l, 6);
SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
SAT_SH2_SH(out0_l, out1_l, 7);
out = PCKEV_XORI128_UB(out0_r, out0_l);
@@ -2812,7 +2709,7 @@ static void common_vt_4t_24w_msa(uint8_t *src, int32_t src_stride,
static void common_vt_4t_32w_mult_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
const int8_t *filter, int32_t height,
- uint8_t rnd_val, int32_t width)
+ int32_t width)
{
uint32_t loop_cnt, cnt;
uint8_t *dst_tmp, *src_tmp;
@@ -2824,11 +2721,9 @@ static void common_vt_4t_32w_mult_msa(uint8_t *src, int32_t src_stride,
v16i8 src21_l, src43_l, src87_l, src109_l;
v8i16 filt;
v16i8 filt0, filt1;
- v8i16 rnd_vec;
v16u8 out;
src -= src_stride;
- rnd_vec = __msa_fill_h(rnd_val);
filt = LD_SH(filter);
SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
@@ -2866,7 +2761,7 @@ static void common_vt_4t_32w_mult_msa(uint8_t *src, int32_t src_stride,
out1_l = FILT_4TAP_DPADD_S_H(src21_l, src43_l, filt0, filt1);
/* 16 width */
- SRAR_H4_SH(out0_r, out1_r, out0_l, out1_l, rnd_vec);
+ SRARI_H4_SH(out0_r, out1_r, out0_l, out1_l, 6);
SAT_SH4_SH(out0_r, out1_r, out0_l, out1_l, 7);
out = PCKEV_XORI128_UB(out0_r, out0_l);
ST_UB(out, dst_tmp);
@@ -2893,7 +2788,7 @@ static void common_vt_4t_32w_mult_msa(uint8_t *src, int32_t src_stride,
out3_l = FILT_4TAP_DPADD_S_H(src87_l, src109_l, filt0, filt1);
/* next 16 width */
- SRAR_H4_SH(out2_r, out3_r, out2_l, out3_l, rnd_vec);
+ SRARI_H4_SH(out2_r, out3_r, out2_l, out3_l, 6);
SAT_SH4_SH(out2_r, out3_r, out2_l, out3_l, 7);
out = PCKEV_XORI128_UB(out2_r, out2_l);
ST_UB(out, dst_tmp + 16);
@@ -2916,11 +2811,10 @@ static void common_vt_4t_32w_mult_msa(uint8_t *src, int32_t src_stride,
static void common_vt_4t_32w_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- const int8_t *filter, int32_t height,
- uint8_t rnd_val)
+ const int8_t *filter, int32_t height)
{
common_vt_4t_32w_mult_msa(src, src_stride, dst, dst_stride,
- filter, height, rnd_val, 32);
+ filter, height, 32);
}
static void hevc_hv_uni_4t_4x2_msa(uint8_t *src,
@@ -3885,7 +3779,7 @@ void ff_hevc_put_hevc_uni_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst, \
const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1]; \
\
common_##DIR1##_##TAP##t_##WIDTH##w_msa(src, src_stride, dst, dst_stride, \
- filter, height, 6); \
+ filter, height); \
}
UNI_MC(qpel, h, 4, 8, hz, mx);
--
1.7.9.5
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel at ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
More information about the ffmpeg-devel
mailing list