[FFmpeg-devel] [PATCH] avutil/mips: Avoid instruction exception caused by gssqc1/gslqc1.
Reimar Döffinger
Reimar.Doeffinger at gmx.de
Wed Jul 24 02:34:13 EEST 2019
Why is "block" not aligned? Does the code for other architectures also use unaligned instructions for these?
On 23.07.2019, at 09:27, Shiyou Yin <yinshiyou-hf at loongson.cn> wrote:
> Ensure the address accesed by gssqc1/gslqc1 are 16-bits memory-aligned.
> ---
> libavcodec/mips/h264dsp_mmi.c | 48 +++++++++++++-----------------------
> libavcodec/mips/simple_idct_mmi.c | 51 +++++++++++++++++++++++++--------------
> libavutil/mips/mmiutils.h | 2 +-
> 3 files changed, 51 insertions(+), 50 deletions(-)
>
> diff --git a/libavcodec/mips/h264dsp_mmi.c b/libavcodec/mips/h264dsp_mmi.c
> index ac65a20..a85d782 100644
> --- a/libavcodec/mips/h264dsp_mmi.c
> +++ b/libavcodec/mips/h264dsp_mmi.c
> @@ -38,6 +38,11 @@ void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
> MMI_LDC1(%[ftmp2], %[src], 0x08)
> MMI_LDC1(%[ftmp3], %[src], 0x10)
> MMI_LDC1(%[ftmp4], %[src], 0x18)
> + /* memset(src, 0, 32); */
> + MMI_USDC1(%[ftmp0], %[src], 0x00)
> + MMI_USDC1(%[ftmp0], %[src], 0x08)
> + MMI_USDC1(%[ftmp0], %[src], 0x10)
> + MMI_USDC1(%[ftmp0], %[src], 0x18)
> MMI_ULWC1(%[ftmp5], %[dst0], 0x00)
> MMI_ULWC1(%[ftmp6], %[dst1], 0x00)
> MMI_ULWC1(%[ftmp7], %[dst2], 0x00)
> @@ -58,11 +63,6 @@ void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
> MMI_SWC1(%[ftmp2], %[dst1], 0x00)
> MMI_SWC1(%[ftmp3], %[dst2], 0x00)
> MMI_SWC1(%[ftmp4], %[dst3], 0x00)
> -
> - /* memset(src, 0, 32); */
> - "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> - "gssqc1 %[ftmp0], %[ftmp0], 0x00(%[src]) \n\t"
> - "gssqc1 %[ftmp0], %[ftmp0], 0x10(%[src]) \n\t"
> : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
> [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
> [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
> @@ -85,15 +85,21 @@ void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
> DECLARE_VAR_ADDRT;
>
> __asm__ volatile (
> - "dli %[tmp0], 0x01 \n\t"
> MMI_LDC1(%[ftmp0], %[block], 0x00)
> - "mtc1 %[tmp0], %[ftmp8] \n\t"
> MMI_LDC1(%[ftmp1], %[block], 0x08)
> - "dli %[tmp0], 0x06 \n\t"
> MMI_LDC1(%[ftmp2], %[block], 0x10)
> + MMI_LDC1(%[ftmp3], %[block], 0x18)
> + /* memset(block, 0, 32) */
> + "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
> + MMI_USDC1(%[ftmp4], %[block], 0x00)
> + MMI_USDC1(%[ftmp4], %[block], 0x08)
> + MMI_USDC1(%[ftmp4], %[block], 0x10)
> + MMI_USDC1(%[ftmp4], %[block], 0x18)
> + "dli %[tmp0], 0x01 \n\t"
> + "mtc1 %[tmp0], %[ftmp8] \n\t"
> + "dli %[tmp0], 0x06 \n\t"
> "mtc1 %[tmp0], %[ftmp9] \n\t"
> "psrah %[ftmp4], %[ftmp1], %[ftmp8] \n\t"
> - MMI_LDC1(%[ftmp3], %[block], 0x18)
> "psrah %[ftmp5], %[ftmp3], %[ftmp8] \n\t"
> "psubh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
> "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
> @@ -121,15 +127,11 @@ void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
> "paddh %[ftmp10], %[ftmp3], %[ftmp1] \n\t"
> "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
> "paddh %[ftmp11], %[ftmp4], %[ftmp5] \n\t"
> - "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
> "psubh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
> - MMI_SDC1(%[ftmp7], %[block], 0x00)
> - MMI_SDC1(%[ftmp7], %[block], 0x08)
> - MMI_SDC1(%[ftmp7], %[block], 0x10)
> - MMI_SDC1(%[ftmp7], %[block], 0x18)
> MMI_ULWC1(%[ftmp2], %[dst], 0x00)
> - "psrah %[ftmp3], %[ftmp10], %[ftmp9] \n\t"
> MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
> + "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
> + "psrah %[ftmp3], %[ftmp10], %[ftmp9] \n\t"
> "psrah %[ftmp4], %[ftmp11], %[ftmp9] \n\t"
> "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
> "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
> @@ -153,11 +155,6 @@ void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
> MMI_SWC1(%[ftmp2], %[dst], 0x00)
> "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
> MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
> -
> - /* memset(block, 0, 32) */
> - "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> - "gssqc1 %[ftmp0], %[ftmp0], 0x00(%[block]) \n\t"
> - "gssqc1 %[ftmp0], %[ftmp0], 0x10(%[block]) \n\t"
> : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
> [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
> [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
> @@ -620,17 +617,6 @@ void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
> MMI_SWC1(%[ftmp6], %[addr0], 0x00)
> MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
> PTR_ADDIU "$29, $29, 0x20 \n\t"
> -
> - /* memset(block, 0, 128) */
> - "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> - "gssqc1 %[ftmp0], %[ftmp0], 0x00(%[block]) \n\t"
> - "gssqc1 %[ftmp0], %[ftmp0], 0x10(%[block]) \n\t"
> - "gssqc1 %[ftmp0], %[ftmp0], 0x20(%[block]) \n\t"
> - "gssqc1 %[ftmp0], %[ftmp0], 0x30(%[block]) \n\t"
> - "gssqc1 %[ftmp0], %[ftmp0], 0x40(%[block]) \n\t"
> - "gssqc1 %[ftmp0], %[ftmp0], 0x50(%[block]) \n\t"
> - "gssqc1 %[ftmp0], %[ftmp0], 0x60(%[block]) \n\t"
> - "gssqc1 %[ftmp0], %[ftmp0], 0x70(%[block]) \n\t"
> : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
> [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
> [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
> diff --git a/libavcodec/mips/simple_idct_mmi.c b/libavcodec/mips/simple_idct_mmi.c
> index 7f4bb74..f54f9ea 100644
> --- a/libavcodec/mips/simple_idct_mmi.c
> +++ b/libavcodec/mips/simple_idct_mmi.c
> @@ -39,7 +39,7 @@
> #define COL_SHIFT 20
> #define DC_SHIFT 3
>
> -DECLARE_ALIGNED(8, const int16_t, W_arr)[46] = {
> +DECLARE_ALIGNED(16, const int16_t, W_arr)[46] = {
> W4, W2, W4, W6,
> W1, W3, W5, W7,
> W4, W6, -W4, -W2,
> @@ -147,14 +147,22 @@ void ff_simple_idct_8_mmi(int16_t *block)
> "gslqc1 $f25, $f24, 0x30(%[w_arr]) \n\t"
> "gslqc1 $f17, $f16, 0x40(%[w_arr]) \n\t"
> /* load source in block */
> - "gslqc1 $f1, $f0, 0x00(%[block]) \n\t"
> - "gslqc1 $f3, $f2, 0x10(%[block]) \n\t"
> - "gslqc1 $f5, $f4, 0x20(%[block]) \n\t"
> - "gslqc1 $f7, $f6, 0x30(%[block]) \n\t"
> - "gslqc1 $f9, $f8, 0x40(%[block]) \n\t"
> - "gslqc1 $f11, $f10, 0x50(%[block]) \n\t"
> - "gslqc1 $f13, $f12, 0x60(%[block]) \n\t"
> - "gslqc1 $f15, $f14, 0x70(%[block]) \n\t"
> + MMI_ULDC1($f0, %[block], 0x00)
> + MMI_ULDC1($f1, %[block], 0x08)
> + MMI_ULDC1($f2, %[block], 0x10)
> + MMI_ULDC1($f3, %[block], 0x18)
> + MMI_ULDC1($f4, %[block], 0x20)
> + MMI_ULDC1($f5, %[block], 0x28)
> + MMI_ULDC1($f6, %[block], 0x30)
> + MMI_ULDC1($f7, %[block], 0x38)
> + MMI_ULDC1($f8, %[block], 0x40)
> + MMI_ULDC1($f9, %[block], 0x48)
> + MMI_ULDC1($f10, %[block], 0x50)
> + MMI_ULDC1($f11, %[block], 0x58)
> + MMI_ULDC1($f12, %[block], 0x60)
> + MMI_ULDC1($f13, %[block], 0x68)
> + MMI_ULDC1($f14, %[block], 0x70)
> + MMI_ULDC1($f15, %[block], 0x78)
>
> /* $9: mask ; $f17: ROW_SHIFT */
> "dmfc1 $9, $f17 \n\t"
> @@ -394,15 +402,22 @@ void ff_simple_idct_8_mmi(int16_t *block)
> "punpcklwd $f11, $f27, $f29 \n\t"
> "punpckhwd $f15, $f27, $f29 \n\t"
> /* Store */
> - "gssqc1 $f1, $f0, 0x00(%[block]) \n\t"
> - "gssqc1 $f5, $f4, 0x10(%[block]) \n\t"
> - "gssqc1 $f9, $f8, 0x20(%[block]) \n\t"
> - "gssqc1 $f13, $f12, 0x30(%[block]) \n\t"
> - "gssqc1 $f3, $f2, 0x40(%[block]) \n\t"
> - "gssqc1 $f7, $f6, 0x50(%[block]) \n\t"
> - "gssqc1 $f11, $f10, 0x60(%[block]) \n\t"
> - "gssqc1 $f15, $f14, 0x70(%[block]) \n\t"
> -
> + MMI_USDC1($f0, %[block], 0X00)
> + MMI_USDC1($f1, %[block], 0X08)
> + MMI_USDC1($f4, %[block], 0X10)
> + MMI_USDC1($f5, %[block], 0X18)
> + MMI_USDC1($f8, %[block], 0X20)
> + MMI_USDC1($f9, %[block], 0X28)
> + MMI_USDC1($f12, %[block], 0X30)
> + MMI_USDC1($f13, %[block], 0X38)
> + MMI_USDC1($f2, %[block], 0X40)
> + MMI_USDC1($f3, %[block], 0X48)
> + MMI_USDC1($f6, %[block], 0X50)
> + MMI_USDC1($f7, %[block], 0X58)
> + MMI_USDC1($f10, %[block], 0X60)
> + MMI_USDC1($f11, %[block], 0X68)
> + MMI_USDC1($f14, %[block], 0X70)
> + MMI_USDC1($f15, %[block], 0X78)
> : [block]"+&r"(block)
> : [w_arr]"r"(W_arr)
> : "memory"
> diff --git a/libavutil/mips/mmiutils.h b/libavutil/mips/mmiutils.h
> index 05f6b31..bfa6d8b 100644
> --- a/libavutil/mips/mmiutils.h
> +++ b/libavutil/mips/mmiutils.h
> @@ -205,7 +205,7 @@
> * backup register
> */
> #define BACKUP_REG \
> - double temp_backup_reg[8]; \
> + double __attribute__ ((aligned (16))) temp_backup_reg[8]; \
> if (_MIPS_SIM == _ABI64) \
> __asm__ volatile ( \
> "gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
> --
> 2.1.0
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
More information about the ffmpeg-devel
mailing list