[FFmpeg-devel] [PATCH 07/11] avcodec/mips: loongson optimize h264qpel with mmi v2

Tue May 24 03:43:54 CEST 2016

that is my fault and thank you for pointing out the mistake, it should be:

diff --git a/libavcodec/mips/h264qpel_mmi.c b/libavcodec/mips/h264qpel_mmi.c
index d641a51..737c68c 100644
--- a/libavcodec/mips/h264qpel_mmi.c
+++ b/libavcodec/mips/h264qpel_mmi.c
@@ -1901,9 +1901,9 @@ static void put_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
             : "memory"
         );
 
-        src8  += 2L * src8Stride;
+        src8  += 2 * src8Stride;
         src16 += 48;
-        dst   += 2L * dstStride;
+        dst   += 2 * dstStride;
     } while (h -= 2);
 }
 
@@ -2260,9 +2260,9 @@ static void avg_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
             : "memory"
         );
 
-        src8  += 2L * src8Stride;
+        src8  += 2 * src8Stride;
         src16 += 48;
-        dst   += 2L * dstStride;
+        dst   += 2 * dstStride;
     } while (b -= 2);
 }










At 2016-05-24 03:47:30, "Michael Niedermayer" <michael at niedermayer.cc> wrote:
>On Tue, May 17, 2016 at 03:08:13PM +0800, 周晓勇 wrote:
>> avcodec/mips/h264qpel_mmi: Version 2 of the optimizations for loongson mmi
>>     
>>     1. no longer use the register names directly and optimized code format
>>     2. to be compatible with O32, specify type of address variable with mips_reg and handle the address variable with PTR_ operator
>>     3. temporarily annotated func put_(avg_)h264_qpel16_hv_lowpass_mmi and related funcs which couldn't pass fate testing in O32 ABI
>>     4. use uld and mtc1 to workaround cpu 3A2000 gslwlc1 bug (gslwlc1 instruction extension bug in O32 ABI)
>>     5. put_pixels_ an avg_pixels_ functions use hpeldsp optimizations instead
>
>[...]
>> @@ -1373,161 +1412,589 @@ static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
>>      }
>>  }
>>  
>> -static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
>> -        int dstStride, int srcStride)
>> -{
>> -    int16_t _tmp[104];
>> -    int16_t *tmp = _tmp;
>> -    int i;
>> -    src -= 2*srcStride;
>> +static inline void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
>> +        const uint8_t *src, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
>> +{
>> +    int w = (size + 8) >> 2;
>> +    double ftmp[11];
>> +    uint64_t tmp0;
>> +    uint64_t low32;
>> +
>> +    src -= 2 * srcStride + 2;
>[...]
>
>> +        src8  += 2L * src8Stride;
>> +        src16 += 48;
>> +        dst   += 2L * dstStride;
>
>why does this use long types  instead of ints while other code uses
>ints ?
>
>> +    } while (h -= 2);
>> +}
>> +
>> +static void put_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
>> +        const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
>> +{
>> +    put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
>> +    put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
>> +            src2Stride);
>> +
>> +    src += 8 * dstStride;
>> +    dst += 8 * dstStride;
>> +    src2 += 8 * src2Stride;
>
>
>
>[...]
>
>-- 
>Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
>I do not agree with what you have to say, but I'll defend to the death your
>right to say it. -- Voltaire