00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "config.h"
00048 #include "libavcodec/dsputil.h"
00049
00050 #if HAVE_INLINE_ASM
00051 static void vector_fmul_window_mips(float *dst, const float *src0,
00052 const float *src1, const float *win, int len)
00053 {
00054 int i, j;
00055
00056
00057
00058 float * dst_i, * dst_j, * dst_i2, * dst_j2;
00059 float temp, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
00060
00061 dst += len;
00062 win += len;
00063 src0 += len;
00064
00065 for (i = -len, j = len - 1; i < 0; i += 8, j -= 8) {
00066
00067 dst_i = dst + i;
00068 dst_j = dst + j;
00069
00070 dst_i2 = dst + i + 4;
00071 dst_j2 = dst + j - 4;
00072
00073 __asm__ volatile (
00074 "mul.s %[temp], %[s1], %[wi] \n\t"
00075 "mul.s %[temp1], %[s1], %[wj] \n\t"
00076 "mul.s %[temp2], %[s11], %[wi1] \n\t"
00077 "mul.s %[temp3], %[s11], %[wj1] \n\t"
00078
00079 "msub.s %[temp], %[temp], %[s0], %[wj] \n\t"
00080 "madd.s %[temp1], %[temp1], %[s0], %[wi] \n\t"
00081 "msub.s %[temp2], %[temp2], %[s01], %[wj1] \n\t"
00082 "madd.s %[temp3], %[temp3], %[s01], %[wi1] \n\t"
00083
00084 "swc1 %[temp], 0(%[dst_i]) \n\t"
00085 "swc1 %[temp1], 0(%[dst_j]) \n\t"
00086 "swc1 %[temp2], 4(%[dst_i]) \n\t"
00087 "swc1 %[temp3], -4(%[dst_j]) \n\t"
00088
00089 "mul.s %[temp4], %[s12], %[wi2] \n\t"
00090 "mul.s %[temp5], %[s12], %[wj2] \n\t"
00091 "mul.s %[temp6], %[s13], %[wi3] \n\t"
00092 "mul.s %[temp7], %[s13], %[wj3] \n\t"
00093
00094 "msub.s %[temp4], %[temp4], %[s02], %[wj2] \n\t"
00095 "madd.s %[temp5], %[temp5], %[s02], %[wi2] \n\t"
00096 "msub.s %[temp6], %[temp6], %[s03], %[wj3] \n\t"
00097 "madd.s %[temp7], %[temp7], %[s03], %[wi3] \n\t"
00098
00099 "swc1 %[temp4], 8(%[dst_i]) \n\t"
00100 "swc1 %[temp5], -8(%[dst_j]) \n\t"
00101 "swc1 %[temp6], 12(%[dst_i]) \n\t"
00102 "swc1 %[temp7], -12(%[dst_j]) \n\t"
00103 : [temp]"=&f"(temp), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
00104 [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
00105 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7)
00106 : [dst_j]"r"(dst_j), [dst_i]"r" (dst_i),
00107 [s0] "f"(src0[i]), [wj] "f"(win[j]), [s1] "f"(src1[j]),
00108 [wi] "f"(win[i]), [s01]"f"(src0[i + 1]),[wj1]"f"(win[j - 1]),
00109 [s11]"f"(src1[j - 1]), [wi1]"f"(win[i + 1]), [s02]"f"(src0[i + 2]),
00110 [wj2]"f"(win[j - 2]), [s12]"f"(src1[j - 2]),[wi2]"f"(win[i + 2]),
00111 [s03]"f"(src0[i + 3]), [wj3]"f"(win[j - 3]), [s13]"f"(src1[j - 3]),
00112 [wi3]"f"(win[i + 3])
00113 : "memory"
00114 );
00115
00116 __asm__ volatile (
00117 "mul.s %[temp], %[s1], %[wi] \n\t"
00118 "mul.s %[temp1], %[s1], %[wj] \n\t"
00119 "mul.s %[temp2], %[s11], %[wi1] \n\t"
00120 "mul.s %[temp3], %[s11], %[wj1] \n\t"
00121
00122 "msub.s %[temp], %[temp], %[s0], %[wj] \n\t"
00123 "madd.s %[temp1], %[temp1], %[s0], %[wi] \n\t"
00124 "msub.s %[temp2], %[temp2], %[s01], %[wj1] \n\t"
00125 "madd.s %[temp3], %[temp3], %[s01], %[wi1] \n\t"
00126
00127 "swc1 %[temp], 0(%[dst_i2]) \n\t"
00128 "swc1 %[temp1], 0(%[dst_j2]) \n\t"
00129 "swc1 %[temp2], 4(%[dst_i2]) \n\t"
00130 "swc1 %[temp3], -4(%[dst_j2]) \n\t"
00131
00132 "mul.s %[temp4], %[s12], %[wi2] \n\t"
00133 "mul.s %[temp5], %[s12], %[wj2] \n\t"
00134 "mul.s %[temp6], %[s13], %[wi3] \n\t"
00135 "mul.s %[temp7], %[s13], %[wj3] \n\t"
00136
00137 "msub.s %[temp4], %[temp4], %[s02], %[wj2] \n\t"
00138 "madd.s %[temp5], %[temp5], %[s02], %[wi2] \n\t"
00139 "msub.s %[temp6], %[temp6], %[s03], %[wj3] \n\t"
00140 "madd.s %[temp7], %[temp7], %[s03], %[wi3] \n\t"
00141
00142 "swc1 %[temp4], 8(%[dst_i2]) \n\t"
00143 "swc1 %[temp5], -8(%[dst_j2]) \n\t"
00144 "swc1 %[temp6], 12(%[dst_i2]) \n\t"
00145 "swc1 %[temp7], -12(%[dst_j2]) \n\t"
00146 : [temp]"=&f"(temp),
00147 [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
00148 [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), [temp6]"=&f"(temp6),
00149 [temp7] "=&f" (temp7)
00150 : [dst_j2]"r"(dst_j2), [dst_i2]"r"(dst_i2),
00151 [s0] "f"(src0[i + 4]), [wj] "f"(win[j - 4]), [s1] "f"(src1[j - 4]),
00152 [wi] "f"(win[i + 4]), [s01]"f"(src0[i + 5]),[wj1]"f"(win[j - 5]),
00153 [s11]"f"(src1[j - 5]), [wi1]"f"(win[i + 5]), [s02]"f"(src0[i + 6]),
00154 [wj2]"f"(win[j - 6]), [s12]"f"(src1[j - 6]),[wi2]"f"(win[i + 6]),
00155 [s03]"f"(src0[i + 7]), [wj3]"f"(win[j - 7]), [s13]"f"(src1[j - 7]),
00156 [wi3]"f"(win[i + 7])
00157 : "memory"
00158 );
00159 }
00160 }
00161 #endif
00162
00163 av_cold void ff_dsputil_init_mips( DSPContext* c, AVCodecContext *avctx )
00164 {
00165 #if HAVE_INLINE_ASM
00166 c->vector_fmul_window = vector_fmul_window_mips;
00167 #endif
00168 }