00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "config.h"
00048 #include "libavcodec/dsputil.h"
00049
00050 static void vector_fmul_window_mips(float *dst, const float *src0,
00051 const float *src1, const float *win, int len)
00052 {
00053 int i, j;
00054
00055
00056
00057 float * dst_i, * dst_j, * dst_i2, * dst_j2;
00058 float temp, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
00059
00060 dst += len;
00061 win += len;
00062 src0 += len;
00063
00064 for (i = -len, j = len - 1; i < 0; i += 8, j -= 8) {
00065
00066 dst_i = dst + i;
00067 dst_j = dst + j;
00068
00069 dst_i2 = dst + i + 4;
00070 dst_j2 = dst + j - 4;
00071
00072 __asm__ volatile (
00073 "mul.s %[temp], %[s1], %[wi] \n\t"
00074 "mul.s %[temp1], %[s1], %[wj] \n\t"
00075 "mul.s %[temp2], %[s11], %[wi1] \n\t"
00076 "mul.s %[temp3], %[s11], %[wj1] \n\t"
00077
00078 "msub.s %[temp], %[temp], %[s0], %[wj] \n\t"
00079 "madd.s %[temp1], %[temp1], %[s0], %[wi] \n\t"
00080 "msub.s %[temp2], %[temp2], %[s01], %[wj1] \n\t"
00081 "madd.s %[temp3], %[temp3], %[s01], %[wi1] \n\t"
00082
00083 "swc1 %[temp], 0(%[dst_i]) \n\t"
00084 "swc1 %[temp1], 0(%[dst_j]) \n\t"
00085 "swc1 %[temp2], 4(%[dst_i]) \n\t"
00086 "swc1 %[temp3], -4(%[dst_j]) \n\t"
00087
00088 "mul.s %[temp4], %[s12], %[wi2] \n\t"
00089 "mul.s %[temp5], %[s12], %[wj2] \n\t"
00090 "mul.s %[temp6], %[s13], %[wi3] \n\t"
00091 "mul.s %[temp7], %[s13], %[wj3] \n\t"
00092
00093 "msub.s %[temp4], %[temp4], %[s02], %[wj2] \n\t"
00094 "madd.s %[temp5], %[temp5], %[s02], %[wi2] \n\t"
00095 "msub.s %[temp6], %[temp6], %[s03], %[wj3] \n\t"
00096 "madd.s %[temp7], %[temp7], %[s03], %[wi3] \n\t"
00097
00098 "swc1 %[temp4], 8(%[dst_i]) \n\t"
00099 "swc1 %[temp5], -8(%[dst_j]) \n\t"
00100 "swc1 %[temp6], 12(%[dst_i]) \n\t"
00101 "swc1 %[temp7], -12(%[dst_j]) \n\t"
00102 : [temp]"=&f"(temp), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
00103 [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
00104 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7)
00105 : [dst_j]"r"(dst_j), [dst_i]"r" (dst_i),
00106 [s0] "f"(src0[i]), [wj] "f"(win[j]), [s1] "f"(src1[j]),
00107 [wi] "f"(win[i]), [s01]"f"(src0[i + 1]),[wj1]"f"(win[j - 1]),
00108 [s11]"f"(src1[j - 1]), [wi1]"f"(win[i + 1]), [s02]"f"(src0[i + 2]),
00109 [wj2]"f"(win[j - 2]), [s12]"f"(src1[j - 2]),[wi2]"f"(win[i + 2]),
00110 [s03]"f"(src0[i + 3]), [wj3]"f"(win[j - 3]), [s13]"f"(src1[j - 3]),
00111 [wi3]"f"(win[i + 3])
00112 : "memory"
00113 );
00114
00115 __asm__ volatile (
00116 "mul.s %[temp], %[s1], %[wi] \n\t"
00117 "mul.s %[temp1], %[s1], %[wj] \n\t"
00118 "mul.s %[temp2], %[s11], %[wi1] \n\t"
00119 "mul.s %[temp3], %[s11], %[wj1] \n\t"
00120
00121 "msub.s %[temp], %[temp], %[s0], %[wj] \n\t"
00122 "madd.s %[temp1], %[temp1], %[s0], %[wi] \n\t"
00123 "msub.s %[temp2], %[temp2], %[s01], %[wj1] \n\t"
00124 "madd.s %[temp3], %[temp3], %[s01], %[wi1] \n\t"
00125
00126 "swc1 %[temp], 0(%[dst_i2]) \n\t"
00127 "swc1 %[temp1], 0(%[dst_j2]) \n\t"
00128 "swc1 %[temp2], 4(%[dst_i2]) \n\t"
00129 "swc1 %[temp3], -4(%[dst_j2]) \n\t"
00130
00131 "mul.s %[temp4], %[s12], %[wi2] \n\t"
00132 "mul.s %[temp5], %[s12], %[wj2] \n\t"
00133 "mul.s %[temp6], %[s13], %[wi3] \n\t"
00134 "mul.s %[temp7], %[s13], %[wj3] \n\t"
00135
00136 "msub.s %[temp4], %[temp4], %[s02], %[wj2] \n\t"
00137 "madd.s %[temp5], %[temp5], %[s02], %[wi2] \n\t"
00138 "msub.s %[temp6], %[temp6], %[s03], %[wj3] \n\t"
00139 "madd.s %[temp7], %[temp7], %[s03], %[wi3] \n\t"
00140
00141 "swc1 %[temp4], 8(%[dst_i2]) \n\t"
00142 "swc1 %[temp5], -8(%[dst_j2]) \n\t"
00143 "swc1 %[temp6], 12(%[dst_i2]) \n\t"
00144 "swc1 %[temp7], -12(%[dst_j2]) \n\t"
00145 : [temp]"=&f"(temp),
00146 [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
00147 [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), [temp6]"=&f"(temp6),
00148 [temp7] "=&f" (temp7)
00149 : [dst_j2]"r"(dst_j2), [dst_i2]"r"(dst_i2),
00150 [s0] "f"(src0[i + 4]), [wj] "f"(win[j - 4]), [s1] "f"(src1[j - 4]),
00151 [wi] "f"(win[i + 4]), [s01]"f"(src0[i + 5]),[wj1]"f"(win[j - 5]),
00152 [s11]"f"(src1[j - 5]), [wi1]"f"(win[i + 5]), [s02]"f"(src0[i + 6]),
00153 [wj2]"f"(win[j - 6]), [s12]"f"(src1[j - 6]),[wi2]"f"(win[i + 6]),
00154 [s03]"f"(src0[i + 7]), [wj3]"f"(win[j - 7]), [s13]"f"(src1[j - 7]),
00155 [wi3]"f"(win[i + 7])
00156 : "memory"
00157 );
00158 }
00159 }
00160
00161 av_cold void ff_dsputil_init_mips( DSPContext* c, AVCodecContext *avctx )
00162 {
00163 c->vector_fmul_window = vector_fmul_window_mips;
00164 }