00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00054 #include "libavutil/attributes.h"
00055 #include "libavutil/common.h"
00056 #include "libavcodec/celp_filters.h"
00057
00058 static void ff_celp_lp_synthesis_filterf_mips(float *out,
00059 const float *filter_coeffs,
00060 const float* in, int buffer_length,
00061 int filter_length)
00062 {
00063 int i,n;
00064
00065 float out0, out1, out2, out3;
00066 float old_out0, old_out1, old_out2, old_out3;
00067 float a,b,c;
00068 const float *p_filter_coeffs;
00069 float *p_out;
00070
00071 a = filter_coeffs[0];
00072 b = filter_coeffs[1];
00073 c = filter_coeffs[2];
00074 b -= filter_coeffs[0] * filter_coeffs[0];
00075 c -= filter_coeffs[1] * filter_coeffs[0];
00076 c -= filter_coeffs[0] * b;
00077
00078 old_out0 = out[-4];
00079 old_out1 = out[-3];
00080 old_out2 = out[-2];
00081 old_out3 = out[-1];
00082 for (n = 0; n <= buffer_length - 4; n+=4) {
00083 p_filter_coeffs = filter_coeffs;
00084 p_out = out;
00085
00086 out0 = in[0];
00087 out1 = in[1];
00088 out2 = in[2];
00089 out3 = in[3];
00090
00091 __asm__ volatile(
00092 "lwc1 $f2, 8(%[filter_coeffs]) \n\t"
00093 "lwc1 $f1, 4(%[filter_coeffs]) \n\t"
00094 "lwc1 $f0, 0(%[filter_coeffs]) \n\t"
00095 "nmsub.s %[out0], %[out0], $f2, %[old_out1] \n\t"
00096 "nmsub.s %[out1], %[out1], $f2, %[old_out2] \n\t"
00097 "nmsub.s %[out2], %[out2], $f2, %[old_out3] \n\t"
00098 "lwc1 $f3, 12(%[filter_coeffs]) \n\t"
00099 "nmsub.s %[out0], %[out0], $f1, %[old_out2] \n\t"
00100 "nmsub.s %[out1], %[out1], $f1, %[old_out3] \n\t"
00101 "nmsub.s %[out2], %[out2], $f3, %[old_out2] \n\t"
00102 "nmsub.s %[out0], %[out0], $f0, %[old_out3] \n\t"
00103 "nmsub.s %[out3], %[out3], $f3, %[old_out3] \n\t"
00104 "nmsub.s %[out1], %[out1], $f3, %[old_out1] \n\t"
00105 "nmsub.s %[out0], %[out0], $f3, %[old_out0] \n\t"
00106
00107 : [out0]"+f"(out0), [out1]"+f"(out1),
00108 [out2]"+f"(out2), [out3]"+f"(out3)
00109 : [old_out0]"f"(old_out0), [old_out1]"f"(old_out1),
00110 [old_out2]"f"(old_out2), [old_out3]"f"(old_out3),
00111 [filter_coeffs]"r"(filter_coeffs)
00112 : "$f0", "$f1", "$f2", "$f3", "$f4"
00113 );
00114
00115 for (i = 5; i <= filter_length; i += 2) {
00116 __asm__ volatile(
00117 "lwc1 %[old_out3], -20(%[p_out]) \n\t"
00118 "lwc1 $f5, 16(%[p_filter_coeffs]) \n\t"
00119 "addiu %[p_out], -8 \n\t"
00120 "addiu %[p_filter_coeffs], 8 \n\t"
00121 "nmsub.s %[out1], %[out1], $f5, %[old_out0] \n\t"
00122 "nmsub.s %[out3], %[out3], $f5, %[old_out2] \n\t"
00123 "lwc1 $f4, 12(%[p_filter_coeffs]) \n\t"
00124 "lwc1 %[old_out2], -16(%[p_out]) \n\t"
00125 "nmsub.s %[out0], %[out0], $f5, %[old_out3] \n\t"
00126 "nmsub.s %[out2], %[out2], $f5, %[old_out1] \n\t"
00127 "nmsub.s %[out1], %[out1], $f4, %[old_out3] \n\t"
00128 "nmsub.s %[out3], %[out3], $f4, %[old_out1] \n\t"
00129 "mov.s %[old_out1], %[old_out3] \n\t"
00130 "nmsub.s %[out0], %[out0], $f4, %[old_out2] \n\t"
00131 "nmsub.s %[out2], %[out2], $f4, %[old_out0] \n\t"
00132
00133 : [out0]"+f"(out0), [out1]"+f"(out1),
00134 [out2]"+f"(out2), [out3]"+f"(out3), [old_out0]"+f"(old_out0),
00135 [old_out1]"+f"(old_out1), [old_out2]"+f"(old_out2),
00136 [old_out3]"+f"(old_out3),[p_filter_coeffs]"+r"(p_filter_coeffs),
00137 [p_out]"+r"(p_out)
00138 :
00139 : "$f4", "$f5"
00140 );
00141 FFSWAP(float, old_out0, old_out2);
00142 }
00143
00144 __asm__ volatile(
00145 "nmsub.s %[out3], %[out3], %[a], %[out2] \n\t"
00146 "nmsub.s %[out2], %[out2], %[a], %[out1] \n\t"
00147 "nmsub.s %[out3], %[out3], %[b], %[out1] \n\t"
00148 "nmsub.s %[out1], %[out1], %[a], %[out0] \n\t"
00149 "nmsub.s %[out2], %[out2], %[b], %[out0] \n\t"
00150 "nmsub.s %[out3], %[out3], %[c], %[out0] \n\t"
00151
00152 : [out0]"+f"(out0), [out1]"+f"(out1),
00153 [out2]"+f"(out2), [out3]"+f"(out3)
00154 : [a]"f"(a), [b]"f"(b), [c]"f"(c)
00155 );
00156
00157 out[0] = out0;
00158 out[1] = out1;
00159 out[2] = out2;
00160 out[3] = out3;
00161
00162 old_out0 = out0;
00163 old_out1 = out1;
00164 old_out2 = out2;
00165 old_out3 = out3;
00166
00167 out += 4;
00168 in += 4;
00169 }
00170
00171 out -= n;
00172 in -= n;
00173 for (; n < buffer_length; n++) {
00174 float out_val, out_val_i, fc_val;
00175 p_filter_coeffs = filter_coeffs;
00176 p_out = &out[n];
00177 out_val = in[n];
00178 for (i = 1; i <= filter_length; i++) {
00179 __asm__ volatile(
00180 "lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t"
00181 "lwc1 %[out_val_i], -4(%[p_out]) \n\t"
00182 "addiu %[p_filter_coeffs], 4 \n\t"
00183 "addiu %[p_out], -4 \n\t"
00184 "nmsub.s %[out_val], %[out_val], %[fc_val], %[out_val_i] \n\t"
00185
00186 : [fc_val]"=&f"(fc_val), [out_val]"+f"(out_val),
00187 [out_val_i]"=&f"(out_val_i), [p_out]"+r"(p_out),
00188 [p_filter_coeffs]"+r"(p_filter_coeffs)
00189 );
00190 }
00191 out[n] = out_val;
00192 }
00193 }
00194
00195 static void ff_celp_lp_zero_synthesis_filterf_mips(float *out,
00196 const float *filter_coeffs,
00197 const float *in, int buffer_length,
00198 int filter_length)
00199 {
00200 int i,n;
00201 float sum_out8, sum_out7, sum_out6, sum_out5, sum_out4, fc_val;
00202 float sum_out3, sum_out2, sum_out1;
00203 const float *p_filter_coeffs, *p_in;
00204
00205 for (n = 0; n < buffer_length; n+=8) {
00206 p_in = &in[n];
00207 p_filter_coeffs = filter_coeffs;
00208 sum_out8 = in[n+7];
00209 sum_out7 = in[n+6];
00210 sum_out6 = in[n+5];
00211 sum_out5 = in[n+4];
00212 sum_out4 = in[n+3];
00213 sum_out3 = in[n+2];
00214 sum_out2 = in[n+1];
00215 sum_out1 = in[n];
00216 i = filter_length;
00217
00218
00219
00220
00221
00222 __asm__ volatile(
00223 "filt_lp_inner%=: \n\t"
00224 "lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t"
00225 "lwc1 $f7, 6*4(%[p_in]) \n\t"
00226 "lwc1 $f6, 5*4(%[p_in]) \n\t"
00227 "lwc1 $f5, 4*4(%[p_in]) \n\t"
00228 "lwc1 $f4, 3*4(%[p_in]) \n\t"
00229 "lwc1 $f3, 2*4(%[p_in]) \n\t"
00230 "lwc1 $f2, 4(%[p_in]) \n\t"
00231 "lwc1 $f1, 0(%[p_in]) \n\t"
00232 "lwc1 $f0, -4(%[p_in]) \n\t"
00233 "addiu %[i], -2 \n\t"
00234 "madd.s %[sum_out8], %[sum_out8], %[fc_val], $f7 \n\t"
00235 "madd.s %[sum_out7], %[sum_out7], %[fc_val], $f6 \n\t"
00236 "madd.s %[sum_out6], %[sum_out6], %[fc_val], $f5 \n\t"
00237 "madd.s %[sum_out5], %[sum_out5], %[fc_val], $f4 \n\t"
00238 "madd.s %[sum_out4], %[sum_out4], %[fc_val], $f3 \n\t"
00239 "madd.s %[sum_out3], %[sum_out3], %[fc_val], $f2 \n\t"
00240 "madd.s %[sum_out2], %[sum_out2], %[fc_val], $f1 \n\t"
00241 "madd.s %[sum_out1], %[sum_out1], %[fc_val], $f0 \n\t"
00242 "lwc1 %[fc_val], 4(%[p_filter_coeffs]) \n\t"
00243 "lwc1 $f7, -8(%[p_in]) \n\t"
00244 "addiu %[p_filter_coeffs], 8 \n\t"
00245 "addiu %[p_in], -8 \n\t"
00246 "madd.s %[sum_out8], %[sum_out8], %[fc_val], $f6 \n\t"
00247 "madd.s %[sum_out7], %[sum_out7], %[fc_val], $f5 \n\t"
00248 "madd.s %[sum_out6], %[sum_out6], %[fc_val], $f4 \n\t"
00249 "madd.s %[sum_out5], %[sum_out5], %[fc_val], $f3 \n\t"
00250 "madd.s %[sum_out4], %[sum_out4], %[fc_val], $f2 \n\t"
00251 "madd.s %[sum_out3], %[sum_out3], %[fc_val], $f1 \n\t"
00252 "madd.s %[sum_out2], %[sum_out2], %[fc_val], $f0 \n\t"
00253 "madd.s %[sum_out1], %[sum_out1], %[fc_val], $f7 \n\t"
00254 "bgtz %[i], filt_lp_inner%= \n\t"
00255
00256 : [sum_out8]"+f"(sum_out8), [sum_out7]"+f"(sum_out7),
00257 [sum_out6]"+f"(sum_out6), [sum_out5]"+f"(sum_out5),
00258 [sum_out4]"+f"(sum_out4), [sum_out3]"+f"(sum_out3),
00259 [sum_out2]"+f"(sum_out2), [sum_out1]"+f"(sum_out1),
00260 [fc_val]"=&f"(fc_val), [p_filter_coeffs]"+r"(p_filter_coeffs),
00261 [p_in]"+r"(p_in), [i]"+r"(i)
00262 :
00263 : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7"
00264 );
00265
00266 out[n+7] = sum_out8;
00267 out[n+6] = sum_out7;
00268 out[n+5] = sum_out6;
00269 out[n+4] = sum_out5;
00270 out[n+3] = sum_out4;
00271 out[n+2] = sum_out3;
00272 out[n+1] = sum_out2;
00273 out[n] = sum_out1;
00274 }
00275 }
00276
00277 void ff_celp_filter_init_mips(CELPFContext *c)
00278 {
00279 c->celp_lp_synthesis_filterf = ff_celp_lp_synthesis_filterf_mips;
00280 c->celp_lp_zero_synthesis_filterf = ff_celp_lp_zero_synthesis_filterf_mips;
00281 }