00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00054 #include "config.h"
00055 #include "libavutil/attributes.h"
00056 #include "libavutil/common.h"
00057 #include "libavcodec/celp_filters.h"
00058
00059 #if HAVE_INLINE_ASM
00060 static void ff_celp_lp_synthesis_filterf_mips(float *out,
00061 const float *filter_coeffs,
00062 const float* in, int buffer_length,
00063 int filter_length)
00064 {
00065 int i,n;
00066
00067 float out0, out1, out2, out3;
00068 float old_out0, old_out1, old_out2, old_out3;
00069 float a,b,c;
00070 const float *p_filter_coeffs;
00071 float *p_out;
00072
00073 a = filter_coeffs[0];
00074 b = filter_coeffs[1];
00075 c = filter_coeffs[2];
00076 b -= filter_coeffs[0] * filter_coeffs[0];
00077 c -= filter_coeffs[1] * filter_coeffs[0];
00078 c -= filter_coeffs[0] * b;
00079
00080 old_out0 = out[-4];
00081 old_out1 = out[-3];
00082 old_out2 = out[-2];
00083 old_out3 = out[-1];
00084 for (n = 0; n <= buffer_length - 4; n+=4) {
00085 p_filter_coeffs = filter_coeffs;
00086 p_out = out;
00087
00088 out0 = in[0];
00089 out1 = in[1];
00090 out2 = in[2];
00091 out3 = in[3];
00092
00093 __asm__ volatile(
00094 "lwc1 $f2, 8(%[filter_coeffs]) \n\t"
00095 "lwc1 $f1, 4(%[filter_coeffs]) \n\t"
00096 "lwc1 $f0, 0(%[filter_coeffs]) \n\t"
00097 "nmsub.s %[out0], %[out0], $f2, %[old_out1] \n\t"
00098 "nmsub.s %[out1], %[out1], $f2, %[old_out2] \n\t"
00099 "nmsub.s %[out2], %[out2], $f2, %[old_out3] \n\t"
00100 "lwc1 $f3, 12(%[filter_coeffs]) \n\t"
00101 "nmsub.s %[out0], %[out0], $f1, %[old_out2] \n\t"
00102 "nmsub.s %[out1], %[out1], $f1, %[old_out3] \n\t"
00103 "nmsub.s %[out2], %[out2], $f3, %[old_out2] \n\t"
00104 "nmsub.s %[out0], %[out0], $f0, %[old_out3] \n\t"
00105 "nmsub.s %[out3], %[out3], $f3, %[old_out3] \n\t"
00106 "nmsub.s %[out1], %[out1], $f3, %[old_out1] \n\t"
00107 "nmsub.s %[out0], %[out0], $f3, %[old_out0] \n\t"
00108
00109 : [out0]"+f"(out0), [out1]"+f"(out1),
00110 [out2]"+f"(out2), [out3]"+f"(out3)
00111 : [old_out0]"f"(old_out0), [old_out1]"f"(old_out1),
00112 [old_out2]"f"(old_out2), [old_out3]"f"(old_out3),
00113 [filter_coeffs]"r"(filter_coeffs)
00114 : "$f0", "$f1", "$f2", "$f3", "$f4"
00115 );
00116
00117 for (i = 5; i <= filter_length; i += 2) {
00118 __asm__ volatile(
00119 "lwc1 %[old_out3], -20(%[p_out]) \n\t"
00120 "lwc1 $f5, 16(%[p_filter_coeffs]) \n\t"
00121 "addiu %[p_out], -8 \n\t"
00122 "addiu %[p_filter_coeffs], 8 \n\t"
00123 "nmsub.s %[out1], %[out1], $f5, %[old_out0] \n\t"
00124 "nmsub.s %[out3], %[out3], $f5, %[old_out2] \n\t"
00125 "lwc1 $f4, 12(%[p_filter_coeffs]) \n\t"
00126 "lwc1 %[old_out2], -16(%[p_out]) \n\t"
00127 "nmsub.s %[out0], %[out0], $f5, %[old_out3] \n\t"
00128 "nmsub.s %[out2], %[out2], $f5, %[old_out1] \n\t"
00129 "nmsub.s %[out1], %[out1], $f4, %[old_out3] \n\t"
00130 "nmsub.s %[out3], %[out3], $f4, %[old_out1] \n\t"
00131 "mov.s %[old_out1], %[old_out3] \n\t"
00132 "nmsub.s %[out0], %[out0], $f4, %[old_out2] \n\t"
00133 "nmsub.s %[out2], %[out2], $f4, %[old_out0] \n\t"
00134
00135 : [out0]"+f"(out0), [out1]"+f"(out1),
00136 [out2]"+f"(out2), [out3]"+f"(out3), [old_out0]"+f"(old_out0),
00137 [old_out1]"+f"(old_out1), [old_out2]"+f"(old_out2),
00138 [old_out3]"+f"(old_out3),[p_filter_coeffs]"+r"(p_filter_coeffs),
00139 [p_out]"+r"(p_out)
00140 :
00141 : "$f4", "$f5"
00142 );
00143 FFSWAP(float, old_out0, old_out2);
00144 }
00145
00146 __asm__ volatile(
00147 "nmsub.s %[out3], %[out3], %[a], %[out2] \n\t"
00148 "nmsub.s %[out2], %[out2], %[a], %[out1] \n\t"
00149 "nmsub.s %[out3], %[out3], %[b], %[out1] \n\t"
00150 "nmsub.s %[out1], %[out1], %[a], %[out0] \n\t"
00151 "nmsub.s %[out2], %[out2], %[b], %[out0] \n\t"
00152 "nmsub.s %[out3], %[out3], %[c], %[out0] \n\t"
00153
00154 : [out0]"+f"(out0), [out1]"+f"(out1),
00155 [out2]"+f"(out2), [out3]"+f"(out3)
00156 : [a]"f"(a), [b]"f"(b), [c]"f"(c)
00157 );
00158
00159 out[0] = out0;
00160 out[1] = out1;
00161 out[2] = out2;
00162 out[3] = out3;
00163
00164 old_out0 = out0;
00165 old_out1 = out1;
00166 old_out2 = out2;
00167 old_out3 = out3;
00168
00169 out += 4;
00170 in += 4;
00171 }
00172
00173 out -= n;
00174 in -= n;
00175 for (; n < buffer_length; n++) {
00176 float out_val, out_val_i, fc_val;
00177 p_filter_coeffs = filter_coeffs;
00178 p_out = &out[n];
00179 out_val = in[n];
00180 for (i = 1; i <= filter_length; i++) {
00181 __asm__ volatile(
00182 "lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t"
00183 "lwc1 %[out_val_i], -4(%[p_out]) \n\t"
00184 "addiu %[p_filter_coeffs], 4 \n\t"
00185 "addiu %[p_out], -4 \n\t"
00186 "nmsub.s %[out_val], %[out_val], %[fc_val], %[out_val_i] \n\t"
00187
00188 : [fc_val]"=&f"(fc_val), [out_val]"+f"(out_val),
00189 [out_val_i]"=&f"(out_val_i), [p_out]"+r"(p_out),
00190 [p_filter_coeffs]"+r"(p_filter_coeffs)
00191 );
00192 }
00193 out[n] = out_val;
00194 }
00195 }
00196
00197 static void ff_celp_lp_zero_synthesis_filterf_mips(float *out,
00198 const float *filter_coeffs,
00199 const float *in, int buffer_length,
00200 int filter_length)
00201 {
00202 int i,n;
00203 float sum_out8, sum_out7, sum_out6, sum_out5, sum_out4, fc_val;
00204 float sum_out3, sum_out2, sum_out1;
00205 const float *p_filter_coeffs, *p_in;
00206
00207 for (n = 0; n < buffer_length; n+=8) {
00208 p_in = &in[n];
00209 p_filter_coeffs = filter_coeffs;
00210 sum_out8 = in[n+7];
00211 sum_out7 = in[n+6];
00212 sum_out6 = in[n+5];
00213 sum_out5 = in[n+4];
00214 sum_out4 = in[n+3];
00215 sum_out3 = in[n+2];
00216 sum_out2 = in[n+1];
00217 sum_out1 = in[n];
00218 i = filter_length;
00219
00220
00221
00222
00223
00224 __asm__ volatile(
00225 "filt_lp_inner%=: \n\t"
00226 "lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t"
00227 "lwc1 $f7, 6*4(%[p_in]) \n\t"
00228 "lwc1 $f6, 5*4(%[p_in]) \n\t"
00229 "lwc1 $f5, 4*4(%[p_in]) \n\t"
00230 "lwc1 $f4, 3*4(%[p_in]) \n\t"
00231 "lwc1 $f3, 2*4(%[p_in]) \n\t"
00232 "lwc1 $f2, 4(%[p_in]) \n\t"
00233 "lwc1 $f1, 0(%[p_in]) \n\t"
00234 "lwc1 $f0, -4(%[p_in]) \n\t"
00235 "addiu %[i], -2 \n\t"
00236 "madd.s %[sum_out8], %[sum_out8], %[fc_val], $f7 \n\t"
00237 "madd.s %[sum_out7], %[sum_out7], %[fc_val], $f6 \n\t"
00238 "madd.s %[sum_out6], %[sum_out6], %[fc_val], $f5 \n\t"
00239 "madd.s %[sum_out5], %[sum_out5], %[fc_val], $f4 \n\t"
00240 "madd.s %[sum_out4], %[sum_out4], %[fc_val], $f3 \n\t"
00241 "madd.s %[sum_out3], %[sum_out3], %[fc_val], $f2 \n\t"
00242 "madd.s %[sum_out2], %[sum_out2], %[fc_val], $f1 \n\t"
00243 "madd.s %[sum_out1], %[sum_out1], %[fc_val], $f0 \n\t"
00244 "lwc1 %[fc_val], 4(%[p_filter_coeffs]) \n\t"
00245 "lwc1 $f7, -8(%[p_in]) \n\t"
00246 "addiu %[p_filter_coeffs], 8 \n\t"
00247 "addiu %[p_in], -8 \n\t"
00248 "madd.s %[sum_out8], %[sum_out8], %[fc_val], $f6 \n\t"
00249 "madd.s %[sum_out7], %[sum_out7], %[fc_val], $f5 \n\t"
00250 "madd.s %[sum_out6], %[sum_out6], %[fc_val], $f4 \n\t"
00251 "madd.s %[sum_out5], %[sum_out5], %[fc_val], $f3 \n\t"
00252 "madd.s %[sum_out4], %[sum_out4], %[fc_val], $f2 \n\t"
00253 "madd.s %[sum_out3], %[sum_out3], %[fc_val], $f1 \n\t"
00254 "madd.s %[sum_out2], %[sum_out2], %[fc_val], $f0 \n\t"
00255 "madd.s %[sum_out1], %[sum_out1], %[fc_val], $f7 \n\t"
00256 "bgtz %[i], filt_lp_inner%= \n\t"
00257
00258 : [sum_out8]"+f"(sum_out8), [sum_out7]"+f"(sum_out7),
00259 [sum_out6]"+f"(sum_out6), [sum_out5]"+f"(sum_out5),
00260 [sum_out4]"+f"(sum_out4), [sum_out3]"+f"(sum_out3),
00261 [sum_out2]"+f"(sum_out2), [sum_out1]"+f"(sum_out1),
00262 [fc_val]"=&f"(fc_val), [p_filter_coeffs]"+r"(p_filter_coeffs),
00263 [p_in]"+r"(p_in), [i]"+r"(i)
00264 :
00265 : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7"
00266 );
00267
00268 out[n+7] = sum_out8;
00269 out[n+6] = sum_out7;
00270 out[n+5] = sum_out6;
00271 out[n+4] = sum_out5;
00272 out[n+3] = sum_out4;
00273 out[n+2] = sum_out3;
00274 out[n+1] = sum_out2;
00275 out[n] = sum_out1;
00276 }
00277 }
00278 #endif
00279
00280 void ff_celp_filter_init_mips(CELPFContext *c)
00281 {
00282 #if HAVE_INLINE_ASM
00283 c->celp_lp_synthesis_filterf = ff_celp_lp_synthesis_filterf_mips;
00284 c->celp_lp_zero_synthesis_filterf = ff_celp_lp_zero_synthesis_filterf_mips;
00285 #endif
00286 }