67 const float *loop_end =
src + count;
78 "lw %[temp0], 0(%[src]) \n\t"
79 "lw %[temp1], 4(%[src]) \n\t"
80 "lw %[temp2], 8(%[src]) \n\t"
81 "lw %[temp3], 12(%[src]) \n\t"
82 "lw %[temp4], 16(%[src]) \n\t"
83 "lw %[temp5], 20(%[src]) \n\t"
84 "lw %[temp6], 24(%[src]) \n\t"
85 "lw %[temp7], 28(%[src]) \n\t"
87 "sw %[temp0], 0(%[dst]) \n\t"
88 "sw %[temp1], 4(%[dst]) \n\t"
89 "sw %[temp2], 8(%[dst]) \n\t"
90 "sw %[temp3], 12(%[dst]) \n\t"
91 "sw %[temp4], 16(%[dst]) \n\t"
92 "sw %[temp5], 20(%[dst]) \n\t"
93 "sw %[temp6], 24(%[dst]) \n\t"
94 "sw %[temp7], 28(%[dst]) \n\t"
95 "bne %[src], %[loop_end], 1b \n\t"
99 : [temp0]
"=&r"(
temp[0]), [temp1]
"=&r"(
temp[1]),
100 [temp2]
"=&r"(
temp[2]), [temp3]
"=&r"(
temp[3]),
101 [temp4]
"=&r"(
temp[4]), [temp5]
"=&r"(
temp[5]),
102 [temp6]
"=&r"(
temp[6]), [temp7]
"=&r"(
temp[7]),
103 [
src]
"+r"(
src), [dst]
"+r"(dst)
104 : [loop_end]
"r"(loop_end)
111 union {
unsigned u;
int s; } v = { previous_val * 1664525
u + 1013904223 };
120 float *saved = sce->
saved;
128 for (
i = 0;
i < 1024;
i += 128)
143 float_copy(
out, saved, 448);
150 float temp0, temp1, temp2, temp3;
151 float *dst0 =
out + 448 + 0*128;
152 float *dst1 = dst0 + 64 + 63;
153 float *dst2 = saved + 63;
154 float *win0 = (
float*)swindow;
155 float *win1 = win0 + 64 + 63;
156 float *win0_prev = (
float*)swindow_prev;
157 float *win1_prev = win0_prev + 64 + 63;
158 float *src0_prev = saved + 448;
159 float *src1_prev = buf + 0*128 + 63;
160 float *
src0 = buf + 0*128 + 64;
161 float *
src1 = buf + 1*128 + 63;
163 for(
i = 0;
i < 64;
i++)
165 temp0 = src0_prev[0];
166 temp1 = src1_prev[0];
171 dst0[0] = temp0 * wj - temp1 * wi;
172 dst1[0] = temp0 * wi + temp1 * wj;
179 dst0[128] = temp2 * wj - temp3 * wi;
180 dst1[128] = temp2 * wi + temp3 * wj;
184 dst0[256] = temp0 * wj - temp1 * wi;
185 dst1[256] = temp0 * wi + temp1 * wj;
186 dst0[384] = temp2 * wj - temp3 * wi;
187 dst1[384] = temp2 * wi + temp3 * wj;
191 dst0[512] = temp0 * wj - temp1 * wi;
192 dst2[0] = temp0 * wi + temp1 * wj;
209 float_copy(
out + 576, buf + 64, 448);
218 float_copy(saved + 448, buf + 7*128 + 64, 64);
220 float_copy(saved, buf + 512, 448);
221 float_copy(saved + 448, buf + 7*128 + 64, 64);
223 float_copy(saved, buf + 512, 512);
235 float *predTime = sce->
ret;
238 int16_t num_samples = 2048;
241 num_samples = ltp->
lag + 1024;
242 j = (2048 - num_samples) >> 2;
243 k = (2048 - num_samples) & 3;
244 p_predTime = &predTime[num_samples];
246 for (
i = 0;
i < num_samples;
i++)
252 "sw $0, 0(%[p_predTime]) \n\t"
253 "sw $0, 4(%[p_predTime]) \n\t"
254 "sw $0, 8(%[p_predTime]) \n\t"
255 "sw $0, 12(%[p_predTime]) \n\t"
256 PTR_ADDIU "%[p_predTime], %[p_predTime], 16 \n\t"
258 : [p_predTime]
"+r"(p_predTime)
263 for (
i = 0;
i < k;
i++) {
266 "sw $0, 0(%[p_predTime]) \n\t"
267 PTR_ADDIU "%[p_predTime], %[p_predTime], 4 \n\t"
269 : [p_predTime]
"+r"(p_predTime)
299 for (; count > 0; count -= 4){
304 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
305 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
306 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
307 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
308 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
309 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
310 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
311 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
312 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
313 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
314 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
315 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
316 "swc1 %[temp8], 0(%[ptr1]) \n\t"
317 "swc1 %[temp9], 4(%[ptr1]) \n\t"
318 "swc1 %[temp10], 8(%[ptr1]) \n\t"
319 "swc1 %[temp11], 12(%[ptr1]) \n\t"
324 : [temp0]
"=&f"(
temp[0]), [temp1]
"=&f"(
temp[1]),
325 [temp2]
"=&f"(
temp[2]), [temp3]
"=&f"(
temp[3]),
326 [temp4]
"=&f"(
temp[4]), [temp5]
"=&f"(
temp[5]),
327 [temp6]
"=&f"(
temp[6]), [temp7]
"=&f"(
temp[7]),
328 [temp8]
"=&f"(
temp[8]), [temp9]
"=&f"(
temp[9]),
329 [temp10]
"=&f"(
temp[10]), [temp11]
"=&f"(
temp[11]),
330 [ptr1]
"+r"(dst), [ptr2]
"+r"(
src0), [ptr3]
"+r"(
src1)
340 float *saved = sce->
saved;
341 float *saved_ltp = sce->
coeffs;
344 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
347 float *p_saved_ltp = saved_ltp + 576;
348 float *loop_end1 = p_saved_ltp + 448;
350 float_copy(saved_ltp, saved, 512);
355 "sw $0, 0(%[p_saved_ltp]) \n\t"
356 "sw $0, 4(%[p_saved_ltp]) \n\t"
357 "sw $0, 8(%[p_saved_ltp]) \n\t"
358 "sw $0, 12(%[p_saved_ltp]) \n\t"
359 "sw $0, 16(%[p_saved_ltp]) \n\t"
360 "sw $0, 20(%[p_saved_ltp]) \n\t"
361 "sw $0, 24(%[p_saved_ltp]) \n\t"
362 "sw $0, 28(%[p_saved_ltp]) \n\t"
363 PTR_ADDIU "%[p_saved_ltp],%[p_saved_ltp], 32 \n\t"
364 "bne %[p_saved_ltp], %[loop_end1], 1b \n\t"
366 : [p_saved_ltp]
"+r"(p_saved_ltp)
367 : [loop_end1]
"r"(loop_end1)
372 fmul_and_reverse(saved_ltp + 512, ac->
buf_mdct + 960, swindow, 64);
374 float *buff0 = saved;
375 float *buff1 = saved_ltp;
376 float *loop_end = saved + 448;
381 ".set noreorder \n\t"
383 "lw %[temp0], 0(%[src]) \n\t"
384 "lw %[temp1], 4(%[src]) \n\t"
385 "lw %[temp2], 8(%[src]) \n\t"
386 "lw %[temp3], 12(%[src]) \n\t"
387 "lw %[temp4], 16(%[src]) \n\t"
388 "lw %[temp5], 20(%[src]) \n\t"
389 "lw %[temp6], 24(%[src]) \n\t"
390 "lw %[temp7], 28(%[src]) \n\t"
392 "sw %[temp0], 0(%[dst]) \n\t"
393 "sw %[temp1], 4(%[dst]) \n\t"
394 "sw %[temp2], 8(%[dst]) \n\t"
395 "sw %[temp3], 12(%[dst]) \n\t"
396 "sw %[temp4], 16(%[dst]) \n\t"
397 "sw %[temp5], 20(%[dst]) \n\t"
398 "sw %[temp6], 24(%[dst]) \n\t"
399 "sw %[temp7], 28(%[dst]) \n\t"
400 "sw $0, 2304(%[dst]) \n\t"
401 "sw $0, 2308(%[dst]) \n\t"
402 "sw $0, 2312(%[dst]) \n\t"
403 "sw $0, 2316(%[dst]) \n\t"
404 "sw $0, 2320(%[dst]) \n\t"
405 "sw $0, 2324(%[dst]) \n\t"
406 "sw $0, 2328(%[dst]) \n\t"
407 "sw $0, 2332(%[dst]) \n\t"
408 "bne %[src], %[loop_end], 1b \n\t"
412 : [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
413 [temp2]
"=&r"(temp2), [temp3]
"=&r"(temp3),
414 [temp4]
"=&r"(temp4), [temp5]
"=&r"(temp5),
415 [temp6]
"=&r"(temp6), [temp7]
"=&r"(temp7),
416 [
src]
"+r"(buff0), [dst]
"+r"(buff1)
417 : [loop_end]
"r"(loop_end)
421 fmul_and_reverse(saved_ltp + 512, ac->
buf_mdct + 960, swindow, 64);
424 fmul_and_reverse(saved_ltp + 512, ac->
buf_mdct + 512, lwindow, 512);
429 float_copy(sce->
ltp_state + 2048, saved_ltp, 1024);
438 c->imdct_and_windowing = imdct_and_windowing_mips;
439 c->apply_ltp = apply_ltp_mips;
440 c->update_ltp = update_ltp_mips;