63 union {
unsigned u;
int s; }
v = { previous_val * 1664525
u + 1013904223 };
72 float *saved = sce->
saved;
80 for (i = 0; i < 1024; i += 128)
98 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
104 ".set noreorder \n\t"
105 "addiu %[loop_end], %[src], 1792 \n\t"
107 "lw %[temp0], 0(%[src]) \n\t"
108 "lw %[temp1], 4(%[src]) \n\t"
109 "lw %[temp2], 8(%[src]) \n\t"
110 "lw %[temp3], 12(%[src]) \n\t"
111 "lw %[temp4], 16(%[src]) \n\t"
112 "lw %[temp5], 20(%[src]) \n\t"
113 "lw %[temp6], 24(%[src]) \n\t"
114 "lw %[temp7], 28(%[src]) \n\t"
115 "addiu %[src], %[src], 32 \n\t"
116 "sw %[temp0], 0(%[dst]) \n\t"
117 "sw %[temp1], 4(%[dst]) \n\t"
118 "sw %[temp2], 8(%[dst]) \n\t"
119 "sw %[temp3], 12(%[dst]) \n\t"
120 "sw %[temp4], 16(%[dst]) \n\t"
121 "sw %[temp5], 20(%[dst]) \n\t"
122 "sw %[temp6], 24(%[dst]) \n\t"
123 "sw %[temp7], 28(%[dst]) \n\t"
124 "bne %[src], %[loop_end], 1b \n\t"
125 " addiu %[dst], %[dst], 32 \n\t"
128 : [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
129 [temp2]
"=&r"(temp2), [temp3]
"=&r"(temp3),
130 [temp4]
"=&r"(temp4), [temp5]
"=&r"(temp5),
131 [temp6]
"=&r"(temp6), [temp7]
"=&r"(temp7),
132 [loop_end]
"=&r"(loop_end), [src]
"+r"(buf1),
144 float temp0, temp1, temp2, temp3;
145 float *dst0 = out + 448 + 0*128;
146 float *dst1 = dst0 + 64 + 63;
147 float *dst2 = saved + 63;
148 float *win0 = (
float*)swindow;
149 float *win1 = win0 + 64 + 63;
150 float *win0_prev = (
float*)swindow_prev;
151 float *win1_prev = win0_prev + 64 + 63;
152 float *src0_prev = saved + 448;
153 float *src1_prev = buf + 0*128 + 63;
154 float *src0 = buf + 0*128 + 64;
155 float *src1 = buf + 1*128 + 63;
157 for(i = 0; i < 64; i++)
159 temp0 = src0_prev[0];
160 temp1 = src1_prev[0];
165 dst0[0] = temp0 * wj - temp1 * wi;
166 dst1[0] = temp0 * wi + temp1 * wj;
173 dst0[128] = temp2 * wj - temp3 * wi;
174 dst1[128] = temp2 * wi + temp3 * wj;
178 dst0[256] = temp0 * wj - temp1 * wi;
179 dst1[256] = temp0 * wi + temp1 * wj;
180 dst0[384] = temp2 * wj - temp3 * wi;
181 dst1[384] = temp2 * wi + temp3 * wj;
185 dst0[512] = temp0 * wj - temp1 * wi;
186 dst2[0] = temp0 * wi + temp1 * wj;
204 float *buf1 = buf + 64;
205 float *buf2 = out + 576;
206 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
212 ".set noreorder \n\t"
213 "addiu %[loop_end], %[src], 1792 \n\t"
215 "lw %[temp0], 0(%[src]) \n\t"
216 "lw %[temp1], 4(%[src]) \n\t"
217 "lw %[temp2], 8(%[src]) \n\t"
218 "lw %[temp3], 12(%[src]) \n\t"
219 "lw %[temp4], 16(%[src]) \n\t"
220 "lw %[temp5], 20(%[src]) \n\t"
221 "lw %[temp6], 24(%[src]) \n\t"
222 "lw %[temp7], 28(%[src]) \n\t"
223 "addiu %[src], %[src], 32 \n\t"
224 "sw %[temp0], 0(%[dst]) \n\t"
225 "sw %[temp1], 4(%[dst]) \n\t"
226 "sw %[temp2], 8(%[dst]) \n\t"
227 "sw %[temp3], 12(%[dst]) \n\t"
228 "sw %[temp4], 16(%[dst]) \n\t"
229 "sw %[temp5], 20(%[dst]) \n\t"
230 "sw %[temp6], 24(%[dst]) \n\t"
231 "sw %[temp7], 28(%[dst]) \n\t"
232 "bne %[src], %[loop_end], 1b \n\t"
233 " addiu %[dst], %[dst], 32 \n\t"
236 : [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
237 [temp2]
"=&r"(temp2), [temp3]
"=&r"(temp3),
238 [temp4]
"=&r"(temp4), [temp5]
"=&r"(temp5),
239 [temp6]
"=&r"(temp6), [temp7]
"=&r"(temp7),
240 [loop_end]
"=&r"(loop_end), [src]
"+r"(buf1),
255 float *buf1 = buf + 7*128 + 64;
256 float *buf2 = saved + 448;
257 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
263 ".set noreorder \n\t"
264 "addiu %[loop_end], %[src], 256 \n\t"
266 "lw %[temp0], 0(%[src]) \n\t"
267 "lw %[temp1], 4(%[src]) \n\t"
268 "lw %[temp2], 8(%[src]) \n\t"
269 "lw %[temp3], 12(%[src]) \n\t"
270 "lw %[temp4], 16(%[src]) \n\t"
271 "lw %[temp5], 20(%[src]) \n\t"
272 "lw %[temp6], 24(%[src]) \n\t"
273 "lw %[temp7], 28(%[src]) \n\t"
274 "addiu %[src], %[src], 32 \n\t"
275 "sw %[temp0], 0(%[dst]) \n\t"
276 "sw %[temp1], 4(%[dst]) \n\t"
277 "sw %[temp2], 8(%[dst]) \n\t"
278 "sw %[temp3], 12(%[dst]) \n\t"
279 "sw %[temp4], 16(%[dst]) \n\t"
280 "sw %[temp5], 20(%[dst]) \n\t"
281 "sw %[temp6], 24(%[dst]) \n\t"
282 "sw %[temp7], 28(%[dst]) \n\t"
283 "bne %[src], %[loop_end], 1b \n\t"
284 " addiu %[dst], %[dst], 32 \n\t"
287 : [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
288 [temp2]
"=&r"(temp2), [temp3]
"=&r"(temp3),
289 [temp4]
"=&r"(temp4), [temp5]
"=&r"(temp5),
290 [temp6]
"=&r"(temp6), [temp7]
"=&r"(temp7),
291 [loop_end]
"=&r"(loop_end), [src]
"+r"(buf1),
298 float *buf1 = buf + 512;
300 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
306 ".set noreorder \n\t"
307 "addiu %[loop_end], %[src], 1792 \n\t"
309 "lw %[temp0], 0(%[src]) \n\t"
310 "lw %[temp1], 4(%[src]) \n\t"
311 "lw %[temp2], 8(%[src]) \n\t"
312 "lw %[temp3], 12(%[src]) \n\t"
313 "lw %[temp4], 16(%[src]) \n\t"
314 "lw %[temp5], 20(%[src]) \n\t"
315 "lw %[temp6], 24(%[src]) \n\t"
316 "lw %[temp7], 28(%[src]) \n\t"
317 "addiu %[src], %[src], 32 \n\t"
318 "sw %[temp0], 0(%[dst]) \n\t"
319 "sw %[temp1], 4(%[dst]) \n\t"
320 "sw %[temp2], 8(%[dst]) \n\t"
321 "sw %[temp3], 12(%[dst]) \n\t"
322 "sw %[temp4], 16(%[dst]) \n\t"
323 "sw %[temp5], 20(%[dst]) \n\t"
324 "sw %[temp6], 24(%[dst]) \n\t"
325 "sw %[temp7], 28(%[dst]) \n\t"
326 "bne %[src], %[loop_end], 1b \n\t"
327 " addiu %[dst], %[dst], 32 \n\t"
330 : [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
331 [temp2]
"=&r"(temp2), [temp3]
"=&r"(temp3),
332 [temp4]
"=&r"(temp4), [temp5]
"=&r"(temp5),
333 [temp6]
"=&r"(temp6), [temp7]
"=&r"(temp7),
334 [loop_end]
"=&r"(loop_end), [src]
"+r"(buf1),
340 float *buf1 = buf + 7*128 + 64;
341 float *buf2 = saved + 448;
342 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
348 ".set noreorder \n\t"
349 "addiu %[loop_end], %[src], 256 \n\t"
351 "lw %[temp0], 0(%[src]) \n\t"
352 "lw %[temp1], 4(%[src]) \n\t"
353 "lw %[temp2], 8(%[src]) \n\t"
354 "lw %[temp3], 12(%[src]) \n\t"
355 "lw %[temp4], 16(%[src]) \n\t"
356 "lw %[temp5], 20(%[src]) \n\t"
357 "lw %[temp6], 24(%[src]) \n\t"
358 "lw %[temp7], 28(%[src]) \n\t"
359 "addiu %[src], %[src], 32 \n\t"
360 "sw %[temp0], 0(%[dst]) \n\t"
361 "sw %[temp1], 4(%[dst]) \n\t"
362 "sw %[temp2], 8(%[dst]) \n\t"
363 "sw %[temp3], 12(%[dst]) \n\t"
364 "sw %[temp4], 16(%[dst]) \n\t"
365 "sw %[temp5], 20(%[dst]) \n\t"
366 "sw %[temp6], 24(%[dst]) \n\t"
367 "sw %[temp7], 28(%[dst]) \n\t"
368 "bne %[src], %[loop_end], 1b \n\t"
369 " addiu %[dst], %[dst], 32 \n\t"
372 : [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
373 [temp2]
"=&r"(temp2), [temp3]
"=&r"(temp3),
374 [temp4]
"=&r"(temp4), [temp5]
"=&r"(temp5),
375 [temp6]
"=&r"(temp6), [temp7]
"=&r"(temp7),
376 [loop_end]
"=&r"(loop_end), [src]
"+r"(buf1),
383 float *buf1 = buf + 512;
385 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
391 ".set noreorder \n\t"
392 "addiu %[loop_end], %[src], 2048 \n\t"
394 "lw %[temp0], 0(%[src]) \n\t"
395 "lw %[temp1], 4(%[src]) \n\t"
396 "lw %[temp2], 8(%[src]) \n\t"
397 "lw %[temp3], 12(%[src]) \n\t"
398 "lw %[temp4], 16(%[src]) \n\t"
399 "lw %[temp5], 20(%[src]) \n\t"
400 "lw %[temp6], 24(%[src]) \n\t"
401 "lw %[temp7], 28(%[src]) \n\t"
402 "addiu %[src], %[src], 32 \n\t"
403 "sw %[temp0], 0(%[dst]) \n\t"
404 "sw %[temp1], 4(%[dst]) \n\t"
405 "sw %[temp2], 8(%[dst]) \n\t"
406 "sw %[temp3], 12(%[dst]) \n\t"
407 "sw %[temp4], 16(%[dst]) \n\t"
408 "sw %[temp5], 20(%[dst]) \n\t"
409 "sw %[temp6], 24(%[dst]) \n\t"
410 "sw %[temp7], 28(%[dst]) \n\t"
411 "bne %[src], %[loop_end], 1b \n\t"
412 " addiu %[dst], %[dst], 32 \n\t"
415 : [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
416 [temp2]
"=&r"(temp2), [temp3]
"=&r"(temp3),
417 [temp4]
"=&r"(temp4), [temp5]
"=&r"(temp5),
418 [temp6]
"=&r"(temp6), [temp7]
"=&r"(temp7),
419 [loop_end]
"=&r"(loop_end), [src]
"+r"(buf1),
435 float *predTime = sce->
ret;
438 int16_t num_samples = 2048;
441 num_samples = ltp->
lag + 1024;
442 j = (2048 - num_samples) >> 2;
443 k = (2048 - num_samples) & 3;
444 p_predTime = &predTime[num_samples];
446 for (i = 0; i < num_samples; i++)
448 for (i = 0; i < j; i++) {
452 "sw $0, 0(%[p_predTime]) \n\t"
453 "sw $0, 4(%[p_predTime]) \n\t"
454 "sw $0, 8(%[p_predTime]) \n\t"
455 "sw $0, 12(%[p_predTime]) \n\t"
456 "addiu %[p_predTime], %[p_predTime], 16 \n\t"
458 : [p_predTime]
"+r"(p_predTime)
463 for (i = 0; i < k; i++) {
466 "sw $0, 0(%[p_predTime]) \n\t"
467 "addiu %[p_predTime], %[p_predTime], 4 \n\t"
469 : [p_predTime]
"+r"(p_predTime)
482 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
483 sce->
coeffs[i] += predFreq[i];
491 float *saved = sce->
saved;
492 float *saved_ltp = sce->
coeffs;
496 int loop_end, loop_end1, loop_end2;
497 float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11;
501 float *buf0 = saved_ltp;
502 float *p_saved_ltp = saved_ltp + 576;
503 float *ptr1 = &saved_ltp[512];
505 float *ptr3 = (
float*)&swindow[63];
506 loop_end1 = (int)(p_saved_ltp + 448);
511 ".set noreorder \n\t"
512 "addiu %[loop_end], %[src], 2048 \n\t"
514 "lw %[temp0], 0(%[src]) \n\t"
515 "lw %[temp1], 4(%[src]) \n\t"
516 "lw %[temp2], 8(%[src]) \n\t"
517 "lw %[temp3], 12(%[src]) \n\t"
518 "lw %[temp4], 16(%[src]) \n\t"
519 "lw %[temp5], 20(%[src]) \n\t"
520 "lw %[temp6], 24(%[src]) \n\t"
521 "lw %[temp7], 28(%[src]) \n\t"
522 "addiu %[src], %[src], 32 \n\t"
523 "sw %[temp0], 0(%[dst]) \n\t"
524 "sw %[temp1], 4(%[dst]) \n\t"
525 "sw %[temp2], 8(%[dst]) \n\t"
526 "sw %[temp3], 12(%[dst]) \n\t"
527 "sw %[temp4], 16(%[dst]) \n\t"
528 "sw %[temp5], 20(%[dst]) \n\t"
529 "sw %[temp6], 24(%[dst]) \n\t"
530 "sw %[temp7], 28(%[dst]) \n\t"
531 "bne %[src], %[loop_end], 1b \n\t"
532 " addiu %[dst], %[dst], 32 \n\t"
535 : [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
536 [temp2]
"=&r"(temp2), [temp3]
"=&r"(temp3),
537 [temp4]
"=&r"(temp4), [temp5]
"=&r"(temp5),
538 [temp6]
"=&r"(temp6), [temp7]
"=&r"(temp7),
539 [loop_end]
"=&r"(loop_end), [src]
"+r"(buf),
548 "sw $0, 0(%[p_saved_ltp]) \n\t"
549 "sw $0, 4(%[p_saved_ltp]) \n\t"
550 "sw $0, 8(%[p_saved_ltp]) \n\t"
551 "sw $0, 12(%[p_saved_ltp]) \n\t"
552 "sw $0, 16(%[p_saved_ltp]) \n\t"
553 "sw $0, 20(%[p_saved_ltp]) \n\t"
554 "sw $0, 24(%[p_saved_ltp]) \n\t"
555 "sw $0, 28(%[p_saved_ltp]) \n\t"
556 "addiu %[p_saved_ltp], %[p_saved_ltp], 32 \n\t"
557 "bne %[p_saved_ltp], %[loop_end1], 1b \n\t"
559 : [p_saved_ltp]
"+r"(p_saved_ltp)
560 : [loop_end1]
"r"(loop_end1)
565 for (i = 0; i < 16; i++){
568 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
569 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
570 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
571 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
572 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
573 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
574 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
575 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
576 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
577 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
578 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
579 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
580 "swc1 %[temp8], 0(%[ptr1]) \n\t"
581 "swc1 %[temp9], 4(%[ptr1]) \n\t"
582 "swc1 %[temp10], 8(%[ptr1]) \n\t"
583 "swc1 %[temp11], 12(%[ptr1]) \n\t"
584 "addiu %[ptr1], %[ptr1], 16 \n\t"
585 "addiu %[ptr2], %[ptr2], -16 \n\t"
586 "addiu %[ptr3], %[ptr3], -16 \n\t"
588 : [temp0]
"=&f"(temp0), [temp1]
"=&f"(temp1),
589 [temp2]
"=&f"(temp2), [temp3]
"=&f"(temp3),
590 [temp4]
"=&f"(temp4), [temp5]
"=&f"(temp5),
591 [temp6]
"=&f"(temp6), [temp7]
"=&f"(temp7),
592 [temp8]
"=&f"(temp8), [temp9]
"=&f"(temp9),
593 [temp10]
"=&f"(temp10), [temp11]
"=&f"(temp11),
594 [ptr1]
"+r"(ptr1), [ptr2]
"+r"(ptr2), [ptr3]
"+r"(ptr3)
600 float *buff0 = saved;
601 float *buff1 = saved_ltp;
602 float *ptr1 = &saved_ltp[512];
604 float *ptr3 = (
float*)&swindow[63];
605 loop_end = (int)(saved + 448);
610 ".set noreorder \n\t"
612 "lw %[temp0], 0(%[src]) \n\t"
613 "lw %[temp1], 4(%[src]) \n\t"
614 "lw %[temp2], 8(%[src]) \n\t"
615 "lw %[temp3], 12(%[src]) \n\t"
616 "lw %[temp4], 16(%[src]) \n\t"
617 "lw %[temp5], 20(%[src]) \n\t"
618 "lw %[temp6], 24(%[src]) \n\t"
619 "lw %[temp7], 28(%[src]) \n\t"
620 "addiu %[src], %[src], 32 \n\t"
621 "sw %[temp0], 0(%[dst]) \n\t"
622 "sw %[temp1], 4(%[dst]) \n\t"
623 "sw %[temp2], 8(%[dst]) \n\t"
624 "sw %[temp3], 12(%[dst]) \n\t"
625 "sw %[temp4], 16(%[dst]) \n\t"
626 "sw %[temp5], 20(%[dst]) \n\t"
627 "sw %[temp6], 24(%[dst]) \n\t"
628 "sw %[temp7], 28(%[dst]) \n\t"
629 "sw $0, 2304(%[dst]) \n\t"
630 "sw $0, 2308(%[dst]) \n\t"
631 "sw $0, 2312(%[dst]) \n\t"
632 "sw $0, 2316(%[dst]) \n\t"
633 "sw $0, 2320(%[dst]) \n\t"
634 "sw $0, 2324(%[dst]) \n\t"
635 "sw $0, 2328(%[dst]) \n\t"
636 "sw $0, 2332(%[dst]) \n\t"
637 "bne %[src], %[loop_end], 1b \n\t"
638 " addiu %[dst], %[dst], 32 \n\t"
641 : [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
642 [temp2]
"=&r"(temp2), [temp3]
"=&r"(temp3),
643 [temp4]
"=&r"(temp4), [temp5]
"=&r"(temp5),
644 [temp6]
"=&r"(temp6), [temp7]
"=&r"(temp7),
645 [src]
"+r"(buff0), [
dst]
"+r"(buff1)
646 : [loop_end]
"r"(loop_end)
650 for (i = 0; i < 16; i++){
653 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
654 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
655 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
656 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
657 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
658 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
659 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
660 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
661 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
662 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
663 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
664 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
665 "swc1 %[temp8], 0(%[ptr1]) \n\t"
666 "swc1 %[temp9], 4(%[ptr1]) \n\t"
667 "swc1 %[temp10], 8(%[ptr1]) \n\t"
668 "swc1 %[temp11], 12(%[ptr1]) \n\t"
669 "addiu %[ptr1], %[ptr1], 16 \n\t"
670 "addiu %[ptr2], %[ptr2], -16 \n\t"
671 "addiu %[ptr3], %[ptr3], -16 \n\t"
673 : [temp0]
"=&f"(temp0), [temp1]
"=&f"(temp1),
674 [temp2]
"=&f"(temp2), [temp3]
"=&f"(temp3),
675 [temp4]
"=&f"(temp4), [temp5]
"=&f"(temp5),
676 [temp6]
"=&f"(temp6), [temp7]
"=&f"(temp7),
677 [temp8]
"=&f"(temp8), [temp9]
"=&f"(temp9),
678 [temp10]
"=&f"(temp10), [temp11]
"=&f"(temp11),
679 [ptr1]
"+r"(ptr1), [ptr2]
"+r"(ptr2), [ptr3]
"+r"(ptr3)
685 float *ptr1, *ptr2, *ptr3;
688 ptr1 = &saved_ltp[512];
690 ptr3 = (
float*)&lwindow[511];
692 for (i = 0; i < 512; i+=4){
695 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
696 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
697 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
698 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
699 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
700 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
701 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
702 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
703 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
704 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
705 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
706 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
707 "swc1 %[temp8], 0(%[ptr1]) \n\t"
708 "swc1 %[temp9], 4(%[ptr1]) \n\t"
709 "swc1 %[temp10], 8(%[ptr1]) \n\t"
710 "swc1 %[temp11], 12(%[ptr1]) \n\t"
711 "addiu %[ptr1], %[ptr1], 16 \n\t"
712 "addiu %[ptr2], %[ptr2], -16 \n\t"
713 "addiu %[ptr3], %[ptr3], -16 \n\t"
715 : [temp0]
"=&f"(temp0), [temp1]
"=&f"(temp1),
716 [temp2]
"=&f"(temp2), [temp3]
"=&f"(temp3),
717 [temp4]
"=&f"(temp4), [temp5]
"=&f"(temp5),
718 [temp6]
"=&f"(temp6), [temp7]
"=&f"(temp7),
719 [temp8]
"=&f"(temp8), [temp9]
"=&f"(temp9),
720 [temp10]
"=&f"(temp10), [temp11]
"=&f"(temp11),
721 [ptr1]
"+r"(ptr1), [ptr2]
"+r"(ptr2),
732 float *buf3 = sce->
ret;
734 float *buf5 = saved_ltp;
740 ".set noreorder \n\t"
741 "addiu %[loop_end], %[src], 4096 \n\t"
742 "addiu %[loop_end1], %[src1], 4096 \n\t"
743 "addiu %[loop_end2], %[src2], 4096 \n\t"
745 "lw %[temp0], 0(%[src]) \n\t"
746 "lw %[temp1], 4(%[src]) \n\t"
747 "lw %[temp2], 8(%[src]) \n\t"
748 "lw %[temp3], 12(%[src]) \n\t"
749 "lw %[temp4], 16(%[src]) \n\t"
750 "lw %[temp5], 20(%[src]) \n\t"
751 "lw %[temp6], 24(%[src]) \n\t"
752 "lw %[temp7], 28(%[src]) \n\t"
753 "addiu %[src], %[src], 32 \n\t"
754 "sw %[temp0], 0(%[dst]) \n\t"
755 "sw %[temp1], 4(%[dst]) \n\t"
756 "sw %[temp2], 8(%[dst]) \n\t"
757 "sw %[temp3], 12(%[dst]) \n\t"
758 "sw %[temp4], 16(%[dst]) \n\t"
759 "sw %[temp5], 20(%[dst]) \n\t"
760 "sw %[temp6], 24(%[dst]) \n\t"
761 "sw %[temp7], 28(%[dst]) \n\t"
762 "bne %[src], %[loop_end], 1b \n\t"
763 " addiu %[dst], %[dst], 32 \n\t"
765 "lw %[temp0], 0(%[src1]) \n\t"
766 "lw %[temp1], 4(%[src1]) \n\t"
767 "lw %[temp2], 8(%[src1]) \n\t"
768 "lw %[temp3], 12(%[src1]) \n\t"
769 "lw %[temp4], 16(%[src1]) \n\t"
770 "lw %[temp5], 20(%[src1]) \n\t"
771 "lw %[temp6], 24(%[src1]) \n\t"
772 "lw %[temp7], 28(%[src1]) \n\t"
773 "addiu %[src1], %[src1], 32 \n\t"
774 "sw %[temp0], 0(%[dst1]) \n\t"
775 "sw %[temp1], 4(%[dst1]) \n\t"
776 "sw %[temp2], 8(%[dst1]) \n\t"
777 "sw %[temp3], 12(%[dst1]) \n\t"
778 "sw %[temp4], 16(%[dst1]) \n\t"
779 "sw %[temp5], 20(%[dst1]) \n\t"
780 "sw %[temp6], 24(%[dst1]) \n\t"
781 "sw %[temp7], 28(%[dst1]) \n\t"
782 "bne %[src1], %[loop_end1], 2b \n\t"
783 " addiu %[dst1], %[dst1], 32 \n\t"
785 "lw %[temp0], 0(%[src2]) \n\t"
786 "lw %[temp1], 4(%[src2]) \n\t"
787 "lw %[temp2], 8(%[src2]) \n\t"
788 "lw %[temp3], 12(%[src2]) \n\t"
789 "lw %[temp4], 16(%[src2]) \n\t"
790 "lw %[temp5], 20(%[src2]) \n\t"
791 "lw %[temp6], 24(%[src2]) \n\t"
792 "lw %[temp7], 28(%[src2]) \n\t"
793 "addiu %[src2], %[src2], 32 \n\t"
794 "sw %[temp0], 0(%[dst2]) \n\t"
795 "sw %[temp1], 4(%[dst2]) \n\t"
796 "sw %[temp2], 8(%[dst2]) \n\t"
797 "sw %[temp3], 12(%[dst2]) \n\t"
798 "sw %[temp4], 16(%[dst2]) \n\t"
799 "sw %[temp5], 20(%[dst2]) \n\t"
800 "sw %[temp6], 24(%[dst2]) \n\t"
801 "sw %[temp7], 28(%[dst2]) \n\t"
802 "bne %[src2], %[loop_end2], 3b \n\t"
803 " addiu %[dst2], %[dst2], 32 \n\t"
806 : [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
807 [temp2]
"=&r"(temp2), [temp3]
"=&r"(temp3),
808 [temp4]
"=&r"(temp4), [temp5]
"=&r"(temp5),
809 [temp6]
"=&r"(temp6), [temp7]
"=&r"(temp7),
810 [loop_end]
"=&r"(loop_end), [loop_end1]
"=&r"(loop_end1),
811 [loop_end2]
"=&r"(loop_end2), [src]
"+r"(buf1),
812 [
dst]
"+r"(buf2), [src1]
"+r"(buf3), [dst1]
"+r"(buf4),
813 [src2]
"+r"(buf5), [dst2]
"+r"(buf6)