28 int dstStride,
int srcStride,
int h)
32 "gslwlc1 $f2, 3(%[src]) \r\n"
33 "gslwrc1 $f2, 0(%[src]) \r\n"
34 "gsswlc1 $f2, 3(%[dst]) \r\n"
35 "gsswrc1 $f2, 0(%[dst]) \r\n"
36 "dadd %[src], %[src], %[srcStride] \r\n"
37 "dadd %[dst], %[dst], %[dstStride] \r\n"
38 "daddi %[h], %[h], -1 \r\n"
40 : [dst]
"+&r"(dst),[src]
"+&r"(src)
41 : [dstStride]
"r"(dstStride),[srcStride]
"r"(srcStride),[
h]
"r"(
h)
47 int dstStride,
int srcStride,
int h)
51 "gsldlc1 $f2, 7(%[src]) \r\n"
52 "gsldrc1 $f2, 0(%[src]) \r\n"
53 "gssdlc1 $f2, 7(%[dst]) \r\n"
54 "gssdrc1 $f2, 0(%[dst]) \r\n"
55 "dadd %[src], %[src], %[srcStride] \r\n"
56 "dadd %[dst], %[dst], %[dstStride] \r\n"
57 "daddi %[h], %[h], -1 \r\n"
59 : [dst]
"+&r"(dst),[src]
"+&r"(src)
60 : [dstStride]
"r"(dstStride),[srcStride]
"r"(srcStride),[
h]
"r"(
h)
66 int dstStride,
int srcStride,
int h)
70 "gsldlc1 $f2, 7(%[src]) \r\n"
71 "gsldrc1 $f2, 0(%[src]) \r\n"
72 "gsldlc1 $f4, 15(%[src]) \r\n"
73 "gsldrc1 $f4, 8(%[src]) \r\n"
74 "gssdlc1 $f2, 7(%[dst]) \r\n"
75 "gssdrc1 $f2, 0(%[dst]) \r\n"
76 "gssdlc1 $f4, 15(%[dst]) \r\n"
77 "gssdrc1 $f4, 8(%[dst]) \r\n"
78 "dadd %[src], %[src], %[srcStride] \r\n"
79 "dadd %[dst], %[dst], %[dstStride] \r\n"
80 "daddi %[h], %[h], -1 \r\n"
82 : [dst]
"+&r"(dst),[src]
"+&r"(src)
83 : [dstStride]
"r"(dstStride),[srcStride]
"r"(srcStride),[
h]
"r"(
h)
88 #define op_put(a, b) a = b
89 #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
91 ptrdiff_t line_size,
int h)
95 "gslwlc1 $f2, 3(%[pixels]) \r\n"
96 "gslwrc1 $f2, 0(%[pixels]) \r\n"
97 "gsswlc1 $f2, 3(%[block]) \r\n"
98 "gsswrc1 $f2, 0(%[block]) \r\n"
99 "dadd %[pixels], %[pixels], %[line_size]\r\n"
100 "dadd %[block], %[block], %[line_size] \r\n"
101 "daddi %[h], %[h], -1 \r\n"
104 : [line_size]
"r"(line_size),[h]
"r"(h)
110 ptrdiff_t line_size,
int h)
114 "gsldlc1 $f2, 7(%[pixels]) \r\n"
115 "gsldrc1 $f2, 0(%[pixels]) \r\n"
116 "gssdlc1 $f2, 7(%[block]) \r\n"
117 "gssdrc1 $f2, 0(%[block]) \r\n"
118 "dadd %[pixels], %[pixels], %[line_size]\r\n"
119 "dadd %[block], %[block], %[line_size] \r\n"
120 "daddi %[h], %[h], -1 \r\n"
123 : [line_size]
"r"(line_size),[h]
"r"(h)
129 ptrdiff_t line_size,
int h)
133 "gsldlc1 $f2, 7(%[pixels]) \r\n"
134 "gsldrc1 $f2, 0(%[pixels]) \r\n"
135 "gsldlc1 $f4, 15(%[pixels]) \r\n"
136 "gsldrc1 $f4, 8(%[pixels]) \r\n"
137 "gssdlc1 $f2, 7(%[block]) \r\n"
138 "gssdrc1 $f2, 0(%[block]) \r\n"
139 "gssdlc1 $f4, 15(%[block]) \r\n"
140 "gssdrc1 $f4, 8(%[block]) \r\n"
141 "dadd %[pixels], %[pixels], %[line_size]\r\n"
142 "dadd %[block], %[block], %[line_size] \r\n"
143 "daddi %[h], %[h], -1 \r\n"
146 : [line_size]
"r"(line_size),[h]
"r"(h)
152 ptrdiff_t line_size,
int h)
156 "gslwlc1 $f2, 3(%[pixels]) \r\n"
157 "gslwrc1 $f2, 0(%[pixels]) \r\n"
158 "gslwlc1 $f4, 3(%[block]) \r\n"
159 "gslwrc1 $f4, 0(%[block]) \r\n"
160 "pavgb $f2, $f2, $f4 \r\n"
161 "gsswlc1 $f2, 3(%[block]) \r\n"
162 "gsswrc1 $f2, 0(%[block]) \r\n"
163 "dadd %[pixels], %[pixels], %[line_size]\r\n"
164 "dadd %[block], %[block], %[line_size] \r\n"
165 "daddi %[h], %[h], -1 \r\n"
168 : [line_size]
"r"(line_size),[h]
"r"(h)
174 ptrdiff_t line_size,
int h)
178 "gsldlc1 $f2, 7(%[block]) \r\n"
179 "gsldrc1 $f2, 0(%[block]) \r\n"
180 "gsldlc1 $f4, 7(%[pixels]) \r\n"
181 "gsldrc1 $f4, 0(%[pixels]) \r\n"
182 "pavgb $f2, $f2, $f4 \r\n"
183 "gssdlc1 $f2, 7(%[block]) \r\n"
184 "gssdrc1 $f2, 0(%[block]) \r\n"
185 "dadd %[pixels], %[pixels], %[line_size]\r\n"
186 "dadd %[block], %[block], %[line_size] \r\n"
187 "daddi %[h], %[h], -1 \r\n"
190 : [line_size]
"r"(line_size),[h]
"r"(h)
196 ptrdiff_t line_size,
int h)
200 "gsldlc1 $f2, 7(%[block]) \r\n"
201 "gsldrc1 $f2, 0(%[block]) \r\n"
202 "gsldlc1 $f4, 15(%[block]) \r\n"
203 "gsldrc1 $f4, 8(%[block]) \r\n"
204 "gsldlc1 $f6, 7(%[pixels]) \r\n"
205 "gsldrc1 $f6, 0(%[pixels]) \r\n"
206 "gsldlc1 $f8, 15(%[pixels]) \r\n"
207 "gsldrc1 $f8, 8(%[pixels]) \r\n"
208 "pavgb $f2, $f2, $f6 \r\n"
209 "pavgb $f4, $f4, $f8 \r\n"
210 "gssdlc1 $f2, 7(%[block]) \r\n"
211 "gssdrc1 $f2, 0(%[block]) \r\n"
212 "gssdlc1 $f4, 15(%[block]) \r\n"
213 "gssdrc1 $f4, 8(%[block]) \r\n"
214 "dadd %[pixels], %[pixels], %[line_size]\r\n"
215 "dadd %[block], %[block], %[line_size] \r\n"
216 "daddi %[h], %[h], -1 \r\n"
219 : [line_size]
"r"(line_size),[h]
"r"(h)
220 :
"$f2",
"$f4",
"$f6",
"$f8"
225 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
229 for (i = 0; i <
h; i++) {
231 a =
AV_RN4P(&src1[i * src_stride1]);
232 b =
AV_RN4P(&src2[i * src_stride2]);
238 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
242 for (i = 0; i <
h; i++) {
244 a =
AV_RN4P(&src1[i * src_stride1]);
245 b =
AV_RN4P(&src2[i * src_stride2]);
247 a =
AV_RN4P(&src1[i * src_stride1 + 4]);
248 b =
AV_RN4P(&src2[i * src_stride2 + 4]);
254 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
258 for (i = 0; i <
h; i++) {
260 a =
AV_RN4P(&src1[i * src_stride1]);
261 b =
AV_RN4P(&src2[i * src_stride2]);
263 a =
AV_RN4P(&src1[i * src_stride1 + 4]);
264 b =
AV_RN4P(&src2[i * src_stride2 + 4]);
266 a =
AV_RN4P(&src1[i * src_stride1 + 8]);
267 b =
AV_RN4P(&src2[i * src_stride2 + 8]);
269 a =
AV_RN4P(&src1[i * src_stride1 + 12]);
270 b =
AV_RN4P(&src2[i * src_stride2 + 12]);
276 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
280 for (i = 0; i <
h; i++) {
282 a =
AV_RN4P(&src1[i * src_stride1]);
283 b =
AV_RN4P(&src2[i * src_stride2]);
289 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
293 for (i = 0; i <
h; i++) {
295 a =
AV_RN4P(&src1[i * src_stride1]);
296 b =
AV_RN4P(&src2[i * src_stride2]);
298 a =
AV_RN4P(&src1[i * src_stride1 + 4]);
299 b =
AV_RN4P(&src2[i * src_stride2 + 4]);
305 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
309 for (i = 0; i <
h; i++) {
311 a =
AV_RN4P(&src1[i * src_stride1]);
312 b =
AV_RN4P(&src2[i * src_stride2]);
314 a =
AV_RN4P(&src1[i * src_stride1 + 4]);
315 b =
AV_RN4P(&src2[i * src_stride2 + 4]);
317 a =
AV_RN4P(&src1[i * src_stride1 + 8]);
318 b =
AV_RN4P(&src2[i * src_stride2 + 8]);
320 a =
AV_RN4P(&src1[i * src_stride1 + 12]);
321 b =
AV_RN4P(&src2[i * src_stride2 + 12]);
329 #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
330 #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
332 int dstStride,
int srcStride)
335 "xor $f0, $f0, $f0 \r\n"
338 "gslwlc1 $f2, 1(%[src]) \r\n"
339 "gslwrc1 $f2, -2(%[src]) \r\n"
340 "gslwlc1 $f4, 2(%[src]) \r\n"
341 "gslwrc1 $f4, -1(%[src]) \r\n"
342 "gslwlc1 $f6, 3(%[src]) \r\n"
343 "gslwrc1 $f6, 0(%[src]) \r\n"
344 "gslwlc1 $f8, 4(%[src]) \r\n"
345 "gslwrc1 $f8, 1(%[src]) \r\n"
346 "gslwlc1 $f10, 5(%[src]) \r\n"
347 "gslwrc1 $f10, 2(%[src]) \r\n"
348 "gslwlc1 $f12, 6(%[src]) \r\n"
349 "gslwrc1 $f12, 3(%[src]) \r\n"
350 "punpcklbh $f2, $f2, $f0 \r\n"
351 "punpcklbh $f4, $f4, $f0 \r\n"
352 "punpcklbh $f6, $f6, $f0 \r\n"
353 "punpcklbh $f8, $f8, $f0 \r\n"
354 "punpcklbh $f10, $f10, $f0 \r\n"
355 "punpcklbh $f12, $f12, $f0 \r\n"
356 "paddsh $f14, $f6, $f8 \r\n"
357 "paddsh $f16, $f4, $f10 \r\n"
358 "paddsh $f18, $f2, $f12 \r\n"
359 "pmullh $f14, $f14, %[ff_pw_20] \r\n"
360 "pmullh $f16, $f16, %[ff_pw_5] \r\n"
361 "psubsh $f14, $f14, $f16 \r\n"
362 "paddsh $f18, $f14, $f18 \r\n"
363 "paddsh $f18, $f18, %[ff_pw_16] \r\n"
364 "psrah $f18, $f18, %[ff_pw_5] \r\n"
365 "packushb $f18, $f18, $f0 \r\n"
366 "gsswlc1 $f18, 3(%[dst]) \r\n"
367 "gsswrc1 $f18, 0(%[dst]) \r\n"
368 "dadd %[dst], %[dst], %[dstStride] \r\n"
369 "dadd %[src], %[src], %[srcStride] \r\n"
370 "daddi $8, $8, -1 \r\n"
372 : [dst]
"+&r"(dst),[src]
"+&r"(src)
373 : [dstStride]
"r"(dstStride),[srcStride]
"r"(srcStride),
375 :
"$8",
"$f0",
"$f2",
"$f4",
"$f6",
"$f8",
"$f10",
"$f12",
"$f14",
"$f16",
381 int dstStride,
int srcStride)
384 "xor $f0, $f0, $f0 \r\n"
387 "gsldlc1 $f2, 5(%[src]) \r\n"
388 "gsldrc1 $f2, -2(%[src]) \r\n"
389 "gsldlc1 $f4, 6(%[src]) \r\n"
390 "gsldrc1 $f4, -1(%[src]) \r\n"
391 "gsldlc1 $f6, 7(%[src]) \r\n"
392 "gsldrc1 $f6, 0(%[src]) \r\n"
393 "gsldlc1 $f8, 8(%[src]) \r\n"
394 "gsldrc1 $f8, 1(%[src]) \r\n"
395 "gsldlc1 $f10, 9(%[src]) \r\n"
396 "gsldrc1 $f10, 2(%[src]) \r\n"
397 "gsldlc1 $f12, 10(%[src]) \r\n"
398 "gsldrc1 $f12, 3(%[src]) \r\n"
399 "punpcklbh $f14, $f6, $f0 \r\n"
400 "punpckhbh $f16, $f6, $f0 \r\n"
401 "punpcklbh $f18, $f8, $f0 \r\n"
402 "punpckhbh $f20, $f8, $f0 \r\n"
403 "paddsh $f6, $f14, $f18 \r\n"
404 "paddsh $f8, $f16, $f20 \r\n"
405 "pmullh $f6, $f6, %[ff_pw_20] \r\n"
406 "pmullh $f8, $f8, %[ff_pw_20] \r\n"
407 "punpcklbh $f14, $f4, $f0 \r\n"
408 "punpckhbh $f16, $f4, $f0 \r\n"
409 "punpcklbh $f18, $f10, $f0 \r\n"
410 "punpckhbh $f20, $f10, $f0 \r\n"
411 "paddsh $f4, $f14, $f18 \r\n"
412 "paddsh $f10, $f16, $f20 \r\n"
413 "pmullh $f4, $f4, %[ff_pw_5] \r\n"
414 "pmullh $f10, $f10, %[ff_pw_5] \r\n"
415 "punpcklbh $f14, $f2, $f0 \r\n"
416 "punpckhbh $f16, $f2, $f0 \r\n"
417 "punpcklbh $f18, $f12, $f0 \r\n"
418 "punpckhbh $f20, $f12, $f0 \r\n"
419 "paddsh $f2, $f14, $f18 \r\n"
420 "paddsh $f12, $f16, $f20 \r\n"
421 "psubsh $f6, $f6, $f4 \r\n"
422 "psubsh $f8, $f8, $f10 \r\n"
423 "paddsh $f6, $f6, $f2 \r\n"
424 "paddsh $f8, $f8, $f12 \r\n"
425 "paddsh $f6, $f6, %[ff_pw_16] \r\n"
426 "paddsh $f8, $f8, %[ff_pw_16] \r\n"
427 "psrah $f6, $f6, %[ff_pw_5] \r\n"
428 "psrah $f8, $f8, %[ff_pw_5] \r\n"
429 "packushb $f18, $f6, $f8 \r\n"
430 "sdc1 $f18, 0(%[dst]) \r\n"
431 "dadd %[dst], %[dst], %[dstStride] \r\n"
432 "dadd %[src], %[src], %[srcStride] \r\n"
433 "daddi $8, $8, -1 \r\n"
435 : [dst]
"+&r"(dst),[src]
"+&r"(src)
436 : [dstStride]
"r"(dstStride),[srcStride]
"r"(srcStride),
438 :
"$8",
"$f0",
"$f2",
"$f4",
"$f6",
"$f8",
"$f10",
"$f12",
"$f14",
"$f16",
444 int dstStride,
int srcStride)
455 int dstStride,
int srcStride)
458 "xor $f0, $f0, $f0 \r\n"
461 "gslwlc1 $f2, 1(%[src]) \r\n"
462 "gslwrc1 $f2, -2(%[src]) \r\n"
463 "gslwlc1 $f4, 2(%[src]) \r\n"
464 "gslwrc1 $f4, -1(%[src]) \r\n"
465 "gslwlc1 $f6, 3(%[src]) \r\n"
466 "gslwrc1 $f6, 0(%[src]) \r\n"
467 "gslwlc1 $f8, 4(%[src]) \r\n"
468 "gslwrc1 $f8, 1(%[src]) \r\n"
469 "gslwlc1 $f10, 5(%[src]) \r\n"
470 "gslwrc1 $f10, 2(%[src]) \r\n"
471 "gslwlc1 $f12, 6(%[src]) \r\n"
472 "gslwrc1 $f12, 3(%[src]) \r\n"
473 "punpcklbh $f2, $f2, $f0 \r\n"
474 "punpcklbh $f4, $f4, $f0 \r\n"
475 "punpcklbh $f6, $f6, $f0 \r\n"
476 "punpcklbh $f8, $f8, $f0 \r\n"
477 "punpcklbh $f10, $f10, $f0 \r\n"
478 "punpcklbh $f12, $f12, $f0 \r\n"
479 "paddsh $f14, $f6, $f8 \r\n"
480 "paddsh $f16, $f4, $f10 \r\n"
481 "paddsh $f18, $f2, $f12 \r\n"
482 "pmullh $f14, $f14, %[ff_pw_20] \r\n"
483 "pmullh $f16, $f16, %[ff_pw_5] \r\n"
484 "psubsh $f14, $f14, $f16 \r\n"
485 "paddsh $f18, $f14, $f18 \r\n"
486 "paddsh $f18, $f18, %[ff_pw_16] \r\n"
487 "psrah $f18, $f18, %[ff_pw_5] \r\n"
488 "packushb $f18, $f18, $f0 \r\n"
489 "lwc1 $f20, 0(%[dst]) \r\n"
490 "pavgb $f18, $f18, $f20 \r\n"
491 "gsswlc1 $f18, 3(%[dst]) \r\n"
492 "gsswrc1 $f18, 0(%[dst]) \r\n"
493 "dadd %[dst], %[dst], %[dstStride] \r\n"
494 "dadd %[src], %[src], %[srcStride] \r\n"
495 "daddi $8, $8, -1 \r\n"
497 : [dst]
"+&r"(dst),[src]
"+&r"(src)
498 : [dstStride]
"r"(dstStride),[srcStride]
"r"(srcStride),
500 :
"$8",
"$f0",
"$f2",
"$f4",
"$f6",
"$f8",
"$f10",
"$f12",
"$f14",
"$f16",
506 int dstStride,
int srcStride)
509 "xor $f0, $f0, $f0 \r\n"
512 "gsldlc1 $f2, 5(%[src]) \r\n"
513 "gsldrc1 $f2, -2(%[src]) \r\n"
514 "gsldlc1 $f4, 6(%[src]) \r\n"
515 "gsldrc1 $f4, -1(%[src]) \r\n"
516 "gsldlc1 $f6, 7(%[src]) \r\n"
517 "gsldrc1 $f6, 0(%[src]) \r\n"
518 "gsldlc1 $f8, 8(%[src]) \r\n"
519 "gsldrc1 $f8, 1(%[src]) \r\n"
520 "gsldlc1 $f10, 9(%[src]) \r\n"
521 "gsldrc1 $f10, 2(%[src]) \r\n"
522 "gsldlc1 $f12, 10(%[src]) \r\n"
523 "gsldrc1 $f12, 3(%[src]) \r\n"
524 "punpcklbh $f14, $f6, $f0 \r\n"
525 "punpckhbh $f16, $f6, $f0 \r\n"
526 "punpcklbh $f18, $f8, $f0 \r\n"
527 "punpckhbh $f20, $f8, $f0 \r\n"
528 "paddsh $f6, $f14, $f18 \r\n"
529 "paddsh $f8, $f16, $f20 \r\n"
530 "pmullh $f6, $f6, %[ff_pw_20] \r\n"
531 "pmullh $f8, $f8, %[ff_pw_20] \r\n"
532 "punpcklbh $f14, $f4, $f0 \r\n"
533 "punpckhbh $f16, $f4, $f0 \r\n"
534 "punpcklbh $f18, $f10, $f0 \r\n"
535 "punpckhbh $f20, $f10, $f0 \r\n"
536 "paddsh $f4, $f14, $f18 \r\n"
537 "paddsh $f10, $f16, $f20 \r\n"
538 "pmullh $f4, $f4, %[ff_pw_5] \r\n"
539 "pmullh $f10, $f10, %[ff_pw_5] \r\n"
540 "punpcklbh $f14, $f2, $f0 \r\n"
541 "punpckhbh $f16, $f2, $f0 \r\n"
542 "punpcklbh $f18, $f12, $f0 \r\n"
543 "punpckhbh $f20, $f12, $f0 \r\n"
544 "paddsh $f2, $f14, $f18 \r\n"
545 "paddsh $f12, $f16, $f20 \r\n"
546 "psubsh $f6, $f6, $f4 \r\n"
547 "psubsh $f8, $f8, $f10 \r\n"
548 "paddsh $f6, $f6, $f2 \r\n"
549 "paddsh $f8, $f8, $f12 \r\n"
550 "paddsh $f6, $f6, %[ff_pw_16] \r\n"
551 "paddsh $f8, $f8, %[ff_pw_16] \r\n"
552 "psrah $f6, $f6, %[ff_pw_5] \r\n"
553 "psrah $f8, $f8, %[ff_pw_5] \r\n"
554 "packushb $f18, $f6, $f8 \r\n"
555 "ldc1 $f20, 0(%[dst]) \r\n"
556 "pavgb $f18, $f18, $f20 \r\n"
557 "sdc1 $f18, 0(%[dst]) \r\n"
558 "dadd %[dst], %[dst], %[dstStride] \r\n"
559 "dadd %[src], %[src], %[srcStride] \r\n"
560 "daddi $8, $8, -1 \r\n"
562 : [dst]
"+&r"(dst),[src]
"+&r"(src)
563 : [dstStride]
"r"(dstStride),[srcStride]
"r"(srcStride),
565 :
"$8",
"$f0",
"$f2",
"$f4",
"$f6",
"$f8",
"$f10",
"$f12",
"$f14",
"$f16",
571 int dstStride,
int srcStride)
582 int dstStride,
int srcStride)
585 "xor $f0, $f0, $f0 \r\n"
586 "gslwlc1 $f2, 3(%[srcB]) \r\n"
587 "gslwrc1 $f2, 0(%[srcB]) \r\n"
588 "gslwlc1 $f4, 3(%[srcA]) \r\n"
589 "gslwrc1 $f4, 0(%[srcA]) \r\n"
590 "gslwlc1 $f6, 3(%[src0]) \r\n"
591 "gslwrc1 $f6, 0(%[src0]) \r\n"
592 "gslwlc1 $f8, 3(%[src1]) \r\n"
593 "gslwrc1 $f8, 0(%[src1]) \r\n"
594 "gslwlc1 $f10, 3(%[src2]) \r\n"
595 "gslwrc1 $f10, 0(%[src2]) \r\n"
596 "gslwlc1 $f12, 3(%[src3]) \r\n"
597 "gslwrc1 $f12, 0(%[src3]) \r\n"
598 "gslwlc1 $f14, 3(%[src4]) \r\n"
599 "gslwrc1 $f14, 0(%[src4]) \r\n"
600 "gslwlc1 $f16, 3(%[src5]) \r\n"
601 "gslwrc1 $f16, 0(%[src5]) \r\n"
602 "gslwlc1 $f18, 3(%[src6]) \r\n"
603 "gslwrc1 $f18, 0(%[src6]) \r\n"
604 "punpcklbh $f2, $f2, $f0 \r\n"
605 "punpcklbh $f4, $f4, $f0 \r\n"
606 "punpcklbh $f6, $f6, $f0 \r\n"
607 "punpcklbh $f8, $f8, $f0 \r\n"
608 "punpcklbh $f10, $f10, $f0 \r\n"
609 "punpcklbh $f12, $f12, $f0 \r\n"
610 "punpcklbh $f14, $f14, $f0 \r\n"
611 "punpcklbh $f16, $f16, $f0 \r\n"
612 "punpcklbh $f18, $f18, $f0 \r\n"
613 "paddsh $f20, $f6, $f8 \r\n"
614 "pmullh $f20, $f20, %[ff_pw_20] \r\n"
615 "paddsh $f22, $f4, $f10 \r\n"
616 "pmullh $f22, $f22, %[ff_pw_5] \r\n"
617 "psubsh $f24, $f20, $f22 \r\n"
618 "paddsh $f24, $f24, $f2 \r\n"
619 "paddsh $f24, $f24, $f12 \r\n"
620 "paddsh $f20, $f8, $f10 \r\n"
621 "pmullh $f20, $f20, %[ff_pw_20] \r\n"
622 "paddsh $f22, $f6, $f12 \r\n"
623 "pmullh $f22, $f22, %[ff_pw_5] \r\n"
624 "psubsh $f26, $f20, $f22 \r\n"
625 "paddsh $f26, $f26, $f4 \r\n"
626 "paddsh $f26, $f26, $f14 \r\n"
627 "paddsh $f20, $f10, $f12 \r\n"
628 "pmullh $f20, $f20, %[ff_pw_20] \r\n"
629 "paddsh $f22, $f8, $f14 \r\n"
630 "pmullh $f22, $f22, %[ff_pw_5] \r\n"
631 "psubsh $f28, $f20, $f22 \r\n"
632 "paddsh $f28, $f28, $f6 \r\n"
633 "paddsh $f28, $f28, $f16 \r\n"
634 "paddsh $f20, $f12, $f14 \r\n"
635 "pmullh $f20, $f20, %[ff_pw_20] \r\n"
636 "paddsh $f22, $f10, $f16 \r\n"
637 "pmullh $f22, $f22, %[ff_pw_5] \r\n"
638 "psubsh $f30, $f20, $f22 \r\n"
639 "paddsh $f30, $f30, $f8 \r\n"
640 "paddsh $f30, $f30, $f18 \r\n"
641 "paddsh $f24, $f24, %[ff_pw_16] \r\n"
642 "paddsh $f26, $f26, %[ff_pw_16] \r\n"
643 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
644 "paddsh $f30, $f30, %[ff_pw_16] \r\n"
645 "psrah $f24, $f24, %[ff_pw_5] \r\n"
646 "psrah $f26, $f26, %[ff_pw_5] \r\n"
647 "psrah $f28, $f28, %[ff_pw_5] \r\n"
648 "psrah $f30, $f30, %[ff_pw_5] \r\n"
649 "packushb $f24, $f24, $f0 \r\n"
650 "packushb $f26, $f26, $f0 \r\n"
651 "packushb $f28, $f28, $f0 \r\n"
652 "packushb $f30, $f30, $f0 \r\n"
653 "swc1 $f24, 0(%[dst0]) \r\n"
654 "swc1 $f26, 0(%[dst1]) \r\n"
655 "swc1 $f28, 0(%[dst2]) \r\n"
656 "swc1 $f30, 0(%[dst3]) \r\n"
657 ::[dst0]
"r"(dst), [dst1]
"r"(dst+dstStride),
658 [dst2]
"r"(dst+2*dstStride), [dst3]
"r"(dst+3*dstStride),
659 [srcB]
"r"(src-2*srcStride), [srcA]
"r"(src-srcStride),
661 [src2]
"r"(src+2*srcStride), [src3]
"r"(src+3*srcStride),
662 [src4]
"r"(src+4*srcStride), [src5]
"r"(src+5*srcStride),
665 :
"$f0",
"$f2",
"$f4",
"$f6",
"$f8",
"$f10",
"$f12",
"$f14",
"$f16",
"$f18",
666 "$f20",
"$f22",
"$f24",
"$f26",
"$f28",
"$f30"
671 int dstStride,
int srcStride)
674 "xor $f0, $f0, $f0 \r\n"
675 "gsldlc1 $f2, 7(%[srcB]) \r\n"
676 "gsldrc1 $f2, 0(%[srcB]) \r\n"
677 "gsldlc1 $f4, 7(%[srcA]) \r\n"
678 "gsldrc1 $f4, 0(%[srcA]) \r\n"
679 "gsldlc1 $f6, 7(%[src0]) \r\n"
680 "gsldrc1 $f6, 0(%[src0]) \r\n"
681 "gsldlc1 $f8, 7(%[src1]) \r\n"
682 "gsldrc1 $f8, 0(%[src1]) \r\n"
683 "gsldlc1 $f10, 7(%[src2]) \r\n"
684 "gsldrc1 $f10, 0(%[src2]) \r\n"
685 "gsldlc1 $f12, 7(%[src3]) \r\n"
686 "gsldrc1 $f12, 0(%[src3]) \r\n"
687 "gsldlc1 $f14, 7(%[src4]) \r\n"
688 "gsldrc1 $f14, 0(%[src4]) \r\n"
689 "gsldlc1 $f16, 7(%[src5]) \r\n"
690 "gsldrc1 $f16, 0(%[src5]) \r\n"
691 "gsldlc1 $f18, 7(%[src6]) \r\n"
692 "gsldrc1 $f18, 0(%[src6]) \r\n"
693 "gsldlc1 $f20, 7(%[src7]) \r\n"
694 "gsldrc1 $f20, 0(%[src7]) \r\n"
695 "gsldlc1 $f22, 7(%[src8]) \r\n"
696 "gsldrc1 $f22, 0(%[src8]) \r\n"
697 "gsldlc1 $f24, 7(%[src9]) \r\n"
698 "gsldrc1 $f24, 0(%[src9]) \r\n"
699 "gsldlc1 $f26, 7(%[src10]) \r\n"
700 "gsldrc1 $f26, 0(%[src10]) \r\n"
701 "punpcklbh $f1, $f2, $f0 \r\n"
702 "punpckhbh $f2, $f2, $f0 \r\n"
703 "punpcklbh $f3, $f4, $f0 \r\n"
704 "punpckhbh $f4, $f4, $f0 \r\n"
705 "punpcklbh $f5, $f6, $f0 \r\n"
706 "punpckhbh $f6, $f6, $f0 \r\n"
707 "punpcklbh $f7, $f8, $f0 \r\n"
708 "punpckhbh $f8, $f8, $f0 \r\n"
709 "punpcklbh $f9, $f10, $f0 \r\n"
710 "punpckhbh $f10, $f10, $f0 \r\n"
711 "punpcklbh $f11, $f12, $f0 \r\n"
712 "punpckhbh $f12, $f12, $f0 \r\n"
713 "punpcklbh $f13, $f14, $f0 \r\n"
714 "punpckhbh $f14, $f14, $f0 \r\n"
715 "punpcklbh $f15, $f16, $f0 \r\n"
716 "punpckhbh $f16, $f16, $f0 \r\n"
717 "punpcklbh $f17, $f18, $f0 \r\n"
718 "punpckhbh $f18, $f18, $f0 \r\n"
719 "punpcklbh $f19, $f20, $f0 \r\n"
720 "punpckhbh $f20, $f20, $f0 \r\n"
721 "punpcklbh $f21, $f22, $f0 \r\n"
722 "punpckhbh $f22, $f22, $f0 \r\n"
723 "punpcklbh $f23, $f24, $f0 \r\n"
724 "punpckhbh $f24, $f24, $f0 \r\n"
725 "punpcklbh $f25, $f26, $f0 \r\n"
726 "punpckhbh $f26, $f26, $f0 \r\n"
727 "paddsh $f27, $f5, $f7 \r\n"
728 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
729 "paddsh $f28, $f6, $f8 \r\n"
730 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
731 "psubsh $f27, $f27, $f3 \r\n"
732 "psubsh $f28, $f28, $f4 \r\n"
733 "psubsh $f27, $f27, $f9 \r\n"
734 "psubsh $f28, $f28, $f10 \r\n"
735 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
736 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
737 "paddsh $f27, $f27, $f1 \r\n"
738 "paddsh $f28, $f28, $f2 \r\n"
739 "paddsh $f27, $f27, $f11 \r\n"
740 "paddsh $f28, $f28, $f12 \r\n"
741 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
742 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
743 "psrah $f27, $f27, %[ff_pw_5] \r\n"
744 "psrah $f28, $f28, %[ff_pw_5] \r\n"
745 "packushb $f27, $f27, $f0 \r\n"
746 "packushb $f28, $f28, $f0 \r\n"
747 "punpcklwd $f2, $f27, $f28 \r\n"
748 "sdc1 $f2, 0(%[dst0]) \r\n"
749 "paddsh $f27, $f7, $f9 \r\n"
750 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
751 "paddsh $f28, $f8, $f10 \r\n"
752 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
753 "psubsh $f27, $f27, $f5 \r\n"
754 "psubsh $f28, $f28, $f6 \r\n"
755 "psubsh $f27, $f27, $f11 \r\n"
756 "psubsh $f28, $f28, $f12 \r\n"
757 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
758 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
759 "paddsh $f27, $f27, $f3 \r\n"
760 "paddsh $f28, $f28, $f4 \r\n"
761 "paddsh $f27, $f27, $f13 \r\n"
762 "paddsh $f28, $f28, $f14 \r\n"
763 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
764 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
765 "psrah $f27, $f27, %[ff_pw_5] \r\n"
766 "psrah $f28, $f28, %[ff_pw_5] \r\n"
767 "packushb $f27, $f27, $f0 \r\n"
768 "packushb $f28, $f28, $f0 \r\n"
769 "punpcklwd $f4, $f27, $f28 \r\n"
770 "sdc1 $f4, 0(%[dst1]) \r\n"
771 "paddsh $f27, $f9, $f11 \r\n"
772 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
773 "paddsh $f28, $f10, $f12 \r\n"
774 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
775 "psubsh $f27, $f27, $f7 \r\n"
776 "psubsh $f28, $f28, $f8 \r\n"
777 "psubsh $f27, $f27, $f13 \r\n"
778 "psubsh $f28, $f28, $f14 \r\n"
779 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
780 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
781 "paddsh $f27, $f27, $f5 \r\n"
782 "paddsh $f28, $f28, $f6 \r\n"
783 "paddsh $f27, $f27, $f15 \r\n"
784 "paddsh $f28, $f28, $f16 \r\n"
785 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
786 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
787 "psrah $f27, $f27, %[ff_pw_5] \r\n"
788 "psrah $f28, $f28, %[ff_pw_5] \r\n"
789 "packushb $f27, $f27, $f0 \r\n"
790 "packushb $f28, $f28, $f0 \r\n"
791 "punpcklwd $f6, $f27, $f28 \r\n"
792 "sdc1 $f6, 0(%[dst2]) \r\n"
793 "paddsh $f27, $f11, $f13 \r\n"
794 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
795 "paddsh $f28, $f12, $f14 \r\n"
796 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
797 "psubsh $f27, $f27, $f9 \r\n"
798 "psubsh $f28, $f28, $f10 \r\n"
799 "psubsh $f27, $f27, $f15 \r\n"
800 "psubsh $f28, $f28, $f16 \r\n"
801 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
802 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
803 "paddsh $f27, $f27, $f7 \r\n"
804 "paddsh $f28, $f28, $f8 \r\n"
805 "paddsh $f27, $f27, $f17 \r\n"
806 "paddsh $f28, $f28, $f18 \r\n"
807 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
808 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
809 "psrah $f27, $f27, %[ff_pw_5] \r\n"
810 "psrah $f28, $f28, %[ff_pw_5] \r\n"
811 "packushb $f27, $f27, $f0 \r\n"
812 "packushb $f28, $f28, $f0 \r\n"
813 "punpcklwd $f8, $f27, $f28 \r\n"
814 "sdc1 $f8, 0(%[dst3]) \r\n"
815 "paddsh $f27, $f13, $f15 \r\n"
816 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
817 "paddsh $f28, $f14, $f16 \r\n"
818 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
819 "psubsh $f27, $f27, $f11 \r\n"
820 "psubsh $f28, $f28, $f12 \r\n"
821 "psubsh $f27, $f27, $f17 \r\n"
822 "psubsh $f28, $f28, $f18 \r\n"
823 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
824 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
825 "paddsh $f27, $f27, $f9 \r\n"
826 "paddsh $f28, $f28, $f10 \r\n"
827 "paddsh $f27, $f27, $f19 \r\n"
828 "paddsh $f28, $f28, $f20 \r\n"
829 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
830 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
831 "psrah $f27, $f27, %[ff_pw_5] \r\n"
832 "psrah $f28, $f28, %[ff_pw_5] \r\n"
833 "packushb $f27, $f27, $f0 \r\n"
834 "packushb $f28, $f28, $f0 \r\n"
835 "punpcklwd $f10, $f27, $f28 \r\n"
836 "sdc1 $f10, 0(%[dst4]) \r\n"
838 "paddsh $f27, $f15, $f17 \r\n"
839 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
840 "paddsh $f28, $f16, $f18 \r\n"
841 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
842 "psubsh $f27, $f27, $f13 \r\n"
843 "psubsh $f28, $f28, $f14 \r\n"
844 "psubsh $f27, $f27, $f19 \r\n"
845 "psubsh $f28, $f28, $f20 \r\n"
846 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
847 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
848 "paddsh $f27, $f27, $f11 \r\n"
849 "paddsh $f28, $f28, $f12 \r\n"
850 "paddsh $f27, $f27, $f21 \r\n"
851 "paddsh $f28, $f28, $f22 \r\n"
852 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
853 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
854 "psrah $f27, $f27, %[ff_pw_5] \r\n"
855 "psrah $f28, $f28, %[ff_pw_5] \r\n"
856 "packushb $f27, $f27, $f0 \r\n"
857 "packushb $f28, $f28, $f0 \r\n"
858 "punpcklwd $f12, $f27, $f28 \r\n"
859 "sdc1 $f12, 0(%[dst5]) \r\n"
860 "paddsh $f27, $f17, $f19 \r\n"
861 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
862 "paddsh $f28, $f18, $f20 \r\n"
863 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
864 "psubsh $f27, $f27, $f15 \r\n"
865 "psubsh $f28, $f28, $f16 \r\n"
866 "psubsh $f27, $f27, $f21 \r\n"
867 "psubsh $f28, $f28, $f22 \r\n"
868 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
869 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
870 "paddsh $f27, $f27, $f13 \r\n"
871 "paddsh $f28, $f28, $f14 \r\n"
872 "paddsh $f27, $f27, $f23 \r\n"
873 "paddsh $f28, $f28, $f24 \r\n"
874 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
875 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
876 "psrah $f27, $f27, %[ff_pw_5] \r\n"
877 "psrah $f28, $f28, %[ff_pw_5] \r\n"
878 "packushb $f27, $f27, $f0 \r\n"
879 "packushb $f28, $f28, $f0 \r\n"
880 "punpcklwd $f14, $f27, $f28 \r\n"
881 "sdc1 $f14, 0(%[dst6]) \r\n"
882 "paddsh $f27, $f19, $f21 \r\n"
883 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
884 "paddsh $f28, $f20, $f22 \r\n"
885 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
886 "psubsh $f27, $f27, $f17 \r\n"
887 "psubsh $f28, $f28, $f18 \r\n"
888 "psubsh $f27, $f27, $f23 \r\n"
889 "psubsh $f28, $f28, $f24 \r\n"
890 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
891 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
892 "paddsh $f27, $f27, $f15 \r\n"
893 "paddsh $f28, $f28, $f16 \r\n"
894 "paddsh $f27, $f27, $f25 \r\n"
895 "paddsh $f28, $f28, $f26 \r\n"
896 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
897 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
898 "psrah $f27, $f27, %[ff_pw_5] \r\n"
899 "psrah $f28, $f28, %[ff_pw_5] \r\n"
900 "packushb $f27, $f27, $f0 \r\n"
901 "packushb $f28, $f28, $f0 \r\n"
902 "punpcklwd $f16, $f27, $f28 \r\n"
903 "sdc1 $f16, 0(%[dst7]) \r\n"
904 ::[dst0]
"r"(dst), [dst1]
"r"(dst+dstStride),
905 [dst2]
"r"(dst+2*dstStride), [dst3]
"r"(dst+3*dstStride),
906 [dst4]
"r"(dst+4*dstStride), [dst5]
"r"(dst+5*dstStride),
907 [dst6]
"r"(dst+6*dstStride), [dst7]
"r"(dst+7*dstStride),
908 [srcB]
"r"(src-2*srcStride), [srcA]
"r"(src-srcStride),
910 [src2]
"r"(src+2*srcStride), [src3]
"r"(src+3*srcStride),
911 [src4]
"r"(src+4*srcStride), [src5]
"r"(src+5*srcStride),
912 [src6]
"r"(src+6*srcStride), [src7]
"r"(src+7*srcStride),
913 [src8]
"r"(src+8*srcStride), [src9]
"r"(src+9*srcStride),
916 :
"$f0",
"$f1",
"$f2",
"$f3",
"$f4",
"$f5",
"$f6",
"$f7",
"$f8",
"$f9",
"$f10",
917 "$f11",
"$f12",
"$f13",
"$f14",
"$f15",
"$f16",
"$f17",
"$f18",
"$f19",
918 "$f20",
"$f21",
"$f22",
"$f23",
"$f24",
"$f25",
"$f26",
"$f27",
"$f28"
923 int dstStride,
int srcStride)
934 int dstStride,
int srcStride)
937 "xor $f0, $f0, $f0 \r\n"
938 "gslwlc1 $f2, 3(%[srcB]) \r\n"
939 "gslwrc1 $f2, 0(%[srcB]) \r\n"
940 "gslwlc1 $f4, 3(%[srcA]) \r\n"
941 "gslwrc1 $f4, 0(%[srcA]) \r\n"
942 "gslwlc1 $f6, 3(%[src0]) \r\n"
943 "gslwrc1 $f6, 0(%[src0]) \r\n"
944 "gslwlc1 $f8, 3(%[src1]) \r\n"
945 "gslwrc1 $f8, 0(%[src1]) \r\n"
946 "gslwlc1 $f10, 3(%[src2]) \r\n"
947 "gslwrc1 $f10, 0(%[src2]) \r\n"
948 "gslwlc1 $f12, 3(%[src3]) \r\n"
949 "gslwrc1 $f12, 0(%[src3]) \r\n"
950 "gslwlc1 $f14, 3(%[src4]) \r\n"
951 "gslwrc1 $f14, 0(%[src4]) \r\n"
952 "gslwlc1 $f16, 3(%[src5]) \r\n"
953 "gslwrc1 $f16, 0(%[src5]) \r\n"
954 "gslwlc1 $f18, 3(%[src6]) \r\n"
955 "gslwrc1 $f18, 0(%[src6]) \r\n"
956 "punpcklbh $f2, $f2, $f0 \r\n"
957 "punpcklbh $f4, $f4, $f0 \r\n"
958 "punpcklbh $f6, $f6, $f0 \r\n"
959 "punpcklbh $f8, $f8, $f0 \r\n"
960 "punpcklbh $f10, $f10, $f0 \r\n"
961 "punpcklbh $f12, $f12, $f0 \r\n"
962 "punpcklbh $f14, $f14, $f0 \r\n"
963 "punpcklbh $f16, $f16, $f0 \r\n"
964 "punpcklbh $f18, $f18, $f0 \r\n"
965 "paddsh $f20, $f6, $f8 \r\n"
966 "pmullh $f20, $f20, %[ff_pw_20] \r\n"
967 "paddsh $f22, $f4, $f10 \r\n"
968 "pmullh $f22, $f22, %[ff_pw_5] \r\n"
969 "psubsh $f24, $f20, $f22 \r\n"
970 "paddsh $f24, $f24, $f2 \r\n"
971 "paddsh $f24, $f24, $f12 \r\n"
972 "paddsh $f20, $f8, $f10 \r\n"
973 "pmullh $f20, $f20, %[ff_pw_20] \r\n"
974 "paddsh $f22, $f6, $f12 \r\n"
975 "pmullh $f22, $f22, %[ff_pw_5] \r\n"
976 "psubsh $f26, $f20, $f22 \r\n"
977 "paddsh $f26, $f26, $f4 \r\n"
978 "paddsh $f26, $f26, $f14 \r\n"
979 "paddsh $f20, $f10, $f12 \r\n"
980 "pmullh $f20, $f20, %[ff_pw_20] \r\n"
981 "paddsh $f22, $f8, $f14 \r\n"
982 "pmullh $f22, $f22, %[ff_pw_5] \r\n"
983 "psubsh $f28, $f20, $f22 \r\n"
984 "paddsh $f28, $f28, $f6 \r\n"
985 "paddsh $f28, $f28, $f16 \r\n"
986 "paddsh $f20, $f12, $f14 \r\n"
987 "pmullh $f20, $f20, %[ff_pw_20] \r\n"
988 "paddsh $f22, $f10, $f16 \r\n"
989 "pmullh $f22, $f22, %[ff_pw_5] \r\n"
990 "psubsh $f30, $f20, $f22 \r\n"
991 "paddsh $f30, $f30, $f8 \r\n"
992 "paddsh $f30, $f30, $f18 \r\n"
993 "paddsh $f24, $f24, %[ff_pw_16] \r\n"
994 "paddsh $f26, $f26, %[ff_pw_16] \r\n"
995 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
996 "paddsh $f30, $f30, %[ff_pw_16] \r\n"
997 "psrah $f24, $f24, %[ff_pw_5] \r\n"
998 "psrah $f26, $f26, %[ff_pw_5] \r\n"
999 "psrah $f28, $f28, %[ff_pw_5] \r\n"
1000 "psrah $f30, $f30, %[ff_pw_5] \r\n"
1001 "packushb $f24, $f24, $f0 \r\n"
1002 "packushb $f26, $f26, $f0 \r\n"
1003 "packushb $f28, $f28, $f0 \r\n"
1004 "packushb $f30, $f30, $f0 \r\n"
1005 "lwc1 $f2, 0(%[dst0]) \r\n"
1006 "lwc1 $f4, 0(%[dst1]) \r\n"
1007 "lwc1 $f6, 0(%[dst2]) \r\n"
1008 "lwc1 $f8, 0(%[dst3]) \r\n"
1009 "pavgb $f24, $f2, $f24 \r\n"
1010 "pavgb $f26, $f4, $f26 \r\n"
1011 "pavgb $f28, $f6, $f28 \r\n"
1012 "pavgb $f30, $f8, $f30 \r\n"
1013 "swc1 $f24, 0(%[dst0]) \r\n"
1014 "swc1 $f26, 0(%[dst1]) \r\n"
1015 "swc1 $f28, 0(%[dst2]) \r\n"
1016 "swc1 $f30, 0(%[dst3]) \r\n"
1017 ::[dst0]
"r"(dst), [dst1]
"r"(dst+dstStride),
1018 [dst2]
"r"(dst+2*dstStride), [dst3]
"r"(dst+3*dstStride),
1019 [srcB]
"r"(src-2*srcStride), [srcA]
"r"(src-srcStride),
1021 [src2]
"r"(src+2*srcStride), [src3]
"r"(src+3*srcStride),
1022 [src4]
"r"(src+4*srcStride), [src5]
"r"(src+5*srcStride),
1025 :
"$f0",
"$f2",
"$f4",
"$f6",
"$f8",
"$f10",
"$f12",
"$f14",
"$f16",
"$f18",
1026 "$f20",
"$f22",
"$f24",
"$f26",
"$f28",
"$f30"
1031 int dstStride,
int srcStride)
1034 "xor $f0, $f0, $f0 \r\n"
1035 "gsldlc1 $f2, 7(%[srcB]) \r\n"
1036 "gsldrc1 $f2, 0(%[srcB]) \r\n"
1037 "gsldlc1 $f4, 7(%[srcA]) \r\n"
1038 "gsldrc1 $f4, 0(%[srcA]) \r\n"
1039 "gsldlc1 $f6, 7(%[src0]) \r\n"
1040 "gsldrc1 $f6, 0(%[src0]) \r\n"
1041 "gsldlc1 $f8, 7(%[src1]) \r\n"
1042 "gsldrc1 $f8, 0(%[src1]) \r\n"
1043 "gsldlc1 $f10, 7(%[src2]) \r\n"
1044 "gsldrc1 $f10, 0(%[src2]) \r\n"
1045 "gsldlc1 $f12, 7(%[src3]) \r\n"
1046 "gsldrc1 $f12, 0(%[src3]) \r\n"
1047 "gsldlc1 $f14, 7(%[src4]) \r\n"
1048 "gsldrc1 $f14, 0(%[src4]) \r\n"
1049 "gsldlc1 $f16, 7(%[src5]) \r\n"
1050 "gsldrc1 $f16, 0(%[src5]) \r\n"
1051 "gsldlc1 $f18, 7(%[src6]) \r\n"
1052 "gsldrc1 $f18, 0(%[src6]) \r\n"
1053 "gsldlc1 $f20, 7(%[src7]) \r\n"
1054 "gsldrc1 $f20, 0(%[src7]) \r\n"
1055 "gsldlc1 $f22, 7(%[src8]) \r\n"
1056 "gsldrc1 $f22, 0(%[src8]) \r\n"
1057 "gsldlc1 $f24, 7(%[src9]) \r\n"
1058 "gsldrc1 $f24, 0(%[src9]) \r\n"
1059 "gsldlc1 $f26, 7(%[src10]) \r\n"
1060 "gsldrc1 $f26, 0(%[src10]) \r\n"
1061 "punpcklbh $f1, $f2, $f0 \r\n"
1062 "punpckhbh $f2, $f2, $f0 \r\n"
1063 "punpcklbh $f3, $f4, $f0 \r\n"
1064 "punpckhbh $f4, $f4, $f0 \r\n"
1065 "punpcklbh $f5, $f6, $f0 \r\n"
1066 "punpckhbh $f6, $f6, $f0 \r\n"
1067 "punpcklbh $f7, $f8, $f0 \r\n"
1068 "punpckhbh $f8, $f8, $f0 \r\n"
1069 "punpcklbh $f9, $f10, $f0 \r\n"
1070 "punpckhbh $f10, $f10, $f0 \r\n"
1071 "punpcklbh $f11, $f12, $f0 \r\n"
1072 "punpckhbh $f12, $f12, $f0 \r\n"
1073 "punpcklbh $f13, $f14, $f0 \r\n"
1074 "punpckhbh $f14, $f14, $f0 \r\n"
1075 "punpcklbh $f15, $f16, $f0 \r\n"
1076 "punpckhbh $f16, $f16, $f0 \r\n"
1077 "punpcklbh $f17, $f18, $f0 \r\n"
1078 "punpckhbh $f18, $f18, $f0 \r\n"
1079 "punpcklbh $f19, $f20, $f0 \r\n"
1080 "punpckhbh $f20, $f20, $f0 \r\n"
1081 "punpcklbh $f21, $f22, $f0 \r\n"
1082 "punpckhbh $f22, $f22, $f0 \r\n"
1083 "punpcklbh $f23, $f24, $f0 \r\n"
1084 "punpckhbh $f24, $f24, $f0 \r\n"
1085 "punpcklbh $f25, $f26, $f0 \r\n"
1086 "punpckhbh $f26, $f26, $f0 \r\n"
1087 "paddsh $f27, $f5, $f7 \r\n"
1088 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1089 "paddsh $f28, $f6, $f8 \r\n"
1090 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1091 "psubsh $f27, $f27, $f3 \r\n"
1092 "psubsh $f28, $f28, $f4 \r\n"
1093 "psubsh $f27, $f27, $f9 \r\n"
1094 "psubsh $f28, $f28, $f10 \r\n"
1095 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1096 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1097 "paddsh $f27, $f27, $f1 \r\n"
1098 "paddsh $f28, $f28, $f2 \r\n"
1099 "paddsh $f27, $f27, $f11 \r\n"
1100 "paddsh $f28, $f28, $f12 \r\n"
1101 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1102 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1103 "psrah $f27, $f27, %[ff_pw_5] \r\n"
1104 "psrah $f28, $f28, %[ff_pw_5] \r\n"
1105 "packushb $f27, $f27, $f0 \r\n"
1106 "packushb $f28, $f28, $f0 \r\n"
1107 "punpcklwd $f2, $f27, $f28 \r\n"
1108 "ldc1 $f28, 0(%[dst0]) \r\n"
1109 "pavgb $f2, $f2, $f28 \r\n"
1110 "sdc1 $f2, 0(%[dst0]) \r\n"
1111 "paddsh $f27, $f7, $f9 \r\n"
1112 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1113 "paddsh $f28, $f8, $f10 \r\n"
1114 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1115 "psubsh $f27, $f27, $f5 \r\n"
1116 "psubsh $f28, $f28, $f6 \r\n"
1117 "psubsh $f27, $f27, $f11 \r\n"
1118 "psubsh $f28, $f28, $f12 \r\n"
1119 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1120 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1121 "paddsh $f27, $f27, $f3 \r\n"
1122 "paddsh $f28, $f28, $f4 \r\n"
1123 "paddsh $f27, $f27, $f13 \r\n"
1124 "paddsh $f28, $f28, $f14 \r\n"
1125 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1126 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1127 "psrah $f27, $f27, %[ff_pw_5] \r\n"
1128 "psrah $f28, $f28, %[ff_pw_5] \r\n"
1129 "packushb $f27, $f27, $f0 \r\n"
1130 "packushb $f28, $f28, $f0 \r\n"
1131 "punpcklwd $f4, $f27, $f28 \r\n"
1132 "ldc1 $f28, 0(%[dst1]) \r\n"
1133 "pavgb $f4, $f4, $f28 \r\n"
1134 "sdc1 $f4, 0(%[dst1]) \r\n"
1135 "paddsh $f27, $f9, $f11 \r\n"
1136 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1137 "paddsh $f28, $f10, $f12 \r\n"
1138 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1139 "psubsh $f27, $f27, $f7 \r\n"
1140 "psubsh $f28, $f28, $f8 \r\n"
1141 "psubsh $f27, $f27, $f13 \r\n"
1142 "psubsh $f28, $f28, $f14 \r\n"
1143 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1144 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1145 "paddsh $f27, $f27, $f5 \r\n"
1146 "paddsh $f28, $f28, $f6 \r\n"
1147 "paddsh $f27, $f27, $f15 \r\n"
1148 "paddsh $f28, $f28, $f16 \r\n"
1149 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1150 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1151 "psrah $f27, $f27, %[ff_pw_5] \r\n"
1152 "psrah $f28, $f28, %[ff_pw_5] \r\n"
1153 "packushb $f27, $f27, $f0 \r\n"
1154 "packushb $f28, $f28, $f0 \r\n"
1155 "punpcklwd $f6, $f27, $f28 \r\n"
1156 "ldc1 $f28, 0(%[dst2]) \r\n"
1157 "pavgb $f6, $f6, $f28 \r\n"
1158 "sdc1 $f6, 0(%[dst2]) \r\n"
1159 "paddsh $f27, $f11, $f13 \r\n"
1160 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1161 "paddsh $f28, $f12, $f14 \r\n"
1162 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1163 "psubsh $f27, $f27, $f9 \r\n"
1164 "psubsh $f28, $f28, $f10 \r\n"
1165 "psubsh $f27, $f27, $f15 \r\n"
1166 "psubsh $f28, $f28, $f16 \r\n"
1167 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1168 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1169 "paddsh $f27, $f27, $f7 \r\n"
1170 "paddsh $f28, $f28, $f8 \r\n"
1171 "paddsh $f27, $f27, $f17 \r\n"
1172 "paddsh $f28, $f28, $f18 \r\n"
1173 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1174 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1175 "psrah $f27, $f27, %[ff_pw_5] \r\n"
1176 "psrah $f28, $f28, %[ff_pw_5] \r\n"
1177 "packushb $f27, $f27, $f0 \r\n"
1178 "packushb $f28, $f28, $f0 \r\n"
1179 "punpcklwd $f8, $f27, $f28 \r\n"
1180 "ldc1 $f28, 0(%[dst3]) \r\n"
1181 "pavgb $f8, $f8, $f28 \r\n"
1182 "sdc1 $f8, 0(%[dst3]) \r\n"
1183 "paddsh $f27, $f13, $f15 \r\n"
1184 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1185 "paddsh $f28, $f14, $f16 \r\n"
1186 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1187 "psubsh $f27, $f27, $f11 \r\n"
1188 "psubsh $f28, $f28, $f12 \r\n"
1189 "psubsh $f27, $f27, $f17 \r\n"
1190 "psubsh $f28, $f28, $f18 \r\n"
1191 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1192 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1193 "paddsh $f27, $f27, $f9 \r\n"
1194 "paddsh $f28, $f28, $f10 \r\n"
1195 "paddsh $f27, $f27, $f19 \r\n"
1196 "paddsh $f28, $f28, $f20 \r\n"
1197 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1198 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1199 "psrah $f27, $f27, %[ff_pw_5] \r\n"
1200 "psrah $f28, $f28, %[ff_pw_5] \r\n"
1201 "packushb $f27, $f27, $f0 \r\n"
1202 "packushb $f28, $f28, $f0 \r\n"
1203 "punpcklwd $f10, $f27, $f28 \r\n"
1204 "ldc1 $f28, 0(%[dst4]) \r\n"
1205 "pavgb $f10, $f10, $f28 \r\n"
1206 "sdc1 $f10, 0(%[dst4]) \r\n"
1207 "paddsh $f27, $f15, $f17 \r\n"
1208 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1209 "paddsh $f28, $f16, $f18 \r\n"
1210 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1211 "psubsh $f27, $f27, $f13 \r\n"
1212 "psubsh $f28, $f28, $f14 \r\n"
1213 "psubsh $f27, $f27, $f19 \r\n"
1214 "psubsh $f28, $f28, $f20 \r\n"
1215 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1216 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1217 "paddsh $f27, $f27, $f11 \r\n"
1218 "paddsh $f28, $f28, $f12 \r\n"
1219 "paddsh $f27, $f27, $f21 \r\n"
1220 "paddsh $f28, $f28, $f22 \r\n"
1221 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1222 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1223 "psrah $f27, $f27, %[ff_pw_5] \r\n"
1224 "psrah $f28, $f28, %[ff_pw_5] \r\n"
1225 "packushb $f27, $f27, $f0 \r\n"
1226 "packushb $f28, $f28, $f0 \r\n"
1227 "punpcklwd $f12, $f27, $f28 \r\n"
1228 "ldc1 $f28, 0(%[dst5]) \r\n"
1229 "pavgb $f12, $f12, $f28 \r\n"
1230 "sdc1 $f12, 0(%[dst5]) \r\n"
1231 "paddsh $f27, $f17, $f19 \r\n"
1232 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1233 "paddsh $f28, $f18, $f20 \r\n"
1234 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1235 "psubsh $f27, $f27, $f15 \r\n"
1236 "psubsh $f28, $f28, $f16 \r\n"
1237 "psubsh $f27, $f27, $f21 \r\n"
1238 "psubsh $f28, $f28, $f22 \r\n"
1239 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1240 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1241 "paddsh $f27, $f27, $f13 \r\n"
1242 "paddsh $f28, $f28, $f14 \r\n"
1243 "paddsh $f27, $f27, $f23 \r\n"
1244 "paddsh $f28, $f28, $f24 \r\n"
1245 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1246 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1247 "psrah $f27, $f27, %[ff_pw_5] \r\n"
1248 "psrah $f28, $f28, %[ff_pw_5] \r\n"
1249 "packushb $f27, $f27, $f0 \r\n"
1250 "packushb $f28, $f28, $f0 \r\n"
1251 "punpcklwd $f14, $f27, $f28 \r\n"
1252 "ldc1 $f28, 0(%[dst6]) \r\n"
1253 "pavgb $f14, $f14, $f28 \r\n"
1254 "sdc1 $f14, 0(%[dst6]) \r\n"
1255 "paddsh $f27, $f19, $f21 \r\n"
1256 "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1257 "paddsh $f28, $f20, $f22 \r\n"
1258 "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1259 "psubsh $f27, $f27, $f17 \r\n"
1260 "psubsh $f28, $f28, $f18 \r\n"
1261 "psubsh $f27, $f27, $f23 \r\n"
1262 "psubsh $f28, $f28, $f24 \r\n"
1263 "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1264 "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1265 "paddsh $f27, $f27, $f15 \r\n"
1266 "paddsh $f28, $f28, $f16 \r\n"
1267 "paddsh $f27, $f27, $f25 \r\n"
1268 "paddsh $f28, $f28, $f26 \r\n"
1269 "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1270 "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1271 "psrah $f27, $f27, %[ff_pw_5] \r\n"
1272 "psrah $f28, $f28, %[ff_pw_5] \r\n"
1273 "packushb $f27, $f27, $f0 \r\n"
1274 "packushb $f28, $f28, $f0 \r\n"
1275 "punpcklwd $f16, $f27, $f28 \r\n"
1276 "ldc1 $f28, 0(%[dst7]) \r\n"
1277 "pavgb $f16, $f16, $f28 \r\n"
1278 "sdc1 $f16, 0(%[dst7]) \r\n"
1279 ::[dst0]
"r"(dst), [dst1]
"r"(dst+dstStride),
1280 [dst2]
"r"(dst+2*dstStride), [dst3]
"r"(dst+3*dstStride),
1281 [dst4]
"r"(dst+4*dstStride), [dst5]
"r"(dst+5*dstStride),
1282 [dst6]
"r"(dst+6*dstStride), [dst7]
"r"(dst+7*dstStride),
1283 [srcB]
"r"(src-2*srcStride), [srcA]
"r"(src-srcStride),
1285 [src2]
"r"(src+2*srcStride), [src3]
"r"(src+3*srcStride),
1286 [src4]
"r"(src+4*srcStride), [src5]
"r"(src+5*srcStride),
1287 [src6]
"r"(src+6*srcStride), [src7]
"r"(src+7*srcStride),
1288 [src8]
"r"(src+8*srcStride), [src9]
"r"(src+9*srcStride),
1291 :
"$f0",
"$f1",
"$f2",
"$f3",
"$f4",
"$f5",
"$f6",
"$f7",
"$f8",
"$f9",
"$f10",
1292 "$f11",
"$f12",
"$f13",
"$f14",
"$f15",
"$f16",
"$f17",
"$f18",
"$f19",
1293 "$f20",
"$f21",
"$f22",
"$f23",
"$f24",
"$f25",
"$f26",
"$f27",
"$f28"
1298 int dstStride,
int srcStride)
1309 int dstStride,
int srcStride)
1313 int16_t *tmp = _tmp;
1316 "xor $f0, $f0, $f0 \r\n"
1319 "gslwlc1 $f2, 1(%[src]) \r\n"
1320 "gslwrc1 $f2, -2(%[src]) \r\n"
1321 "gslwlc1 $f4, 2(%[src]) \r\n"
1322 "gslwrc1 $f4, -1(%[src]) \r\n"
1323 "gslwlc1 $f6, 3(%[src]) \r\n"
1324 "gslwrc1 $f6, 0(%[src]) \r\n"
1325 "gslwlc1 $f8, 4(%[src]) \r\n"
1326 "gslwrc1 $f8, 1(%[src]) \r\n"
1327 "gslwlc1 $f10, 5(%[src]) \r\n"
1328 "gslwrc1 $f10, 2(%[src]) \r\n"
1329 "gslwlc1 $f12, 6(%[src]) \r\n"
1330 "gslwrc1 $f12, 3(%[src]) \r\n"
1331 "punpcklbh $f2, $f2, $f0 \r\n"
1332 "punpcklbh $f4, $f4, $f0 \r\n"
1333 "punpcklbh $f6, $f6, $f0 \r\n"
1334 "punpcklbh $f8, $f8, $f0 \r\n"
1335 "punpcklbh $f10, $f10, $f0 \r\n"
1336 "punpcklbh $f12, $f12, $f0 \r\n"
1337 "paddsh $f14, $f6, $f8 \r\n"
1338 "paddsh $f16, $f4, $f10 \r\n"
1339 "paddsh $f18, $f2, $f12 \r\n"
1340 "pmullh $f14, $f14, %[ff_pw_20] \r\n"
1341 "pmullh $f16, $f16, %[ff_pw_5] \r\n"
1342 "psubsh $f14, $f14, $f16 \r\n"
1343 "paddsh $f18, $f14, $f18 \r\n"
1344 "sdc1 $f18, 0(%[tmp]) \r\n"
1345 "dadd %[tmp], %[tmp], %[tmpStride] \r\n"
1346 "dadd %[src], %[src], %[srcStride] \r\n"
1347 "daddi $8, $8, -1 \r\n"
1349 : [tmp]
"+&r"(tmp),[src]
"+&r"(src)
1350 : [tmpStride]
"r"(8),[srcStride]
"r"(srcStride),
1352 :
"$8",
"$f0",
"$f2",
"$f4",
"$f6",
"$f8",
"$f10",
"$f12",
"$f14",
"$f16",
"$f18"
1357 for(i=0; i<4; i++) {
1358 const int16_t tmpB= tmp[-8];
1359 const int16_t tmpA= tmp[-4];
1360 const int16_t tmp0= tmp[ 0];
1361 const int16_t tmp1= tmp[ 4];
1362 const int16_t tmp2= tmp[ 8];
1363 const int16_t tmp3= tmp[12];
1364 const int16_t tmp4= tmp[16];
1365 const int16_t tmp5= tmp[20];
1366 const int16_t tmp6= tmp[24];
1367 op2_put(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1368 op2_put(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1369 op2_put(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1370 op2_put(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1377 int dstStride,
int srcStride)
1380 int16_t *tmp = _tmp;
1385 "xor $f0, $f0, $f0 \r\n"
1388 "gsldlc1 $f2, 5(%[src]) \r\n"
1389 "gsldrc1 $f2, -2(%[src]) \r\n"
1390 "gsldlc1 $f4, 6(%[src]) \r\n"
1391 "gsldrc1 $f4, -1(%[src]) \r\n"
1392 "gsldlc1 $f6, 7(%[src]) \r\n"
1393 "gsldrc1 $f6, 0(%[src]) \r\n"
1394 "gsldlc1 $f8, 8(%[src]) \r\n"
1395 "gsldrc1 $f8, 1(%[src]) \r\n"
1396 "gsldlc1 $f10, 9(%[src]) \r\n"
1397 "gsldrc1 $f10, 2(%[src]) \r\n"
1398 "gsldlc1 $f12, 10(%[src]) \r\n"
1399 "gsldrc1 $f12, 3(%[src]) \r\n"
1400 "punpcklbh $f1, $f2, $f0 \r\n"
1401 "punpcklbh $f3, $f4, $f0 \r\n"
1402 "punpcklbh $f5, $f6, $f0 \r\n"
1403 "punpcklbh $f7, $f8, $f0 \r\n"
1404 "punpcklbh $f9, $f10, $f0 \r\n"
1405 "punpcklbh $f11, $f12, $f0 \r\n"
1406 "punpckhbh $f2, $f2, $f0 \r\n"
1407 "punpckhbh $f4, $f4, $f0 \r\n"
1408 "punpckhbh $f6, $f6, $f0 \r\n"
1409 "punpckhbh $f8, $f8, $f0 \r\n"
1410 "punpckhbh $f10, $f10, $f0 \r\n"
1411 "punpckhbh $f12, $f12, $f0 \r\n"
1412 "paddsh $f13, $f5, $f7 \r\n"
1413 "paddsh $f15, $f3, $f9 \r\n"
1414 "paddsh $f17, $f1, $f11 \r\n"
1415 "pmullh $f13, $f13, %[ff_pw_20] \r\n"
1416 "pmullh $f15, $f15, %[ff_pw_5] \r\n"
1417 "psubsh $f13, $f13, $f15 \r\n"
1418 "paddsh $f17, $f13, $f17 \r\n"
1419 "paddsh $f14, $f6, $f8 \r\n"
1420 "paddsh $f16, $f4, $f10 \r\n"
1421 "paddsh $f18, $f2, $f12 \r\n"
1422 "pmullh $f14, $f14, %[ff_pw_20] \r\n"
1423 "pmullh $f16, $f16, %[ff_pw_5] \r\n"
1424 "psubsh $f14, $f14, $f16 \r\n"
1425 "paddsh $f18, $f14, $f18 \r\n"
1426 "sdc1 $f17, 0(%[tmp]) \r\n"
1427 "sdc1 $f18, 8(%[tmp]) \r\n"
1428 "dadd %[tmp], %[tmp], %[tmpStride] \r\n"
1429 "dadd %[src], %[src], %[srcStride] \r\n"
1430 "daddi $8, $8, -1 \r\n"
1432 : [tmp]
"+&r"(tmp),[src]
"+&r"(src)
1433 : [tmpStride]
"r"(16),[srcStride]
"r"(srcStride),
1435 :
"$8",
"$f0",
"$f1",
"$f2",
"$f3",
"$f4",
"$f5",
"$f6",
"$f7",
"$f8",
"$f9",
1436 "$f10",
"$f11",
"$f12",
"$f13",
"$f14",
"$f15",
"$f16",
"$f17",
"$f18"
1441 for(i=0; i<8; i++) {
1442 const int tmpB= tmp[-16];
1443 const int tmpA= tmp[ -8];
1444 const int tmp0= tmp[ 0];
1445 const int tmp1= tmp[ 8];
1446 const int tmp2= tmp[ 16];
1447 const int tmp3= tmp[ 24];
1448 const int tmp4= tmp[ 32];
1449 const int tmp5= tmp[ 40];
1450 const int tmp6= tmp[ 48];
1451 const int tmp7= tmp[ 56];
1452 const int tmp8= tmp[ 64];
1453 const int tmp9= tmp[ 72];
1454 const int tmp10=tmp[ 80];
1455 op2_put(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1456 op2_put(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1457 op2_put(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1458 op2_put(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1459 op2_put(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));
1460 op2_put(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));
1461 op2_put(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));
1462 op2_put(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));
1469 int dstStride,
int srcStride)
1480 int dstStride,
int srcStride)
1484 int16_t *tmp = _tmp;
1488 "xor $f0, $f0, $f0 \r\n"
1491 "gslwlc1 $f2, 1(%[src]) \r\n"
1492 "gslwrc1 $f2, -2(%[src]) \r\n"
1493 "gslwlc1 $f4, 2(%[src]) \r\n"
1494 "gslwrc1 $f4, -1(%[src]) \r\n"
1495 "gslwlc1 $f6, 3(%[src]) \r\n"
1496 "gslwrc1 $f6, 0(%[src]) \r\n"
1497 "gslwlc1 $f8, 4(%[src]) \r\n"
1498 "gslwrc1 $f8, 1(%[src]) \r\n"
1499 "gslwlc1 $f10, 5(%[src]) \r\n"
1500 "gslwrc1 $f10, 2(%[src]) \r\n"
1501 "gslwlc1 $f12, 6(%[src]) \r\n"
1502 "gslwrc1 $f12, 3(%[src]) \r\n"
1503 "punpcklbh $f2, $f2, $f0 \r\n"
1504 "punpcklbh $f4, $f4, $f0 \r\n"
1505 "punpcklbh $f6, $f6, $f0 \r\n"
1506 "punpcklbh $f8, $f8, $f0 \r\n"
1507 "punpcklbh $f10, $f10, $f0 \r\n"
1508 "punpcklbh $f12, $f12, $f0 \r\n"
1509 "paddsh $f14, $f6, $f8 \r\n"
1510 "paddsh $f16, $f4, $f10 \r\n"
1511 "paddsh $f18, $f2, $f12 \r\n"
1512 "pmullh $f14, $f14, %[ff_pw_20] \r\n"
1513 "pmullh $f16, $f16, %[ff_pw_5] \r\n"
1514 "psubsh $f14, $f14, $f16 \r\n"
1515 "paddsh $f18, $f14, $f18 \r\n"
1516 "sdc1 $f18, 0(%[tmp]) \r\n"
1517 "dadd %[tmp], %[tmp], %[tmpStride] \r\n"
1518 "dadd %[src], %[src], %[srcStride] \r\n"
1519 "daddi $8, $8, -1 \r\n"
1521 : [tmp]
"+&r"(tmp),[src]
"+&r"(src)
1522 : [tmpStride]
"r"(8),[srcStride]
"r"(srcStride),
1524 :
"$8",
"$f0",
"$f2",
"$f4",
"$f6",
"$f8",
"$f10",
"$f12",
"$f14",
"$f16",
"$f18"
1531 const int16_t tmpB= tmp[-8];
1532 const int16_t tmpA= tmp[-4];
1533 const int16_t tmp0= tmp[ 0];
1534 const int16_t tmp1= tmp[ 4];
1535 const int16_t tmp2= tmp[ 8];
1536 const int16_t tmp3= tmp[12];
1537 const int16_t tmp4= tmp[16];
1538 const int16_t tmp5= tmp[20];
1539 const int16_t tmp6= tmp[24];
1540 op2_avg(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1541 op2_avg(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1542 op2_avg(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1543 op2_avg(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1550 int dstStride,
int srcStride)
1553 int16_t *tmp = _tmp;
1558 "xor $f0, $f0, $f0 \r\n"
1561 "gsldlc1 $f2, 5(%[src]) \r\n"
1562 "gsldrc1 $f2, -2(%[src]) \r\n"
1563 "gsldlc1 $f4, 6(%[src]) \r\n"
1564 "gsldrc1 $f4, -1(%[src]) \r\n"
1565 "gsldlc1 $f6, 7(%[src]) \r\n"
1566 "gsldrc1 $f6, 0(%[src]) \r\n"
1567 "gsldlc1 $f8, 8(%[src]) \r\n"
1568 "gsldrc1 $f8, 1(%[src]) \r\n"
1569 "gsldlc1 $f10, 9(%[src]) \r\n"
1570 "gsldrc1 $f10, 2(%[src]) \r\n"
1571 "gsldlc1 $f12, 10(%[src]) \r\n"
1572 "gsldrc1 $f12, 3(%[src]) \r\n"
1573 "punpcklbh $f1, $f2, $f0 \r\n"
1574 "punpcklbh $f3, $f4, $f0 \r\n"
1575 "punpcklbh $f5, $f6, $f0 \r\n"
1576 "punpcklbh $f7, $f8, $f0 \r\n"
1577 "punpcklbh $f9, $f10, $f0 \r\n"
1578 "punpcklbh $f11, $f12, $f0 \r\n"
1579 "punpckhbh $f2, $f2, $f0 \r\n"
1580 "punpckhbh $f4, $f4, $f0 \r\n"
1581 "punpckhbh $f6, $f6, $f0 \r\n"
1582 "punpckhbh $f8, $f8, $f0 \r\n"
1583 "punpckhbh $f10, $f10, $f0 \r\n"
1584 "punpckhbh $f12, $f12, $f0 \r\n"
1585 "paddsh $f13, $f5, $f7 \r\n"
1586 "paddsh $f15, $f3, $f9 \r\n"
1587 "paddsh $f17, $f1, $f11 \r\n"
1588 "pmullh $f13, $f13, %[ff_pw_20] \r\n"
1589 "pmullh $f15, $f15, %[ff_pw_5] \r\n"
1590 "psubsh $f13, $f13, $f15 \r\n"
1591 "paddsh $f17, $f13, $f17 \r\n"
1592 "paddsh $f14, $f6, $f8 \r\n"
1593 "paddsh $f16, $f4, $f10 \r\n"
1594 "paddsh $f18, $f2, $f12 \r\n"
1595 "pmullh $f14, $f14, %[ff_pw_20] \r\n"
1596 "pmullh $f16, $f16, %[ff_pw_5] \r\n"
1597 "psubsh $f14, $f14, $f16 \r\n"
1598 "paddsh $f18, $f14, $f18 \r\n"
1600 "sdc1 $f17, 0(%[tmp]) \r\n"
1601 "sdc1 $f18, 8(%[tmp]) \r\n"
1602 "dadd %[tmp], %[tmp], %[tmpStride] \r\n"
1603 "dadd %[src], %[src], %[srcStride] \r\n"
1604 "daddi $8, $8, -1 \r\n"
1606 : [tmp]
"+&r"(tmp),[src]
"+&r"(src)
1607 : [tmpStride]
"r"(16),[srcStride]
"r"(srcStride),
1609 :
"$8",
"$f0",
"$f1",
"$f2",
"$f3",
"$f4",
"$f5",
"$f6",
"$f7",
"$f8",
"$f9",
1610 "$f10",
"$f11",
"$f12",
"$f13",
"$f14",
"$f15",
"$f16",
"$f17",
"$f18"
1615 for(i=0; i<8; i++) {
1616 const int tmpB= tmp[-16];
1617 const int tmpA= tmp[ -8];
1618 const int tmp0= tmp[ 0];
1619 const int tmp1= tmp[ 8];
1620 const int tmp2= tmp[ 16];
1621 const int tmp3= tmp[ 24];
1622 const int tmp4= tmp[ 32];
1623 const int tmp5= tmp[ 40];
1624 const int tmp6= tmp[ 48];
1625 const int tmp7= tmp[ 56];
1626 const int tmp8= tmp[ 64];
1627 const int tmp9= tmp[ 72];
1628 const int tmp10=tmp[ 80];
1629 op2_avg(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1630 op2_avg(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1631 op2_avg(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1632 op2_avg(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1633 op2_avg(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));
1634 op2_avg(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));
1635 op2_avg(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));
1636 op2_avg(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));
1643 int dstStride,
int srcStride){
1685 uint8_t *
const full_mid= full + 8;
1696 uint8_t *
const full_mid= full + 8;
1705 uint8_t *
const full_mid= full + 8;
1716 uint8_t *
const full_mid= full + 8;
1729 uint8_t *
const full_mid= full + 8;
1742 uint8_t *
const full_mid= full + 8;
1755 uint8_t *
const full_mid= full + 8;
1794 uint8_t *
const full_mid= full + 8;
1807 uint8_t *
const full_mid= full + 8;
1849 uint8_t *
const full_mid= full + 8;
1860 uint8_t *
const full_mid= full + 8;
1869 uint8_t *
const full_mid= full + 8;
1880 uint8_t *
const full_mid= full + 8;
1893 uint8_t *
const full_mid= full + 8;
1906 uint8_t *
const full_mid= full + 8;
1919 uint8_t *
const full_mid= full + 8;
1958 uint8_t *
const full_mid= full + 8;
1971 uint8_t *
const full_mid= full + 8;
2013 uint8_t *
const full_mid= full + 16;
2024 uint8_t *
const full_mid= full + 16;
2033 uint8_t *
const full_mid= full + 16;
2044 uint8_t *
const full_mid= full + 16;
2057 uint8_t *
const full_mid= full + 16;
2070 uint8_t *
const full_mid= full + 16;
2083 uint8_t *
const full_mid= full + 16;
2122 uint8_t *
const full_mid= full + 16;
2135 uint8_t *
const full_mid= full + 16;
2177 uint8_t *
const full_mid= full + 16;
2188 uint8_t *
const full_mid= full + 16;
2197 uint8_t *
const full_mid= full + 16;
2208 uint8_t *
const full_mid= full + 16;
2221 uint8_t *
const full_mid= full + 16;
2234 uint8_t *
const full_mid= full + 16;
2247 uint8_t *
const full_mid= full + 16;
2286 uint8_t *
const full_mid= full + 16;
2299 uint8_t *
const full_mid= full + 16;
2341 uint8_t *
const full_mid= full + 32;
2352 uint8_t *
const full_mid= full + 32;
2361 uint8_t *
const full_mid= full + 32;
2372 uint8_t *
const full_mid= full + 32;
2385 uint8_t *
const full_mid= full + 32;
2398 uint8_t *
const full_mid= full + 32;
2411 uint8_t *
const full_mid= full + 32;
2450 uint8_t *
const full_mid= full + 32;
2463 uint8_t *
const full_mid= full + 32;
2505 uint8_t *
const full_mid= full + 32;
2516 uint8_t *
const full_mid= full + 32;
2525 uint8_t *
const full_mid= full + 32;
2536 uint8_t *
const full_mid= full + 32;
2549 uint8_t *
const full_mid= full + 32;
2562 uint8_t *
const full_mid= full + 32;
2575 uint8_t *
const full_mid= full + 32;
2614 uint8_t *
const full_mid= full + 32;
2627 uint8_t *
const full_mid= full + 32;
void ff_put_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_pixels8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_put_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_put_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_put_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_pixels4_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels16_l2_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_avg_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_pixels16_l2_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_avg_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
static void put_pixels8_l2_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_avg_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
static void put_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_put_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels4_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
static void copy_block16_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
static void avg_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
static void put_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_put_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_put_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void copy_block8_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
GLint GLenum GLboolean GLsizei stride
void ff_avg_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
static void copy_block4_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
void ff_put_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_pixels4_l2_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_put_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels4_l2_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_avg_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_pixels8_l2_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_put_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_put_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
static void avg_pixels16_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)