37 #define GET_PERM(per1, per2, pix) {\ 38 per1 = vec_lvsl(0, pix);\ 39 per2 = vec_add(per1, vec_splat_u8(1));\ 41 #define LOAD_PIX(v, iv, pix, per1, per2) {\ 42 vector unsigned char pix2l = vec_ld(0, pix);\ 43 vector unsigned char pix2r = vec_ld(16, pix);\ 44 v = vec_perm(pix2l, pix2r, per1);\ 45 iv = vec_perm(pix2l, pix2r, per2);\ 48 #define GET_PERM(per1, per2, pix) {} 49 #define LOAD_PIX(v, iv, pix, per1, per2) {\ 50 v = vec_vsx_ld(0, pix);\ 51 iv = vec_vsx_ld(1, pix);\ 58 int __attribute__((
aligned(16)))
s = 0;
59 const vector
unsigned char zero =
60 (const vector
unsigned char) vec_splat_u8(0);
61 vector
unsigned int sad = (vector
unsigned int) vec_splat_u32(0);
62 vector
signed int sumdiffs;
63 vector
unsigned char perm1, perm2, pix2v, pix2iv;
65 GET_PERM(perm1, perm2, pix2);
66 for (i = 0; i < h; i++) {
70 vector
unsigned char pix1v = vec_ld(0, pix1);
71 LOAD_PIX(pix2v, pix2iv, pix2, perm1, perm2);
74 vector
unsigned char avgv = vec_avg(pix2v, pix2iv);
77 vector
unsigned char t5 = vec_sub(vec_max(pix1v, avgv),
78 vec_min(pix1v, avgv));
81 sad = vec_sum4s(t5, sad);
87 sumdiffs = vec_sums((vector
signed int) sad, (vector
signed int)
zero);
88 sumdiffs = vec_splat(sumdiffs, 3);
89 vec_ste(sumdiffs, 0, &
s);
95 ptrdiff_t stride,
int h)
98 int __attribute__((
aligned(16)))
s = 0;
99 const vector
unsigned char zero =
100 (const vector
unsigned char) vec_splat_u8(0);
101 vector
unsigned char pix1v, pix3v, avgv, t5;
102 vector
unsigned int sad = (vector
unsigned int) vec_splat_u32(0);
103 vector
signed int sumdiffs;
114 vector
unsigned char pix2v = VEC_LD(0, pix2);
116 for (i = 0; i < h; i++) {
120 pix1v = vec_ld(0, pix1);
121 pix3v = VEC_LD(0, pix3);
124 avgv = vec_avg(pix2v, pix3v);
127 t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
130 sad = vec_sum4s(t5, sad);
138 sumdiffs = vec_sums((vector
signed int) sad, (vector
signed int)
zero);
139 sumdiffs = vec_splat(sumdiffs, 3);
140 vec_ste(sumdiffs, 0, &
s);
145 ptrdiff_t stride,
int h)
148 int __attribute__((
aligned(16)))
s = 0;
150 const vector
unsigned char zero =
151 (const vector
unsigned char) vec_splat_u8(0);
152 const vector
unsigned short two =
153 (const vector
unsigned short) vec_splat_u16(2);
154 vector
unsigned char avgv, t5;
155 vector
unsigned char pix1v, pix3v, pix3iv;
156 vector
unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
157 vector
unsigned short avghv, avglv;
158 vector
unsigned int sad = (vector
unsigned int) vec_splat_u32(0);
159 vector
signed int sumdiffs;
160 vector
unsigned char perm1, perm2, pix2v, pix2iv;
161 GET_PERM(perm1, perm2, pix2);
170 LOAD_PIX(pix2v, pix2iv, pix2, perm1, perm2);
171 vector
unsigned short pix2hv =
172 (vector
unsigned short) VEC_MERGEH(
zero, pix2v);
173 vector
unsigned short pix2lv =
174 (vector
unsigned short) VEC_MERGEL(zero, pix2v);
175 vector
unsigned short pix2ihv =
176 (vector
unsigned short) VEC_MERGEH(zero, pix2iv);
177 vector
unsigned short pix2ilv =
178 (vector
unsigned short) VEC_MERGEL(zero, pix2iv);
180 vector
unsigned short t1 = vec_add(pix2hv, pix2ihv);
181 vector
unsigned short t2 = vec_add(pix2lv, pix2ilv);
182 vector
unsigned short t3,
t4;
184 for (i = 0; i < h; i++) {
188 pix1v = vec_ld(0, pix1);
189 LOAD_PIX(pix3v, pix3iv, pix3, perm1, perm2);
198 pix3hv = (vector
unsigned short) VEC_MERGEH(zero, pix3v);
199 pix3lv = (vector
unsigned short) VEC_MERGEL(zero, pix3v);
200 pix3ihv = (vector
unsigned short) VEC_MERGEH(zero, pix3iv);
201 pix3ilv = (vector
unsigned short) VEC_MERGEL(zero, pix3iv);
204 t3 = vec_add(pix3hv, pix3ihv);
205 t4 = vec_add(pix3lv, pix3ilv);
207 avghv = vec_sr(vec_add(vec_add(
t1, t3), two), two);
208 avglv = vec_sr(vec_add(vec_add(
t2, t4), two), two);
211 avgv = vec_pack(avghv, avglv);
214 t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
217 sad = vec_sum4s(t5, sad);
226 sumdiffs = vec_sums((vector
signed int) sad, (vector
signed int) zero);
227 sumdiffs = vec_splat(sumdiffs, 3);
228 vec_ste(sumdiffs, 0, &
s);
234 ptrdiff_t stride,
int h)
238 const vector
unsigned int zero =
239 (const vector
unsigned int) vec_splat_u32(0);
240 vector
unsigned int sad = (vector
unsigned int) vec_splat_u32(0);
241 vector
signed int sumdiffs;
243 for (i = 0; i < h; i++) {
245 vector
unsigned char t1 =vec_ld(0, pix1);
246 vector
unsigned char t2 = VEC_LD(0, pix2);
249 vector
unsigned char t3 = vec_max(t1, t2);
250 vector
unsigned char t4 = vec_min(t1, t2);
251 vector
unsigned char t5 = vec_sub(t3, t4);
254 sad = vec_sum4s(t5, sad);
261 sumdiffs = vec_sums((vector
signed int) sad, (vector
signed int) zero);
262 sumdiffs = vec_splat(sumdiffs, 3);
263 vec_ste(sumdiffs, 0, &
s);
269 ptrdiff_t stride,
int h)
273 const vector
unsigned int zero =
274 (const vector
unsigned int) vec_splat_u32(0);
275 const vector
unsigned char permclear =
276 (vector
unsigned char)
277 { 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 };
278 vector
unsigned int sad = (vector
unsigned int) vec_splat_u32(0);
279 vector
signed int sumdiffs;
281 for (i = 0; i <
h; i++) {
285 vector
unsigned char pix1l = VEC_LD(0, pix1);
286 vector
unsigned char pix2l = VEC_LD(0, pix2);
287 vector
unsigned char t1 = vec_and(pix1l, permclear);
288 vector
unsigned char t2 = vec_and(pix2l, permclear);
291 vector
unsigned char t3 = vec_max(t1, t2);
292 vector
unsigned char t4 = vec_min(t1, t2);
293 vector
unsigned char t5 = vec_sub(t3, t4);
296 sad = vec_sum4s(t5, sad);
303 sumdiffs = vec_sums((vector
signed int) sad, (vector
signed int) zero);
304 sumdiffs = vec_splat(sumdiffs, 3);
305 vec_ste(sumdiffs, 0, &
s);
313 ptrdiff_t stride,
int h)
317 const vector
unsigned int zero =
318 (const vector
unsigned int) vec_splat_u32(0);
319 const vector
unsigned char permclear =
320 (vector
unsigned char)
321 { 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 };
322 vector
unsigned int sum = (vector
unsigned int) vec_splat_u32(0);
323 vector
signed int sumsqr;
325 for (i = 0; i <
h; i++) {
329 vector
unsigned char t1 = vec_and(VEC_LD(0, pix1), permclear);
330 vector
unsigned char t2 = vec_and(VEC_LD(0, pix2), permclear);
336 vector
unsigned char t3 = vec_max(t1, t2);
337 vector
unsigned char t4 = vec_min(t1, t2);
338 vector
unsigned char t5 = vec_sub(t3, t4);
341 sum = vec_msum(t5, t5, sum);
348 sumsqr = vec_sums((vector
signed int) sum, (vector
signed int) zero);
349 sumsqr = vec_splat(sumsqr, 3);
350 vec_ste(sumsqr, 0, &
s);
358 ptrdiff_t stride,
int h)
362 const vector
unsigned int zero =
363 (const vector
unsigned int) vec_splat_u32(0);
364 vector
unsigned int sum = (vector
unsigned int) vec_splat_u32(0);
365 vector
signed int sumsqr;
367 for (i = 0; i < h; i++) {
369 vector
unsigned char t1 = vec_ld(0, pix1);
370 vector
unsigned char t2 = VEC_LD(0, pix2);
376 vector
unsigned char t3 = vec_max(t1, t2);
377 vector
unsigned char t4 = vec_min(t1, t2);
378 vector
unsigned char t5 = vec_sub(t3, t4);
381 sum = vec_msum(t5, t5, sum);
388 sumsqr = vec_sums((vector
signed int) sum, (vector
signed int) zero);
389 sumsqr = vec_splat(sumsqr, 3);
391 vec_ste(sumsqr, 0, &
s);
398 int __attribute__((
aligned(16))) sum;
399 register const vector
unsigned char vzero =
400 (const vector
unsigned char) vec_splat_u8(0);
401 register vector
signed short temp0, temp1, temp2, temp3, temp4,
404 register const vector
signed short vprod1 =
405 (
const vector
signed short) { 1, -1, 1, -1, 1, -1, 1, -1 };
406 register const vector
signed short vprod2 =
407 (
const vector
signed short) { 1, 1, -1, -1, 1, 1, -1, -1 };
408 register const vector
signed short vprod3 =
409 (
const vector
signed short) { 1, 1, 1, 1, -1, -1, -1, -1 };
410 register const vector
unsigned char perm1 =
411 (
const vector
unsigned char)
412 { 0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
413 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D };
414 register const vector
unsigned char perm2 =
415 (
const vector
unsigned char)
416 { 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
417 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B };
418 register const vector
unsigned char perm3 =
419 (
const vector
unsigned char)
420 { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
421 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
424 #define ONEITERBUTTERFLY(i, res) \ 426 register vector unsigned char srcO = unaligned_load(stride * i, src); \ 427 register vector unsigned char dstO = unaligned_load(stride * i, dst);\ 431 register vector signed short srcV = \ 432 (vector signed short) VEC_MERGEH((vector signed char) vzero, \ 433 (vector signed char) srcO); \ 434 register vector signed short dstV = \ 435 (vector signed short) VEC_MERGEH((vector signed char) vzero, \ 436 (vector signed char) dstO); \ 439 register vector signed short but0 = vec_sub(srcV, dstV); \ 440 register vector signed short op1 = vec_perm(but0, but0, perm1); \ 441 register vector signed short but1 = vec_mladd(but0, vprod1, op1); \ 442 register vector signed short op2 = vec_perm(but1, but1, perm2); \ 443 register vector signed short but2 = vec_mladd(but1, vprod2, op2); \ 444 register vector signed short op3 = vec_perm(but2, but2, perm3); \ 445 res = vec_mladd(but2, vprod3, op3); \ 448 ONEITERBUTTERFLY(0, temp0);
449 ONEITERBUTTERFLY(1, temp1);
450 ONEITERBUTTERFLY(2, temp2);
451 ONEITERBUTTERFLY(3, temp3);
452 ONEITERBUTTERFLY(4, temp4);
453 ONEITERBUTTERFLY(5, temp5);
454 ONEITERBUTTERFLY(6, temp6);
455 ONEITERBUTTERFLY(7, temp7);
457 #undef ONEITERBUTTERFLY 459 register vector
signed int vsum;
460 register vector
signed short line0 = vec_add(temp0, temp1);
461 register vector
signed short line1 = vec_sub(temp0, temp1);
462 register vector
signed short line2 = vec_add(temp2, temp3);
463 register vector
signed short line3 = vec_sub(temp2, temp3);
464 register vector
signed short line4 = vec_add(temp4, temp5);
465 register vector
signed short line5 = vec_sub(temp4, temp5);
466 register vector
signed short line6 = vec_add(temp6, temp7);
467 register vector
signed short line7 = vec_sub(temp6, temp7);
469 register vector
signed short line0B = vec_add(line0, line2);
470 register vector
signed short line2B = vec_sub(line0, line2);
471 register vector
signed short line1B = vec_add(line1, line3);
472 register vector
signed short line3B = vec_sub(line1, line3);
473 register vector
signed short line4B = vec_add(line4, line6);
474 register vector
signed short line6B = vec_sub(line4, line6);
475 register vector
signed short line5B = vec_add(line5, line7);
476 register vector
signed short line7B = vec_sub(line5, line7);
478 register vector
signed short line0C = vec_add(line0B, line4B);
479 register vector
signed short line4C = vec_sub(line0B, line4B);
480 register vector
signed short line1C = vec_add(line1B, line5B);
481 register vector
signed short line5C = vec_sub(line1B, line5B);
482 register vector
signed short line2C = vec_add(line2B, line6B);
483 register vector
signed short line6C = vec_sub(line2B, line6B);
484 register vector
signed short line3C = vec_add(line3B, line7B);
485 register vector
signed short line7C = vec_sub(line3B, line7B);
487 vsum = vec_sum4s(vec_abs(line0C), vec_splat_s32(0));
488 vsum = vec_sum4s(vec_abs(line1C), vsum);
489 vsum = vec_sum4s(vec_abs(line2C), vsum);
490 vsum = vec_sum4s(vec_abs(line3C), vsum);
491 vsum = vec_sum4s(vec_abs(line4C), vsum);
492 vsum = vec_sum4s(vec_abs(line5C), vsum);
493 vsum = vec_sum4s(vec_abs(line6C), vsum);
494 vsum = vec_sum4s(vec_abs(line7C), vsum);
495 vsum = vec_sums(vsum, (vector
signed int) vzero);
496 vsum = vec_splat(vsum, 3);
498 vec_ste(vsum, 0, &sum);
522 uint8_t *src, ptrdiff_t stride,
int h)
524 int __attribute__((
aligned(16))) sum;
525 register vector
signed short 534 register vector
signed short 543 register const vector
unsigned char vzero
__asm__ ("v31") =
544 (const vector
unsigned char) vec_splat_u8(0);
546 register const vector
signed short vprod1
__asm__ (
"v16") =
547 (
const vector
signed short) { 1, -1, 1, -1, 1, -1, 1, -1 };
549 register const vector
signed short vprod2
__asm__ (
"v17") =
550 (
const vector
signed short) { 1, 1, -1, -1, 1, 1, -1, -1 };
552 register const vector
signed short vprod3
__asm__ (
"v18") =
553 (
const vector
signed short) { 1, 1, 1, 1, -1, -1, -1, -1 };
555 register const vector
unsigned char perm1
__asm__ (
"v19") =
556 (
const vector
unsigned char)
557 { 0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
558 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D };
560 register const vector
unsigned char perm2
__asm__ (
"v20") =
561 (
const vector
unsigned char)
562 { 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
563 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B };
565 register const vector
unsigned char perm3
__asm__ (
"v21") =
566 (
const vector
unsigned char)
567 { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
568 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
570 #define ONEITERBUTTERFLY(i, res1, res2) \ 572 register vector unsigned char srcO __asm__ ("v22") = \ 573 unaligned_load(stride * i, src); \ 574 register vector unsigned char dstO __asm__ ("v23") = \ 575 unaligned_load(stride * i, dst);\ 578 register vector signed short srcV __asm__ ("v24") = \ 579 (vector signed short) VEC_MERGEH((vector signed char) vzero, \ 580 (vector signed char) srcO); \ 581 register vector signed short dstV __asm__ ("v25") = \ 582 (vector signed short) VEC_MERGEH((vector signed char) vzero, \ 583 (vector signed char) dstO); \ 584 register vector signed short srcW __asm__ ("v26") = \ 585 (vector signed short) VEC_MERGEL((vector signed char) vzero, \ 586 (vector signed char) srcO); \ 587 register vector signed short dstW __asm__ ("v27") = \ 588 (vector signed short) VEC_MERGEL((vector signed char) vzero, \ 589 (vector signed char) dstO); \ 592 register vector signed short but0 __asm__ ("v28") = \ 593 vec_sub(srcV, dstV); \ 594 register vector signed short but0S __asm__ ("v29") = \ 595 vec_sub(srcW, dstW); \ 596 register vector signed short op1 __asm__ ("v30") = \ 597 vec_perm(but0, but0, perm1); \ 598 register vector signed short but1 __asm__ ("v22") = \ 599 vec_mladd(but0, vprod1, op1); \ 600 register vector signed short op1S __asm__ ("v23") = \ 601 vec_perm(but0S, but0S, perm1); \ 602 register vector signed short but1S __asm__ ("v24") = \ 603 vec_mladd(but0S, vprod1, op1S); \ 604 register vector signed short op2 __asm__ ("v25") = \ 605 vec_perm(but1, but1, perm2); \ 606 register vector signed short but2 __asm__ ("v26") = \ 607 vec_mladd(but1, vprod2, op2); \ 608 register vector signed short op2S __asm__ ("v27") = \ 609 vec_perm(but1S, but1S, perm2); \ 610 register vector signed short but2S __asm__ ("v28") = \ 611 vec_mladd(but1S, vprod2, op2S); \ 612 register vector signed short op3 __asm__ ("v29") = \ 613 vec_perm(but2, but2, perm3); \ 614 register vector signed short op3S __asm__ ("v30") = \ 615 vec_perm(but2S, but2S, perm3); \ 616 res1 = vec_mladd(but2, vprod3, op3); \ 617 res2 = vec_mladd(but2S, vprod3, op3S); \ 620 ONEITERBUTTERFLY(0, temp0, temp0S);
621 ONEITERBUTTERFLY(1, temp1, temp1S);
622 ONEITERBUTTERFLY(2, temp2, temp2S);
623 ONEITERBUTTERFLY(3, temp3, temp3S);
624 ONEITERBUTTERFLY(4, temp4, temp4S);
625 ONEITERBUTTERFLY(5, temp5, temp5S);
626 ONEITERBUTTERFLY(6, temp6, temp6S);
627 ONEITERBUTTERFLY(7, temp7, temp7S);
629 #undef ONEITERBUTTERFLY 631 register vector
signed int vsum;
633 register vector
signed short line0 = vec_add(temp0, temp1);
634 register vector
signed short line1 = vec_sub(temp0, temp1);
635 register vector
signed short line2 = vec_add(temp2, temp3);
636 register vector
signed short line3 = vec_sub(temp2, temp3);
637 register vector
signed short line4 = vec_add(temp4, temp5);
638 register vector
signed short line5 = vec_sub(temp4, temp5);
639 register vector
signed short line6 = vec_add(temp6, temp7);
640 register vector
signed short line7 = vec_sub(temp6, temp7);
642 register vector
signed short line0B = vec_add(line0, line2);
643 register vector
signed short line2B = vec_sub(line0, line2);
644 register vector
signed short line1B = vec_add(line1, line3);
645 register vector
signed short line3B = vec_sub(line1, line3);
646 register vector
signed short line4B = vec_add(line4, line6);
647 register vector
signed short line6B = vec_sub(line4, line6);
648 register vector
signed short line5B = vec_add(line5, line7);
649 register vector
signed short line7B = vec_sub(line5, line7);
651 register vector
signed short line0C = vec_add(line0B, line4B);
652 register vector
signed short line4C = vec_sub(line0B, line4B);
653 register vector
signed short line1C = vec_add(line1B, line5B);
654 register vector
signed short line5C = vec_sub(line1B, line5B);
655 register vector
signed short line2C = vec_add(line2B, line6B);
656 register vector
signed short line6C = vec_sub(line2B, line6B);
657 register vector
signed short line3C = vec_add(line3B, line7B);
658 register vector
signed short line7C = vec_sub(line3B, line7B);
660 register vector
signed short line0S = vec_add(temp0S, temp1S);
661 register vector
signed short line1S = vec_sub(temp0S, temp1S);
662 register vector
signed short line2S = vec_add(temp2S, temp3S);
663 register vector
signed short line3S = vec_sub(temp2S, temp3S);
664 register vector
signed short line4S = vec_add(temp4S, temp5S);
665 register vector
signed short line5S = vec_sub(temp4S, temp5S);
666 register vector
signed short line6S = vec_add(temp6S, temp7S);
667 register vector
signed short line7S = vec_sub(temp6S, temp7S);
669 register vector
signed short line0BS = vec_add(line0S, line2S);
670 register vector
signed short line2BS = vec_sub(line0S, line2S);
671 register vector
signed short line1BS = vec_add(line1S, line3S);
672 register vector
signed short line3BS = vec_sub(line1S, line3S);
673 register vector
signed short line4BS = vec_add(line4S, line6S);
674 register vector
signed short line6BS = vec_sub(line4S, line6S);
675 register vector
signed short line5BS = vec_add(line5S, line7S);
676 register vector
signed short line7BS = vec_sub(line5S, line7S);
678 register vector
signed short line0CS = vec_add(line0BS, line4BS);
679 register vector
signed short line4CS = vec_sub(line0BS, line4BS);
680 register vector
signed short line1CS = vec_add(line1BS, line5BS);
681 register vector
signed short line5CS = vec_sub(line1BS, line5BS);
682 register vector
signed short line2CS = vec_add(line2BS, line6BS);
683 register vector
signed short line6CS = vec_sub(line2BS, line6BS);
684 register vector
signed short line3CS = vec_add(line3BS, line7BS);
685 register vector
signed short line7CS = vec_sub(line3BS, line7BS);
687 vsum = vec_sum4s(vec_abs(line0C), vec_splat_s32(0));
688 vsum = vec_sum4s(vec_abs(line1C), vsum);
689 vsum = vec_sum4s(vec_abs(line2C), vsum);
690 vsum = vec_sum4s(vec_abs(line3C), vsum);
691 vsum = vec_sum4s(vec_abs(line4C), vsum);
692 vsum = vec_sum4s(vec_abs(line5C), vsum);
693 vsum = vec_sum4s(vec_abs(line6C), vsum);
694 vsum = vec_sum4s(vec_abs(line7C), vsum);
696 vsum = vec_sum4s(vec_abs(line0CS), vsum);
697 vsum = vec_sum4s(vec_abs(line1CS), vsum);
698 vsum = vec_sum4s(vec_abs(line2CS), vsum);
699 vsum = vec_sum4s(vec_abs(line3CS), vsum);
700 vsum = vec_sum4s(vec_abs(line4CS), vsum);
701 vsum = vec_sum4s(vec_abs(line5CS), vsum);
702 vsum = vec_sum4s(vec_abs(line6CS), vsum);
703 vsum = vec_sum4s(vec_abs(line7CS), vsum);
704 vsum = vec_sums(vsum, (vector
signed int) vzero);
705 vsum = vec_splat(vsum, 3);
707 vec_ste(vsum, 0, &sum);
713 uint8_t *src, ptrdiff_t stride,
int h)
715 int score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
720 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
732 c->
pix_abs[0][1] = sad16_x2_altivec;
733 c->
pix_abs[0][2] = sad16_y2_altivec;
734 c->
pix_abs[0][3] = sad16_xy2_altivec;
735 c->
pix_abs[0][0] = sad16_altivec;
736 c->
pix_abs[1][0] = sad8_altivec;
738 c->
sad[0] = sad16_altivec;
739 c->
sad[1] = sad8_altivec;
740 c->
sse[0] = sse16_altivec;
741 c->
sse[1] = sse8_altivec;
av_cold void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx)
Macro definitions for various function/variable attributes.
me_cmp_func hadamard8_diff[6]
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
static int aligned(int val)
#define PPC_ALTIVEC(flags)
me_cmp_func pix_abs[2][4]
Libavcodec external API header.
main external API structure.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Contains misc utility macros and inline functions.
GLint GLenum GLboolean GLsizei stride
__asm__(".macro parse_r var r\n\t""\\var = -1\n\t"_IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)".iflt \\var\n\t"".error \"Unable to parse register name \\r\"\n\t"".endif\n\t"".endm")
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are short