41     register vector 
unsigned char pixelsv1, pixelsv2;
 
   42     register vector 
unsigned char pixelsv1B, pixelsv2B;
 
   43     register vector 
unsigned char pixelsv1C, pixelsv2C;
 
   44     register vector 
unsigned char pixelsv1D, pixelsv2D;
 
   46     register vector 
unsigned char perm = vec_lvsl(0, pixels);
 
   48     register ptrdiff_t line_size_2 = line_size << 1;
 
   49     register ptrdiff_t line_size_3 = line_size + line_size_2;
 
   50     register ptrdiff_t line_size_4 = line_size << 2;
 
   57     for (i = 0; i < h; i += 4) {
 
   58         pixelsv1  = vec_ld( 0, pixels);
 
   59         pixelsv2  = vec_ld(15, pixels);
 
   60         pixelsv1B = vec_ld(line_size, pixels);
 
   61         pixelsv2B = vec_ld(15 + line_size, pixels);
 
   62         pixelsv1C = vec_ld(line_size_2, pixels);
 
   63         pixelsv2C = vec_ld(15 + line_size_2, pixels);
 
   64         pixelsv1D = vec_ld(line_size_3, pixels);
 
   65         pixelsv2D = vec_ld(15 + line_size_3, pixels);
 
   66         vec_st(vec_perm(pixelsv1, pixelsv2, perm),
 
   67                0, (
unsigned char*)block);
 
   68         vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),
 
   69                line_size, (
unsigned char*)block);
 
   70         vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),
 
   71                line_size_2, (
unsigned char*)block);
 
   72         vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),
 
   73                line_size_3, (
unsigned char*)block);
 
   80 #define op_avg(a,b)  a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) 
   83     register vector 
unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
 
   84     register vector 
unsigned char perm = vec_lvsl(0, pixels);
 
   87     for (i = 0; i < h; i++) {
 
   88         pixelsv1 = vec_ld( 0, pixels);
 
   89         pixelsv2 = vec_ld(16,pixels);
 
   90         blockv = vec_ld(0, block);
 
   91         pixelsv = vec_perm(pixelsv1, pixelsv2, perm);
 
   92         blockv = vec_avg(blockv,pixelsv);
 
   93         vec_st(blockv, 0, (
unsigned char*)block);
 
  100 static void avg_pixels8_altivec(
uint8_t * block, 
const uint8_t * pixels, ptrdiff_t line_size, 
int h)
 
  102     register vector 
unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
 
  105    for (i = 0; i < h; i++) {
 
  108        int rightside = ((
unsigned long)block & 0x0000000F);
 
  110        blockv = vec_ld(0, block);
 
  111        pixelsv1 = vec_ld( 0, pixels);
 
  112        pixelsv2 = vec_ld(16, pixels);
 
  113        pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));
 
  116            pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,
s0,
s1));
 
  118            pixelsv = vec_perm(blockv, pixelsv, vcprm(
s0,
s1,2,3));
 
  121        blockv = vec_avg(blockv, pixelsv);
 
  123        vec_st(blockv, 0, block);
 
  131 static void put_pixels8_xy2_altivec(
uint8_t *block, 
const uint8_t *pixels, ptrdiff_t line_size, 
int h)
 
  134     register vector 
unsigned char pixelsv1, pixelsv2, pixelsavg;
 
  135     register vector 
unsigned char blockv, temp1, temp2;
 
  136     register vector 
unsigned short pixelssum1, pixelssum2, temp3;
 
  137     register const vector 
unsigned char vczero = (
const vector 
unsigned char)vec_splat_u8(0);
 
  138     register const vector 
unsigned short vctwo = (
const vector 
unsigned short)vec_splat_u16(2);
 
  140     temp1 = vec_ld(0, pixels);
 
  141     temp2 = vec_ld(16, pixels);
 
  142     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
 
  143     if ((((
unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {
 
  146         pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
 
  148     pixelsv1 = vec_mergeh(vczero, pixelsv1);
 
  149     pixelsv2 = vec_mergeh(vczero, pixelsv2);
 
  150     pixelssum1 = vec_add((vector 
unsigned short)pixelsv1,
 
  151                          (vector 
unsigned short)pixelsv2);
 
  152     pixelssum1 = vec_add(pixelssum1, vctwo);
 
  154     for (i = 0; i < h ; i++) {
 
  155         int rightside = ((
unsigned long)block & 0x0000000F);
 
  156         blockv = vec_ld(0, block);
 
  158         temp1 = vec_ld(line_size, pixels);
 
  159         temp2 = vec_ld(line_size + 16, pixels);
 
  160         pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
 
  161         if (((((
unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {
 
  164             pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
 
  167         pixelsv1 = vec_mergeh(vczero, pixelsv1);
 
  168         pixelsv2 = vec_mergeh(vczero, pixelsv2);
 
  169         pixelssum2 = vec_add((vector 
unsigned short)pixelsv1,
 
  170                              (vector 
unsigned short)pixelsv2);
 
  171         temp3 = vec_add(pixelssum1, pixelssum2);
 
  172         temp3 = vec_sra(temp3, vctwo);
 
  173         pixelssum1 = vec_add(pixelssum2, vctwo);
 
  174         pixelsavg = vec_packsu(temp3, (vector 
unsigned short) vczero);
 
  177             blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, 
s0, 
s1));
 
  179             blockv = vec_perm(blockv, pixelsavg, vcprm(
s0, 
s1, 2, 3));
 
  182         vec_st(blockv, 0, block);
 
  190 static void put_no_rnd_pixels8_xy2_altivec(
uint8_t *block, 
const uint8_t *pixels, ptrdiff_t line_size, 
int h)
 
  193     register vector 
unsigned char pixelsv1, pixelsv2, pixelsavg;
 
  194     register vector 
unsigned char blockv, temp1, temp2;
 
  195     register vector 
unsigned short pixelssum1, pixelssum2, temp3;
 
  196     register const vector 
unsigned char vczero = (
const vector 
unsigned char)vec_splat_u8(0);
 
  197     register const vector 
unsigned short vcone = (
const vector 
unsigned short)vec_splat_u16(1);
 
  198     register const vector 
unsigned short vctwo = (
const vector 
unsigned short)vec_splat_u16(2);
 
  200     temp1 = vec_ld(0, pixels);
 
  201     temp2 = vec_ld(16, pixels);
 
  202     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
 
  203     if ((((
unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {
 
  206         pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
 
  208     pixelsv1 = vec_mergeh(vczero, pixelsv1);
 
  209     pixelsv2 = vec_mergeh(vczero, pixelsv2);
 
  210     pixelssum1 = vec_add((vector 
unsigned short)pixelsv1,
 
  211                          (vector 
unsigned short)pixelsv2);
 
  212     pixelssum1 = vec_add(pixelssum1, vcone);
 
  214     for (i = 0; i < h ; i++) {
 
  215         int rightside = ((
unsigned long)block & 0x0000000F);
 
  216         blockv = vec_ld(0, block);
 
  218         temp1 = vec_ld(line_size, pixels);
 
  219         temp2 = vec_ld(line_size + 16, pixels);
 
  220         pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
 
  221         if (((((
unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {
 
  224             pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
 
  227         pixelsv1 = vec_mergeh(vczero, pixelsv1);
 
  228         pixelsv2 = vec_mergeh(vczero, pixelsv2);
 
  229         pixelssum2 = vec_add((vector 
unsigned short)pixelsv1,
 
  230                              (vector 
unsigned short)pixelsv2);
 
  231         temp3 = vec_add(pixelssum1, pixelssum2);
 
  232         temp3 = vec_sra(temp3, vctwo);
 
  233         pixelssum1 = vec_add(pixelssum2, vcone);
 
  234         pixelsavg = vec_packsu(temp3, (vector 
unsigned short) vczero);
 
  237             blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, 
s0, 
s1));
 
  239             blockv = vec_perm(blockv, pixelsavg, vcprm(
s0, 
s1, 2, 3));
 
  242         vec_st(blockv, 0, block);
 
  250 static void put_pixels16_xy2_altivec(
uint8_t * block, 
const uint8_t * pixels, ptrdiff_t line_size, 
int h)
 
  253     register vector 
unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
 
  254     register vector 
unsigned char blockv, temp1, temp2;
 
  255     register vector 
unsigned short temp3, temp4,
 
  256         pixelssum1, pixelssum2, pixelssum3, pixelssum4;
 
  257     register const vector 
unsigned char vczero = (
const vector 
unsigned char)vec_splat_u8(0);
 
  258     register const vector 
unsigned short vctwo = (
const vector 
unsigned short)vec_splat_u16(2);
 
  260     temp1 = vec_ld(0, pixels);
 
  261     temp2 = vec_ld(16, pixels);
 
  262     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
 
  263     if ((((
unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {
 
  266         pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
 
  268     pixelsv3 = vec_mergel(vczero, pixelsv1);
 
  269     pixelsv4 = vec_mergel(vczero, pixelsv2);
 
  270     pixelsv1 = vec_mergeh(vczero, pixelsv1);
 
  271     pixelsv2 = vec_mergeh(vczero, pixelsv2);
 
  272     pixelssum3 = vec_add((vector 
unsigned short)pixelsv3,
 
  273                          (vector 
unsigned short)pixelsv4);
 
  274     pixelssum3 = vec_add(pixelssum3, vctwo);
 
  275     pixelssum1 = vec_add((vector 
unsigned short)pixelsv1,
 
  276                          (vector 
unsigned short)pixelsv2);
 
  277     pixelssum1 = vec_add(pixelssum1, vctwo);
 
  279     for (i = 0; i < h ; i++) {
 
  280         blockv = vec_ld(0, block);
 
  282         temp1 = vec_ld(line_size, pixels);
 
  283         temp2 = vec_ld(line_size + 16, pixels);
 
  284         pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
 
  285         if (((((
unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {
 
  288             pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
 
  291         pixelsv3 = vec_mergel(vczero, pixelsv1);
 
  292         pixelsv4 = vec_mergel(vczero, pixelsv2);
 
  293         pixelsv1 = vec_mergeh(vczero, pixelsv1);
 
  294         pixelsv2 = vec_mergeh(vczero, pixelsv2);
 
  296         pixelssum4 = vec_add((vector 
unsigned short)pixelsv3,
 
  297                              (vector 
unsigned short)pixelsv4);
 
  298         pixelssum2 = vec_add((vector 
unsigned short)pixelsv1,
 
  299                              (vector 
unsigned short)pixelsv2);
 
  300         temp4 = vec_add(pixelssum3, pixelssum4);
 
  301         temp4 = vec_sra(temp4, vctwo);
 
  302         temp3 = vec_add(pixelssum1, pixelssum2);
 
  303         temp3 = vec_sra(temp3, vctwo);
 
  305         pixelssum3 = vec_add(pixelssum4, vctwo);
 
  306         pixelssum1 = vec_add(pixelssum2, vctwo);
 
  308         blockv = vec_packsu(temp3, temp4);
 
  310         vec_st(blockv, 0, block);
 
  318 static void put_no_rnd_pixels16_xy2_altivec(
uint8_t * block, 
const uint8_t * pixels, ptrdiff_t line_size, 
int h)
 
  321     register vector 
unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
 
  322     register vector 
unsigned char blockv, temp1, temp2;
 
  323     register vector 
unsigned short temp3, temp4,
 
  324         pixelssum1, pixelssum2, pixelssum3, pixelssum4;
 
  325     register const vector 
unsigned char vczero = (
const vector 
unsigned char)vec_splat_u8(0);
 
  326     register const vector 
unsigned short vcone = (
const vector 
unsigned short)vec_splat_u16(1);
 
  327     register const vector 
unsigned short vctwo = (
const vector 
unsigned short)vec_splat_u16(2);
 
  329     temp1 = vec_ld(0, pixels);
 
  330     temp2 = vec_ld(16, pixels);
 
  331     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
 
  332     if ((((
unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {
 
  335         pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
 
  337     pixelsv3 = vec_mergel(vczero, pixelsv1);
 
  338     pixelsv4 = vec_mergel(vczero, pixelsv2);
 
  339     pixelsv1 = vec_mergeh(vczero, pixelsv1);
 
  340     pixelsv2 = vec_mergeh(vczero, pixelsv2);
 
  341     pixelssum3 = vec_add((vector 
unsigned short)pixelsv3,
 
  342                          (vector 
unsigned short)pixelsv4);
 
  343     pixelssum3 = vec_add(pixelssum3, vcone);
 
  344     pixelssum1 = vec_add((vector 
unsigned short)pixelsv1,
 
  345                          (vector 
unsigned short)pixelsv2);
 
  346     pixelssum1 = vec_add(pixelssum1, vcone);
 
  348     for (i = 0; i < h ; i++) {
 
  349         blockv = vec_ld(0, block);
 
  351         temp1 = vec_ld(line_size, pixels);
 
  352         temp2 = vec_ld(line_size + 16, pixels);
 
  353         pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
 
  354         if (((((
unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {
 
  357             pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
 
  360         pixelsv3 = vec_mergel(vczero, pixelsv1);
 
  361         pixelsv4 = vec_mergel(vczero, pixelsv2);
 
  362         pixelsv1 = vec_mergeh(vczero, pixelsv1);
 
  363         pixelsv2 = vec_mergeh(vczero, pixelsv2);
 
  365         pixelssum4 = vec_add((vector 
unsigned short)pixelsv3,
 
  366                              (vector 
unsigned short)pixelsv4);
 
  367         pixelssum2 = vec_add((vector 
unsigned short)pixelsv1,
 
  368                              (vector 
unsigned short)pixelsv2);
 
  369         temp4 = vec_add(pixelssum3, pixelssum4);
 
  370         temp4 = vec_sra(temp4, vctwo);
 
  371         temp3 = vec_add(pixelssum1, pixelssum2);
 
  372         temp3 = vec_sra(temp3, vctwo);
 
  374         pixelssum3 = vec_add(pixelssum4, vcone);
 
  375         pixelssum1 = vec_add(pixelssum2, vcone);
 
  377         blockv = vec_packsu(temp3, temp4);
 
  379         vec_st(blockv, 0, block);
 
  387 static void avg_pixels8_xy2_altivec(
uint8_t *block, 
const uint8_t *pixels, ptrdiff_t line_size, 
int h)
 
  390     register vector 
unsigned char pixelsv1, pixelsv2, pixelsavg;
 
  391     register vector 
unsigned char blockv, temp1, temp2, blocktemp;
 
  392     register vector 
unsigned short pixelssum1, pixelssum2, temp3;
 
  394     register const vector 
unsigned char vczero = (
const vector 
unsigned char)
 
  396     register const vector 
unsigned short vctwo = (
const vector 
unsigned short)
 
  399     temp1 = vec_ld(0, pixels);
 
  400     temp2 = vec_ld(16, pixels);
 
  401     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
 
  402     if ((((
unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {
 
  405         pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
 
  407     pixelsv1 = vec_mergeh(vczero, pixelsv1);
 
  408     pixelsv2 = vec_mergeh(vczero, pixelsv2);
 
  409     pixelssum1 = vec_add((vector 
unsigned short)pixelsv1,
 
  410                          (vector 
unsigned short)pixelsv2);
 
  411     pixelssum1 = vec_add(pixelssum1, vctwo);
 
  413     for (i = 0; i < h ; i++) {
 
  414         int rightside = ((
unsigned long)block & 0x0000000F);
 
  415         blockv = vec_ld(0, block);
 
  417         temp1 = vec_ld(line_size, pixels);
 
  418         temp2 = vec_ld(line_size + 16, pixels);
 
  419         pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
 
  420         if (((((
unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {
 
  423             pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
 
  426         pixelsv1 = vec_mergeh(vczero, pixelsv1);
 
  427         pixelsv2 = vec_mergeh(vczero, pixelsv2);
 
  428         pixelssum2 = vec_add((vector 
unsigned short)pixelsv1,
 
  429                              (vector 
unsigned short)pixelsv2);
 
  430         temp3 = vec_add(pixelssum1, pixelssum2);
 
  431         temp3 = vec_sra(temp3, vctwo);
 
  432         pixelssum1 = vec_add(pixelssum2, vctwo);
 
  433         pixelsavg = vec_packsu(temp3, (vector 
unsigned short) vczero);
 
  436             blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, 
s0, 
s1));
 
  438             blocktemp = vec_perm(blockv, pixelsavg, vcprm(
s0, 
s1, 2, 3));
 
  441         blockv = vec_avg(blocktemp, blockv);
 
  442         vec_st(blockv, 0, block);