00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00027 #include "util_altivec.h"
00028 #define FILTER_BITS 8
00029
00030 typedef union {
00031 vector signed short v;
00032 signed short s[8];
00033 } vec_ss;
00034
00035 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
00036 int wrap, int16_t *filter)
00037 {
00038 int sum, i;
00039 const uint8_t *s;
00040 vector unsigned char *tv, tmp, dstv, zero;
00041 vec_ss srchv[4], srclv[4], fv[4];
00042 vector signed short zeros, sumhv, sumlv;
00043 s = src;
00044
00045 for(i=0;i<4;i++) {
00046
00047
00048
00049
00050
00051
00052 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
00053 fv[i].v = vec_splat(fv[i].v, 0);
00054 }
00055
00056 zero = vec_splat_u8(0);
00057 zeros = vec_splat_s16(0);
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067 i = (-(int)dst) & 0xf;
00068 while(i>0) {
00069 sum = s[0 * wrap] * filter[0] +
00070 s[1 * wrap] * filter[1] +
00071 s[2 * wrap] * filter[2] +
00072 s[3 * wrap] * filter[3];
00073 sum = sum >> FILTER_BITS;
00074 if (sum<0) sum = 0; else if (sum>255) sum=255;
00075 dst[0] = sum;
00076 dst++;
00077 s++;
00078 dst_width--;
00079 i--;
00080 }
00081
00082
00083 while(dst_width>=16) {
00084
00085
00086
00087
00088
00089 tv = (vector unsigned char *) &s[0 * wrap];
00090 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
00091 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
00092 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
00093 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
00094 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
00095
00096 tv = (vector unsigned char *) &s[1 * wrap];
00097 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
00098 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
00099 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
00100 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
00101 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
00102
00103 tv = (vector unsigned char *) &s[2 * wrap];
00104 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
00105 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
00106 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
00107 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
00108 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
00109
00110 tv = (vector unsigned char *) &s[3 * wrap];
00111 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
00112 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
00113 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
00114 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
00115 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
00116
00117
00118
00119 dstv = vec_packsu(sumhv, sumlv) ;
00120 vec_st(dstv, 0, (vector unsigned char *) dst);
00121
00122 dst+=16;
00123 s+=16;
00124 dst_width-=16;
00125 }
00126
00127
00128
00129 while(dst_width>0) {
00130 sum = s[0 * wrap] * filter[0] +
00131 s[1 * wrap] * filter[1] +
00132 s[2 * wrap] * filter[2] +
00133 s[3 * wrap] * filter[3];
00134 sum = sum >> FILTER_BITS;
00135 if (sum<0) sum = 0; else if (sum>255) sum=255;
00136 dst[0] = sum;
00137 dst++;
00138 s++;
00139 dst_width--;
00140 }
00141 }
00142