39 static void get_pixels_altivec(int16_t *restrict
block,
const uint8_t *pixels,
43 vector
unsigned char perm =
44 (vector
unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
45 0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
46 const vector
unsigned char zero =
47 (
const vector
unsigned char) vec_splat_u8(0);
49 for (i = 0; i < 8; i++) {
53 vector
unsigned char bytes = vec_vsx_ld(0, pixels);
57 vector
signed short shorts = (vector
signed short) vec_perm(bytes, zero, perm);
60 vec_vsx_st(shorts, i * 16, (vector
signed short *) block);
66 static void get_pixels_altivec(int16_t *restrict block,
const uint8_t *pixels,
70 vector
unsigned char perm = vec_lvsl(0, pixels);
71 const vector
unsigned char zero =
72 (
const vector
unsigned char) vec_splat_u8(0);
74 for (i = 0; i < 8; i++) {
78 vector
unsigned char pixl = vec_ld(0, pixels);
79 vector
unsigned char pixr = vec_ld(7, pixels);
80 vector
unsigned char bytes = vec_perm(pixl, pixr, perm);
83 vector
signed short shorts = (vector
signed short) vec_mergeh(zero,
87 vec_st(shorts, i * 16, (vector
signed short *) block);
96 static void diff_pixels_altivec(int16_t *restrict block,
const uint8_t *
s1,
100 const vector
unsigned char zero =
101 (
const vector
unsigned char) vec_splat_u8(0);
102 vector
signed short shorts1, shorts2;
104 for (i = 0; i < 4; i++) {
108 vector
unsigned char bytes = vec_vsx_ld(0, s1);
111 shorts1 = (vector
signed short) vec_mergeh(bytes, zero);
114 bytes =vec_vsx_ld(0, s2);
117 shorts2 = (vector
signed short) vec_mergeh(bytes, zero);
120 shorts1 = vec_sub(shorts1, shorts2);
123 vec_vsx_st(shorts1, 0, (vector
signed short *) block);
135 bytes = vec_vsx_ld(0, s1);
138 shorts1 = (vector
signed short) vec_mergeh(bytes, zero);
141 bytes = vec_vsx_ld(0, s2);
144 shorts2 = (vector
signed short) vec_mergeh(bytes, zero);
147 shorts1 = vec_sub(shorts1, shorts2);
150 vec_vsx_st(shorts1, 0, (vector
signed short *) block);
158 static void diff_pixels_altivec(int16_t *restrict block,
const uint8_t *s1,
162 vector
unsigned char perm1 = vec_lvsl(0, s1);
163 vector
unsigned char perm2 = vec_lvsl(0, s2);
164 const vector
unsigned char zero =
165 (
const vector
unsigned char) vec_splat_u8(0);
166 vector
signed short shorts1, shorts2;
168 for (i = 0; i < 4; i++) {
172 vector
unsigned char pixl = vec_ld(0, s1);
173 vector
unsigned char pixr = vec_ld(15, s1);
174 vector
unsigned char bytes = vec_perm(pixl, pixr, perm1);
177 shorts1 = (vector
signed short) vec_mergeh(zero, bytes);
180 pixl = vec_ld(0, s2);
181 pixr = vec_ld(15, s2);
182 bytes = vec_perm(pixl, pixr, perm2);
185 shorts2 = (vector
signed short) vec_mergeh(zero, bytes);
188 shorts1 = vec_sub(shorts1, shorts2);
191 vec_st(shorts1, 0, (vector
signed short *) block);
203 pixl = vec_ld(0, s1);
204 pixr = vec_ld(15, s1);
205 bytes = vec_perm(pixl, pixr, perm1);
208 shorts1 = (vector
signed short) vec_mergeh(zero, bytes);
211 pixl = vec_ld(0, s2);
212 pixr = vec_ld(15, s2);
213 bytes = vec_perm(pixl, pixr, perm2);
216 shorts2 = (vector
signed short) vec_mergeh(zero, bytes);
219 shorts1 = vec_sub(shorts1, shorts2);
222 vec_st(shorts1, 0, (vector
signed short *) block);
236 unsigned high_bit_depth)
244 if (!high_bit_depth) {