24 #ifndef AVUTIL_PPC_UTIL_ALTIVEC_H 25 #define AVUTIL_PPC_UTIL_ALTIVEC_H 34 #define vec_u8 vector unsigned char 35 #define vec_s8 vector signed char 36 #define vec_u16 vector unsigned short 37 #define vec_s16 vector signed short 38 #define vec_u32 vector unsigned int 39 #define vec_s32 vector signed int 40 #define vec_f vector float 45 #define LOAD_ZERO const vec_u8 zerov = vec_splat_u8( 0 ) 47 #define zero_u8v (vec_u8) zerov 48 #define zero_s8v (vec_s8) zerov 49 #define zero_u16v (vec_u16) zerov 50 #define zero_s16v (vec_s16) zerov 51 #define zero_u32v (vec_u32) zerov 52 #define zero_s32v (vec_s32) zerov 59 #define WORD_0 0x00,0x01,0x02,0x03 60 #define WORD_1 0x04,0x05,0x06,0x07 61 #define WORD_2 0x08,0x09,0x0a,0x0b 62 #define WORD_3 0x0c,0x0d,0x0e,0x0f 63 #define WORD_s0 0x10,0x11,0x12,0x13 64 #define WORD_s1 0x14,0x15,0x16,0x17 65 #define WORD_s2 0x18,0x19,0x1a,0x1b 66 #define WORD_s3 0x1c,0x1d,0x1e,0x1f 67 #define vcprm(a,b,c,d) (const vec_u8){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} 69 #define SWP_W2S0 0x02,0x03,0x00,0x01 70 #define SWP_W2S1 0x06,0x07,0x04,0x05 71 #define SWP_W2S2 0x0a,0x0b,0x08,0x09 72 #define SWP_W2S3 0x0e,0x0f,0x0c,0x0d 73 #define SWP_W2Ss0 0x12,0x13,0x10,0x11 74 #define SWP_W2Ss1 0x16,0x17,0x14,0x15 75 #define SWP_W2Ss2 0x1a,0x1b,0x18,0x19 76 #define SWP_W2Ss3 0x1e,0x1f,0x1c,0x1d 77 #define vcswapi2s(a,b,c,d) (const vector unsigned char){SWP_W2S ## a, SWP_W2S ## b, SWP_W2S ## c, SWP_W2S ## d} 80 (const vector unsigned char){0x0f,0x0e,0x0d,0x0c,0x0b,0x0a,0x09,0x08,0x07,0x06,0x05,0x04,0x03,0x02,0x01,0x00} 84 #define TRANSPOSE8(a,b,c,d,e,f,g,h) \ 86 vec_s16 A1, B1, C1, D1, E1, F1, G1, H1; \ 87 vec_s16 A2, B2, C2, D2, E2, F2, G2, H2; \ 89 A1 = vec_mergeh (a, e); \ 90 B1 = vec_mergel (a, e); \ 91 C1 = vec_mergeh (b, f); \ 92 D1 = vec_mergel (b, f); \ 93 E1 = vec_mergeh (c, g); \ 94 F1 = vec_mergel (c, g); \ 95 G1 = vec_mergeh (d, h); \ 96 H1 = vec_mergel (d, h); \ 98 A2 = vec_mergeh (A1, E1); \ 99 B2 = vec_mergel (A1, E1); \ 100 C2 = vec_mergeh (B1, F1); \ 101 D2 = vec_mergel (B1, F1); \ 102 E2 = vec_mergeh (C1, G1); \ 103 F2 = vec_mergel (C1, G1); \ 104 G2 = vec_mergeh (D1, H1); \ 105 H2 = vec_mergel (D1, H1); \ 107 a = vec_mergeh (A2, E2); \ 108 b = vec_mergel (A2, E2); \ 109 c = vec_mergeh (B2, F2); \ 110 d = vec_mergel (B2, F2); \ 111 e = vec_mergeh (C2, G2); \ 112 f = vec_mergel (C2, G2); \ 113 g = vec_mergeh (D2, H2); \ 114 h = vec_mergel (D2, H2); \ 119 #define VEC_LD(offset,b) \ 120 vec_perm(vec_ld(offset, b), vec_ld((offset)+15, b), vec_lvsl(offset, b)) 122 #define VEC_LD(offset,b) \ 123 vec_vsx_ld(offset, b) 132 register vec_u8 second = vec_ld(offset + 15, src);
134 return vec_perm(first, second, mask);
140 return vec_perm(a, b, perm_vec);
143 #define unaligned_load(a,b) VEC_LD(a,b) 144 #define load_with_perm_vec(a,b,c) VEC_LD(a,b) 153 #define vec_unaligned_load(b) VEC_LD(0, b) 156 #define VEC_MERGEH(a, b) vec_mergeh(a, b) 157 #define VEC_MERGEL(a, b) vec_mergel(a, b) 159 #define VEC_MERGEH(a, b) vec_mergeh(b, a) 160 #define VEC_MERGEL(a, b) vec_mergel(b, a) 164 #define VEC_ST(a,b,c) vec_st(a,b,c) 166 #define VEC_ST(a,b,c) vec_vsx_st(a,b,c) 170 #define VEC_SPLAT16(a,b) vec_splat((vec_s16)(a), b) 172 #define VEC_SPLAT16(a,b) vec_splat((vec_s16)(vec_perm(a, a, vcswapi2s(0,1,2,3))), b) 176 #define VEC_SLD16(a,b,c) vec_sld(a, b, c) 178 #define VEC_SLD16(a,b,c) vec_sld(b, a, c) 185 #define vsx_ld_u8_s16(off, p) \ 186 ((vec_s16)vec_mergeh((vec_u8)vec_splat_u8(0), \ 187 (vec_u8)vec_vsx_ld((off), (p)))) 189 #define vsx_ld_u8_s16(off, p) \ 190 ((vec_s16)vec_mergeh((vec_u8)vec_vsx_ld((off), (p)), \ 191 (vec_u8)vec_splat_u8(0))) The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
static const uint16_t mask[17]