FFmpeg
util_altivec.h
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 /**
20  * @file
21  * Contains misc utility macros and inline functions
22  */
23 
24 #ifndef AVUTIL_PPC_UTIL_ALTIVEC_H
25 #define AVUTIL_PPC_UTIL_ALTIVEC_H
26 
27 #include <stdint.h>
28 
29 #include "config.h"
30 
31 /***********************************************************************
32  * Vector types
33  **********************************************************************/
34 #define vec_u8 vector unsigned char
35 #define vec_s8 vector signed char
36 #define vec_u16 vector unsigned short
37 #define vec_s16 vector signed short
38 #define vec_u32 vector unsigned int
39 #define vec_s32 vector signed int
40 #define vec_f vector float
41 
42 /***********************************************************************
43  * Null vector
44  **********************************************************************/
45 #define LOAD_ZERO const vec_u8 zerov = vec_splat_u8( 0 )
46 
47 #define zero_u8v (vec_u8) zerov
48 #define zero_s8v (vec_s8) zerov
49 #define zero_u16v (vec_u16) zerov
50 #define zero_s16v (vec_s16) zerov
51 #define zero_u32v (vec_u32) zerov
52 #define zero_s32v (vec_s32) zerov
53 
54 #if HAVE_ALTIVEC
55 #include <altivec.h>
56 
57 // used to build registers permutation vectors (vcprm)
58 // the 's' are for words in the _s_econd vector
59 #define WORD_0 0x00,0x01,0x02,0x03
60 #define WORD_1 0x04,0x05,0x06,0x07
61 #define WORD_2 0x08,0x09,0x0a,0x0b
62 #define WORD_3 0x0c,0x0d,0x0e,0x0f
63 #define WORD_s0 0x10,0x11,0x12,0x13
64 #define WORD_s1 0x14,0x15,0x16,0x17
65 #define WORD_s2 0x18,0x19,0x1a,0x1b
66 #define WORD_s3 0x1c,0x1d,0x1e,0x1f
67 #define vcprm(a,b,c,d) (const vec_u8){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
68 
69 #define SWP_W2S0 0x02,0x03,0x00,0x01
70 #define SWP_W2S1 0x06,0x07,0x04,0x05
71 #define SWP_W2S2 0x0a,0x0b,0x08,0x09
72 #define SWP_W2S3 0x0e,0x0f,0x0c,0x0d
73 #define SWP_W2Ss0 0x12,0x13,0x10,0x11
74 #define SWP_W2Ss1 0x16,0x17,0x14,0x15
75 #define SWP_W2Ss2 0x1a,0x1b,0x18,0x19
76 #define SWP_W2Ss3 0x1e,0x1f,0x1c,0x1d
77 #define vcswapi2s(a,b,c,d) (const vector unsigned char){SWP_W2S ## a, SWP_W2S ## b, SWP_W2S ## c, SWP_W2S ## d}
78 
79 #define vcswapc() \
80  (const vector unsigned char){0x0f,0x0e,0x0d,0x0c,0x0b,0x0a,0x09,0x08,0x07,0x06,0x05,0x04,0x03,0x02,0x01,0x00}
81 
82 
83 // Transpose 8x8 matrix of 16-bit elements (in-place)
84 #define TRANSPOSE8(a,b,c,d,e,f,g,h) \
85 do { \
86  vec_s16 A1, B1, C1, D1, E1, F1, G1, H1; \
87  vec_s16 A2, B2, C2, D2, E2, F2, G2, H2; \
88  \
89  A1 = vec_mergeh (a, e); \
90  B1 = vec_mergel (a, e); \
91  C1 = vec_mergeh (b, f); \
92  D1 = vec_mergel (b, f); \
93  E1 = vec_mergeh (c, g); \
94  F1 = vec_mergel (c, g); \
95  G1 = vec_mergeh (d, h); \
96  H1 = vec_mergel (d, h); \
97  \
98  A2 = vec_mergeh (A1, E1); \
99  B2 = vec_mergel (A1, E1); \
100  C2 = vec_mergeh (B1, F1); \
101  D2 = vec_mergel (B1, F1); \
102  E2 = vec_mergeh (C1, G1); \
103  F2 = vec_mergel (C1, G1); \
104  G2 = vec_mergeh (D1, H1); \
105  H2 = vec_mergel (D1, H1); \
106  \
107  a = vec_mergeh (A2, E2); \
108  b = vec_mergel (A2, E2); \
109  c = vec_mergeh (B2, F2); \
110  d = vec_mergel (B2, F2); \
111  e = vec_mergeh (C2, G2); \
112  f = vec_mergel (C2, G2); \
113  g = vec_mergeh (D2, H2); \
114  h = vec_mergel (D2, H2); \
115 } while (0)
116 
117 
118 #if HAVE_BIGENDIAN
119 #define VEC_LD(offset,b) \
120  vec_perm(vec_ld(offset, b), vec_ld((offset)+15, b), vec_lvsl(offset, b))
121 #else
122 #define VEC_LD(offset,b) \
123  vec_vsx_ld(offset, b)
124 #endif
125 
126 /** @brief loads unaligned vector @a *src with offset @a offset
127  and returns it */
128 #if HAVE_BIGENDIAN
129 static inline vec_u8 unaligned_load(int offset, const uint8_t *src)
130 {
131  register vec_u8 first = vec_ld(offset, src);
132  register vec_u8 second = vec_ld(offset + 15, src);
133  register vec_u8 mask = vec_lvsl(offset, src);
134  return vec_perm(first, second, mask);
135 }
136 static inline vec_u8 load_with_perm_vec(int offset, const uint8_t *src, vec_u8 perm_vec)
137 {
138  vec_u8 a = vec_ld(offset, src);
139  vec_u8 b = vec_ld(offset + 15, src);
140  return vec_perm(a, b, perm_vec);
141 }
142 #else
143 #define unaligned_load(a,b) VEC_LD(a,b)
144 #define load_with_perm_vec(a,b,c) VEC_LD(a,b)
145 #endif
146 
147 
148 /**
149  * loads vector known misalignment
150  * @param perm_vec the align permute vector to combine the two loads from lvsl
151  */
152 
153 #define vec_unaligned_load(b) VEC_LD(0, b)
154 
155 #if HAVE_BIGENDIAN
156 #define VEC_MERGEH(a, b) vec_mergeh(a, b)
157 #define VEC_MERGEL(a, b) vec_mergel(a, b)
158 #else
159 #define VEC_MERGEH(a, b) vec_mergeh(b, a)
160 #define VEC_MERGEL(a, b) vec_mergel(b, a)
161 #endif
162 
163 #if HAVE_BIGENDIAN
164 #define VEC_ST(a,b,c) vec_st(a,b,c)
165 #else
166 #define VEC_ST(a,b,c) vec_vsx_st(a,b,c)
167 #endif
168 
169 #if HAVE_BIGENDIAN
170 #define VEC_SPLAT16(a,b) vec_splat((vec_s16)(a), b)
171 #else
172 #define VEC_SPLAT16(a,b) vec_splat((vec_s16)(vec_perm(a, a, vcswapi2s(0,1,2,3))), b)
173 #endif
174 
175 #if HAVE_BIGENDIAN
176 #define VEC_SLD16(a,b,c) vec_sld(a, b, c)
177 #else
178 #define VEC_SLD16(a,b,c) vec_sld(b, a, c)
179 #endif
180 
181 #endif /* HAVE_ALTIVEC */
182 
183 #if HAVE_VSX
184 #if HAVE_BIGENDIAN
185 #define vsx_ld_u8_s16(off, p) \
186  ((vec_s16)vec_mergeh((vec_u8)vec_splat_u8(0), \
187  (vec_u8)vec_vsx_ld((off), (p))))
188 #else
189 #define vsx_ld_u8_s16(off, p) \
190  ((vec_s16)vec_mergeh((vec_u8)vec_vsx_ld((off), (p)), \
191  (vec_u8)vec_splat_u8(0)))
192 #endif /* HAVE_BIGENDIAN */
193 #endif /* HAVE_VSX */
194 
195 #endif /* AVUTIL_PPC_UTIL_ALTIVEC_H */
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:36
#define src
Definition: vp8dsp.c:254
uint8_t
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
static const uint16_t mask[17]
Definition: lzw.c:38
#define b
Definition: input.c:41
#define vec_u8
Definition: util_altivec.h:34