FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
cabac.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef AVCODEC_X86_CABAC_H
22 #define AVCODEC_X86_CABAC_H
23 
24 #include "libavcodec/cabac.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/x86/asm.h"
27 #include "libavutil/internal.h"
28 #include "config.h"
29 
30 #if (defined(__i386) && defined(__clang__) && (__clang_major__<2 || (__clang_major__==2 && __clang_minor__<10)))\
31  || ( !defined(__clang__) && defined(__llvm__) && __GNUC__==4 && __GNUC_MINOR__==2 && __GNUC_PATCHLEVEL__<=1)
32 # define BROKEN_COMPILER 1
33 #else
34 # define BROKEN_COMPILER 0
35 #endif
36 
37 #if HAVE_INLINE_ASM
38 
39 #ifdef BROKEN_RELOCATIONS
40 #define TABLES_ARG , "r"(tables)
41 
42 #if HAVE_FAST_CMOV
43 #define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
44  "cmp "low" , "tmp" \n\t"\
45  "cmova %%ecx , "range" \n\t"\
46  "sbb %%rcx , %%rcx \n\t"\
47  "and %%ecx , "tmp" \n\t"\
48  "xor %%rcx , "retq" \n\t"\
49  "sub "tmp" , "low" \n\t"
50 #else /* HAVE_FAST_CMOV */
51 #define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
52 /* P4 Prescott has crappy cmov,sbb,64bit shift so avoid them */ \
53  "sub "low" , "tmp" \n\t"\
54  "sar $31 , "tmp" \n\t"\
55  "sub %%ecx , "range" \n\t"\
56  "and "tmp" , "range" \n\t"\
57  "add %%ecx , "range" \n\t"\
58  "shl $17 , %%ecx \n\t"\
59  "and "tmp" , %%ecx \n\t"\
60  "sub %%ecx , "low" \n\t"\
61  "xor "tmp" , "ret" \n\t"\
62  "movslq "ret" , "retq" \n\t"
63 #endif /* HAVE_FAST_CMOV */
64 
65 #define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
66  "movzbl "statep" , "ret" \n\t"\
67  "mov "range" , "tmp" \n\t"\
68  "and $0xC0 , "range" \n\t"\
69  "lea ("ret", "range", 2), %%ecx \n\t"\
70  "movzbl "lps_off"("tables", %%rcx), "range" \n\t"\
71  "sub "range" , "tmp" \n\t"\
72  "mov "tmp" , %%ecx \n\t"\
73  "shl $17 , "tmp" \n\t"\
74  BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
75  "movzbl "norm_off"("tables", "rangeq"), %%ecx \n\t"\
76  "shl %%cl , "range" \n\t"\
77  "movzbl "mlps_off"+128("tables", "retq"), "tmp" \n\t"\
78  "shl %%cl , "low" \n\t"\
79  "mov "tmpbyte" , "statep" \n\t"\
80  "test "lowword" , "lowword" \n\t"\
81  "jnz 2f \n\t"\
82  "mov "byte" , %%"REG_c" \n\t"\
83  "add"OPSIZE" $2 , "byte" \n\t"\
84  "movzwl (%%"REG_c") , "tmp" \n\t"\
85  "lea -1("low") , %%ecx \n\t"\
86  "xor "low" , %%ecx \n\t"\
87  "shr $15 , %%ecx \n\t"\
88  "bswap "tmp" \n\t"\
89  "shr $15 , "tmp" \n\t"\
90  "movzbl "norm_off"("tables", %%rcx), %%ecx \n\t"\
91  "sub $0xFFFF , "tmp" \n\t"\
92  "neg %%ecx \n\t"\
93  "add $7 , %%ecx \n\t"\
94  "shl %%cl , "tmp" \n\t"\
95  "add "tmp" , "low" \n\t"\
96  "2: \n\t"
97 
98 #else /* BROKEN_RELOCATIONS */
99 #define TABLES_ARG
100 #define RIP_ARG
101 
102 #if HAVE_FAST_CMOV
103 #define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
104  "mov "tmp" , %%ecx \n\t"\
105  "shl $17 , "tmp" \n\t"\
106  "cmp "low" , "tmp" \n\t"\
107  "cmova %%ecx , "range" \n\t"\
108  "sbb %%ecx , %%ecx \n\t"\
109  "and %%ecx , "tmp" \n\t"\
110  "xor %%ecx , "ret" \n\t"\
111  "sub "tmp" , "low" \n\t"
112 #else /* HAVE_FAST_CMOV */
113 #define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
114  "mov "tmp" , %%ecx \n\t"\
115  "shl $17 , "tmp" \n\t"\
116  "sub "low" , "tmp" \n\t"\
117  "sar $31 , "tmp" \n\t" /*lps_mask*/\
118  "sub %%ecx , "range" \n\t" /*RangeLPS - range*/\
119  "and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\
120  "add %%ecx , "range" \n\t" /*new range*/\
121  "shl $17 , %%ecx \n\t"\
122  "and "tmp" , %%ecx \n\t"\
123  "sub %%ecx , "low" \n\t"\
124  "xor "tmp" , "ret" \n\t"
125 #endif /* HAVE_FAST_CMOV */
126 
127 #define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
128  "movzbl "statep" , "ret" \n\t"\
129  "mov "range" , "tmp" \n\t"\
130  "and $0xC0 , "range" \n\t"\
131  "movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\
132  "sub "range" , "tmp" \n\t"\
133  BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \
134  "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx \n\t"\
135  "shl %%cl , "range" \n\t"\
136  "movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp" \n\t"\
137  "shl %%cl , "low" \n\t"\
138  "mov "tmpbyte" , "statep" \n\t"\
139  "test "lowword" , "lowword" \n\t"\
140  " jnz 2f \n\t"\
141  "mov "byte" , %%"REG_c" \n\t"\
142  "add"OPSIZE" $2 , "byte" \n\t"\
143  "movzwl (%%"REG_c") , "tmp" \n\t"\
144  "lea -1("low") , %%ecx \n\t"\
145  "xor "low" , %%ecx \n\t"\
146  "shr $15 , %%ecx \n\t"\
147  "bswap "tmp" \n\t"\
148  "shr $15 , "tmp" \n\t"\
149  "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\
150  "sub $0xFFFF , "tmp" \n\t"\
151  "neg %%ecx \n\t"\
152  "add $7 , %%ecx \n\t"\
153  "shl %%cl , "tmp" \n\t"\
154  "add "tmp" , "low" \n\t"\
155  "2: \n\t"
156 
157 #endif /* BROKEN_RELOCATIONS */
158 
159 #if HAVE_7REGS && !BROKEN_COMPILER
160 #define get_cabac_inline get_cabac_inline_x86
161 static av_always_inline int get_cabac_inline_x86(CABACContext *c,
162  uint8_t *const state)
163 {
164  int bit, tmp;
165 #ifdef BROKEN_RELOCATIONS
166  void *tables;
167 
168  __asm__ volatile(
169  "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
170  : "=&r"(tables)
171  );
172 #endif
173 
174  __asm__ volatile(
175  BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1",
176  "%2", "%q2", "%3", "%b3",
177  "%c6(%5)", "%c7(%5)",
181  "%8")
182  : "=&r"(bit), "=&r"(c->low), "=&r"(c->range), "=&q"(tmp)
183  : "r"(state), "r"(c),
184  "i"(offsetof(CABACContext, bytestream)),
185  "i"(offsetof(CABACContext, bytestream_end))
186  TABLES_ARG
187  ,"1"(c->low), "2"(c->range)
188  : "%"REG_c, "memory"
189  );
190  return bit & 1;
191 }
192 #endif /* HAVE_7REGS */
193 
194 #define get_cabac_bypass_sign get_cabac_bypass_sign_x86
195 static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
196 {
197  x86_reg tmp;
198  __asm__ volatile(
199  "movl %c6(%2), %k1 \n\t"
200  "movl %c3(%2), %%eax \n\t"
201  "shl $17, %k1 \n\t"
202  "add %%eax, %%eax \n\t"
203  "sub %k1, %%eax \n\t"
204  "cltd \n\t"
205  "and %%edx, %k1 \n\t"
206  "add %k1, %%eax \n\t"
207  "xor %%edx, %%ecx \n\t"
208  "sub %%edx, %%ecx \n\t"
209  "test %%ax, %%ax \n\t"
210  "jnz 1f \n\t"
211  "mov %c4(%2), %1 \n\t"
212  "subl $0xFFFF, %%eax \n\t"
213  "movzwl (%1), %%edx \n\t"
214  "bswap %%edx \n\t"
215  "shrl $15, %%edx \n\t"
216  "add $2, %1 \n\t"
217  "addl %%edx, %%eax \n\t"
218  "mov %1, %c4(%2) \n\t"
219  "1: \n\t"
220  "movl %%eax, %c3(%2) \n\t"
221 
222  : "+c"(val), "=&r"(tmp)
223  : "r"(c),
224  "i"(offsetof(CABACContext, low)),
225  "i"(offsetof(CABACContext, bytestream)),
226  "i"(offsetof(CABACContext, bytestream_end)),
227  "i"(offsetof(CABACContext, range))
228  : "%eax", "%edx", "memory"
229  );
230  return val;
231 }
232 
233 #if !BROKEN_COMPILER
234 #define get_cabac_bypass get_cabac_bypass_x86
235 static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
236 {
237  x86_reg tmp;
238  int res;
239  __asm__ volatile(
240  "movl %c6(%2), %k1 \n\t"
241  "movl %c3(%2), %%eax \n\t"
242  "shl $17, %k1 \n\t"
243  "add %%eax, %%eax \n\t"
244  "sub %k1, %%eax \n\t"
245  "cltd \n\t"
246  "and %%edx, %k1 \n\t"
247  "add %k1, %%eax \n\t"
248  "inc %%edx \n\t"
249  "test %%ax, %%ax \n\t"
250  "jnz 1f \n\t"
251  "mov %c4(%2), %1 \n\t"
252  "subl $0xFFFF, %%eax \n\t"
253  "movzwl (%1), %%ecx \n\t"
254  "bswap %%ecx \n\t"
255  "shrl $15, %%ecx \n\t"
256  "addl %%ecx, %%eax \n\t"
257  "cmp %c5(%2), %1 \n\t"
258  "jge 1f \n\t"
259  "add"OPSIZE" $2, %c4(%2) \n\t"
260  "1: \n\t"
261  "movl %%eax, %c3(%2) \n\t"
262 
263  : "=&d"(res), "=&r"(tmp)
264  : "r"(c),
265  "i"(offsetof(CABACContext, low)),
266  "i"(offsetof(CABACContext, bytestream)),
267  "i"(offsetof(CABACContext, bytestream_end)),
268  "i"(offsetof(CABACContext, range))
269  : "%eax", "%ecx", "memory"
270  );
271  return res;
272 }
273 #endif /* !BROKEN_COMPILER */
274 
275 #endif /* HAVE_INLINE_ASM */
276 #endif /* AVCODEC_X86_CABAC_H */