FFmpeg
cabac.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef AVCODEC_X86_CABAC_H
22 #define AVCODEC_X86_CABAC_H
23 
24 #include <stddef.h>
25 
26 #include "libavcodec/cabac.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/macros.h"
29 #include "libavutil/x86/asm.h"
30 #include "config.h"
31 
32 #if (defined(__i386) && defined(__clang__) && (__clang_major__<2 || (__clang_major__==2 && __clang_minor__<10)))\
33  || ( !defined(__clang__) && defined(__llvm__) && __GNUC__==4 && __GNUC_MINOR__==2 && __GNUC_PATCHLEVEL__<=1)\
34  || (defined(__INTEL_COMPILER) && defined(_MSC_VER))
35 # define BROKEN_COMPILER 1
36 #else
37 # define BROKEN_COMPILER 0
38 #endif
39 
40 #if HAVE_INLINE_ASM
41 
42 #ifndef UNCHECKED_BITSTREAM_READER
43 #define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
44 #endif
45 
46 #if UNCHECKED_BITSTREAM_READER
47 #define END_CHECK(end) ""
48 #else
49 #define END_CHECK(end) \
50  "cmp "end" , %%"FF_REG_c" \n\t"\
51  "jge 1f \n\t"
52 #endif
53 
54 #ifdef BROKEN_RELOCATIONS
55 #define TABLES_ARG , "r"(tables)
56 
57 #if HAVE_FAST_CMOV
58 #define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
59  "cmp "low" , "tmp" \n\t"\
60  "cmova %%ecx , "range" \n\t"\
61  "sbb %%rcx , %%rcx \n\t"\
62  "and %%ecx , "tmp" \n\t"\
63  "xor %%rcx , "retq" \n\t"\
64  "sub "tmp" , "low" \n\t"
65 #else /* HAVE_FAST_CMOV */
66 #define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
67 /* P4 Prescott has crappy cmov,sbb,64-bit shift so avoid them */ \
68  "sub "low" , "tmp" \n\t"\
69  "sar $31 , "tmp" \n\t"\
70  "sub %%ecx , "range" \n\t"\
71  "and "tmp" , "range" \n\t"\
72  "add %%ecx , "range" \n\t"\
73  "shl $17 , %%ecx \n\t"\
74  "and "tmp" , %%ecx \n\t"\
75  "sub %%ecx , "low" \n\t"\
76  "xor "tmp" , "ret" \n\t"\
77  "movslq "ret" , "retq" \n\t"
78 #endif /* HAVE_FAST_CMOV */
79 
80 #define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
81  "movzbl "statep" , "ret" \n\t"\
82  "mov "range" , "tmp" \n\t"\
83  "and $0xC0 , "range" \n\t"\
84  "lea ("ret", "range", 2), %%ecx \n\t"\
85  "movzbl "lps_off"("tables", %%rcx), "range" \n\t"\
86  "sub "range" , "tmp" \n\t"\
87  "mov "tmp" , %%ecx \n\t"\
88  "shl $17 , "tmp" \n\t"\
89  BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
90  "movzbl "norm_off"("tables", "rangeq"), %%ecx \n\t"\
91  "shl %%cl , "range" \n\t"\
92  "movzbl "mlps_off"+128("tables", "retq"), "tmp" \n\t"\
93  "shl %%cl , "low" \n\t"\
94  "mov "tmpbyte" , "statep" \n\t"\
95  "test "lowword" , "lowword" \n\t"\
96  "jnz 2f \n\t"\
97  "mov "byte" , %%"FF_REG_c" \n\t"\
98  END_CHECK(end)\
99  "add"FF_OPSIZE" $2 , "byte" \n\t"\
100  "1: \n\t"\
101  "movzwl (%%"FF_REG_c") , "tmp" \n\t"\
102  "lea -1("low") , %%ecx \n\t"\
103  "xor "low" , %%ecx \n\t"\
104  "shr $15 , %%ecx \n\t"\
105  "bswap "tmp" \n\t"\
106  "shr $15 , "tmp" \n\t"\
107  "movzbl "norm_off"("tables", %%rcx), %%ecx \n\t"\
108  "sub $0xFFFF , "tmp" \n\t"\
109  "neg %%ecx \n\t"\
110  "add $7 , %%ecx \n\t"\
111  "shl %%cl , "tmp" \n\t"\
112  "add "tmp" , "low" \n\t"\
113  "2: \n\t"
114 
115 #else /* BROKEN_RELOCATIONS */
116 #define TABLES_ARG NAMED_CONSTRAINTS_ARRAY_ADD(ff_h264_cabac_tables)
117 #define RIP_ARG
118 
119 #if HAVE_FAST_CMOV
120 #define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
121  "mov "tmp" , %%ecx \n\t"\
122  "shl $17 , "tmp" \n\t"\
123  "cmp "low" , "tmp" \n\t"\
124  "cmova %%ecx , "range" \n\t"\
125  "sbb %%ecx , %%ecx \n\t"\
126  "and %%ecx , "tmp" \n\t"\
127  "xor %%ecx , "ret" \n\t"\
128  "sub "tmp" , "low" \n\t"
129 #else /* HAVE_FAST_CMOV */
130 #define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
131  "mov "tmp" , %%ecx \n\t"\
132  "shl $17 , "tmp" \n\t"\
133  "sub "low" , "tmp" \n\t"\
134  "sar $31 , "tmp" \n\t" /*lps_mask*/\
135  "sub %%ecx , "range" \n\t" /*RangeLPS - range*/\
136  "and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\
137  "add %%ecx , "range" \n\t" /*new range*/\
138  "shl $17 , %%ecx \n\t"\
139  "and "tmp" , %%ecx \n\t"\
140  "sub %%ecx , "low" \n\t"\
141  "xor "tmp" , "ret" \n\t"
142 #endif /* HAVE_FAST_CMOV */
143 
144 #define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
145  "movzbl "statep" , "ret" \n\t"\
146  "mov "range" , "tmp" \n\t"\
147  "and $0xC0 , "range" \n\t"\
148  "movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\
149  "sub "range" , "tmp" \n\t"\
150  BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \
151  "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx \n\t"\
152  "shl %%cl , "range" \n\t"\
153  "movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp" \n\t"\
154  "shl %%cl , "low" \n\t"\
155  "mov "tmpbyte" , "statep" \n\t"\
156  "test "lowword" , "lowword" \n\t"\
157  " jnz 2f \n\t"\
158  "mov "byte" , %%"FF_REG_c" \n\t"\
159  END_CHECK(end)\
160  "add"FF_OPSIZE" $2 , "byte" \n\t"\
161  "1: \n\t"\
162  "movzwl (%%"FF_REG_c") , "tmp" \n\t"\
163  "lea -1("low") , %%ecx \n\t"\
164  "xor "low" , %%ecx \n\t"\
165  "shr $15 , %%ecx \n\t"\
166  "bswap "tmp" \n\t"\
167  "shr $15 , "tmp" \n\t"\
168  "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\
169  "sub $0xFFFF , "tmp" \n\t"\
170  "neg %%ecx \n\t"\
171  "add $7 , %%ecx \n\t"\
172  "shl %%cl , "tmp" \n\t"\
173  "add "tmp" , "low" \n\t"\
174  "2: \n\t"
175 
176 #endif /* BROKEN_RELOCATIONS */
177 
178 #if HAVE_7REGS && !BROKEN_COMPILER
179 #define get_cabac_inline get_cabac_inline_x86
180 static
181 #if defined(_WIN32) && !defined(_WIN64) && defined(__clang__)
183 #else
185 #endif
186 int get_cabac_inline_x86(CABACContext *c, uint8_t *const state)
187 {
188  int bit, tmp;
189 #ifdef BROKEN_RELOCATIONS
190  void *tables;
191 
192  __asm__ volatile(
193  "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
194  : "=&r"(tables)
196  );
197 #endif
198 
199  __asm__ volatile(
200  BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1",
201  "%2", "%q2", "%3", "%b3",
202  "%c6(%5)", "%c7(%5)",
206  "%8")
207  : "=&r"(bit), "=&r"(c->low), "=&r"(c->range), "=&q"(tmp)
208  : "r"(state), "r"(c),
209  "i"(offsetof(CABACContext, bytestream)),
210  "i"(offsetof(CABACContext, bytestream_end))
211  TABLES_ARG
212  ,"1"(c->low), "2"(c->range)
213  : "%"FF_REG_c, "memory"
214  );
215  return bit & 1;
216 }
217 #endif /* HAVE_7REGS && !BROKEN_COMPILER */
218 
219 #if !BROKEN_COMPILER
220 #define get_cabac_bypass_sign get_cabac_bypass_sign_x86
221 static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
222 {
223  x86_reg tmp;
224  __asm__ volatile(
225  "movl %c6(%2), %k1 \n\t"
226  "movl %c3(%2), %%eax \n\t"
227  "shl $17, %k1 \n\t"
228  "add %%eax, %%eax \n\t"
229  "sub %k1, %%eax \n\t"
230  "cdq \n\t"
231  "and %%edx, %k1 \n\t"
232  "add %k1, %%eax \n\t"
233  "xor %%edx, %%ecx \n\t"
234  "sub %%edx, %%ecx \n\t"
235  "test %%ax, %%ax \n\t"
236  "jnz 1f \n\t"
237  "mov %c4(%2), %1 \n\t"
238  "subl $0xFFFF, %%eax \n\t"
239  "movzwl (%1), %%edx \n\t"
240  "bswap %%edx \n\t"
241  "shrl $15, %%edx \n\t"
242 #if UNCHECKED_BITSTREAM_READER
243  "add $2, %1 \n\t"
244  "addl %%edx, %%eax \n\t"
245  "mov %1, %c4(%2) \n\t"
246 #else
247  "addl %%edx, %%eax \n\t"
248  "cmp %c5(%2), %1 \n\t"
249  "jge 1f \n\t"
250  "add"FF_OPSIZE" $2, %c4(%2) \n\t"
251 #endif
252  "1: \n\t"
253  "movl %%eax, %c3(%2) \n\t"
254 
255  : "+c"(val), "=&r"(tmp)
256  : "r"(c),
257  "i"(offsetof(CABACContext, low)),
258  "i"(offsetof(CABACContext, bytestream)),
259  "i"(offsetof(CABACContext, bytestream_end)),
260  "i"(offsetof(CABACContext, range))
261  : "%eax", "%edx", "memory"
262  );
263  return val;
264 }
265 
266 #define get_cabac_bypass get_cabac_bypass_x86
267 static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
268 {
269  x86_reg tmp;
270  int res;
271  __asm__ volatile(
272  "movl %c6(%2), %k1 \n\t"
273  "movl %c3(%2), %%eax \n\t"
274  "shl $17, %k1 \n\t"
275  "add %%eax, %%eax \n\t"
276  "sub %k1, %%eax \n\t"
277  "cdq \n\t"
278  "and %%edx, %k1 \n\t"
279  "add %k1, %%eax \n\t"
280  "inc %%edx \n\t"
281  "test %%ax, %%ax \n\t"
282  "jnz 1f \n\t"
283  "mov %c4(%2), %1 \n\t"
284  "subl $0xFFFF, %%eax \n\t"
285  "movzwl (%1), %%ecx \n\t"
286  "bswap %%ecx \n\t"
287  "shrl $15, %%ecx \n\t"
288  "addl %%ecx, %%eax \n\t"
289  "cmp %c5(%2), %1 \n\t"
290  "jge 1f \n\t"
291  "add"FF_OPSIZE" $2, %c4(%2) \n\t"
292  "1: \n\t"
293  "movl %%eax, %c3(%2) \n\t"
294 
295  : "=&d"(res), "=&r"(tmp)
296  : "r"(c),
297  "i"(offsetof(CABACContext, low)),
298  "i"(offsetof(CABACContext, bytestream)),
299  "i"(offsetof(CABACContext, bytestream_end)),
300  "i"(offsetof(CABACContext, range))
301  : "%eax", "%ecx", "memory"
302  );
303  return res;
304 }
305 #endif /* !BROKEN_COMPILER */
306 
307 #endif /* HAVE_INLINE_ASM */
308 #endif /* AVCODEC_X86_CABAC_H */
cabac.h
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
bit
#define bit(string, value)
Definition: cbs_mpeg2.c:58
macros.h
NAMED_CONSTRAINTS_ARRAY
#define NAMED_CONSTRAINTS_ARRAY(...)
Definition: asm.h:151
tables
Writing a table generator This documentation is preliminary Parts of the API are not good and should be changed Basic concepts A table generator consists of two *_tablegen c and *_tablegen h The h file will provide the variable declarations and initialization code for the tables
Definition: tablegen.txt:10
val
static double val(void *priv, double ch)
Definition: aeval.c:77
ff_h264_cabac_tables
const uint8_t ff_h264_cabac_tables[512+4 *2 *64+4 *64+63]
state
static struct @343 state
av_noinline
#define av_noinline
Definition: attributes.h:72
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
asm.h
H264_LPS_RANGE_OFFSET
#define H264_LPS_RANGE_OFFSET
Definition: cabac.h:34
attributes.h
av_always_inline
#define av_always_inline
Definition: attributes.h:49
AV_STRINGIFY
#define AV_STRINGIFY(s)
Definition: macros.h:66
H264_NORM_SHIFT_OFFSET
#define H264_NORM_SHIFT_OFFSET
Definition: cabac.h:33
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
MANGLE
#define MANGLE(a)
Definition: asm.h:127
x86_reg
int x86_reg
Definition: asm.h:72
H264_MLPS_STATE_OFFSET
#define H264_MLPS_STATE_OFFSET
Definition: cabac.h:35
CABACContext
Definition: cabac.h:41