Line |
Branch |
Exec |
Source |
1 |
|
|
/* |
2 |
|
|
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
3 |
|
|
* |
4 |
|
|
* This file is part of FFmpeg. |
5 |
|
|
* |
6 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
7 |
|
|
* modify it under the terms of the GNU Lesser General Public |
8 |
|
|
* License as published by the Free Software Foundation; either |
9 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
* |
11 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
12 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
|
|
* Lesser General Public License for more details. |
15 |
|
|
* |
16 |
|
|
* You should have received a copy of the GNU Lesser General Public |
17 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
18 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 |
|
|
*/ |
20 |
|
|
|
21 |
|
|
#ifndef AVCODEC_X86_CABAC_H |
22 |
|
|
#define AVCODEC_X86_CABAC_H |
23 |
|
|
|
24 |
|
|
#include <stddef.h> |
25 |
|
|
|
26 |
|
|
#include "libavcodec/cabac.h" |
27 |
|
|
#include "libavutil/attributes.h" |
28 |
|
|
#include "libavutil/macros.h" |
29 |
|
|
#include "libavutil/x86/asm.h" |
30 |
|
|
#include "config.h" |
31 |
|
|
|
32 |
|
|
#if (defined(__i386) && defined(__clang__) && (__clang_major__<2 || (__clang_major__==2 && __clang_minor__<10)))\ |
33 |
|
|
|| ( !defined(__clang__) && defined(__llvm__) && __GNUC__==4 && __GNUC_MINOR__==2 && __GNUC_PATCHLEVEL__<=1)\ |
34 |
|
|
|| (defined(__INTEL_COMPILER) && defined(_MSC_VER)) |
35 |
|
|
# define BROKEN_COMPILER 1 |
36 |
|
|
#else |
37 |
|
|
# define BROKEN_COMPILER 0 |
38 |
|
|
#endif |
39 |
|
|
|
40 |
|
|
#if HAVE_INLINE_ASM |
41 |
|
|
|
42 |
|
|
#ifndef UNCHECKED_BITSTREAM_READER |
43 |
|
|
#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER |
44 |
|
|
#endif |
45 |
|
|
|
46 |
|
|
#if UNCHECKED_BITSTREAM_READER |
47 |
|
|
#define END_CHECK(end) "" |
48 |
|
|
#else |
49 |
|
|
#define END_CHECK(end) \ |
50 |
|
|
"cmp "end" , %%"FF_REG_c" \n\t"\ |
51 |
|
|
"jge 1f \n\t" |
52 |
|
|
#endif |
53 |
|
|
|
54 |
|
|
#ifdef BROKEN_RELOCATIONS |
55 |
|
|
#define TABLES_ARG , "r"(tables) |
56 |
|
|
|
57 |
|
|
#if HAVE_FAST_CMOV |
58 |
|
|
#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \ |
59 |
|
|
"cmp "low" , "tmp" \n\t"\ |
60 |
|
|
"cmova %%ecx , "range" \n\t"\ |
61 |
|
|
"sbb %%rcx , %%rcx \n\t"\ |
62 |
|
|
"and %%ecx , "tmp" \n\t"\ |
63 |
|
|
"xor %%rcx , "retq" \n\t"\ |
64 |
|
|
"sub "tmp" , "low" \n\t" |
65 |
|
|
#else /* HAVE_FAST_CMOV */ |
66 |
|
|
#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \ |
67 |
|
|
/* P4 Prescott has crappy cmov,sbb,64-bit shift so avoid them */ \ |
68 |
|
|
"sub "low" , "tmp" \n\t"\ |
69 |
|
|
"sar $31 , "tmp" \n\t"\ |
70 |
|
|
"sub %%ecx , "range" \n\t"\ |
71 |
|
|
"and "tmp" , "range" \n\t"\ |
72 |
|
|
"add %%ecx , "range" \n\t"\ |
73 |
|
|
"shl $17 , %%ecx \n\t"\ |
74 |
|
|
"and "tmp" , %%ecx \n\t"\ |
75 |
|
|
"sub %%ecx , "low" \n\t"\ |
76 |
|
|
"xor "tmp" , "ret" \n\t"\ |
77 |
|
|
"movslq "ret" , "retq" \n\t" |
78 |
|
|
#endif /* HAVE_FAST_CMOV */ |
79 |
|
|
|
80 |
|
|
#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \ |
81 |
|
|
"movzbl "statep" , "ret" \n\t"\ |
82 |
|
|
"mov "range" , "tmp" \n\t"\ |
83 |
|
|
"and $0xC0 , "range" \n\t"\ |
84 |
|
|
"lea ("ret", "range", 2), %%ecx \n\t"\ |
85 |
|
|
"movzbl "lps_off"("tables", %%rcx), "range" \n\t"\ |
86 |
|
|
"sub "range" , "tmp" \n\t"\ |
87 |
|
|
"mov "tmp" , %%ecx \n\t"\ |
88 |
|
|
"shl $17 , "tmp" \n\t"\ |
89 |
|
|
BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \ |
90 |
|
|
"movzbl "norm_off"("tables", "rangeq"), %%ecx \n\t"\ |
91 |
|
|
"shl %%cl , "range" \n\t"\ |
92 |
|
|
"movzbl "mlps_off"+128("tables", "retq"), "tmp" \n\t"\ |
93 |
|
|
"shl %%cl , "low" \n\t"\ |
94 |
|
|
"mov "tmpbyte" , "statep" \n\t"\ |
95 |
|
|
"test "lowword" , "lowword" \n\t"\ |
96 |
|
|
"jnz 2f \n\t"\ |
97 |
|
|
"mov "byte" , %%"FF_REG_c" \n\t"\ |
98 |
|
|
END_CHECK(end)\ |
99 |
|
|
"add"FF_OPSIZE" $2 , "byte" \n\t"\ |
100 |
|
|
"1: \n\t"\ |
101 |
|
|
"movzwl (%%"FF_REG_c") , "tmp" \n\t"\ |
102 |
|
|
"lea -1("low") , %%ecx \n\t"\ |
103 |
|
|
"xor "low" , %%ecx \n\t"\ |
104 |
|
|
"shr $15 , %%ecx \n\t"\ |
105 |
|
|
"bswap "tmp" \n\t"\ |
106 |
|
|
"shr $15 , "tmp" \n\t"\ |
107 |
|
|
"movzbl "norm_off"("tables", %%rcx), %%ecx \n\t"\ |
108 |
|
|
"sub $0xFFFF , "tmp" \n\t"\ |
109 |
|
|
"neg %%ecx \n\t"\ |
110 |
|
|
"add $7 , %%ecx \n\t"\ |
111 |
|
|
"shl %%cl , "tmp" \n\t"\ |
112 |
|
|
"add "tmp" , "low" \n\t"\ |
113 |
|
|
"2: \n\t" |
114 |
|
|
|
115 |
|
|
#else /* BROKEN_RELOCATIONS */ |
116 |
|
|
#define TABLES_ARG NAMED_CONSTRAINTS_ARRAY_ADD(ff_h264_cabac_tables) |
117 |
|
|
#define RIP_ARG |
118 |
|
|
|
119 |
|
|
#if HAVE_FAST_CMOV |
120 |
|
|
#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\ |
121 |
|
|
"mov "tmp" , %%ecx \n\t"\ |
122 |
|
|
"shl $17 , "tmp" \n\t"\ |
123 |
|
|
"cmp "low" , "tmp" \n\t"\ |
124 |
|
|
"cmova %%ecx , "range" \n\t"\ |
125 |
|
|
"sbb %%ecx , %%ecx \n\t"\ |
126 |
|
|
"and %%ecx , "tmp" \n\t"\ |
127 |
|
|
"xor %%ecx , "ret" \n\t"\ |
128 |
|
|
"sub "tmp" , "low" \n\t" |
129 |
|
|
#else /* HAVE_FAST_CMOV */ |
130 |
|
|
#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\ |
131 |
|
|
"mov "tmp" , %%ecx \n\t"\ |
132 |
|
|
"shl $17 , "tmp" \n\t"\ |
133 |
|
|
"sub "low" , "tmp" \n\t"\ |
134 |
|
|
"sar $31 , "tmp" \n\t" /*lps_mask*/\ |
135 |
|
|
"sub %%ecx , "range" \n\t" /*RangeLPS - range*/\ |
136 |
|
|
"and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\ |
137 |
|
|
"add %%ecx , "range" \n\t" /*new range*/\ |
138 |
|
|
"shl $17 , %%ecx \n\t"\ |
139 |
|
|
"and "tmp" , %%ecx \n\t"\ |
140 |
|
|
"sub %%ecx , "low" \n\t"\ |
141 |
|
|
"xor "tmp" , "ret" \n\t" |
142 |
|
|
#endif /* HAVE_FAST_CMOV */ |
143 |
|
|
|
144 |
|
|
#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \ |
145 |
|
|
"movzbl "statep" , "ret" \n\t"\ |
146 |
|
|
"mov "range" , "tmp" \n\t"\ |
147 |
|
|
"and $0xC0 , "range" \n\t"\ |
148 |
|
|
"movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\ |
149 |
|
|
"sub "range" , "tmp" \n\t"\ |
150 |
|
|
BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \ |
151 |
|
|
"movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx \n\t"\ |
152 |
|
|
"shl %%cl , "range" \n\t"\ |
153 |
|
|
"movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp" \n\t"\ |
154 |
|
|
"shl %%cl , "low" \n\t"\ |
155 |
|
|
"mov "tmpbyte" , "statep" \n\t"\ |
156 |
|
|
"test "lowword" , "lowword" \n\t"\ |
157 |
|
|
" jnz 2f \n\t"\ |
158 |
|
|
"mov "byte" , %%"FF_REG_c" \n\t"\ |
159 |
|
|
END_CHECK(end)\ |
160 |
|
|
"add"FF_OPSIZE" $2 , "byte" \n\t"\ |
161 |
|
|
"1: \n\t"\ |
162 |
|
|
"movzwl (%%"FF_REG_c") , "tmp" \n\t"\ |
163 |
|
|
"lea -1("low") , %%ecx \n\t"\ |
164 |
|
|
"xor "low" , %%ecx \n\t"\ |
165 |
|
|
"shr $15 , %%ecx \n\t"\ |
166 |
|
|
"bswap "tmp" \n\t"\ |
167 |
|
|
"shr $15 , "tmp" \n\t"\ |
168 |
|
|
"movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\ |
169 |
|
|
"sub $0xFFFF , "tmp" \n\t"\ |
170 |
|
|
"neg %%ecx \n\t"\ |
171 |
|
|
"add $7 , %%ecx \n\t"\ |
172 |
|
|
"shl %%cl , "tmp" \n\t"\ |
173 |
|
|
"add "tmp" , "low" \n\t"\ |
174 |
|
|
"2: \n\t" |
175 |
|
|
|
176 |
|
|
#endif /* BROKEN_RELOCATIONS */ |
177 |
|
|
|
178 |
|
|
#if HAVE_7REGS && !BROKEN_COMPILER |
179 |
|
|
#define get_cabac_inline get_cabac_inline_x86 |
180 |
|
|
static |
181 |
|
|
#if ARCH_X86_32 |
182 |
|
|
av_noinline |
183 |
|
|
#else |
184 |
|
|
av_always_inline |
185 |
|
|
#endif |
186 |
|
630419954 |
int get_cabac_inline_x86(CABACContext *c, uint8_t *const state) |
187 |
|
|
{ |
188 |
|
|
int bit, tmp; |
189 |
|
|
#ifdef BROKEN_RELOCATIONS |
190 |
|
|
void *tables; |
191 |
|
|
|
192 |
|
630419954 |
__asm__ volatile( |
193 |
|
|
"lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" |
194 |
|
|
: "=&r"(tables) |
195 |
|
|
: NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables) |
196 |
|
|
); |
197 |
|
|
#endif |
198 |
|
|
|
199 |
|
630419954 |
__asm__ volatile( |
200 |
|
|
BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1", |
201 |
|
|
"%2", "%q2", "%3", "%b3", |
202 |
|
|
"%c6(%5)", "%c7(%5)", |
203 |
|
|
AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), |
204 |
|
|
AV_STRINGIFY(H264_LPS_RANGE_OFFSET), |
205 |
|
|
AV_STRINGIFY(H264_MLPS_STATE_OFFSET), |
206 |
|
|
"%8") |
207 |
|
|
: "=&r"(bit), "=&r"(c->low), "=&r"(c->range), "=&q"(tmp) |
208 |
|
|
: "r"(state), "r"(c), |
209 |
|
|
"i"(offsetof(CABACContext, bytestream)), |
210 |
|
|
"i"(offsetof(CABACContext, bytestream_end)) |
211 |
|
|
TABLES_ARG |
212 |
|
630419954 |
,"1"(c->low), "2"(c->range) |
213 |
|
|
: "%"FF_REG_c, "memory" |
214 |
|
|
); |
215 |
|
630419954 |
return bit & 1; |
216 |
|
|
} |
217 |
|
|
#endif /* HAVE_7REGS && !BROKEN_COMPILER */ |
218 |
|
|
|
219 |
|
|
#if !BROKEN_COMPILER |
220 |
|
|
#define get_cabac_bypass_sign get_cabac_bypass_sign_x86 |
221 |
|
176443607 |
static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val) |
222 |
|
|
{ |
223 |
|
|
x86_reg tmp; |
224 |
|
176443607 |
__asm__ volatile( |
225 |
|
|
"movl %c6(%2), %k1 \n\t" |
226 |
|
|
"movl %c3(%2), %%eax \n\t" |
227 |
|
|
"shl $17, %k1 \n\t" |
228 |
|
|
"add %%eax, %%eax \n\t" |
229 |
|
|
"sub %k1, %%eax \n\t" |
230 |
|
|
"cdq \n\t" |
231 |
|
|
"and %%edx, %k1 \n\t" |
232 |
|
|
"add %k1, %%eax \n\t" |
233 |
|
|
"xor %%edx, %%ecx \n\t" |
234 |
|
|
"sub %%edx, %%ecx \n\t" |
235 |
|
|
"test %%ax, %%ax \n\t" |
236 |
|
|
"jnz 1f \n\t" |
237 |
|
|
"mov %c4(%2), %1 \n\t" |
238 |
|
|
"subl $0xFFFF, %%eax \n\t" |
239 |
|
|
"movzwl (%1), %%edx \n\t" |
240 |
|
|
"bswap %%edx \n\t" |
241 |
|
|
"shrl $15, %%edx \n\t" |
242 |
|
|
#if UNCHECKED_BITSTREAM_READER |
243 |
|
|
"add $2, %1 \n\t" |
244 |
|
|
"addl %%edx, %%eax \n\t" |
245 |
|
|
"mov %1, %c4(%2) \n\t" |
246 |
|
|
#else |
247 |
|
|
"addl %%edx, %%eax \n\t" |
248 |
|
|
"cmp %c5(%2), %1 \n\t" |
249 |
|
|
"jge 1f \n\t" |
250 |
|
|
"add"FF_OPSIZE" $2, %c4(%2) \n\t" |
251 |
|
|
#endif |
252 |
|
|
"1: \n\t" |
253 |
|
|
"movl %%eax, %c3(%2) \n\t" |
254 |
|
|
|
255 |
|
|
: "+c"(val), "=&r"(tmp) |
256 |
|
|
: "r"(c), |
257 |
|
|
"i"(offsetof(CABACContext, low)), |
258 |
|
|
"i"(offsetof(CABACContext, bytestream)), |
259 |
|
|
"i"(offsetof(CABACContext, bytestream_end)), |
260 |
|
|
"i"(offsetof(CABACContext, range)) |
261 |
|
|
: "%eax", "%edx", "memory" |
262 |
|
|
); |
263 |
|
176443607 |
return val; |
264 |
|
|
} |
265 |
|
|
|
266 |
|
|
#define get_cabac_bypass get_cabac_bypass_x86 |
267 |
|
59320675 |
static av_always_inline int get_cabac_bypass_x86(CABACContext *c) |
268 |
|
|
{ |
269 |
|
|
x86_reg tmp; |
270 |
|
|
int res; |
271 |
|
59320675 |
__asm__ volatile( |
272 |
|
|
"movl %c6(%2), %k1 \n\t" |
273 |
|
|
"movl %c3(%2), %%eax \n\t" |
274 |
|
|
"shl $17, %k1 \n\t" |
275 |
|
|
"add %%eax, %%eax \n\t" |
276 |
|
|
"sub %k1, %%eax \n\t" |
277 |
|
|
"cdq \n\t" |
278 |
|
|
"and %%edx, %k1 \n\t" |
279 |
|
|
"add %k1, %%eax \n\t" |
280 |
|
|
"inc %%edx \n\t" |
281 |
|
|
"test %%ax, %%ax \n\t" |
282 |
|
|
"jnz 1f \n\t" |
283 |
|
|
"mov %c4(%2), %1 \n\t" |
284 |
|
|
"subl $0xFFFF, %%eax \n\t" |
285 |
|
|
"movzwl (%1), %%ecx \n\t" |
286 |
|
|
"bswap %%ecx \n\t" |
287 |
|
|
"shrl $15, %%ecx \n\t" |
288 |
|
|
"addl %%ecx, %%eax \n\t" |
289 |
|
|
"cmp %c5(%2), %1 \n\t" |
290 |
|
|
"jge 1f \n\t" |
291 |
|
|
"add"FF_OPSIZE" $2, %c4(%2) \n\t" |
292 |
|
|
"1: \n\t" |
293 |
|
|
"movl %%eax, %c3(%2) \n\t" |
294 |
|
|
|
295 |
|
|
: "=&d"(res), "=&r"(tmp) |
296 |
|
|
: "r"(c), |
297 |
|
|
"i"(offsetof(CABACContext, low)), |
298 |
|
|
"i"(offsetof(CABACContext, bytestream)), |
299 |
|
|
"i"(offsetof(CABACContext, bytestream_end)), |
300 |
|
|
"i"(offsetof(CABACContext, range)) |
301 |
|
|
: "%eax", "%ecx", "memory" |
302 |
|
|
); |
303 |
|
59320675 |
return res; |
304 |
|
|
} |
305 |
|
|
#endif /* !BROKEN_COMPILER */ |
306 |
|
|
|
307 |
|
|
#endif /* HAVE_INLINE_ASM */ |
308 |
|
|
#endif /* AVCODEC_X86_CABAC_H */ |
309 |
|
|
|