1 |
|
|
/* |
2 |
|
|
* Chinese AVS video (AVS1-P2, JiZhun profile) decoder. |
3 |
|
|
* Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de> |
4 |
|
|
* |
5 |
|
|
* MMX-optimized DSP functions, based on H.264 optimizations by |
6 |
|
|
* Michael Niedermayer and Loren Merritt |
7 |
|
|
* |
8 |
|
|
* This file is part of FFmpeg. |
9 |
|
|
* |
10 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
11 |
|
|
* modify it under the terms of the GNU Lesser General Public |
12 |
|
|
* License as published by the Free Software Foundation; either |
13 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
14 |
|
|
* |
15 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
16 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 |
|
|
* Lesser General Public License for more details. |
19 |
|
|
* |
20 |
|
|
* You should have received a copy of the GNU Lesser General Public |
21 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
22 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
23 |
|
|
*/ |
24 |
|
|
|
25 |
|
|
#include "libavutil/attributes.h" |
26 |
|
|
#include "libavutil/common.h" |
27 |
|
|
#include "libavutil/cpu.h" |
28 |
|
|
#include "libavutil/mem_internal.h" |
29 |
|
|
#include "libavutil/x86/asm.h" |
30 |
|
|
#include "libavutil/x86/cpu.h" |
31 |
|
|
#include "libavcodec/cavsdsp.h" |
32 |
|
|
#include "libavcodec/idctdsp.h" |
33 |
|
|
#include "constants.h" |
34 |
|
|
#include "fpel.h" |
35 |
|
|
#include "idctdsp.h" |
36 |
|
|
#include "config.h" |
37 |
|
|
|
38 |
|
|
|
39 |
|
|
#if HAVE_MMX_EXTERNAL |
40 |
|
|
|
41 |
|
|
void ff_cavs_idct8_mmx(int16_t *out, const int16_t *in); |
42 |
|
|
|
43 |
|
|
static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, ptrdiff_t stride) |
44 |
|
|
{ |
45 |
|
|
LOCAL_ALIGNED(16, int16_t, b2, [64]); |
46 |
|
|
ff_cavs_idct8_mmx(b2, block); |
47 |
|
|
ff_add_pixels_clamped_mmx(b2, dst, stride); |
48 |
|
|
} |
49 |
|
|
|
50 |
|
|
void ff_cavs_idct8_sse2(int16_t *out, const int16_t *in); |
51 |
|
|
|
52 |
|
|
static void cavs_idct8_add_sse2(uint8_t *dst, int16_t *block, ptrdiff_t stride) |
53 |
|
|
{ |
54 |
|
|
LOCAL_ALIGNED(16, int16_t, b2, [64]); |
55 |
|
|
ff_cavs_idct8_sse2(b2, block); |
56 |
|
|
ff_add_pixels_clamped_sse2(b2, dst, stride); |
57 |
|
|
} |
58 |
|
|
|
59 |
|
|
#endif /* HAVE_MMX_EXTERNAL */ |
60 |
|
|
|
61 |
|
|
#if (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) |
62 |
|
|
|
63 |
|
|
/***************************************************************************** |
64 |
|
|
* |
65 |
|
|
* motion compensation |
66 |
|
|
* |
67 |
|
|
****************************************************************************/ |
68 |
|
|
|
69 |
|
|
/* vertical filter [-1 -2 96 42 -7 0] */ |
70 |
|
|
#define QPEL_CAVSV1(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ |
71 |
|
|
"movd (%0), "#F" \n\t"\ |
72 |
|
|
"movq "#C", %%mm6 \n\t"\ |
73 |
|
|
"pmullw "MANGLE(MUL1)", %%mm6\n\t"\ |
74 |
|
|
"movq "#D", %%mm7 \n\t"\ |
75 |
|
|
"pmullw "MANGLE(MUL2)", %%mm7\n\t"\ |
76 |
|
|
"psllw $3, "#E" \n\t"\ |
77 |
|
|
"psubw "#E", %%mm6 \n\t"\ |
78 |
|
|
"psraw $3, "#E" \n\t"\ |
79 |
|
|
"paddw %%mm7, %%mm6 \n\t"\ |
80 |
|
|
"paddw "#E", %%mm6 \n\t"\ |
81 |
|
|
"paddw "#B", "#B" \n\t"\ |
82 |
|
|
"pxor %%mm7, %%mm7 \n\t"\ |
83 |
|
|
"add %2, %0 \n\t"\ |
84 |
|
|
"punpcklbw %%mm7, "#F" \n\t"\ |
85 |
|
|
"psubw "#B", %%mm6 \n\t"\ |
86 |
|
|
"psraw $1, "#B" \n\t"\ |
87 |
|
|
"psubw "#A", %%mm6 \n\t"\ |
88 |
|
|
"paddw "MANGLE(ADD)", %%mm6 \n\t"\ |
89 |
|
|
"psraw $7, %%mm6 \n\t"\ |
90 |
|
|
"packuswb %%mm6, %%mm6 \n\t"\ |
91 |
|
|
OP(%%mm6, (%1), A, d) \ |
92 |
|
|
"add %3, %1 \n\t" |
93 |
|
|
|
94 |
|
|
/* vertical filter [ 0 -1 5 5 -1 0] */ |
95 |
|
|
#define QPEL_CAVSV2(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ |
96 |
|
|
"movd (%0), "#F" \n\t"\ |
97 |
|
|
"movq "#C", %%mm6 \n\t"\ |
98 |
|
|
"paddw "#D", %%mm6 \n\t"\ |
99 |
|
|
"pmullw "MANGLE(MUL1)", %%mm6\n\t"\ |
100 |
|
|
"add %2, %0 \n\t"\ |
101 |
|
|
"punpcklbw %%mm7, "#F" \n\t"\ |
102 |
|
|
"psubw "#B", %%mm6 \n\t"\ |
103 |
|
|
"psubw "#E", %%mm6 \n\t"\ |
104 |
|
|
"paddw "MANGLE(ADD)", %%mm6 \n\t"\ |
105 |
|
|
"psraw $3, %%mm6 \n\t"\ |
106 |
|
|
"packuswb %%mm6, %%mm6 \n\t"\ |
107 |
|
|
OP(%%mm6, (%1), A, d) \ |
108 |
|
|
"add %3, %1 \n\t" |
109 |
|
|
|
110 |
|
|
/* vertical filter [ 0 -7 42 96 -2 -1] */ |
111 |
|
|
#define QPEL_CAVSV3(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ |
112 |
|
|
"movd (%0), "#F" \n\t"\ |
113 |
|
|
"movq "#C", %%mm6 \n\t"\ |
114 |
|
|
"pmullw "MANGLE(MUL2)", %%mm6\n\t"\ |
115 |
|
|
"movq "#D", %%mm7 \n\t"\ |
116 |
|
|
"pmullw "MANGLE(MUL1)", %%mm7\n\t"\ |
117 |
|
|
"psllw $3, "#B" \n\t"\ |
118 |
|
|
"psubw "#B", %%mm6 \n\t"\ |
119 |
|
|
"psraw $3, "#B" \n\t"\ |
120 |
|
|
"paddw %%mm7, %%mm6 \n\t"\ |
121 |
|
|
"paddw "#B", %%mm6 \n\t"\ |
122 |
|
|
"paddw "#E", "#E" \n\t"\ |
123 |
|
|
"pxor %%mm7, %%mm7 \n\t"\ |
124 |
|
|
"add %2, %0 \n\t"\ |
125 |
|
|
"punpcklbw %%mm7, "#F" \n\t"\ |
126 |
|
|
"psubw "#E", %%mm6 \n\t"\ |
127 |
|
|
"psraw $1, "#E" \n\t"\ |
128 |
|
|
"psubw "#F", %%mm6 \n\t"\ |
129 |
|
|
"paddw "MANGLE(ADD)", %%mm6 \n\t"\ |
130 |
|
|
"psraw $7, %%mm6 \n\t"\ |
131 |
|
|
"packuswb %%mm6, %%mm6 \n\t"\ |
132 |
|
|
OP(%%mm6, (%1), A, d) \ |
133 |
|
|
"add %3, %1 \n\t" |
134 |
|
|
|
135 |
|
|
|
136 |
|
|
#define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\ |
137 |
|
|
int w= 2;\ |
138 |
|
|
src -= 2*srcStride;\ |
139 |
|
|
\ |
140 |
|
|
while(w--){\ |
141 |
|
|
__asm__ volatile(\ |
142 |
|
|
"pxor %%mm7, %%mm7 \n\t"\ |
143 |
|
|
"movd (%0), %%mm0 \n\t"\ |
144 |
|
|
"add %2, %0 \n\t"\ |
145 |
|
|
"movd (%0), %%mm1 \n\t"\ |
146 |
|
|
"add %2, %0 \n\t"\ |
147 |
|
|
"movd (%0), %%mm2 \n\t"\ |
148 |
|
|
"add %2, %0 \n\t"\ |
149 |
|
|
"movd (%0), %%mm3 \n\t"\ |
150 |
|
|
"add %2, %0 \n\t"\ |
151 |
|
|
"movd (%0), %%mm4 \n\t"\ |
152 |
|
|
"add %2, %0 \n\t"\ |
153 |
|
|
"punpcklbw %%mm7, %%mm0 \n\t"\ |
154 |
|
|
"punpcklbw %%mm7, %%mm1 \n\t"\ |
155 |
|
|
"punpcklbw %%mm7, %%mm2 \n\t"\ |
156 |
|
|
"punpcklbw %%mm7, %%mm3 \n\t"\ |
157 |
|
|
"punpcklbw %%mm7, %%mm4 \n\t"\ |
158 |
|
|
VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ |
159 |
|
|
VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ |
160 |
|
|
VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ |
161 |
|
|
VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ |
162 |
|
|
VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\ |
163 |
|
|
VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\ |
164 |
|
|
VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ |
165 |
|
|
VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ |
166 |
|
|
\ |
167 |
|
|
: "+a"(src), "+c"(dst)\ |
168 |
|
|
: "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\ |
169 |
|
|
NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\ |
170 |
|
|
: "memory"\ |
171 |
|
|
);\ |
172 |
|
|
if(h==16){\ |
173 |
|
|
__asm__ volatile(\ |
174 |
|
|
VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ |
175 |
|
|
VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ |
176 |
|
|
VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\ |
177 |
|
|
VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\ |
178 |
|
|
VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ |
179 |
|
|
VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ |
180 |
|
|
VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ |
181 |
|
|
VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ |
182 |
|
|
\ |
183 |
|
|
: "+a"(src), "+c"(dst)\ |
184 |
|
|
: "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\ |
185 |
|
|
NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\ |
186 |
|
|
: "memory"\ |
187 |
|
|
);\ |
188 |
|
|
}\ |
189 |
|
|
src += 4-(h+5)*srcStride;\ |
190 |
|
|
dst += 4-h*dstStride;\ |
191 |
|
|
} |
192 |
|
|
|
193 |
|
|
#define QPEL_CAVS(OPNAME, OP, MMX)\ |
194 |
|
|
static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ |
195 |
|
|
{\ |
196 |
|
|
int h=8;\ |
197 |
|
|
__asm__ volatile(\ |
198 |
|
|
"pxor %%mm7, %%mm7 \n\t"\ |
199 |
|
|
"movq "MANGLE(ff_pw_5)", %%mm6\n\t"\ |
200 |
|
|
"1: \n\t"\ |
201 |
|
|
"movq (%0), %%mm0 \n\t"\ |
202 |
|
|
"movq 1(%0), %%mm2 \n\t"\ |
203 |
|
|
"movq %%mm0, %%mm1 \n\t"\ |
204 |
|
|
"movq %%mm2, %%mm3 \n\t"\ |
205 |
|
|
"punpcklbw %%mm7, %%mm0 \n\t"\ |
206 |
|
|
"punpckhbw %%mm7, %%mm1 \n\t"\ |
207 |
|
|
"punpcklbw %%mm7, %%mm2 \n\t"\ |
208 |
|
|
"punpckhbw %%mm7, %%mm3 \n\t"\ |
209 |
|
|
"paddw %%mm2, %%mm0 \n\t"\ |
210 |
|
|
"paddw %%mm3, %%mm1 \n\t"\ |
211 |
|
|
"pmullw %%mm6, %%mm0 \n\t"\ |
212 |
|
|
"pmullw %%mm6, %%mm1 \n\t"\ |
213 |
|
|
"movq -1(%0), %%mm2 \n\t"\ |
214 |
|
|
"movq 2(%0), %%mm4 \n\t"\ |
215 |
|
|
"movq %%mm2, %%mm3 \n\t"\ |
216 |
|
|
"movq %%mm4, %%mm5 \n\t"\ |
217 |
|
|
"punpcklbw %%mm7, %%mm2 \n\t"\ |
218 |
|
|
"punpckhbw %%mm7, %%mm3 \n\t"\ |
219 |
|
|
"punpcklbw %%mm7, %%mm4 \n\t"\ |
220 |
|
|
"punpckhbw %%mm7, %%mm5 \n\t"\ |
221 |
|
|
"paddw %%mm4, %%mm2 \n\t"\ |
222 |
|
|
"paddw %%mm3, %%mm5 \n\t"\ |
223 |
|
|
"psubw %%mm2, %%mm0 \n\t"\ |
224 |
|
|
"psubw %%mm5, %%mm1 \n\t"\ |
225 |
|
|
"movq "MANGLE(ff_pw_4)", %%mm5\n\t"\ |
226 |
|
|
"paddw %%mm5, %%mm0 \n\t"\ |
227 |
|
|
"paddw %%mm5, %%mm1 \n\t"\ |
228 |
|
|
"psraw $3, %%mm0 \n\t"\ |
229 |
|
|
"psraw $3, %%mm1 \n\t"\ |
230 |
|
|
"packuswb %%mm1, %%mm0 \n\t"\ |
231 |
|
|
OP(%%mm0, (%1),%%mm5, q) \ |
232 |
|
|
"add %3, %0 \n\t"\ |
233 |
|
|
"add %4, %1 \n\t"\ |
234 |
|
|
"decl %2 \n\t"\ |
235 |
|
|
" jnz 1b \n\t"\ |
236 |
|
|
: "+a"(src), "+c"(dst), "+m"(h)\ |
237 |
|
|
: "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\ |
238 |
|
|
NAMED_CONSTRAINTS_ADD(ff_pw_4,ff_pw_5)\ |
239 |
|
|
: "memory"\ |
240 |
|
|
);\ |
241 |
|
|
}\ |
242 |
|
|
\ |
243 |
|
|
static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ |
244 |
|
|
{ \ |
245 |
|
|
QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \ |
246 |
|
|
}\ |
247 |
|
|
\ |
248 |
|
|
static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ |
249 |
|
|
{ \ |
250 |
|
|
QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_42) \ |
251 |
|
|
}\ |
252 |
|
|
\ |
253 |
|
|
static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ |
254 |
|
|
{ \ |
255 |
|
|
QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \ |
256 |
|
|
}\ |
257 |
|
|
\ |
258 |
|
|
static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ |
259 |
|
|
{ \ |
260 |
|
|
OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\ |
261 |
|
|
}\ |
262 |
|
|
static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ |
263 |
|
|
{ \ |
264 |
|
|
OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\ |
265 |
|
|
OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ |
266 |
|
|
}\ |
267 |
|
|
\ |
268 |
|
|
static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ |
269 |
|
|
{ \ |
270 |
|
|
OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\ |
271 |
|
|
}\ |
272 |
|
|
static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ |
273 |
|
|
{ \ |
274 |
|
|
OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\ |
275 |
|
|
OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ |
276 |
|
|
}\ |
277 |
|
|
\ |
278 |
|
|
static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ |
279 |
|
|
{ \ |
280 |
|
|
OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\ |
281 |
|
|
}\ |
282 |
|
|
static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ |
283 |
|
|
{ \ |
284 |
|
|
OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\ |
285 |
|
|
OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ |
286 |
|
|
}\ |
287 |
|
|
\ |
288 |
|
|
static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ |
289 |
|
|
{ \ |
290 |
|
|
OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\ |
291 |
|
|
OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ |
292 |
|
|
src += 8*srcStride;\ |
293 |
|
|
dst += 8*dstStride;\ |
294 |
|
|
OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\ |
295 |
|
|
OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ |
296 |
|
|
}\ |
297 |
|
|
|
298 |
|
|
#define CAVS_MC(OPNAME, SIZE, MMX) \ |
299 |
|
|
static void OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ |
300 |
|
|
{\ |
301 |
|
|
OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\ |
302 |
|
|
}\ |
303 |
|
|
\ |
304 |
|
|
static void OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ |
305 |
|
|
{\ |
306 |
|
|
OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\ |
307 |
|
|
}\ |
308 |
|
|
\ |
309 |
|
|
static void OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ |
310 |
|
|
{\ |
311 |
|
|
OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\ |
312 |
|
|
}\ |
313 |
|
|
\ |
314 |
|
|
static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ |
315 |
|
|
{\ |
316 |
|
|
OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\ |
317 |
|
|
}\ |
318 |
|
|
|
319 |
|
|
#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" |
320 |
|
|
#define AVG_3DNOW_OP(a,b,temp, size) \ |
321 |
|
|
"mov" #size " " #b ", " #temp " \n\t"\ |
322 |
|
|
"pavgusb " #temp ", " #a " \n\t"\ |
323 |
|
|
"mov" #size " " #a ", " #b " \n\t" |
324 |
|
|
#define AVG_MMXEXT_OP(a, b, temp, size) \ |
325 |
|
|
"mov" #size " " #b ", " #temp " \n\t"\ |
326 |
|
|
"pavgb " #temp ", " #a " \n\t"\ |
327 |
|
|
"mov" #size " " #a ", " #b " \n\t" |
328 |
|
|
|
329 |
|
|
#endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */ |
330 |
|
|
|
331 |
|
|
#if HAVE_MMX_EXTERNAL |
332 |
|
|
static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src, |
333 |
|
|
ptrdiff_t stride) |
334 |
|
|
{ |
335 |
|
|
ff_put_pixels8_mmx(dst, src, stride, 8); |
336 |
|
|
} |
337 |
|
|
|
338 |
|
|
static void avg_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src, |
339 |
|
|
ptrdiff_t stride) |
340 |
|
|
{ |
341 |
|
|
ff_avg_pixels8_mmx(dst, src, stride, 8); |
342 |
|
|
} |
343 |
|
|
|
344 |
|
|
static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src, |
345 |
|
|
ptrdiff_t stride) |
346 |
|
|
{ |
347 |
|
|
ff_avg_pixels8_mmxext(dst, src, stride, 8); |
348 |
|
|
} |
349 |
|
|
|
350 |
|
|
static void put_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src, |
351 |
|
|
ptrdiff_t stride) |
352 |
|
|
{ |
353 |
|
|
ff_put_pixels16_mmx(dst, src, stride, 16); |
354 |
|
|
} |
355 |
|
|
|
356 |
|
|
static void avg_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src, |
357 |
|
|
ptrdiff_t stride) |
358 |
|
|
{ |
359 |
|
|
ff_avg_pixels16_mmx(dst, src, stride, 16); |
360 |
|
|
} |
361 |
|
|
|
362 |
|
|
static void avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, const uint8_t *src, |
363 |
|
|
ptrdiff_t stride) |
364 |
|
|
{ |
365 |
|
|
ff_avg_pixels16_mmxext(dst, src, stride, 16); |
366 |
|
|
} |
367 |
|
|
|
368 |
|
|
static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src, |
369 |
|
|
ptrdiff_t stride) |
370 |
|
|
{ |
371 |
|
|
ff_put_pixels16_sse2(dst, src, stride, 16); |
372 |
|
|
} |
373 |
|
|
|
374 |
|
|
static void avg_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src, |
375 |
|
|
ptrdiff_t stride) |
376 |
|
|
{ |
377 |
|
|
ff_avg_pixels16_sse2(dst, src, stride, 16); |
378 |
|
|
} |
379 |
|
|
#endif |
380 |
|
|
|
381 |
|
|
static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c, |
382 |
|
|
AVCodecContext *avctx) |
383 |
|
|
{ |
384 |
|
|
#if HAVE_MMX_EXTERNAL |
385 |
|
|
c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_mmx; |
386 |
|
|
c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx; |
387 |
|
|
c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmx; |
388 |
|
|
c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmx; |
389 |
|
|
|
390 |
|
|
c->cavs_idct8_add = cavs_idct8_add_mmx; |
391 |
|
|
c->idct_perm = FF_IDCT_PERM_TRANSPOSE; |
392 |
|
|
#endif /* HAVE_MMX_EXTERNAL */ |
393 |
|
|
} |
394 |
|
|
|
395 |
|
|
#define DSPFUNC(PFX, IDX, NUM, EXT) \ |
396 |
|
|
c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \ |
397 |
|
|
c->PFX ## _cavs_qpel_pixels_tab[IDX][ 4] = PFX ## _cavs_qpel ## NUM ## _mc01_ ## EXT; \ |
398 |
|
|
c->PFX ## _cavs_qpel_pixels_tab[IDX][ 8] = PFX ## _cavs_qpel ## NUM ## _mc02_ ## EXT; \ |
399 |
|
|
c->PFX ## _cavs_qpel_pixels_tab[IDX][12] = PFX ## _cavs_qpel ## NUM ## _mc03_ ## EXT; \ |
400 |
|
|
|
401 |
|
|
#if HAVE_MMXEXT_INLINE |
402 |
|
|
QPEL_CAVS(put_, PUT_OP, mmxext) |
403 |
|
|
QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext) |
404 |
|
|
|
405 |
|
|
CAVS_MC(put_, 8, mmxext) |
406 |
|
|
CAVS_MC(put_, 16, mmxext) |
407 |
|
|
CAVS_MC(avg_, 8, mmxext) |
408 |
|
|
CAVS_MC(avg_, 16, mmxext) |
409 |
|
|
#endif /* HAVE_MMXEXT_INLINE */ |
410 |
|
|
|
411 |
|
|
#if HAVE_AMD3DNOW_INLINE |
412 |
|
|
QPEL_CAVS(put_, PUT_OP, 3dnow) |
413 |
|
|
QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow) |
414 |
|
|
|
415 |
|
|
CAVS_MC(put_, 8, 3dnow) |
416 |
|
|
CAVS_MC(put_, 16,3dnow) |
417 |
|
|
CAVS_MC(avg_, 8, 3dnow) |
418 |
|
|
CAVS_MC(avg_, 16,3dnow) |
419 |
|
|
|
420 |
|
|
static av_cold void cavsdsp_init_3dnow(CAVSDSPContext *c, |
421 |
|
|
AVCodecContext *avctx) |
422 |
|
|
{ |
423 |
|
|
DSPFUNC(put, 0, 16, 3dnow); |
424 |
|
|
DSPFUNC(put, 1, 8, 3dnow); |
425 |
|
|
DSPFUNC(avg, 0, 16, 3dnow); |
426 |
|
|
DSPFUNC(avg, 1, 8, 3dnow); |
427 |
|
|
} |
428 |
|
|
#endif /* HAVE_AMD3DNOW_INLINE */ |
429 |
|
|
|
430 |
|
4 |
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx) |
431 |
|
|
{ |
432 |
|
4 |
av_unused int cpu_flags = av_get_cpu_flags(); |
433 |
|
|
|
434 |
✗✓ |
4 |
if (X86_MMX(cpu_flags)) |
435 |
|
|
cavsdsp_init_mmx(c, avctx); |
436 |
|
|
|
437 |
|
|
#if HAVE_AMD3DNOW_INLINE |
438 |
✗✓ |
4 |
if (INLINE_AMD3DNOW(cpu_flags)) |
439 |
|
|
cavsdsp_init_3dnow(c, avctx); |
440 |
|
|
#endif /* HAVE_AMD3DNOW_INLINE */ |
441 |
|
|
#if HAVE_MMXEXT_INLINE |
442 |
✗✓ |
4 |
if (INLINE_MMXEXT(cpu_flags)) { |
443 |
|
|
DSPFUNC(put, 0, 16, mmxext); |
444 |
|
|
DSPFUNC(put, 1, 8, mmxext); |
445 |
|
|
DSPFUNC(avg, 0, 16, mmxext); |
446 |
|
|
DSPFUNC(avg, 1, 8, mmxext); |
447 |
|
|
} |
448 |
|
|
#endif |
449 |
|
|
#if HAVE_MMX_EXTERNAL |
450 |
✗✓ |
4 |
if (EXTERNAL_MMXEXT(cpu_flags)) { |
451 |
|
|
c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmxext; |
452 |
|
|
c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext; |
453 |
|
|
} |
454 |
|
|
#endif |
455 |
|
|
#if HAVE_SSE2_EXTERNAL |
456 |
✗✓ |
4 |
if (EXTERNAL_SSE2(cpu_flags)) { |
457 |
|
|
c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2; |
458 |
|
|
c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2; |
459 |
|
|
|
460 |
|
|
c->cavs_idct8_add = cavs_idct8_add_sse2; |
461 |
|
|
c->idct_perm = FF_IDCT_PERM_TRANSPOSE; |
462 |
|
|
} |
463 |
|
|
#endif |
464 |
|
4 |
} |