Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Chinese AVS video (AVS1-P2, JiZhun profile) decoder. | ||
3 | * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de> | ||
4 | * | ||
5 | * MMX-optimized DSP functions, based on H.264 optimizations by | ||
6 | * Michael Niedermayer and Loren Merritt | ||
7 | * | ||
8 | * This file is part of FFmpeg. | ||
9 | * | ||
10 | * FFmpeg is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU Lesser General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2.1 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * FFmpeg is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * Lesser General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU Lesser General Public | ||
21 | * License along with FFmpeg; if not, write to the Free Software | ||
22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
23 | */ | ||
24 | |||
25 | #include "libavutil/attributes.h" | ||
26 | #include "libavutil/common.h" | ||
27 | #include "libavutil/cpu.h" | ||
28 | #include "libavutil/mem_internal.h" | ||
29 | #include "libavutil/x86/asm.h" | ||
30 | #include "libavutil/x86/cpu.h" | ||
31 | #include "libavcodec/cavsdsp.h" | ||
32 | #include "libavcodec/idctdsp.h" | ||
33 | #include "constants.h" | ||
34 | #include "fpel.h" | ||
35 | #include "idctdsp.h" | ||
36 | #include "config.h" | ||
37 | |||
38 | |||
39 | #if HAVE_SSE2_EXTERNAL | ||
40 | |||
41 | void ff_cavs_idct8_sse2(int16_t *out, const int16_t *in); | ||
42 | |||
43 | 4921 | static void cavs_idct8_add_sse2(uint8_t *dst, int16_t *block, ptrdiff_t stride) | |
44 | { | ||
45 | 4921 | LOCAL_ALIGNED(16, int16_t, b2, [64]); | |
46 | 4921 | ff_cavs_idct8_sse2(b2, block); | |
47 | 4921 | ff_add_pixels_clamped_sse2(b2, dst, stride); | |
48 | 4921 | } | |
49 | |||
50 | #endif /* HAVE_SSE2_EXTERNAL */ | ||
51 | |||
52 | #if HAVE_MMXEXT_INLINE | ||
53 | |||
54 | DECLARE_ASM_CONST(8, uint64_t, pw_42) = 0x002A002A002A002AULL; | ||
55 | DECLARE_ASM_CONST(8, uint64_t, pw_96) = 0x0060006000600060ULL; | ||
56 | |||
57 | /***************************************************************************** | ||
58 | * | ||
59 | * motion compensation | ||
60 | * | ||
61 | ****************************************************************************/ | ||
62 | |||
63 | /* vertical filter [-1 -2 96 42 -7 0] */ | ||
64 | #define QPEL_CAVSV1(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ | ||
65 | "movd (%0), "#F" \n\t"\ | ||
66 | "movq "#C", %%mm6 \n\t"\ | ||
67 | "pmullw "MANGLE(MUL1)", %%mm6\n\t"\ | ||
68 | "movq "#D", %%mm7 \n\t"\ | ||
69 | "pmullw "MANGLE(MUL2)", %%mm7\n\t"\ | ||
70 | "psllw $3, "#E" \n\t"\ | ||
71 | "psubw "#E", %%mm6 \n\t"\ | ||
72 | "psraw $3, "#E" \n\t"\ | ||
73 | "paddw %%mm7, %%mm6 \n\t"\ | ||
74 | "paddw "#E", %%mm6 \n\t"\ | ||
75 | "paddw "#B", "#B" \n\t"\ | ||
76 | "pxor %%mm7, %%mm7 \n\t"\ | ||
77 | "add %2, %0 \n\t"\ | ||
78 | "punpcklbw %%mm7, "#F" \n\t"\ | ||
79 | "psubw "#B", %%mm6 \n\t"\ | ||
80 | "psraw $1, "#B" \n\t"\ | ||
81 | "psubw "#A", %%mm6 \n\t"\ | ||
82 | "paddw "MANGLE(ADD)", %%mm6 \n\t"\ | ||
83 | "psraw $7, %%mm6 \n\t"\ | ||
84 | "packuswb %%mm6, %%mm6 \n\t"\ | ||
85 | OP(%%mm6, (%1), A, d) \ | ||
86 | "add %3, %1 \n\t" | ||
87 | |||
88 | /* vertical filter [ 0 -1 5 5 -1 0] */ | ||
89 | #define QPEL_CAVSV2(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ | ||
90 | "movd (%0), "#F" \n\t"\ | ||
91 | "movq "#C", %%mm6 \n\t"\ | ||
92 | "paddw "#D", %%mm6 \n\t"\ | ||
93 | "pmullw "MANGLE(MUL1)", %%mm6\n\t"\ | ||
94 | "add %2, %0 \n\t"\ | ||
95 | "punpcklbw %%mm7, "#F" \n\t"\ | ||
96 | "psubw "#B", %%mm6 \n\t"\ | ||
97 | "psubw "#E", %%mm6 \n\t"\ | ||
98 | "paddw "MANGLE(ADD)", %%mm6 \n\t"\ | ||
99 | "psraw $3, %%mm6 \n\t"\ | ||
100 | "packuswb %%mm6, %%mm6 \n\t"\ | ||
101 | OP(%%mm6, (%1), A, d) \ | ||
102 | "add %3, %1 \n\t" | ||
103 | |||
104 | /* vertical filter [ 0 -7 42 96 -2 -1] */ | ||
105 | #define QPEL_CAVSV3(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ | ||
106 | "movd (%0), "#F" \n\t"\ | ||
107 | "movq "#C", %%mm6 \n\t"\ | ||
108 | "pmullw "MANGLE(MUL2)", %%mm6\n\t"\ | ||
109 | "movq "#D", %%mm7 \n\t"\ | ||
110 | "pmullw "MANGLE(MUL1)", %%mm7\n\t"\ | ||
111 | "psllw $3, "#B" \n\t"\ | ||
112 | "psubw "#B", %%mm6 \n\t"\ | ||
113 | "psraw $3, "#B" \n\t"\ | ||
114 | "paddw %%mm7, %%mm6 \n\t"\ | ||
115 | "paddw "#B", %%mm6 \n\t"\ | ||
116 | "paddw "#E", "#E" \n\t"\ | ||
117 | "pxor %%mm7, %%mm7 \n\t"\ | ||
118 | "add %2, %0 \n\t"\ | ||
119 | "punpcklbw %%mm7, "#F" \n\t"\ | ||
120 | "psubw "#E", %%mm6 \n\t"\ | ||
121 | "psraw $1, "#E" \n\t"\ | ||
122 | "psubw "#F", %%mm6 \n\t"\ | ||
123 | "paddw "MANGLE(ADD)", %%mm6 \n\t"\ | ||
124 | "psraw $7, %%mm6 \n\t"\ | ||
125 | "packuswb %%mm6, %%mm6 \n\t"\ | ||
126 | OP(%%mm6, (%1), A, d) \ | ||
127 | "add %3, %1 \n\t" | ||
128 | |||
129 | |||
130 | #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\ | ||
131 | int w= 2;\ | ||
132 | src -= 2*srcStride;\ | ||
133 | \ | ||
134 | while(w--){\ | ||
135 | __asm__ volatile(\ | ||
136 | "pxor %%mm7, %%mm7 \n\t"\ | ||
137 | "movd (%0), %%mm0 \n\t"\ | ||
138 | "add %2, %0 \n\t"\ | ||
139 | "movd (%0), %%mm1 \n\t"\ | ||
140 | "add %2, %0 \n\t"\ | ||
141 | "movd (%0), %%mm2 \n\t"\ | ||
142 | "add %2, %0 \n\t"\ | ||
143 | "movd (%0), %%mm3 \n\t"\ | ||
144 | "add %2, %0 \n\t"\ | ||
145 | "movd (%0), %%mm4 \n\t"\ | ||
146 | "add %2, %0 \n\t"\ | ||
147 | "punpcklbw %%mm7, %%mm0 \n\t"\ | ||
148 | "punpcklbw %%mm7, %%mm1 \n\t"\ | ||
149 | "punpcklbw %%mm7, %%mm2 \n\t"\ | ||
150 | "punpcklbw %%mm7, %%mm3 \n\t"\ | ||
151 | "punpcklbw %%mm7, %%mm4 \n\t"\ | ||
152 | VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ | ||
153 | VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ | ||
154 | VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ | ||
155 | VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ | ||
156 | VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\ | ||
157 | VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\ | ||
158 | VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ | ||
159 | VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ | ||
160 | \ | ||
161 | : "+a"(src), "+c"(dst)\ | ||
162 | : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\ | ||
163 | NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\ | ||
164 | : "memory"\ | ||
165 | );\ | ||
166 | if(h==16){\ | ||
167 | __asm__ volatile(\ | ||
168 | VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ | ||
169 | VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ | ||
170 | VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\ | ||
171 | VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\ | ||
172 | VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ | ||
173 | VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ | ||
174 | VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ | ||
175 | VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ | ||
176 | \ | ||
177 | : "+a"(src), "+c"(dst)\ | ||
178 | : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\ | ||
179 | NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\ | ||
180 | : "memory"\ | ||
181 | );\ | ||
182 | }\ | ||
183 | src += 4-(h+5)*srcStride;\ | ||
184 | dst += 4-h*dstStride;\ | ||
185 | } | ||
186 | |||
187 | #define QPEL_CAVS(OPNAME, OP, MMX)\ | ||
188 | static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
189 | {\ | ||
190 | int h=8;\ | ||
191 | __asm__ volatile(\ | ||
192 | "pxor %%mm7, %%mm7 \n\t"\ | ||
193 | "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\ | ||
194 | "1: \n\t"\ | ||
195 | "movq (%0), %%mm0 \n\t"\ | ||
196 | "movq 1(%0), %%mm2 \n\t"\ | ||
197 | "movq %%mm0, %%mm1 \n\t"\ | ||
198 | "movq %%mm2, %%mm3 \n\t"\ | ||
199 | "punpcklbw %%mm7, %%mm0 \n\t"\ | ||
200 | "punpckhbw %%mm7, %%mm1 \n\t"\ | ||
201 | "punpcklbw %%mm7, %%mm2 \n\t"\ | ||
202 | "punpckhbw %%mm7, %%mm3 \n\t"\ | ||
203 | "paddw %%mm2, %%mm0 \n\t"\ | ||
204 | "paddw %%mm3, %%mm1 \n\t"\ | ||
205 | "pmullw %%mm6, %%mm0 \n\t"\ | ||
206 | "pmullw %%mm6, %%mm1 \n\t"\ | ||
207 | "movq -1(%0), %%mm2 \n\t"\ | ||
208 | "movq 2(%0), %%mm4 \n\t"\ | ||
209 | "movq %%mm2, %%mm3 \n\t"\ | ||
210 | "movq %%mm4, %%mm5 \n\t"\ | ||
211 | "punpcklbw %%mm7, %%mm2 \n\t"\ | ||
212 | "punpckhbw %%mm7, %%mm3 \n\t"\ | ||
213 | "punpcklbw %%mm7, %%mm4 \n\t"\ | ||
214 | "punpckhbw %%mm7, %%mm5 \n\t"\ | ||
215 | "paddw %%mm4, %%mm2 \n\t"\ | ||
216 | "paddw %%mm3, %%mm5 \n\t"\ | ||
217 | "psubw %%mm2, %%mm0 \n\t"\ | ||
218 | "psubw %%mm5, %%mm1 \n\t"\ | ||
219 | "movq "MANGLE(ff_pw_4)", %%mm5\n\t"\ | ||
220 | "paddw %%mm5, %%mm0 \n\t"\ | ||
221 | "paddw %%mm5, %%mm1 \n\t"\ | ||
222 | "psraw $3, %%mm0 \n\t"\ | ||
223 | "psraw $3, %%mm1 \n\t"\ | ||
224 | "packuswb %%mm1, %%mm0 \n\t"\ | ||
225 | OP(%%mm0, (%1),%%mm5, q) \ | ||
226 | "add %3, %0 \n\t"\ | ||
227 | "add %4, %1 \n\t"\ | ||
228 | "decl %2 \n\t"\ | ||
229 | " jnz 1b \n\t"\ | ||
230 | : "+a"(src), "+c"(dst), "+m"(h)\ | ||
231 | : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\ | ||
232 | NAMED_CONSTRAINTS_ADD(ff_pw_4,ff_pw_5)\ | ||
233 | : "memory"\ | ||
234 | );\ | ||
235 | }\ | ||
236 | \ | ||
237 | static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ | ||
238 | { \ | ||
239 | QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,pw_96,pw_42) \ | ||
240 | }\ | ||
241 | \ | ||
242 | static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ | ||
243 | { \ | ||
244 | QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,pw_42) \ | ||
245 | }\ | ||
246 | \ | ||
247 | static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ | ||
248 | { \ | ||
249 | QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,pw_96,pw_42) \ | ||
250 | }\ | ||
251 | \ | ||
252 | static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
253 | { \ | ||
254 | OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\ | ||
255 | }\ | ||
256 | static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
257 | { \ | ||
258 | OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\ | ||
259 | OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ | ||
260 | }\ | ||
261 | \ | ||
262 | static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
263 | { \ | ||
264 | OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\ | ||
265 | }\ | ||
266 | static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
267 | { \ | ||
268 | OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\ | ||
269 | OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ | ||
270 | }\ | ||
271 | \ | ||
272 | static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
273 | { \ | ||
274 | OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\ | ||
275 | }\ | ||
276 | static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
277 | { \ | ||
278 | OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\ | ||
279 | OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ | ||
280 | }\ | ||
281 | \ | ||
282 | static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
283 | { \ | ||
284 | OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\ | ||
285 | OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ | ||
286 | src += 8*srcStride;\ | ||
287 | dst += 8*dstStride;\ | ||
288 | OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\ | ||
289 | OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ | ||
290 | }\ | ||
291 | |||
292 | #define CAVS_MC(OPNAME, SIZE, MMX) \ | ||
293 | static void OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ | ||
294 | {\ | ||
295 | OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\ | ||
296 | }\ | ||
297 | \ | ||
298 | static void OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ | ||
299 | {\ | ||
300 | OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\ | ||
301 | }\ | ||
302 | \ | ||
303 | static void OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ | ||
304 | {\ | ||
305 | OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\ | ||
306 | }\ | ||
307 | \ | ||
308 | static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ | ||
309 | {\ | ||
310 | OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\ | ||
311 | }\ | ||
312 | |||
313 | #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" | ||
314 | #define AVG_MMXEXT_OP(a, b, temp, size) \ | ||
315 | "mov" #size " " #b ", " #temp " \n\t"\ | ||
316 | "pavgb " #temp ", " #a " \n\t"\ | ||
317 | "mov" #size " " #a ", " #b " \n\t" | ||
318 | |||
319 | #endif /* HAVE_MMXEXT_INLINE */ | ||
320 | |||
321 | #if HAVE_MMX_EXTERNAL | ||
322 | ✗ | static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src, | |
323 | ptrdiff_t stride) | ||
324 | { | ||
325 | ✗ | ff_put_pixels8_mmx(dst, src, stride, 8); | |
326 | ✗ | } | |
327 | |||
328 | ✗ | static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src, | |
329 | ptrdiff_t stride) | ||
330 | { | ||
331 | ✗ | ff_avg_pixels8_mmxext(dst, src, stride, 8); | |
332 | ✗ | } | |
333 | |||
334 | ✗ | static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src, | |
335 | ptrdiff_t stride) | ||
336 | { | ||
337 | ✗ | ff_put_pixels16_sse2(dst, src, stride, 16); | |
338 | ✗ | } | |
339 | |||
340 | ✗ | static void avg_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src, | |
341 | ptrdiff_t stride) | ||
342 | { | ||
343 | ✗ | ff_avg_pixels16_sse2(dst, src, stride, 16); | |
344 | ✗ | } | |
345 | #endif | ||
346 | |||
347 | 2 | static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c) | |
348 | { | ||
349 | #if HAVE_MMX_EXTERNAL | ||
350 | 2 | c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx; | |
351 | #endif /* HAVE_MMX_EXTERNAL */ | ||
352 | 2 | } | |
353 | |||
354 | #define DSPFUNC(PFX, IDX, NUM, EXT) \ | ||
355 | c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \ | ||
356 | c->PFX ## _cavs_qpel_pixels_tab[IDX][ 4] = PFX ## _cavs_qpel ## NUM ## _mc01_ ## EXT; \ | ||
357 | c->PFX ## _cavs_qpel_pixels_tab[IDX][ 8] = PFX ## _cavs_qpel ## NUM ## _mc02_ ## EXT; \ | ||
358 | c->PFX ## _cavs_qpel_pixels_tab[IDX][12] = PFX ## _cavs_qpel ## NUM ## _mc03_ ## EXT; \ | ||
359 | |||
360 | #if HAVE_MMXEXT_INLINE | ||
361 | ✗ | QPEL_CAVS(put_, PUT_OP, mmxext) | |
362 | ✗ | QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext) | |
363 | |||
364 | ✗ | CAVS_MC(put_, 8, mmxext) | |
365 | ✗ | CAVS_MC(put_, 16, mmxext) | |
366 | ✗ | CAVS_MC(avg_, 8, mmxext) | |
367 | ✗ | CAVS_MC(avg_, 16, mmxext) | |
368 | #endif /* HAVE_MMXEXT_INLINE */ | ||
369 | |||
370 | 6 | av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c) | |
371 | { | ||
372 | 6 | av_unused int cpu_flags = av_get_cpu_flags(); | |
373 | |||
374 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
|
6 | if (X86_MMX(cpu_flags)) |
375 | 2 | cavsdsp_init_mmx(c); | |
376 | |||
377 | #if HAVE_MMXEXT_INLINE | ||
378 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
|
6 | if (INLINE_MMXEXT(cpu_flags)) { |
379 | 2 | DSPFUNC(put, 0, 16, mmxext); | |
380 | 2 | DSPFUNC(put, 1, 8, mmxext); | |
381 | 2 | DSPFUNC(avg, 0, 16, mmxext); | |
382 | 2 | DSPFUNC(avg, 1, 8, mmxext); | |
383 | } | ||
384 | #endif | ||
385 | #if HAVE_MMX_EXTERNAL | ||
386 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
|
6 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
387 | 2 | c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext; | |
388 | } | ||
389 | #endif | ||
390 | #if HAVE_SSE2_EXTERNAL | ||
391 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
|
6 | if (EXTERNAL_SSE2(cpu_flags)) { |
392 | 2 | c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2; | |
393 | 2 | c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2; | |
394 | |||
395 | 2 | c->cavs_idct8_add = cavs_idct8_add_sse2; | |
396 | 2 | c->idct_perm = FF_IDCT_PERM_TRANSPOSE; | |
397 | } | ||
398 | #endif | ||
399 | 6 | } | |
400 |