Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Chinese AVS video (AVS1-P2, JiZhun profile) decoder. | ||
3 | * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de> | ||
4 | * | ||
5 | * MMX-optimized DSP functions, based on H.264 optimizations by | ||
6 | * Michael Niedermayer and Loren Merritt | ||
7 | * | ||
8 | * This file is part of FFmpeg. | ||
9 | * | ||
10 | * FFmpeg is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU Lesser General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2.1 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * FFmpeg is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * Lesser General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU Lesser General Public | ||
21 | * License along with FFmpeg; if not, write to the Free Software | ||
22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
23 | */ | ||
24 | |||
25 | #include "libavutil/attributes.h" | ||
26 | #include "libavutil/common.h" | ||
27 | #include "libavutil/cpu.h" | ||
28 | #include "libavutil/mem_internal.h" | ||
29 | #include "libavutil/x86/asm.h" | ||
30 | #include "libavutil/x86/cpu.h" | ||
31 | #include "libavcodec/cavsdsp.h" | ||
32 | #include "libavcodec/idctdsp.h" | ||
33 | #include "constants.h" | ||
34 | #include "fpel.h" | ||
35 | #include "idctdsp.h" | ||
36 | #include "config.h" | ||
37 | |||
38 | |||
39 | #if HAVE_SSE2_EXTERNAL | ||
40 | |||
41 | void ff_cavs_idct8_sse2(int16_t *out, const int16_t *in); | ||
42 | |||
43 | 4921 | static void cavs_idct8_add_sse2(uint8_t *dst, int16_t *block, ptrdiff_t stride) | |
44 | { | ||
45 | 4921 | LOCAL_ALIGNED(16, int16_t, b2, [64]); | |
46 | 4921 | ff_cavs_idct8_sse2(b2, block); | |
47 | 4921 | ff_add_pixels_clamped_sse2(b2, dst, stride); | |
48 | 4921 | } | |
49 | |||
50 | #endif /* HAVE_SSE2_EXTERNAL */ | ||
51 | |||
52 | #if HAVE_MMXEXT_INLINE | ||
53 | |||
54 | /***************************************************************************** | ||
55 | * | ||
56 | * motion compensation | ||
57 | * | ||
58 | ****************************************************************************/ | ||
59 | |||
60 | /* vertical filter [-1 -2 96 42 -7 0] */ | ||
61 | #define QPEL_CAVSV1(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ | ||
62 | "movd (%0), "#F" \n\t"\ | ||
63 | "movq "#C", %%mm6 \n\t"\ | ||
64 | "pmullw "MANGLE(MUL1)", %%mm6\n\t"\ | ||
65 | "movq "#D", %%mm7 \n\t"\ | ||
66 | "pmullw "MANGLE(MUL2)", %%mm7\n\t"\ | ||
67 | "psllw $3, "#E" \n\t"\ | ||
68 | "psubw "#E", %%mm6 \n\t"\ | ||
69 | "psraw $3, "#E" \n\t"\ | ||
70 | "paddw %%mm7, %%mm6 \n\t"\ | ||
71 | "paddw "#E", %%mm6 \n\t"\ | ||
72 | "paddw "#B", "#B" \n\t"\ | ||
73 | "pxor %%mm7, %%mm7 \n\t"\ | ||
74 | "add %2, %0 \n\t"\ | ||
75 | "punpcklbw %%mm7, "#F" \n\t"\ | ||
76 | "psubw "#B", %%mm6 \n\t"\ | ||
77 | "psraw $1, "#B" \n\t"\ | ||
78 | "psubw "#A", %%mm6 \n\t"\ | ||
79 | "paddw "MANGLE(ADD)", %%mm6 \n\t"\ | ||
80 | "psraw $7, %%mm6 \n\t"\ | ||
81 | "packuswb %%mm6, %%mm6 \n\t"\ | ||
82 | OP(%%mm6, (%1), A, d) \ | ||
83 | "add %3, %1 \n\t" | ||
84 | |||
85 | /* vertical filter [ 0 -1 5 5 -1 0] */ | ||
86 | #define QPEL_CAVSV2(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ | ||
87 | "movd (%0), "#F" \n\t"\ | ||
88 | "movq "#C", %%mm6 \n\t"\ | ||
89 | "paddw "#D", %%mm6 \n\t"\ | ||
90 | "pmullw "MANGLE(MUL1)", %%mm6\n\t"\ | ||
91 | "add %2, %0 \n\t"\ | ||
92 | "punpcklbw %%mm7, "#F" \n\t"\ | ||
93 | "psubw "#B", %%mm6 \n\t"\ | ||
94 | "psubw "#E", %%mm6 \n\t"\ | ||
95 | "paddw "MANGLE(ADD)", %%mm6 \n\t"\ | ||
96 | "psraw $3, %%mm6 \n\t"\ | ||
97 | "packuswb %%mm6, %%mm6 \n\t"\ | ||
98 | OP(%%mm6, (%1), A, d) \ | ||
99 | "add %3, %1 \n\t" | ||
100 | |||
101 | /* vertical filter [ 0 -7 42 96 -2 -1] */ | ||
102 | #define QPEL_CAVSV3(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ | ||
103 | "movd (%0), "#F" \n\t"\ | ||
104 | "movq "#C", %%mm6 \n\t"\ | ||
105 | "pmullw "MANGLE(MUL2)", %%mm6\n\t"\ | ||
106 | "movq "#D", %%mm7 \n\t"\ | ||
107 | "pmullw "MANGLE(MUL1)", %%mm7\n\t"\ | ||
108 | "psllw $3, "#B" \n\t"\ | ||
109 | "psubw "#B", %%mm6 \n\t"\ | ||
110 | "psraw $3, "#B" \n\t"\ | ||
111 | "paddw %%mm7, %%mm6 \n\t"\ | ||
112 | "paddw "#B", %%mm6 \n\t"\ | ||
113 | "paddw "#E", "#E" \n\t"\ | ||
114 | "pxor %%mm7, %%mm7 \n\t"\ | ||
115 | "add %2, %0 \n\t"\ | ||
116 | "punpcklbw %%mm7, "#F" \n\t"\ | ||
117 | "psubw "#E", %%mm6 \n\t"\ | ||
118 | "psraw $1, "#E" \n\t"\ | ||
119 | "psubw "#F", %%mm6 \n\t"\ | ||
120 | "paddw "MANGLE(ADD)", %%mm6 \n\t"\ | ||
121 | "psraw $7, %%mm6 \n\t"\ | ||
122 | "packuswb %%mm6, %%mm6 \n\t"\ | ||
123 | OP(%%mm6, (%1), A, d) \ | ||
124 | "add %3, %1 \n\t" | ||
125 | |||
126 | |||
127 | #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\ | ||
128 | int w= 2;\ | ||
129 | src -= 2*srcStride;\ | ||
130 | \ | ||
131 | while(w--){\ | ||
132 | __asm__ volatile(\ | ||
133 | "pxor %%mm7, %%mm7 \n\t"\ | ||
134 | "movd (%0), %%mm0 \n\t"\ | ||
135 | "add %2, %0 \n\t"\ | ||
136 | "movd (%0), %%mm1 \n\t"\ | ||
137 | "add %2, %0 \n\t"\ | ||
138 | "movd (%0), %%mm2 \n\t"\ | ||
139 | "add %2, %0 \n\t"\ | ||
140 | "movd (%0), %%mm3 \n\t"\ | ||
141 | "add %2, %0 \n\t"\ | ||
142 | "movd (%0), %%mm4 \n\t"\ | ||
143 | "add %2, %0 \n\t"\ | ||
144 | "punpcklbw %%mm7, %%mm0 \n\t"\ | ||
145 | "punpcklbw %%mm7, %%mm1 \n\t"\ | ||
146 | "punpcklbw %%mm7, %%mm2 \n\t"\ | ||
147 | "punpcklbw %%mm7, %%mm3 \n\t"\ | ||
148 | "punpcklbw %%mm7, %%mm4 \n\t"\ | ||
149 | VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ | ||
150 | VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ | ||
151 | VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ | ||
152 | VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ | ||
153 | VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\ | ||
154 | VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\ | ||
155 | VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ | ||
156 | VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ | ||
157 | \ | ||
158 | : "+a"(src), "+c"(dst)\ | ||
159 | : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\ | ||
160 | NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\ | ||
161 | : "memory"\ | ||
162 | );\ | ||
163 | if(h==16){\ | ||
164 | __asm__ volatile(\ | ||
165 | VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ | ||
166 | VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ | ||
167 | VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\ | ||
168 | VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\ | ||
169 | VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ | ||
170 | VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ | ||
171 | VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ | ||
172 | VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ | ||
173 | \ | ||
174 | : "+a"(src), "+c"(dst)\ | ||
175 | : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\ | ||
176 | NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\ | ||
177 | : "memory"\ | ||
178 | );\ | ||
179 | }\ | ||
180 | src += 4-(h+5)*srcStride;\ | ||
181 | dst += 4-h*dstStride;\ | ||
182 | } | ||
183 | |||
184 | #define QPEL_CAVS(OPNAME, OP, MMX)\ | ||
185 | static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
186 | {\ | ||
187 | int h=8;\ | ||
188 | __asm__ volatile(\ | ||
189 | "pxor %%mm7, %%mm7 \n\t"\ | ||
190 | "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\ | ||
191 | "1: \n\t"\ | ||
192 | "movq (%0), %%mm0 \n\t"\ | ||
193 | "movq 1(%0), %%mm2 \n\t"\ | ||
194 | "movq %%mm0, %%mm1 \n\t"\ | ||
195 | "movq %%mm2, %%mm3 \n\t"\ | ||
196 | "punpcklbw %%mm7, %%mm0 \n\t"\ | ||
197 | "punpckhbw %%mm7, %%mm1 \n\t"\ | ||
198 | "punpcklbw %%mm7, %%mm2 \n\t"\ | ||
199 | "punpckhbw %%mm7, %%mm3 \n\t"\ | ||
200 | "paddw %%mm2, %%mm0 \n\t"\ | ||
201 | "paddw %%mm3, %%mm1 \n\t"\ | ||
202 | "pmullw %%mm6, %%mm0 \n\t"\ | ||
203 | "pmullw %%mm6, %%mm1 \n\t"\ | ||
204 | "movq -1(%0), %%mm2 \n\t"\ | ||
205 | "movq 2(%0), %%mm4 \n\t"\ | ||
206 | "movq %%mm2, %%mm3 \n\t"\ | ||
207 | "movq %%mm4, %%mm5 \n\t"\ | ||
208 | "punpcklbw %%mm7, %%mm2 \n\t"\ | ||
209 | "punpckhbw %%mm7, %%mm3 \n\t"\ | ||
210 | "punpcklbw %%mm7, %%mm4 \n\t"\ | ||
211 | "punpckhbw %%mm7, %%mm5 \n\t"\ | ||
212 | "paddw %%mm4, %%mm2 \n\t"\ | ||
213 | "paddw %%mm3, %%mm5 \n\t"\ | ||
214 | "psubw %%mm2, %%mm0 \n\t"\ | ||
215 | "psubw %%mm5, %%mm1 \n\t"\ | ||
216 | "movq "MANGLE(ff_pw_4)", %%mm5\n\t"\ | ||
217 | "paddw %%mm5, %%mm0 \n\t"\ | ||
218 | "paddw %%mm5, %%mm1 \n\t"\ | ||
219 | "psraw $3, %%mm0 \n\t"\ | ||
220 | "psraw $3, %%mm1 \n\t"\ | ||
221 | "packuswb %%mm1, %%mm0 \n\t"\ | ||
222 | OP(%%mm0, (%1),%%mm5, q) \ | ||
223 | "add %3, %0 \n\t"\ | ||
224 | "add %4, %1 \n\t"\ | ||
225 | "decl %2 \n\t"\ | ||
226 | " jnz 1b \n\t"\ | ||
227 | : "+a"(src), "+c"(dst), "+m"(h)\ | ||
228 | : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\ | ||
229 | NAMED_CONSTRAINTS_ADD(ff_pw_4,ff_pw_5)\ | ||
230 | : "memory"\ | ||
231 | );\ | ||
232 | }\ | ||
233 | \ | ||
234 | static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ | ||
235 | { \ | ||
236 | QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \ | ||
237 | }\ | ||
238 | \ | ||
239 | static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ | ||
240 | { \ | ||
241 | QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_42) \ | ||
242 | }\ | ||
243 | \ | ||
244 | static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ | ||
245 | { \ | ||
246 | QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \ | ||
247 | }\ | ||
248 | \ | ||
249 | static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
250 | { \ | ||
251 | OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\ | ||
252 | }\ | ||
253 | static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
254 | { \ | ||
255 | OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\ | ||
256 | OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ | ||
257 | }\ | ||
258 | \ | ||
259 | static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
260 | { \ | ||
261 | OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\ | ||
262 | }\ | ||
263 | static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
264 | { \ | ||
265 | OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\ | ||
266 | OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ | ||
267 | }\ | ||
268 | \ | ||
269 | static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
270 | { \ | ||
271 | OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\ | ||
272 | }\ | ||
273 | static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
274 | { \ | ||
275 | OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\ | ||
276 | OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ | ||
277 | }\ | ||
278 | \ | ||
279 | static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ | ||
280 | { \ | ||
281 | OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\ | ||
282 | OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ | ||
283 | src += 8*srcStride;\ | ||
284 | dst += 8*dstStride;\ | ||
285 | OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\ | ||
286 | OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ | ||
287 | }\ | ||
288 | |||
289 | #define CAVS_MC(OPNAME, SIZE, MMX) \ | ||
290 | static void OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ | ||
291 | {\ | ||
292 | OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\ | ||
293 | }\ | ||
294 | \ | ||
295 | static void OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ | ||
296 | {\ | ||
297 | OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\ | ||
298 | }\ | ||
299 | \ | ||
300 | static void OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ | ||
301 | {\ | ||
302 | OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\ | ||
303 | }\ | ||
304 | \ | ||
305 | static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ | ||
306 | {\ | ||
307 | OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\ | ||
308 | }\ | ||
309 | |||
310 | #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" | ||
311 | #define AVG_MMXEXT_OP(a, b, temp, size) \ | ||
312 | "mov" #size " " #b ", " #temp " \n\t"\ | ||
313 | "pavgb " #temp ", " #a " \n\t"\ | ||
314 | "mov" #size " " #a ", " #b " \n\t" | ||
315 | |||
316 | #endif /* HAVE_MMXEXT_INLINE */ | ||
317 | |||
318 | #if HAVE_MMX_EXTERNAL | ||
319 | ✗ | static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src, | |
320 | ptrdiff_t stride) | ||
321 | { | ||
322 | ✗ | ff_put_pixels8_mmx(dst, src, stride, 8); | |
323 | ✗ | } | |
324 | |||
325 | ✗ | static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src, | |
326 | ptrdiff_t stride) | ||
327 | { | ||
328 | ✗ | ff_avg_pixels8_mmxext(dst, src, stride, 8); | |
329 | ✗ | } | |
330 | |||
331 | ✗ | static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src, | |
332 | ptrdiff_t stride) | ||
333 | { | ||
334 | ✗ | ff_put_pixels16_sse2(dst, src, stride, 16); | |
335 | ✗ | } | |
336 | |||
337 | ✗ | static void avg_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src, | |
338 | ptrdiff_t stride) | ||
339 | { | ||
340 | ✗ | ff_avg_pixels16_sse2(dst, src, stride, 16); | |
341 | ✗ | } | |
342 | #endif | ||
343 | |||
344 | 2 | static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c) | |
345 | { | ||
346 | #if HAVE_MMX_EXTERNAL | ||
347 | 2 | c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx; | |
348 | #endif /* HAVE_MMX_EXTERNAL */ | ||
349 | 2 | } | |
350 | |||
351 | #define DSPFUNC(PFX, IDX, NUM, EXT) \ | ||
352 | c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \ | ||
353 | c->PFX ## _cavs_qpel_pixels_tab[IDX][ 4] = PFX ## _cavs_qpel ## NUM ## _mc01_ ## EXT; \ | ||
354 | c->PFX ## _cavs_qpel_pixels_tab[IDX][ 8] = PFX ## _cavs_qpel ## NUM ## _mc02_ ## EXT; \ | ||
355 | c->PFX ## _cavs_qpel_pixels_tab[IDX][12] = PFX ## _cavs_qpel ## NUM ## _mc03_ ## EXT; \ | ||
356 | |||
357 | #if HAVE_MMXEXT_INLINE | ||
358 | ✗ | QPEL_CAVS(put_, PUT_OP, mmxext) | |
359 | ✗ | QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext) | |
360 | |||
361 | ✗ | CAVS_MC(put_, 8, mmxext) | |
362 | ✗ | CAVS_MC(put_, 16, mmxext) | |
363 | ✗ | CAVS_MC(avg_, 8, mmxext) | |
364 | ✗ | CAVS_MC(avg_, 16, mmxext) | |
365 | #endif /* HAVE_MMXEXT_INLINE */ | ||
366 | |||
367 | 6 | av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c) | |
368 | { | ||
369 | 6 | av_unused int cpu_flags = av_get_cpu_flags(); | |
370 | |||
371 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
|
6 | if (X86_MMX(cpu_flags)) |
372 | 2 | cavsdsp_init_mmx(c); | |
373 | |||
374 | #if HAVE_MMXEXT_INLINE | ||
375 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
|
6 | if (INLINE_MMXEXT(cpu_flags)) { |
376 | 2 | DSPFUNC(put, 0, 16, mmxext); | |
377 | 2 | DSPFUNC(put, 1, 8, mmxext); | |
378 | 2 | DSPFUNC(avg, 0, 16, mmxext); | |
379 | 2 | DSPFUNC(avg, 1, 8, mmxext); | |
380 | } | ||
381 | #endif | ||
382 | #if HAVE_MMX_EXTERNAL | ||
383 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
|
6 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
384 | 2 | c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext; | |
385 | } | ||
386 | #endif | ||
387 | #if HAVE_SSE2_EXTERNAL | ||
388 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
|
6 | if (EXTERNAL_SSE2(cpu_flags)) { |
389 | 2 | c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2; | |
390 | 2 | c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2; | |
391 | |||
392 | 2 | c->cavs_idct8_add = cavs_idct8_add_sse2; | |
393 | 2 | c->idct_perm = FF_IDCT_PERM_TRANSPOSE; | |
394 | } | ||
395 | #endif | ||
396 | 6 | } | |
397 |