FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/qpeldsp_init.c
Date: 2025-10-10 03:51:19
Exec Total Coverage
Lines: 20 20 100.0%
Functions: 91 91 100.0%
Branches: 4 4 100.0%

Line Branch Exec Source
1 /*
2 * quarterpel DSP functions
3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <stddef.h>
24 #include <stdint.h>
25
26 #include "config.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/cpu.h"
29 #include "libavutil/x86/cpu.h"
30 #include "libavcodec/pixels.h"
31 #include "libavcodec/qpeldsp.h"
32 #include "fpel.h"
33
34 void ff_put_pixels8_l2_mmxext(uint8_t *dst,
35 const uint8_t *src1, const uint8_t *src2,
36 ptrdiff_t dstStride, ptrdiff_t src1Stride, int h);
37 void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst,
38 const uint8_t *src1, const uint8_t *src2,
39 ptrdiff_t dstStride, ptrdiff_t src1Stride, int h);
40 void ff_avg_pixels8_l2_mmxext(uint8_t *dst,
41 const uint8_t *src1, const uint8_t *src2,
42 ptrdiff_t dstStride, ptrdiff_t src1Stride);
43 void ff_put_pixels16_l2_mmxext(uint8_t *dst,
44 const uint8_t *src1, const uint8_t *src2,
45 ptrdiff_t dstStride, ptrdiff_t src1Stride, int h);
46 void ff_avg_pixels16_l2_mmxext(uint8_t *dst,
47 const uint8_t *src1, const uint8_t *src2,
48 ptrdiff_t dstStride, ptrdiff_t src1Stride);
49 void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst,
50 const uint8_t *src1, const uint8_t *src2,
51 ptrdiff_t dstStride, ptrdiff_t src1Stride, int h);
52 void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
53 ptrdiff_t dstStride, ptrdiff_t srcStride, int h);
54 void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
55 ptrdiff_t dstStride, ptrdiff_t srcStride, int h);
56 void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst,
57 const uint8_t *src,
58 ptrdiff_t dstStride, ptrdiff_t srcStride,
59 int h);
60 void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
61 ptrdiff_t dstStride, ptrdiff_t srcStride, int h);
62 void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
63 ptrdiff_t dstStride, ptrdiff_t srcStride, int h);
64 void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst,
65 const uint8_t *src,
66 ptrdiff_t dstStride, ptrdiff_t srcStride,
67 int h);
68 void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
69 ptrdiff_t dstStride, ptrdiff_t srcStride);
70 void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
71 ptrdiff_t dstStride, ptrdiff_t srcStride);
72 void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst,
73 const uint8_t *src,
74 ptrdiff_t dstStride, ptrdiff_t srcStride);
75 void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
76 ptrdiff_t dstStride, ptrdiff_t srcStride);
77 void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
78 ptrdiff_t dstStride, ptrdiff_t srcStride);
79 void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
80 const uint8_t *src,
81 ptrdiff_t dstStride, ptrdiff_t srcStride);
82
83 #if HAVE_X86ASM
84
85 #define QPEL_OP(OPNAME, RND, MMX, ARG) \
86 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
87 const uint8_t *src, \
88 ptrdiff_t stride) \
89 { \
90 uint64_t temp[8]; \
91 uint8_t *const half = (uint8_t *) temp; \
92 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
93 stride, 8); \
94 ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src, half, \
95 stride, stride, 8)); \
96 } \
97 \
98 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \
99 const uint8_t *src, \
100 ptrdiff_t stride) \
101 { \
102 ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
103 stride, 8); \
104 } \
105 \
106 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \
107 const uint8_t *src, \
108 ptrdiff_t stride) \
109 { \
110 uint64_t temp[8]; \
111 uint8_t *const half = (uint8_t *) temp; \
112 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
113 stride, 8); \
114 ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src + 1, half, \
115 stride, stride, 8)); \
116 } \
117 \
118 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \
119 const uint8_t *src, \
120 ptrdiff_t stride) \
121 { \
122 uint64_t temp[8]; \
123 uint8_t *const half = (uint8_t *) temp; \
124 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
125 8, stride); \
126 ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src, half, \
127 stride, stride, 8)); \
128 } \
129 \
130 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \
131 const uint8_t *src, \
132 ptrdiff_t stride) \
133 { \
134 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
135 stride, stride); \
136 } \
137 \
138 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \
139 const uint8_t *src, \
140 ptrdiff_t stride) \
141 { \
142 uint64_t temp[8]; \
143 uint8_t *const half = (uint8_t *) temp; \
144 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
145 8, stride); \
146 ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src + stride, half, \
147 stride, stride, 8)); \
148 } \
149 \
150 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \
151 const uint8_t *src, \
152 ptrdiff_t stride) \
153 { \
154 uint64_t half[8 + 9]; \
155 uint8_t *const halfH = (uint8_t *) half + 64; \
156 uint8_t *const halfHV = (uint8_t *) half; \
157 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
158 stride, 9); \
159 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
160 stride, 9); \
161 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
162 ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH, halfHV, \
163 stride, 8, 8)); \
164 } \
165 \
166 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \
167 const uint8_t *src, \
168 ptrdiff_t stride) \
169 { \
170 uint64_t half[8 + 9]; \
171 uint8_t *const halfH = (uint8_t *) half + 64; \
172 uint8_t *const halfHV = (uint8_t *) half; \
173 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
174 stride, 9); \
175 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
176 stride, 9); \
177 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
178 ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH, halfHV, \
179 stride, 8, 8)); \
180 } \
181 \
182 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \
183 const uint8_t *src, \
184 ptrdiff_t stride) \
185 { \
186 uint64_t half[8 + 9]; \
187 uint8_t *const halfH = (uint8_t *) half + 64; \
188 uint8_t *const halfHV = (uint8_t *) half; \
189 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
190 stride, 9); \
191 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
192 stride, 9); \
193 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
194 ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH + 8, halfHV, \
195 stride, 8, 8)); \
196 } \
197 \
198 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \
199 const uint8_t *src, \
200 ptrdiff_t stride) \
201 { \
202 uint64_t half[8 + 9]; \
203 uint8_t *const halfH = (uint8_t *) half + 64; \
204 uint8_t *const halfHV = (uint8_t *) half; \
205 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
206 stride, 9); \
207 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
208 stride, 9); \
209 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
210 ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH + 8, halfHV, \
211 stride, 8, 8)); \
212 } \
213 \
214 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \
215 const uint8_t *src, \
216 ptrdiff_t stride) \
217 { \
218 uint64_t half[8 + 9]; \
219 uint8_t *const halfH = (uint8_t *) half + 64; \
220 uint8_t *const halfHV = (uint8_t *) half; \
221 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
222 stride, 9); \
223 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
224 ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH, halfHV, \
225 stride, 8, 8)); \
226 } \
227 \
228 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \
229 const uint8_t *src, \
230 ptrdiff_t stride) \
231 { \
232 uint64_t half[8 + 9]; \
233 uint8_t *const halfH = (uint8_t *) half + 64; \
234 uint8_t *const halfHV = (uint8_t *) half; \
235 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
236 stride, 9); \
237 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
238 ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH + 8, halfHV, \
239 stride, 8, 8)); \
240 } \
241 \
242 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \
243 const uint8_t *src, \
244 ptrdiff_t stride) \
245 { \
246 uint64_t half[8 + 9]; \
247 uint8_t *const halfH = (uint8_t *) half; \
248 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
249 stride, 9); \
250 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
251 8, stride, 9); \
252 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
253 stride, 8); \
254 } \
255 \
256 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \
257 const uint8_t *src, \
258 ptrdiff_t stride) \
259 { \
260 uint64_t half[8 + 9]; \
261 uint8_t *const halfH = (uint8_t *) half; \
262 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
263 stride, 9); \
264 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
265 stride, 9); \
266 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
267 stride, 8); \
268 } \
269 \
270 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \
271 const uint8_t *src, \
272 ptrdiff_t stride) \
273 { \
274 uint64_t half[9]; \
275 uint8_t *const halfH = (uint8_t *) half; \
276 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
277 stride, 9); \
278 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
279 stride, 8); \
280 } \
281 \
282 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
283 const uint8_t *src, \
284 ptrdiff_t stride) \
285 { \
286 uint64_t temp[32]; \
287 uint8_t *const half = (uint8_t *) temp; \
288 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
289 stride, 16); \
290 ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src, half, \
291 stride, stride, 16)); \
292 } \
293 \
294 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \
295 const uint8_t *src, \
296 ptrdiff_t stride) \
297 { \
298 ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
299 stride, stride, 16);\
300 } \
301 \
302 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \
303 const uint8_t *src, \
304 ptrdiff_t stride) \
305 { \
306 uint64_t temp[32]; \
307 uint8_t *const half = (uint8_t*) temp; \
308 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
309 stride, 16); \
310 ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src + 1, half, \
311 stride, stride, 16)); \
312 } \
313 \
314 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \
315 const uint8_t *src, \
316 ptrdiff_t stride) \
317 { \
318 uint64_t temp[32]; \
319 uint8_t *const half = (uint8_t *) temp; \
320 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
321 stride); \
322 ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src, half, \
323 stride, stride, 16)); \
324 } \
325 \
326 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \
327 const uint8_t *src, \
328 ptrdiff_t stride) \
329 { \
330 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
331 stride, stride); \
332 } \
333 \
334 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \
335 const uint8_t *src, \
336 ptrdiff_t stride) \
337 { \
338 uint64_t temp[32]; \
339 uint8_t *const half = (uint8_t *) temp; \
340 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
341 stride); \
342 ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src+stride, half, \
343 stride, stride, 16)); \
344 } \
345 \
346 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \
347 const uint8_t *src, \
348 ptrdiff_t stride) \
349 { \
350 uint64_t half[16 * 2 + 17 * 2]; \
351 uint8_t *const halfH = (uint8_t *) half + 256; \
352 uint8_t *const halfHV = (uint8_t *) half; \
353 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
354 stride, 17); \
355 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
356 stride, 17); \
357 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
358 16, 16); \
359 ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH, halfHV, \
360 stride, 16, 16)); \
361 } \
362 \
363 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \
364 const uint8_t *src, \
365 ptrdiff_t stride) \
366 { \
367 uint64_t half[16 * 2 + 17 * 2]; \
368 uint8_t *const halfH = (uint8_t *) half + 256; \
369 uint8_t *const halfHV = (uint8_t *) half; \
370 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
371 stride, 17); \
372 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
373 stride, 17); \
374 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
375 16, 16); \
376 ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH, halfHV, \
377 stride, 16, 16)); \
378 } \
379 \
380 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \
381 const uint8_t *src, \
382 ptrdiff_t stride) \
383 { \
384 uint64_t half[16 * 2 + 17 * 2]; \
385 uint8_t *const halfH = (uint8_t *) half + 256; \
386 uint8_t *const halfHV = (uint8_t *) half; \
387 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
388 stride, 17); \
389 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
390 stride, 17); \
391 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
392 16, 16); \
393 ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH + 16, halfHV, \
394 stride, 16, 16)); \
395 } \
396 \
397 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \
398 const uint8_t *src, \
399 ptrdiff_t stride) \
400 { \
401 uint64_t half[16 * 2 + 17 * 2]; \
402 uint8_t *const halfH = (uint8_t *) half + 256; \
403 uint8_t *const halfHV = (uint8_t *) half; \
404 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
405 stride, 17); \
406 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
407 stride, 17); \
408 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
409 16, 16); \
410 ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH + 16, halfHV, \
411 stride, 16, 16)); \
412 } \
413 \
414 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \
415 const uint8_t *src, \
416 ptrdiff_t stride) \
417 { \
418 uint64_t half[16 * 2 + 17 * 2]; \
419 uint8_t *const halfH = (uint8_t *) half + 256; \
420 uint8_t *const halfHV = (uint8_t *) half; \
421 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
422 stride, 17); \
423 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
424 16, 16); \
425 ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH, halfHV, \
426 stride, 16, 16)); \
427 } \
428 \
429 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \
430 const uint8_t *src, \
431 ptrdiff_t stride) \
432 { \
433 uint64_t half[16 * 2 + 17 * 2]; \
434 uint8_t *const halfH = (uint8_t *) half + 256; \
435 uint8_t *const halfHV = (uint8_t *) half; \
436 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
437 stride, 17); \
438 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
439 16, 16); \
440 ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH + 16, halfHV, \
441 stride, 16, 16)); \
442 } \
443 \
444 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \
445 const uint8_t *src, \
446 ptrdiff_t stride) \
447 { \
448 uint64_t half[17 * 2]; \
449 uint8_t *const halfH = (uint8_t *) half; \
450 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
451 stride, 17); \
452 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
453 stride, 17); \
454 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
455 stride, 16); \
456 } \
457 \
458 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \
459 const uint8_t *src, \
460 ptrdiff_t stride) \
461 { \
462 uint64_t half[17 * 2]; \
463 uint8_t *const halfH = (uint8_t *) half; \
464 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
465 stride, 17); \
466 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
467 stride, 17); \
468 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
469 stride, 16); \
470 } \
471 \
472 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \
473 const uint8_t *src, \
474 ptrdiff_t stride) \
475 { \
476 uint64_t half[17 * 2]; \
477 uint8_t *const halfH = (uint8_t *) half; \
478 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
479 stride, 17); \
480 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
481 stride, 16); \
482 }
483
484 #define PASSTHROUGH(...) __VA_ARGS__
485 #define STRIP_HEIGHT(dst, src1, src2, dststride, srcstride, height) \
486 (dst), (src1), (src2), (dststride), (srcstride)
487
488 60 QPEL_OP(put_, _, mmxext, PASSTHROUGH)
489 60 QPEL_OP(avg_, _, mmxext, STRIP_HEIGHT)
490 60 QPEL_OP(put_no_rnd_, _no_rnd_, mmxext, PASSTHROUGH)
491
492 #endif /* HAVE_X86ASM */
493
494 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
495 do { \
496 c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
497 c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
498 c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
499 c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
500 c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
501 c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
502 c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
503 c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
504 c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
505 c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
506 c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
507 c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
508 c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
509 c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
510 c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
511 } while (0)
512
513 295 av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
514 {
515 295 int cpu_flags = av_get_cpu_flags();
516
517
2/2
✓ Branch 0 taken 46 times.
✓ Branch 1 taken 249 times.
295 if (X86_MMXEXT(cpu_flags)) {
518 #if HAVE_MMXEXT_EXTERNAL
519 46 SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
520 46 c->avg_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext;
521 46 SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
522
523 46 SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
524 46 c->put_no_rnd_qpel_pixels_tab[1][0] =
525 46 c->put_qpel_pixels_tab[1][0] = ff_put_pixels8x8_mmx;
526 46 SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
527 46 SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
528 46 SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
529 #endif /* HAVE_MMXEXT_EXTERNAL */
530 }
531 #if HAVE_SSE2_EXTERNAL
532
2/2
✓ Branch 0 taken 44 times.
✓ Branch 1 taken 251 times.
295 if (EXTERNAL_SSE2(cpu_flags)) {
533 44 c->put_no_rnd_qpel_pixels_tab[0][0] =
534 44 c->put_qpel_pixels_tab[0][0] = ff_put_pixels16x16_sse2;
535 44 c->avg_qpel_pixels_tab[0][0] = ff_avg_pixels16x16_sse2;
536 }
537 #endif
538 295 }
539