FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/qpeldsp_init.c
Date: 2026-04-30 13:16:32
Exec Total Coverage
Lines: 24 24 100.0%
Functions: 91 91 100.0%
Branches: 6 6 100.0%

Line Branch Exec Source
1 /*
2 * quarterpel DSP functions
3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <stddef.h>
24 #include <stdint.h>
25
26 #include "config.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/attributes_internal.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/x86/cpu.h"
32 #include "libavcodec/qpeldsp.h"
33 #include "fpel.h"
34 #include "qpel.h"
35
36 FF_VISIBILITY_PUSH_HIDDEN
37 void ff_put_no_rnd_pixels8x8_l2_mmxext(uint8_t *dst,
38 const uint8_t *src1, const uint8_t *src2,
39 ptrdiff_t dstStride, ptrdiff_t src1Stride);
40 void ff_put_no_rnd_pixels16x16_l2_sse2(uint8_t *dst,
41 const uint8_t *src1, const uint8_t *src2,
42 ptrdiff_t dstStride, ptrdiff_t src1Stride);
43
44 #define QPEL_H(OPNAME, RND, SIZE, UNUSED1, XMM, UNUSED2, UNUSED3, L2) \
45 void ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_ ## XMM (uint8_t *dst, \
46 const uint8_t *src, \
47 ptrdiff_t dstStride, \
48 ptrdiff_t srcStride, \
49 int h); \
50 void ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_l2_ ## XMM(uint8_t *dst, \
51 const uint8_t *src, \
52 ptrdiff_t dstStride, \
53 ptrdiff_t srcStride, \
54 int h, \
55 ptrdiff_t l2_offset);\
56 static void OPNAME ## _qpel ## SIZE ## _mc10_ ## XMM(uint8_t *dst, \
57 const uint8_t *src, \
58 ptrdiff_t stride) \
59 { \
60 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_l2_ ## XMM(dst, src, stride, \
61 stride, SIZE, 0); \
62 } \
63 \
64 static void OPNAME ## _qpel ## SIZE ## _mc20_ ## XMM(uint8_t *dst, \
65 const uint8_t *src, \
66 ptrdiff_t stride) \
67 { \
68 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_ ## XMM(dst, src, stride, \
69 stride, SIZE); \
70 } \
71 \
72 static void OPNAME ## _qpel ## SIZE ## _mc30_ ## XMM(uint8_t *dst, \
73 const uint8_t *src, \
74 ptrdiff_t stride) \
75 { \
76 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_l2_ ## XMM(dst, src, stride, \
77 stride, SIZE, 1); \
78 }
79
80 #define QPEL_V(OPNAME, RND, SIZE, UNUSED1, UNUSED2, XMM, UNUSED3, L2) \
81 void ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## XMM (uint8_t *dst, \
82 const uint8_t *src, \
83 ptrdiff_t dstStride, \
84 ptrdiff_t srcStride); \
85 static void OPNAME ## _qpel ## SIZE ## _mc01_ ## XMM(uint8_t *dst, \
86 const uint8_t *src, \
87 ptrdiff_t stride) \
88 { \
89 DECLARE_ALIGNED(SIZE, uint8_t, half)[SIZE*SIZE]; \
90 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## XMM(half, src, \
91 SIZE, stride); \
92 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, src, half, \
93 stride, stride); \
94 } \
95 \
96 static void OPNAME ## _qpel ## SIZE ## _mc02_ ## XMM(uint8_t *dst, \
97 const uint8_t *src, \
98 ptrdiff_t stride) \
99 { \
100 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## XMM(dst, src, \
101 stride, stride); \
102 } \
103 \
104 static void OPNAME ## _qpel ## SIZE ## _mc03_ ## XMM(uint8_t *dst, \
105 const uint8_t *src, \
106 ptrdiff_t stride) \
107 { \
108 DECLARE_ALIGNED(SIZE, uint8_t, half)[SIZE*SIZE]; \
109 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## XMM(half, src, \
110 SIZE, stride); \
111 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, src + stride, \
112 half, stride, stride); \
113 }
114
115 #define QPEL_HV(OPNAME, RND, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2) \
116 static void OPNAME ## _qpel ## SIZE ## _mc11_ ## HVXMM(uint8_t *dst, \
117 const uint8_t *src, \
118 ptrdiff_t stride) \
119 { \
120 DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
121 uint8_t *const halfH = half + SIZE*SIZE; \
122 uint8_t *const halfHV = half; \
123 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
124 stride, SIZEP1, 0); \
125 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
126 SIZE, SIZE); \
127 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH, halfHV, \
128 stride, SIZE); \
129 } \
130 \
131 static void OPNAME ## _qpel ## SIZE ## _mc31_ ## HVXMM(uint8_t *dst, \
132 const uint8_t *src, \
133 ptrdiff_t stride) \
134 { \
135 DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
136 uint8_t *const halfH = half + SIZE*SIZE; \
137 uint8_t *const halfHV = half; \
138 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
139 stride, SIZEP1, 1); \
140 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
141 SIZE, SIZE); \
142 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH, halfHV, \
143 stride, SIZE); \
144 } \
145 \
146 static void OPNAME ## _qpel ## SIZE ## _mc13_ ## HVXMM(uint8_t *dst, \
147 const uint8_t *src, \
148 ptrdiff_t stride) \
149 { \
150 DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
151 uint8_t *const halfH = half + SIZE*SIZE; \
152 uint8_t *const halfHV = half; \
153 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
154 stride, SIZEP1, 0); \
155 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
156 SIZE, SIZE); \
157 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH + SIZE, \
158 halfHV, stride, SIZE); \
159 } \
160 \
161 static void OPNAME ## _qpel ## SIZE ## _mc33_ ## HVXMM(uint8_t *dst, \
162 const uint8_t *src, \
163 ptrdiff_t stride) \
164 { \
165 DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
166 uint8_t *const halfH = half + SIZE*SIZE; \
167 uint8_t *const halfHV = half; \
168 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
169 stride, SIZEP1, 1); \
170 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
171 SIZE, SIZE); \
172 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH + SIZE, \
173 halfHV, stride, SIZE); \
174 } \
175 \
176 static void OPNAME ## _qpel ## SIZE ## _mc21_ ## HVXMM(uint8_t *dst, \
177 const uint8_t *src, \
178 ptrdiff_t stride) \
179 { \
180 DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
181 uint8_t *const halfH = half + SIZE*SIZE; \
182 uint8_t *const halfHV = half; \
183 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_ ## HXMM(halfH, src, SIZE, \
184 stride, SIZEP1); \
185 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
186 SIZE, SIZE); \
187 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH, halfHV, \
188 stride, SIZE); \
189 } \
190 \
191 static void OPNAME ## _qpel ## SIZE ## _mc23_ ## HVXMM(uint8_t *dst, \
192 const uint8_t *src, \
193 ptrdiff_t stride) \
194 { \
195 DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
196 uint8_t *const halfH = half + SIZE*SIZE; \
197 uint8_t *const halfHV = half; \
198 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_ ## HXMM(halfH, src, SIZE, \
199 stride, SIZEP1); \
200 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
201 SIZE, SIZE); \
202 ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH + SIZE, \
203 halfHV, stride, SIZE); \
204 } \
205 \
206 static void OPNAME ## _qpel ## SIZE ## _mc12_ ## HVXMM(uint8_t *dst, \
207 const uint8_t *src, \
208 ptrdiff_t stride) \
209 { \
210 DECLARE_ALIGNED(SIZE, uint8_t, halfH)[SIZEP1*SIZE]; \
211 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
212 stride, SIZEP1, 0); \
213 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## VXMM(dst, halfH, \
214 stride, SIZE); \
215 } \
216 \
217 static void OPNAME ## _qpel ## SIZE ## _mc32_ ## HVXMM(uint8_t *dst, \
218 const uint8_t *src, \
219 ptrdiff_t stride) \
220 { \
221 DECLARE_ALIGNED(SIZE, uint8_t, halfH)[SIZEP1*SIZE]; \
222 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
223 stride, SIZEP1, 1); \
224 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## VXMM(dst, halfH, \
225 stride, SIZE); \
226 } \
227 \
228 static void OPNAME ## _qpel ## SIZE ## _mc22_ ## HVXMM(uint8_t *dst, \
229 const uint8_t *src, \
230 ptrdiff_t stride) \
231 { \
232 DECLARE_ALIGNED(SIZE, uint8_t, halfH)[SIZEP1*SIZE]; \
233 ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_ ## HXMM(halfH, src, SIZE, \
234 stride, SIZEP1); \
235 ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## VXMM(dst, halfH, \
236 stride, SIZE); \
237 }
238
239 #define QPEL3(MACRO, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2) \
240 MACRO(put,, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2) \
241 MACRO(avg,, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2) \
242 MACRO(put_no_rnd, no_rnd_, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2)
243
244 18 QPEL3(QPEL_H, 8, 9, ssse3, sse2, ssse3, mmxext)
245 18 QPEL3(QPEL_H, 16, 17, ssse3, sse2, ssse3, sse2)
246 18 QPEL3(QPEL_V, 8, 9, ssse3, sse2, ssse3, mmxext)
247 54 QPEL3(QPEL_HV, 8, 9, ssse3, sse2, ssse3, mmxext)
248 18 QPEL3(QPEL_V, 16, 17, ssse3, sse2, ssse3, sse2)
249 54 QPEL3(QPEL_HV, 16, 17, ssse3, sse2, ssse3, sse2)
250
251 #define SET_QPEL_FUNC(OP, X, Y, SIZE, CPU, PREFIX) \
252 c->OP ## _qpel_pixels_tab[SIZE == 8][X+4*Y] = PREFIX ## OP ## _qpel ## SIZE ## _mc ## X ## Y ## _ ## CPU
253
254 #define SET_QPEL_FUNCS3(X, Y, SIZE, CPU, PREFIX) \
255 SET_QPEL_FUNC(avg, X, Y, SIZE, CPU, PREFIX); \
256 SET_QPEL_FUNC(put, X, Y, SIZE, CPU, PREFIX); \
257 SET_QPEL_FUNC(put_no_rnd, X, Y, SIZE, CPU, PREFIX)
258
259 #define SET_H_QPEL_FUNCS(SIZE, CPU, PREFIX) \
260 SET_QPEL_FUNCS3(1, 0, SIZE, CPU, PREFIX); \
261 SET_QPEL_FUNCS3(2, 0, SIZE, CPU, PREFIX); \
262 SET_QPEL_FUNCS3(3, 0, SIZE, CPU, PREFIX)
263
264 #define SET_V_QPEL_FUNCS(SIZE, CPU, PREFIX) \
265 SET_QPEL_FUNCS3(0, 1, SIZE, CPU, PREFIX); \
266 SET_QPEL_FUNCS3(0, 2, SIZE, CPU, PREFIX); \
267 SET_QPEL_FUNCS3(0, 3, SIZE, CPU, PREFIX)
268
269 #define SET_HV_QPEL_FUNCS(SIZE, CPU, PREFIX) \
270 SET_QPEL_FUNCS3(1, 1, SIZE, CPU, PREFIX); \
271 SET_QPEL_FUNCS3(1, 2, SIZE, CPU, PREFIX); \
272 SET_QPEL_FUNCS3(1, 3, SIZE, CPU, PREFIX); \
273 SET_QPEL_FUNCS3(2, 1, SIZE, CPU, PREFIX); \
274 SET_QPEL_FUNCS3(2, 2, SIZE, CPU, PREFIX); \
275 SET_QPEL_FUNCS3(2, 3, SIZE, CPU, PREFIX); \
276 SET_QPEL_FUNCS3(3, 1, SIZE, CPU, PREFIX); \
277 SET_QPEL_FUNCS3(3, 2, SIZE, CPU, PREFIX); \
278 SET_QPEL_FUNCS3(3, 3, SIZE, CPU, PREFIX)
279
280 298 av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
281 {
282 298 int cpu_flags = av_get_cpu_flags();
283
284
2/2
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 250 times.
298 if (X86_MMXEXT(cpu_flags)) {
285 #if HAVE_MMXEXT_EXTERNAL
286 48 c->avg_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext;
287 #endif /* HAVE_MMXEXT_EXTERNAL */
288 }
289 #if HAVE_SSE2_EXTERNAL
290
2/2
✓ Branch 0 taken 46 times.
✓ Branch 1 taken 252 times.
298 if (EXTERNAL_SSE2(cpu_flags)) {
291 46 c->put_no_rnd_qpel_pixels_tab[0][0] =
292 46 c->put_qpel_pixels_tab[0][0] = ff_put_pixels16x16_sse2;
293 46 c->put_no_rnd_qpel_pixels_tab[1][0] =
294 46 c->put_qpel_pixels_tab[1][0] = ff_put_pixels8x8_sse2;
295 46 c->avg_qpel_pixels_tab[0][0] = ff_avg_pixels16x16_sse2;
296
297 46 SET_V_QPEL_FUNCS (16, sse2,);
298 46 SET_V_QPEL_FUNCS (8, sse2,);
299 }
300 #endif
301
2/2
✓ Branch 0 taken 44 times.
✓ Branch 1 taken 254 times.
298 if (EXTERNAL_SSSE3(cpu_flags)) {
302 44 SET_H_QPEL_FUNCS(8, ssse3,);
303 44 SET_HV_QPEL_FUNCS(8, ssse3,);
304 44 SET_H_QPEL_FUNCS(16, ssse3,);
305 44 SET_HV_QPEL_FUNCS(16, ssse3,);
306 }
307 298 }
308 FF_VISIBILITY_POP_HIDDEN
309