Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Half-pel DSP functions. | ||
3 | * Copyright (c) 2000, 2001 Fabrice Bellard | ||
4 | * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | ||
5 | * | ||
6 | * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> | ||
7 | * | ||
8 | * This file is part of FFmpeg. | ||
9 | * | ||
10 | * FFmpeg is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU Lesser General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2.1 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * FFmpeg is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * Lesser General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU Lesser General Public | ||
21 | * License along with FFmpeg; if not, write to the Free Software | ||
22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
23 | */ | ||
24 | |||
25 | /** | ||
26 | * @file | ||
27 | * Half-pel DSP functions. | ||
28 | */ | ||
29 | |||
30 | #include "libavutil/attributes.h" | ||
31 | #include "libavutil/intreadwrite.h" | ||
32 | #include "hpeldsp.h" | ||
33 | |||
34 | #define BIT_DEPTH 8 | ||
35 | #include "hpel_template.c" | ||
36 | #include "pel_template.c" | ||
37 | |||
38 | #define PIXOP2(OPNAME, OP) \ | ||
39 | static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst, \ | ||
40 | const uint8_t *src1, \ | ||
41 | const uint8_t *src2, \ | ||
42 | int dst_stride, \ | ||
43 | int src_stride1, \ | ||
44 | int src_stride2, \ | ||
45 | int h) \ | ||
46 | { \ | ||
47 | int i; \ | ||
48 | \ | ||
49 | for (i = 0; i < h; i++) { \ | ||
50 | uint32_t a, b; \ | ||
51 | a = AV_RN32(&src1[i * src_stride1]); \ | ||
52 | b = AV_RN32(&src2[i * src_stride2]); \ | ||
53 | OP(*((uint32_t *) &dst[i * dst_stride]), \ | ||
54 | no_rnd_avg32(a, b)); \ | ||
55 | a = AV_RN32(&src1[i * src_stride1 + 4]); \ | ||
56 | b = AV_RN32(&src2[i * src_stride2 + 4]); \ | ||
57 | OP(*((uint32_t *) &dst[i * dst_stride + 4]), \ | ||
58 | no_rnd_avg32(a, b)); \ | ||
59 | } \ | ||
60 | } \ | ||
61 | \ | ||
62 | static inline void OPNAME ## _no_rnd_pixels8_x2_8_c(uint8_t *block, \ | ||
63 | const uint8_t *pixels, \ | ||
64 | ptrdiff_t line_size, \ | ||
65 | int h) \ | ||
66 | { \ | ||
67 | OPNAME ## _no_rnd_pixels8_l2_8(block, pixels, pixels + 1, \ | ||
68 | line_size, line_size, line_size, h); \ | ||
69 | } \ | ||
70 | \ | ||
71 | static inline void OPNAME ## _pixels8_x2_8_c(uint8_t *block, \ | ||
72 | const uint8_t *pixels, \ | ||
73 | ptrdiff_t line_size, \ | ||
74 | int h) \ | ||
75 | { \ | ||
76 | OPNAME ## _pixels8_l2_8(block, pixels, pixels + 1, \ | ||
77 | line_size, line_size, line_size, h); \ | ||
78 | } \ | ||
79 | \ | ||
80 | static inline void OPNAME ## _no_rnd_pixels8_y2_8_c(uint8_t *block, \ | ||
81 | const uint8_t *pixels, \ | ||
82 | ptrdiff_t line_size, \ | ||
83 | int h) \ | ||
84 | { \ | ||
85 | OPNAME ## _no_rnd_pixels8_l2_8(block, pixels, pixels + line_size, \ | ||
86 | line_size, line_size, line_size, h); \ | ||
87 | } \ | ||
88 | \ | ||
89 | static inline void OPNAME ## _pixels8_y2_8_c(uint8_t *block, \ | ||
90 | const uint8_t *pixels, \ | ||
91 | ptrdiff_t line_size, \ | ||
92 | int h) \ | ||
93 | { \ | ||
94 | OPNAME ## _pixels8_l2_8(block, pixels, pixels + line_size, \ | ||
95 | line_size, line_size, line_size, h); \ | ||
96 | } \ | ||
97 | \ | ||
98 | static inline void OPNAME ## _pixels4_x2_8_c(uint8_t *block, \ | ||
99 | const uint8_t *pixels, \ | ||
100 | ptrdiff_t line_size, \ | ||
101 | int h) \ | ||
102 | { \ | ||
103 | OPNAME ## _pixels4_l2_8(block, pixels, pixels + 1, \ | ||
104 | line_size, line_size, line_size, h); \ | ||
105 | } \ | ||
106 | \ | ||
107 | static inline void OPNAME ## _pixels4_y2_8_c(uint8_t *block, \ | ||
108 | const uint8_t *pixels, \ | ||
109 | ptrdiff_t line_size, \ | ||
110 | int h) \ | ||
111 | { \ | ||
112 | OPNAME ## _pixels4_l2_8(block, pixels, pixels + line_size, \ | ||
113 | line_size, line_size, line_size, h); \ | ||
114 | } \ | ||
115 | \ | ||
116 | static inline void OPNAME ## _pixels2_x2_8_c(uint8_t *block, \ | ||
117 | const uint8_t *pixels, \ | ||
118 | ptrdiff_t line_size, \ | ||
119 | int h) \ | ||
120 | { \ | ||
121 | OPNAME ## _pixels2_l2_8(block, pixels, pixels + 1, \ | ||
122 | line_size, line_size, line_size, h); \ | ||
123 | } \ | ||
124 | \ | ||
125 | static inline void OPNAME ## _pixels2_y2_8_c(uint8_t *block, \ | ||
126 | const uint8_t *pixels, \ | ||
127 | ptrdiff_t line_size, \ | ||
128 | int h) \ | ||
129 | { \ | ||
130 | OPNAME ## _pixels2_l2_8(block, pixels, pixels + line_size, \ | ||
131 | line_size, line_size, line_size, h); \ | ||
132 | } \ | ||
133 | \ | ||
134 | static inline void OPNAME ## _pixels2_xy2_8_c(uint8_t *block, \ | ||
135 | const uint8_t *pixels, \ | ||
136 | ptrdiff_t line_size, \ | ||
137 | int h) \ | ||
138 | { \ | ||
139 | int i, a1, b1; \ | ||
140 | int a0 = pixels[0]; \ | ||
141 | int b0 = pixels[1] + 2; \ | ||
142 | \ | ||
143 | a0 += b0; \ | ||
144 | b0 += pixels[2]; \ | ||
145 | pixels += line_size; \ | ||
146 | for (i = 0; i < h; i += 2) { \ | ||
147 | a1 = pixels[0]; \ | ||
148 | b1 = pixels[1]; \ | ||
149 | a1 += b1; \ | ||
150 | b1 += pixels[2]; \ | ||
151 | \ | ||
152 | block[0] = (a1 + a0) >> 2; /* FIXME non put */ \ | ||
153 | block[1] = (b1 + b0) >> 2; \ | ||
154 | \ | ||
155 | pixels += line_size; \ | ||
156 | block += line_size; \ | ||
157 | \ | ||
158 | a0 = pixels[0]; \ | ||
159 | b0 = pixels[1] + 2; \ | ||
160 | a0 += b0; \ | ||
161 | b0 += pixels[2]; \ | ||
162 | \ | ||
163 | block[0] = (a1 + a0) >> 2; \ | ||
164 | block[1] = (b1 + b0) >> 2; \ | ||
165 | pixels += line_size; \ | ||
166 | block += line_size; \ | ||
167 | } \ | ||
168 | } \ | ||
169 | \ | ||
170 | static inline void OPNAME ## _pixels4_xy2_8_c(uint8_t *block, \ | ||
171 | const uint8_t *pixels, \ | ||
172 | ptrdiff_t line_size, \ | ||
173 | int h) \ | ||
174 | { \ | ||
175 | /* FIXME HIGH BIT DEPTH */ \ | ||
176 | int i; \ | ||
177 | const uint32_t a = AV_RN32(pixels); \ | ||
178 | const uint32_t b = AV_RN32(pixels + 1); \ | ||
179 | uint32_t l0 = (a & 0x03030303UL) + \ | ||
180 | (b & 0x03030303UL) + \ | ||
181 | 0x02020202UL; \ | ||
182 | uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ | ||
183 | ((b & 0xFCFCFCFCUL) >> 2); \ | ||
184 | uint32_t l1, h1; \ | ||
185 | \ | ||
186 | pixels += line_size; \ | ||
187 | for (i = 0; i < h; i += 2) { \ | ||
188 | uint32_t a = AV_RN32(pixels); \ | ||
189 | uint32_t b = AV_RN32(pixels + 1); \ | ||
190 | l1 = (a & 0x03030303UL) + \ | ||
191 | (b & 0x03030303UL); \ | ||
192 | h1 = ((a & 0xFCFCFCFCUL) >> 2) + \ | ||
193 | ((b & 0xFCFCFCFCUL) >> 2); \ | ||
194 | OP(*((uint32_t *) block), h0 + h1 + \ | ||
195 | (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ | ||
196 | pixels += line_size; \ | ||
197 | block += line_size; \ | ||
198 | a = AV_RN32(pixels); \ | ||
199 | b = AV_RN32(pixels + 1); \ | ||
200 | l0 = (a & 0x03030303UL) + \ | ||
201 | (b & 0x03030303UL) + \ | ||
202 | 0x02020202UL; \ | ||
203 | h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ | ||
204 | ((b & 0xFCFCFCFCUL) >> 2); \ | ||
205 | OP(*((uint32_t *) block), h0 + h1 + \ | ||
206 | (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ | ||
207 | pixels += line_size; \ | ||
208 | block += line_size; \ | ||
209 | } \ | ||
210 | } \ | ||
211 | \ | ||
212 | static inline void OPNAME ## _pixels8_xy2_8_c(uint8_t *block, \ | ||
213 | const uint8_t *pixels, \ | ||
214 | ptrdiff_t line_size, \ | ||
215 | int h) \ | ||
216 | { \ | ||
217 | /* FIXME HIGH BIT DEPTH */ \ | ||
218 | int j; \ | ||
219 | \ | ||
220 | for (j = 0; j < 2; j++) { \ | ||
221 | int i; \ | ||
222 | const uint32_t a = AV_RN32(pixels); \ | ||
223 | const uint32_t b = AV_RN32(pixels + 1); \ | ||
224 | uint32_t l0 = (a & 0x03030303UL) + \ | ||
225 | (b & 0x03030303UL) + \ | ||
226 | 0x02020202UL; \ | ||
227 | uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ | ||
228 | ((b & 0xFCFCFCFCUL) >> 2); \ | ||
229 | uint32_t l1, h1; \ | ||
230 | \ | ||
231 | pixels += line_size; \ | ||
232 | for (i = 0; i < h; i += 2) { \ | ||
233 | uint32_t a = AV_RN32(pixels); \ | ||
234 | uint32_t b = AV_RN32(pixels + 1); \ | ||
235 | l1 = (a & 0x03030303UL) + \ | ||
236 | (b & 0x03030303UL); \ | ||
237 | h1 = ((a & 0xFCFCFCFCUL) >> 2) + \ | ||
238 | ((b & 0xFCFCFCFCUL) >> 2); \ | ||
239 | OP(*((uint32_t *) block), h0 + h1 + \ | ||
240 | (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ | ||
241 | pixels += line_size; \ | ||
242 | block += line_size; \ | ||
243 | a = AV_RN32(pixels); \ | ||
244 | b = AV_RN32(pixels + 1); \ | ||
245 | l0 = (a & 0x03030303UL) + \ | ||
246 | (b & 0x03030303UL) + \ | ||
247 | 0x02020202UL; \ | ||
248 | h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ | ||
249 | ((b & 0xFCFCFCFCUL) >> 2); \ | ||
250 | OP(*((uint32_t *) block), h0 + h1 + \ | ||
251 | (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ | ||
252 | pixels += line_size; \ | ||
253 | block += line_size; \ | ||
254 | } \ | ||
255 | pixels += 4 - line_size * (h + 1); \ | ||
256 | block += 4 - line_size * h; \ | ||
257 | } \ | ||
258 | } \ | ||
259 | \ | ||
260 | static inline void OPNAME ## _no_rnd_pixels8_xy2_8_c(uint8_t *block, \ | ||
261 | const uint8_t *pixels, \ | ||
262 | ptrdiff_t line_size, \ | ||
263 | int h) \ | ||
264 | { \ | ||
265 | /* FIXME HIGH BIT DEPTH */ \ | ||
266 | int j; \ | ||
267 | \ | ||
268 | for (j = 0; j < 2; j++) { \ | ||
269 | int i; \ | ||
270 | const uint32_t a = AV_RN32(pixels); \ | ||
271 | const uint32_t b = AV_RN32(pixels + 1); \ | ||
272 | uint32_t l0 = (a & 0x03030303UL) + \ | ||
273 | (b & 0x03030303UL) + \ | ||
274 | 0x01010101UL; \ | ||
275 | uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ | ||
276 | ((b & 0xFCFCFCFCUL) >> 2); \ | ||
277 | uint32_t l1, h1; \ | ||
278 | \ | ||
279 | pixels += line_size; \ | ||
280 | for (i = 0; i < h; i += 2) { \ | ||
281 | uint32_t a = AV_RN32(pixels); \ | ||
282 | uint32_t b = AV_RN32(pixels + 1); \ | ||
283 | l1 = (a & 0x03030303UL) + \ | ||
284 | (b & 0x03030303UL); \ | ||
285 | h1 = ((a & 0xFCFCFCFCUL) >> 2) + \ | ||
286 | ((b & 0xFCFCFCFCUL) >> 2); \ | ||
287 | OP(*((uint32_t *) block), h0 + h1 + \ | ||
288 | (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ | ||
289 | pixels += line_size; \ | ||
290 | block += line_size; \ | ||
291 | a = AV_RN32(pixels); \ | ||
292 | b = AV_RN32(pixels + 1); \ | ||
293 | l0 = (a & 0x03030303UL) + \ | ||
294 | (b & 0x03030303UL) + \ | ||
295 | 0x01010101UL; \ | ||
296 | h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ | ||
297 | ((b & 0xFCFCFCFCUL) >> 2); \ | ||
298 | OP(*((uint32_t *) block), h0 + h1 + \ | ||
299 | (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ | ||
300 | pixels += line_size; \ | ||
301 | block += line_size; \ | ||
302 | } \ | ||
303 | pixels += 4 - line_size * (h + 1); \ | ||
304 | block += 4 - line_size * h; \ | ||
305 | } \ | ||
306 | } \ | ||
307 | \ | ||
308 | CALL_2X_PIXELS(OPNAME ## _pixels16_x2_8_c, \ | ||
309 | OPNAME ## _pixels8_x2_8_c, \ | ||
310 | 8) \ | ||
311 | CALL_2X_PIXELS(OPNAME ## _pixels16_y2_8_c, \ | ||
312 | OPNAME ## _pixels8_y2_8_c, \ | ||
313 | 8) \ | ||
314 | CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_8_c, \ | ||
315 | OPNAME ## _pixels8_xy2_8_c, \ | ||
316 | 8) \ | ||
317 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_8_c, \ | ||
318 | OPNAME ## _pixels8_8_c, \ | ||
319 | 8) \ | ||
320 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_8_c, \ | ||
321 | OPNAME ## _no_rnd_pixels8_x2_8_c, \ | ||
322 | 8) \ | ||
323 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_8_c, \ | ||
324 | OPNAME ## _no_rnd_pixels8_y2_8_c, \ | ||
325 | 8) \ | ||
326 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_8_c, \ | ||
327 | OPNAME ## _no_rnd_pixels8_xy2_8_c, \ | ||
328 | 8) \ | ||
329 | |||
330 | #define op_avg(a, b) a = rnd_avg32(a, b) | ||
331 | #define op_put(a, b) a = b | ||
332 | #define put_no_rnd_pixels8_8_c put_pixels8_8_c | ||
333 |
6/6✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 55180232 times.
✓ Branch 3 taken 8009651 times.
✓ Branch 4 taken 8036292 times.
✓ Branch 5 taken 4006470 times.
|
161051786 | PIXOP2(avg, op_avg) |
334 |
4/4✓ Branch 0 taken 158358844 times.
✓ Branch 1 taken 25978446 times.
✓ Branch 2 taken 65792264 times.
✓ Branch 3 taken 16410420 times.
|
538606056 | PIXOP2(put, op_put) |
335 | #undef op_avg | ||
336 | #undef op_put | ||
337 | |||
338 | 1136 | av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags) | |
339 | { | ||
340 | #define hpel_funcs(prefix, idx, num) \ | ||
341 | c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \ | ||
342 | c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \ | ||
343 | c->prefix ## _pixels_tab idx [2] = prefix ## _pixels ## num ## _y2_8_c; \ | ||
344 | c->prefix ## _pixels_tab idx [3] = prefix ## _pixels ## num ## _xy2_8_c | ||
345 | |||
346 | 1136 | hpel_funcs(put, [0], 16); | |
347 | 1136 | hpel_funcs(put, [1], 8); | |
348 | 1136 | hpel_funcs(put, [2], 4); | |
349 | 1136 | hpel_funcs(put, [3], 2); | |
350 | 1136 | hpel_funcs(put_no_rnd, [0], 16); | |
351 | 1136 | hpel_funcs(put_no_rnd, [1], 8); | |
352 | 1136 | hpel_funcs(avg, [0], 16); | |
353 | 1136 | hpel_funcs(avg, [1], 8); | |
354 | 1136 | hpel_funcs(avg, [2], 4); | |
355 | 1136 | hpel_funcs(avg, [3], 2); | |
356 | 1136 | hpel_funcs(avg_no_rnd,, 16); | |
357 | |||
358 | #if ARCH_AARCH64 | ||
359 | ff_hpeldsp_init_aarch64(c, flags); | ||
360 | #elif ARCH_ARM | ||
361 | ff_hpeldsp_init_arm(c, flags); | ||
362 | #elif ARCH_PPC | ||
363 | ff_hpeldsp_init_ppc(c, flags); | ||
364 | #elif ARCH_X86 | ||
365 | 1136 | ff_hpeldsp_init_x86(c, flags); | |
366 | #elif ARCH_MIPS | ||
367 | ff_hpeldsp_init_mips(c, flags); | ||
368 | #elif ARCH_LOONGARCH64 | ||
369 | ff_hpeldsp_init_loongarch(c, flags); | ||
370 | #endif | ||
371 | 1136 | } | |
372 |