1 |
|
|
/* |
2 |
|
|
* Half-pel DSP functions. |
3 |
|
|
* Copyright (c) 2000, 2001 Fabrice Bellard |
4 |
|
|
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
5 |
|
|
* |
6 |
|
|
* gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
7 |
|
|
* |
8 |
|
|
* This file is part of FFmpeg. |
9 |
|
|
* |
10 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
11 |
|
|
* modify it under the terms of the GNU Lesser General Public |
12 |
|
|
* License as published by the Free Software Foundation; either |
13 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
14 |
|
|
* |
15 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
16 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 |
|
|
* Lesser General Public License for more details. |
19 |
|
|
* |
20 |
|
|
* You should have received a copy of the GNU Lesser General Public |
21 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
22 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
23 |
|
|
*/ |
24 |
|
|
|
25 |
|
|
/** |
26 |
|
|
* @file |
27 |
|
|
* Half-pel DSP functions. |
28 |
|
|
*/ |
29 |
|
|
|
30 |
|
|
#include "libavutil/attributes.h" |
31 |
|
|
#include "libavutil/intreadwrite.h" |
32 |
|
|
#include "hpeldsp.h" |
33 |
|
|
|
34 |
|
|
#define BIT_DEPTH 8 |
35 |
|
|
#include "hpel_template.c" |
36 |
|
|
#include "pel_template.c" |
37 |
|
|
|
38 |
|
|
#define PIXOP2(OPNAME, OP) \ |
39 |
|
|
static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst, \ |
40 |
|
|
const uint8_t *src1, \ |
41 |
|
|
const uint8_t *src2, \ |
42 |
|
|
int dst_stride, \ |
43 |
|
|
int src_stride1, \ |
44 |
|
|
int src_stride2, \ |
45 |
|
|
int h) \ |
46 |
|
|
{ \ |
47 |
|
|
int i; \ |
48 |
|
|
\ |
49 |
|
|
for (i = 0; i < h; i++) { \ |
50 |
|
|
uint32_t a, b; \ |
51 |
|
|
a = AV_RN32(&src1[i * src_stride1]); \ |
52 |
|
|
b = AV_RN32(&src2[i * src_stride2]); \ |
53 |
|
|
OP(*((uint32_t *) &dst[i * dst_stride]), \ |
54 |
|
|
no_rnd_avg32(a, b)); \ |
55 |
|
|
a = AV_RN32(&src1[i * src_stride1 + 4]); \ |
56 |
|
|
b = AV_RN32(&src2[i * src_stride2 + 4]); \ |
57 |
|
|
OP(*((uint32_t *) &dst[i * dst_stride + 4]), \ |
58 |
|
|
no_rnd_avg32(a, b)); \ |
59 |
|
|
} \ |
60 |
|
|
} \ |
61 |
|
|
\ |
62 |
|
|
static inline void OPNAME ## _no_rnd_pixels8_x2_8_c(uint8_t *block, \ |
63 |
|
|
const uint8_t *pixels, \ |
64 |
|
|
ptrdiff_t line_size, \ |
65 |
|
|
int h) \ |
66 |
|
|
{ \ |
67 |
|
|
OPNAME ## _no_rnd_pixels8_l2_8(block, pixels, pixels + 1, \ |
68 |
|
|
line_size, line_size, line_size, h); \ |
69 |
|
|
} \ |
70 |
|
|
\ |
71 |
|
|
static inline void OPNAME ## _pixels8_x2_8_c(uint8_t *block, \ |
72 |
|
|
const uint8_t *pixels, \ |
73 |
|
|
ptrdiff_t line_size, \ |
74 |
|
|
int h) \ |
75 |
|
|
{ \ |
76 |
|
|
OPNAME ## _pixels8_l2_8(block, pixels, pixels + 1, \ |
77 |
|
|
line_size, line_size, line_size, h); \ |
78 |
|
|
} \ |
79 |
|
|
\ |
80 |
|
|
static inline void OPNAME ## _no_rnd_pixels8_y2_8_c(uint8_t *block, \ |
81 |
|
|
const uint8_t *pixels, \ |
82 |
|
|
ptrdiff_t line_size, \ |
83 |
|
|
int h) \ |
84 |
|
|
{ \ |
85 |
|
|
OPNAME ## _no_rnd_pixels8_l2_8(block, pixels, pixels + line_size, \ |
86 |
|
|
line_size, line_size, line_size, h); \ |
87 |
|
|
} \ |
88 |
|
|
\ |
89 |
|
|
static inline void OPNAME ## _pixels8_y2_8_c(uint8_t *block, \ |
90 |
|
|
const uint8_t *pixels, \ |
91 |
|
|
ptrdiff_t line_size, \ |
92 |
|
|
int h) \ |
93 |
|
|
{ \ |
94 |
|
|
OPNAME ## _pixels8_l2_8(block, pixels, pixels + line_size, \ |
95 |
|
|
line_size, line_size, line_size, h); \ |
96 |
|
|
} \ |
97 |
|
|
\ |
98 |
|
|
static inline void OPNAME ## _pixels4_x2_8_c(uint8_t *block, \ |
99 |
|
|
const uint8_t *pixels, \ |
100 |
|
|
ptrdiff_t line_size, \ |
101 |
|
|
int h) \ |
102 |
|
|
{ \ |
103 |
|
|
OPNAME ## _pixels4_l2_8(block, pixels, pixels + 1, \ |
104 |
|
|
line_size, line_size, line_size, h); \ |
105 |
|
|
} \ |
106 |
|
|
\ |
107 |
|
|
static inline void OPNAME ## _pixels4_y2_8_c(uint8_t *block, \ |
108 |
|
|
const uint8_t *pixels, \ |
109 |
|
|
ptrdiff_t line_size, \ |
110 |
|
|
int h) \ |
111 |
|
|
{ \ |
112 |
|
|
OPNAME ## _pixels4_l2_8(block, pixels, pixels + line_size, \ |
113 |
|
|
line_size, line_size, line_size, h); \ |
114 |
|
|
} \ |
115 |
|
|
\ |
116 |
|
|
static inline void OPNAME ## _pixels2_x2_8_c(uint8_t *block, \ |
117 |
|
|
const uint8_t *pixels, \ |
118 |
|
|
ptrdiff_t line_size, \ |
119 |
|
|
int h) \ |
120 |
|
|
{ \ |
121 |
|
|
OPNAME ## _pixels2_l2_8(block, pixels, pixels + 1, \ |
122 |
|
|
line_size, line_size, line_size, h); \ |
123 |
|
|
} \ |
124 |
|
|
\ |
125 |
|
|
static inline void OPNAME ## _pixels2_y2_8_c(uint8_t *block, \ |
126 |
|
|
const uint8_t *pixels, \ |
127 |
|
|
ptrdiff_t line_size, \ |
128 |
|
|
int h) \ |
129 |
|
|
{ \ |
130 |
|
|
OPNAME ## _pixels2_l2_8(block, pixels, pixels + line_size, \ |
131 |
|
|
line_size, line_size, line_size, h); \ |
132 |
|
|
} \ |
133 |
|
|
\ |
134 |
|
|
static inline void OPNAME ## _pixels2_xy2_8_c(uint8_t *block, \ |
135 |
|
|
const uint8_t *pixels, \ |
136 |
|
|
ptrdiff_t line_size, \ |
137 |
|
|
int h) \ |
138 |
|
|
{ \ |
139 |
|
|
int i, a1, b1; \ |
140 |
|
|
int a0 = pixels[0]; \ |
141 |
|
|
int b0 = pixels[1] + 2; \ |
142 |
|
|
\ |
143 |
|
|
a0 += b0; \ |
144 |
|
|
b0 += pixels[2]; \ |
145 |
|
|
pixels += line_size; \ |
146 |
|
|
for (i = 0; i < h; i += 2) { \ |
147 |
|
|
a1 = pixels[0]; \ |
148 |
|
|
b1 = pixels[1]; \ |
149 |
|
|
a1 += b1; \ |
150 |
|
|
b1 += pixels[2]; \ |
151 |
|
|
\ |
152 |
|
|
block[0] = (a1 + a0) >> 2; /* FIXME non put */ \ |
153 |
|
|
block[1] = (b1 + b0) >> 2; \ |
154 |
|
|
\ |
155 |
|
|
pixels += line_size; \ |
156 |
|
|
block += line_size; \ |
157 |
|
|
\ |
158 |
|
|
a0 = pixels[0]; \ |
159 |
|
|
b0 = pixels[1] + 2; \ |
160 |
|
|
a0 += b0; \ |
161 |
|
|
b0 += pixels[2]; \ |
162 |
|
|
\ |
163 |
|
|
block[0] = (a1 + a0) >> 2; \ |
164 |
|
|
block[1] = (b1 + b0) >> 2; \ |
165 |
|
|
pixels += line_size; \ |
166 |
|
|
block += line_size; \ |
167 |
|
|
} \ |
168 |
|
|
} \ |
169 |
|
|
\ |
170 |
|
|
static inline void OPNAME ## _pixels4_xy2_8_c(uint8_t *block, \ |
171 |
|
|
const uint8_t *pixels, \ |
172 |
|
|
ptrdiff_t line_size, \ |
173 |
|
|
int h) \ |
174 |
|
|
{ \ |
175 |
|
|
/* FIXME HIGH BIT DEPTH */ \ |
176 |
|
|
int i; \ |
177 |
|
|
const uint32_t a = AV_RN32(pixels); \ |
178 |
|
|
const uint32_t b = AV_RN32(pixels + 1); \ |
179 |
|
|
uint32_t l0 = (a & 0x03030303UL) + \ |
180 |
|
|
(b & 0x03030303UL) + \ |
181 |
|
|
0x02020202UL; \ |
182 |
|
|
uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
183 |
|
|
((b & 0xFCFCFCFCUL) >> 2); \ |
184 |
|
|
uint32_t l1, h1; \ |
185 |
|
|
\ |
186 |
|
|
pixels += line_size; \ |
187 |
|
|
for (i = 0; i < h; i += 2) { \ |
188 |
|
|
uint32_t a = AV_RN32(pixels); \ |
189 |
|
|
uint32_t b = AV_RN32(pixels + 1); \ |
190 |
|
|
l1 = (a & 0x03030303UL) + \ |
191 |
|
|
(b & 0x03030303UL); \ |
192 |
|
|
h1 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
193 |
|
|
((b & 0xFCFCFCFCUL) >> 2); \ |
194 |
|
|
OP(*((uint32_t *) block), h0 + h1 + \ |
195 |
|
|
(((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
196 |
|
|
pixels += line_size; \ |
197 |
|
|
block += line_size; \ |
198 |
|
|
a = AV_RN32(pixels); \ |
199 |
|
|
b = AV_RN32(pixels + 1); \ |
200 |
|
|
l0 = (a & 0x03030303UL) + \ |
201 |
|
|
(b & 0x03030303UL) + \ |
202 |
|
|
0x02020202UL; \ |
203 |
|
|
h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
204 |
|
|
((b & 0xFCFCFCFCUL) >> 2); \ |
205 |
|
|
OP(*((uint32_t *) block), h0 + h1 + \ |
206 |
|
|
(((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
207 |
|
|
pixels += line_size; \ |
208 |
|
|
block += line_size; \ |
209 |
|
|
} \ |
210 |
|
|
} \ |
211 |
|
|
\ |
212 |
|
|
static inline void OPNAME ## _pixels8_xy2_8_c(uint8_t *block, \ |
213 |
|
|
const uint8_t *pixels, \ |
214 |
|
|
ptrdiff_t line_size, \ |
215 |
|
|
int h) \ |
216 |
|
|
{ \ |
217 |
|
|
/* FIXME HIGH BIT DEPTH */ \ |
218 |
|
|
int j; \ |
219 |
|
|
\ |
220 |
|
|
for (j = 0; j < 2; j++) { \ |
221 |
|
|
int i; \ |
222 |
|
|
const uint32_t a = AV_RN32(pixels); \ |
223 |
|
|
const uint32_t b = AV_RN32(pixels + 1); \ |
224 |
|
|
uint32_t l0 = (a & 0x03030303UL) + \ |
225 |
|
|
(b & 0x03030303UL) + \ |
226 |
|
|
0x02020202UL; \ |
227 |
|
|
uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
228 |
|
|
((b & 0xFCFCFCFCUL) >> 2); \ |
229 |
|
|
uint32_t l1, h1; \ |
230 |
|
|
\ |
231 |
|
|
pixels += line_size; \ |
232 |
|
|
for (i = 0; i < h; i += 2) { \ |
233 |
|
|
uint32_t a = AV_RN32(pixels); \ |
234 |
|
|
uint32_t b = AV_RN32(pixels + 1); \ |
235 |
|
|
l1 = (a & 0x03030303UL) + \ |
236 |
|
|
(b & 0x03030303UL); \ |
237 |
|
|
h1 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
238 |
|
|
((b & 0xFCFCFCFCUL) >> 2); \ |
239 |
|
|
OP(*((uint32_t *) block), h0 + h1 + \ |
240 |
|
|
(((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
241 |
|
|
pixels += line_size; \ |
242 |
|
|
block += line_size; \ |
243 |
|
|
a = AV_RN32(pixels); \ |
244 |
|
|
b = AV_RN32(pixels + 1); \ |
245 |
|
|
l0 = (a & 0x03030303UL) + \ |
246 |
|
|
(b & 0x03030303UL) + \ |
247 |
|
|
0x02020202UL; \ |
248 |
|
|
h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
249 |
|
|
((b & 0xFCFCFCFCUL) >> 2); \ |
250 |
|
|
OP(*((uint32_t *) block), h0 + h1 + \ |
251 |
|
|
(((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
252 |
|
|
pixels += line_size; \ |
253 |
|
|
block += line_size; \ |
254 |
|
|
} \ |
255 |
|
|
pixels += 4 - line_size * (h + 1); \ |
256 |
|
|
block += 4 - line_size * h; \ |
257 |
|
|
} \ |
258 |
|
|
} \ |
259 |
|
|
\ |
260 |
|
|
static inline void OPNAME ## _no_rnd_pixels8_xy2_8_c(uint8_t *block, \ |
261 |
|
|
const uint8_t *pixels, \ |
262 |
|
|
ptrdiff_t line_size, \ |
263 |
|
|
int h) \ |
264 |
|
|
{ \ |
265 |
|
|
/* FIXME HIGH BIT DEPTH */ \ |
266 |
|
|
int j; \ |
267 |
|
|
\ |
268 |
|
|
for (j = 0; j < 2; j++) { \ |
269 |
|
|
int i; \ |
270 |
|
|
const uint32_t a = AV_RN32(pixels); \ |
271 |
|
|
const uint32_t b = AV_RN32(pixels + 1); \ |
272 |
|
|
uint32_t l0 = (a & 0x03030303UL) + \ |
273 |
|
|
(b & 0x03030303UL) + \ |
274 |
|
|
0x01010101UL; \ |
275 |
|
|
uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
276 |
|
|
((b & 0xFCFCFCFCUL) >> 2); \ |
277 |
|
|
uint32_t l1, h1; \ |
278 |
|
|
\ |
279 |
|
|
pixels += line_size; \ |
280 |
|
|
for (i = 0; i < h; i += 2) { \ |
281 |
|
|
uint32_t a = AV_RN32(pixels); \ |
282 |
|
|
uint32_t b = AV_RN32(pixels + 1); \ |
283 |
|
|
l1 = (a & 0x03030303UL) + \ |
284 |
|
|
(b & 0x03030303UL); \ |
285 |
|
|
h1 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
286 |
|
|
((b & 0xFCFCFCFCUL) >> 2); \ |
287 |
|
|
OP(*((uint32_t *) block), h0 + h1 + \ |
288 |
|
|
(((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
289 |
|
|
pixels += line_size; \ |
290 |
|
|
block += line_size; \ |
291 |
|
|
a = AV_RN32(pixels); \ |
292 |
|
|
b = AV_RN32(pixels + 1); \ |
293 |
|
|
l0 = (a & 0x03030303UL) + \ |
294 |
|
|
(b & 0x03030303UL) + \ |
295 |
|
|
0x01010101UL; \ |
296 |
|
|
h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
297 |
|
|
((b & 0xFCFCFCFCUL) >> 2); \ |
298 |
|
|
OP(*((uint32_t *) block), h0 + h1 + \ |
299 |
|
|
(((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
300 |
|
|
pixels += line_size; \ |
301 |
|
|
block += line_size; \ |
302 |
|
|
} \ |
303 |
|
|
pixels += 4 - line_size * (h + 1); \ |
304 |
|
|
block += 4 - line_size * h; \ |
305 |
|
|
} \ |
306 |
|
|
} \ |
307 |
|
|
\ |
308 |
|
|
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_8_c, \ |
309 |
|
|
OPNAME ## _pixels8_x2_8_c, \ |
310 |
|
|
8) \ |
311 |
|
|
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_8_c, \ |
312 |
|
|
OPNAME ## _pixels8_y2_8_c, \ |
313 |
|
|
8) \ |
314 |
|
|
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_8_c, \ |
315 |
|
|
OPNAME ## _pixels8_xy2_8_c, \ |
316 |
|
|
8) \ |
317 |
|
|
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_8_c, \ |
318 |
|
|
OPNAME ## _pixels8_8_c, \ |
319 |
|
|
8) \ |
320 |
|
|
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_8_c, \ |
321 |
|
|
OPNAME ## _no_rnd_pixels8_x2_8_c, \ |
322 |
|
|
8) \ |
323 |
|
|
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_8_c, \ |
324 |
|
|
OPNAME ## _no_rnd_pixels8_y2_8_c, \ |
325 |
|
|
8) \ |
326 |
|
|
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_8_c, \ |
327 |
|
|
OPNAME ## _no_rnd_pixels8_xy2_8_c, \ |
328 |
|
|
8) \ |
329 |
|
|
|
330 |
|
|
#define op_avg(a, b) a = rnd_avg32(a, b) |
331 |
|
|
#define op_put(a, b) a = b |
332 |
|
|
#define put_no_rnd_pixels8_8_c put_pixels8_8_c |
333 |
✓✓✓✓ ✓✓ |
157076208 |
PIXOP2(avg, op_avg) |
334 |
✓✓✓✓
|
255296908 |
PIXOP2(put, op_put) |
335 |
|
|
#undef op_avg |
336 |
|
|
#undef op_put |
337 |
|
|
|
338 |
|
997 |
av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags) |
339 |
|
|
{ |
340 |
|
|
#define hpel_funcs(prefix, idx, num) \ |
341 |
|
|
c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \ |
342 |
|
|
c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \ |
343 |
|
|
c->prefix ## _pixels_tab idx [2] = prefix ## _pixels ## num ## _y2_8_c; \ |
344 |
|
|
c->prefix ## _pixels_tab idx [3] = prefix ## _pixels ## num ## _xy2_8_c |
345 |
|
|
|
346 |
|
997 |
hpel_funcs(put, [0], 16); |
347 |
|
997 |
hpel_funcs(put, [1], 8); |
348 |
|
997 |
hpel_funcs(put, [2], 4); |
349 |
|
997 |
hpel_funcs(put, [3], 2); |
350 |
|
997 |
hpel_funcs(put_no_rnd, [0], 16); |
351 |
|
997 |
hpel_funcs(put_no_rnd, [1], 8); |
352 |
|
997 |
hpel_funcs(avg, [0], 16); |
353 |
|
997 |
hpel_funcs(avg, [1], 8); |
354 |
|
997 |
hpel_funcs(avg, [2], 4); |
355 |
|
997 |
hpel_funcs(avg, [3], 2); |
356 |
|
997 |
hpel_funcs(avg_no_rnd,, 16); |
357 |
|
|
|
358 |
|
|
if (ARCH_AARCH64) |
359 |
|
|
ff_hpeldsp_init_aarch64(c, flags); |
360 |
|
|
if (ARCH_ALPHA) |
361 |
|
|
ff_hpeldsp_init_alpha(c, flags); |
362 |
|
|
if (ARCH_ARM) |
363 |
|
|
ff_hpeldsp_init_arm(c, flags); |
364 |
|
|
if (ARCH_PPC) |
365 |
|
|
ff_hpeldsp_init_ppc(c, flags); |
366 |
|
|
if (ARCH_X86) |
367 |
|
997 |
ff_hpeldsp_init_x86(c, flags); |
368 |
|
|
if (ARCH_MIPS) |
369 |
|
|
ff_hpeldsp_init_mips(c, flags); |
370 |
|
997 |
} |