1 |
|
|
/* |
2 |
|
|
* thirdpel DSP functions |
3 |
|
|
* |
4 |
|
|
* This file is part of FFmpeg. |
5 |
|
|
* |
6 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
7 |
|
|
* modify it under the terms of the GNU Lesser General Public |
8 |
|
|
* License as published by the Free Software Foundation; either |
9 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
* |
11 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
12 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
|
|
* Lesser General Public License for more details. |
15 |
|
|
* |
16 |
|
|
* You should have received a copy of the GNU Lesser General Public |
17 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
18 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 |
|
|
*/ |
20 |
|
|
|
21 |
|
|
/** |
22 |
|
|
* @file |
23 |
|
|
* thirdpel DSP functions |
24 |
|
|
*/ |
25 |
|
|
|
26 |
|
|
#include <stdint.h> |
27 |
|
|
|
28 |
|
|
#include "libavutil/attributes.h" |
29 |
|
|
#include "tpeldsp.h" |
30 |
|
|
|
31 |
|
|
#define BIT_DEPTH 8 |
32 |
|
|
#include "pel_template.c" |
33 |
|
|
|
34 |
|
24 |
static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, |
35 |
|
|
int stride, int width, int height) |
36 |
|
|
{ |
37 |
✗✓✓✓ ✗ |
24 |
switch (width) { |
38 |
|
|
case 2: |
39 |
|
|
put_pixels2_8_c(dst, src, stride, height); |
40 |
|
|
break; |
41 |
|
14 |
case 4: |
42 |
|
14 |
put_pixels4_8_c(dst, src, stride, height); |
43 |
|
14 |
break; |
44 |
|
9 |
case 8: |
45 |
|
9 |
put_pixels8_8_c(dst, src, stride, height); |
46 |
|
9 |
break; |
47 |
|
1 |
case 16: |
48 |
|
1 |
put_pixels16_8_c(dst, src, stride, height); |
49 |
|
1 |
break; |
50 |
|
|
} |
51 |
|
24 |
} |
52 |
|
|
|
53 |
|
66 |
static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, |
54 |
|
|
int stride, int width, int height) |
55 |
|
|
{ |
56 |
|
|
int i, j; |
57 |
|
|
|
58 |
✓✓ |
690 |
for (i = 0; i < height; i++) { |
59 |
✓✓ |
6960 |
for (j = 0; j < width; j++) |
60 |
|
6336 |
dst[j] = ((2 * src[j] + src[j + 1] + 1) * |
61 |
|
6336 |
683) >> 11; |
62 |
|
624 |
src += stride; |
63 |
|
624 |
dst += stride; |
64 |
|
|
} |
65 |
|
66 |
} |
66 |
|
|
|
67 |
|
66 |
static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, |
68 |
|
|
int stride, int width, int height) |
69 |
|
|
{ |
70 |
|
|
int i, j; |
71 |
|
|
|
72 |
✓✓ |
770 |
for (i = 0; i < height; i++) { |
73 |
✓✓ |
8192 |
for (j = 0; j < width; j++) |
74 |
|
7488 |
dst[j] = ((src[j] + 2 * src[j + 1] + 1) * |
75 |
|
7488 |
683) >> 11; |
76 |
|
704 |
src += stride; |
77 |
|
704 |
dst += stride; |
78 |
|
|
} |
79 |
|
66 |
} |
80 |
|
|
|
81 |
|
30 |
static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, |
82 |
|
|
int stride, int width, int height) |
83 |
|
|
{ |
84 |
|
|
int i, j; |
85 |
|
|
|
86 |
✓✓ |
350 |
for (i = 0; i < height; i++) { |
87 |
✓✓ |
3776 |
for (j = 0; j < width; j++) |
88 |
|
3456 |
dst[j] = ((2 * src[j] + src[j + stride] + 1) * |
89 |
|
3456 |
683) >> 11; |
90 |
|
320 |
src += stride; |
91 |
|
320 |
dst += stride; |
92 |
|
|
} |
93 |
|
30 |
} |
94 |
|
|
|
95 |
|
57 |
static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, |
96 |
|
|
int stride, int width, int height) |
97 |
|
|
{ |
98 |
|
|
int i, j; |
99 |
|
|
|
100 |
✓✓ |
585 |
for (i = 0; i < height; i++) { |
101 |
✓✓ |
5424 |
for (j = 0; j < width; j++) |
102 |
|
4896 |
dst[j] = ((4 * src[j] + 3 * src[j + 1] + |
103 |
|
4896 |
3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * |
104 |
|
4896 |
2731) >> 15; |
105 |
|
528 |
src += stride; |
106 |
|
528 |
dst += stride; |
107 |
|
|
} |
108 |
|
57 |
} |
109 |
|
|
|
110 |
|
75 |
static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, |
111 |
|
|
int stride, int width, int height) |
112 |
|
|
{ |
113 |
|
|
int i, j; |
114 |
|
|
|
115 |
✓✓ |
779 |
for (i = 0; i < height; i++) { |
116 |
✓✓ |
8384 |
for (j = 0; j < width; j++) |
117 |
|
7680 |
dst[j] = ((3 * src[j] + 2 * src[j + 1] + |
118 |
|
7680 |
4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * |
119 |
|
7680 |
2731) >> 15; |
120 |
|
704 |
src += stride; |
121 |
|
704 |
dst += stride; |
122 |
|
|
} |
123 |
|
75 |
} |
124 |
|
|
|
125 |
|
45 |
static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, |
126 |
|
|
int stride, int width, int height) |
127 |
|
|
{ |
128 |
|
|
int i, j; |
129 |
|
|
|
130 |
✓✓ |
509 |
for (i = 0; i < height; i++) { |
131 |
✓✓ |
5072 |
for (j = 0; j < width; j++) |
132 |
|
4608 |
dst[j] = ((src[j] + 2 * src[j + stride] + 1) * |
133 |
|
4608 |
683) >> 11; |
134 |
|
464 |
src += stride; |
135 |
|
464 |
dst += stride; |
136 |
|
|
} |
137 |
|
45 |
} |
138 |
|
|
|
139 |
|
72 |
static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, |
140 |
|
|
int stride, int width, int height) |
141 |
|
|
{ |
142 |
|
|
int i, j; |
143 |
|
|
|
144 |
✓✓ |
760 |
for (i = 0; i < height; i++) { |
145 |
✓✓ |
7408 |
for (j = 0; j < width; j++) |
146 |
|
6720 |
dst[j] = ((3 * src[j] + 4 * src[j + 1] + |
147 |
|
6720 |
2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * |
148 |
|
6720 |
2731) >> 15; |
149 |
|
688 |
src += stride; |
150 |
|
688 |
dst += stride; |
151 |
|
|
} |
152 |
|
72 |
} |
153 |
|
|
|
154 |
|
72 |
static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, |
155 |
|
|
int stride, int width, int height) |
156 |
|
|
{ |
157 |
|
|
int i, j; |
158 |
|
|
|
159 |
✓✓ |
824 |
for (i = 0; i < height; i++) { |
160 |
✓✓ |
7760 |
for (j = 0; j < width; j++) |
161 |
|
7008 |
dst[j] = ((2 * src[j] + 3 * src[j + 1] + |
162 |
|
7008 |
3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * |
163 |
|
7008 |
2731) >> 15; |
164 |
|
752 |
src += stride; |
165 |
|
752 |
dst += stride; |
166 |
|
|
} |
167 |
|
72 |
} |
168 |
|
|
|
169 |
|
3 |
static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, |
170 |
|
|
int stride, int width, int height) |
171 |
|
|
{ |
172 |
✗✗✓✓ ✗ |
3 |
switch (width) { |
173 |
|
|
case 2: |
174 |
|
|
avg_pixels2_8_c(dst, src, stride, height); |
175 |
|
|
break; |
176 |
|
|
case 4: |
177 |
|
|
avg_pixels4_8_c(dst, src, stride, height); |
178 |
|
|
break; |
179 |
|
2 |
case 8: |
180 |
|
2 |
avg_pixels8_8_c(dst, src, stride, height); |
181 |
|
2 |
break; |
182 |
|
1 |
case 16: |
183 |
|
1 |
avg_pixels16_8_c(dst, src, stride, height); |
184 |
|
1 |
break; |
185 |
|
|
} |
186 |
|
3 |
} |
187 |
|
|
|
188 |
|
6 |
static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, |
189 |
|
|
int stride, int width, int height) |
190 |
|
|
{ |
191 |
|
|
int i, j; |
192 |
|
|
|
193 |
✓✓ |
70 |
for (i = 0; i < height; i++) { |
194 |
✓✓ |
832 |
for (j = 0; j < width; j++) |
195 |
|
768 |
dst[j] = (dst[j] + |
196 |
|
768 |
(((2 * src[j] + src[j + 1] + 1) * |
197 |
|
768 |
683) >> 11) + 1) >> 1; |
198 |
|
64 |
src += stride; |
199 |
|
64 |
dst += stride; |
200 |
|
|
} |
201 |
|
6 |
} |
202 |
|
|
|
203 |
|
3 |
static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, |
204 |
|
|
int stride, int width, int height) |
205 |
|
|
{ |
206 |
|
|
int i, j; |
207 |
|
|
|
208 |
✓✓ |
35 |
for (i = 0; i < height; i++) { |
209 |
✓✓ |
416 |
for (j = 0; j < width; j++) |
210 |
|
384 |
dst[j] = (dst[j] + |
211 |
|
384 |
(((src[j] + 2 * src[j + 1] + 1) * |
212 |
|
384 |
683) >> 11) + 1) >> 1; |
213 |
|
32 |
src += stride; |
214 |
|
32 |
dst += stride; |
215 |
|
|
} |
216 |
|
3 |
} |
217 |
|
|
|
218 |
|
6 |
static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, |
219 |
|
|
int stride, int width, int height) |
220 |
|
|
{ |
221 |
|
|
int i, j; |
222 |
|
|
|
223 |
✓✓ |
70 |
for (i = 0; i < height; i++) { |
224 |
✓✓ |
832 |
for (j = 0; j < width; j++) |
225 |
|
768 |
dst[j] = (dst[j] + |
226 |
|
768 |
(((2 * src[j] + src[j + stride] + 1) * |
227 |
|
768 |
683) >> 11) + 1) >> 1; |
228 |
|
64 |
src += stride; |
229 |
|
64 |
dst += stride; |
230 |
|
|
} |
231 |
|
6 |
} |
232 |
|
|
|
233 |
|
3 |
static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, |
234 |
|
|
int stride, int width, int height) |
235 |
|
|
{ |
236 |
|
|
int i, j; |
237 |
|
|
|
238 |
✓✓ |
35 |
for (i = 0; i < height; i++) { |
239 |
✓✓ |
416 |
for (j = 0; j < width; j++) |
240 |
|
384 |
dst[j] = (dst[j] + |
241 |
|
384 |
(((4 * src[j] + 3 * src[j + 1] + |
242 |
|
384 |
3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * |
243 |
|
384 |
2731) >> 15) + 1) >> 1; |
244 |
|
32 |
src += stride; |
245 |
|
32 |
dst += stride; |
246 |
|
|
} |
247 |
|
3 |
} |
248 |
|
|
|
249 |
|
6 |
static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, |
250 |
|
|
int stride, int width, int height) |
251 |
|
|
{ |
252 |
|
|
int i, j; |
253 |
|
|
|
254 |
✓✓ |
70 |
for (i = 0; i < height; i++) { |
255 |
✓✓ |
832 |
for (j = 0; j < width; j++) |
256 |
|
768 |
dst[j] = (dst[j] + |
257 |
|
768 |
(((3 * src[j] + 2 * src[j + 1] + |
258 |
|
768 |
4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * |
259 |
|
768 |
2731) >> 15) + 1) >> 1; |
260 |
|
64 |
src += stride; |
261 |
|
64 |
dst += stride; |
262 |
|
|
} |
263 |
|
6 |
} |
264 |
|
|
|
265 |
|
3 |
static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, |
266 |
|
|
int stride, int width, int height) |
267 |
|
|
{ |
268 |
|
|
int i, j; |
269 |
|
|
|
270 |
✓✓ |
35 |
for (i = 0; i < height; i++) { |
271 |
✓✓ |
416 |
for (j = 0; j < width; j++) |
272 |
|
384 |
dst[j] = (dst[j] + |
273 |
|
384 |
(((src[j] + 2 * src[j + stride] + 1) * |
274 |
|
384 |
683) >> 11) + 1) >> 1; |
275 |
|
32 |
src += stride; |
276 |
|
32 |
dst += stride; |
277 |
|
|
} |
278 |
|
3 |
} |
279 |
|
|
|
280 |
|
9 |
static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, |
281 |
|
|
int stride, int width, int height) |
282 |
|
|
{ |
283 |
|
|
int i, j; |
284 |
|
|
|
285 |
✓✓ |
105 |
for (i = 0; i < height; i++) { |
286 |
✓✓ |
1248 |
for (j = 0; j < width; j++) |
287 |
|
1152 |
dst[j] = (dst[j] + |
288 |
|
1152 |
(((3 * src[j] + 4 * src[j + 1] + |
289 |
|
1152 |
2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * |
290 |
|
1152 |
2731) >> 15) + 1) >> 1; |
291 |
|
96 |
src += stride; |
292 |
|
96 |
dst += stride; |
293 |
|
|
} |
294 |
|
9 |
} |
295 |
|
|
|
296 |
|
|
static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, |
297 |
|
|
int stride, int width, int height) |
298 |
|
|
{ |
299 |
|
|
int i, j; |
300 |
|
|
|
301 |
|
|
for (i = 0; i < height; i++) { |
302 |
|
|
for (j = 0; j < width; j++) |
303 |
|
|
dst[j] = (dst[j] + |
304 |
|
|
(((2 * src[j] + 3 * src[j + 1] + |
305 |
|
|
3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * |
306 |
|
|
2731) >> 15) + 1) >> 1; |
307 |
|
|
src += stride; |
308 |
|
|
dst += stride; |
309 |
|
|
} |
310 |
|
|
} |
311 |
|
|
|
312 |
|
8 |
av_cold void ff_tpeldsp_init(TpelDSPContext *c) |
313 |
|
|
{ |
314 |
|
8 |
c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; |
315 |
|
8 |
c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; |
316 |
|
8 |
c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; |
317 |
|
8 |
c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; |
318 |
|
8 |
c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; |
319 |
|
8 |
c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; |
320 |
|
8 |
c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; |
321 |
|
8 |
c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; |
322 |
|
8 |
c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; |
323 |
|
|
|
324 |
|
8 |
c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; |
325 |
|
8 |
c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; |
326 |
|
8 |
c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; |
327 |
|
8 |
c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; |
328 |
|
8 |
c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; |
329 |
|
8 |
c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; |
330 |
|
8 |
c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; |
331 |
|
8 |
c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; |
332 |
|
8 |
c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; |
333 |
|
8 |
} |