Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * VC-1 and WMV3 decoder - DSP functions | ||
3 | * Copyright (c) 2006 Konstantin Shishkov | ||
4 | * | ||
5 | * This file is part of FFmpeg. | ||
6 | * | ||
7 | * FFmpeg is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU Lesser General Public | ||
9 | * License as published by the Free Software Foundation; either | ||
10 | * version 2.1 of the License, or (at your option) any later version. | ||
11 | * | ||
12 | * FFmpeg is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * Lesser General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU Lesser General Public | ||
18 | * License along with FFmpeg; if not, write to the Free Software | ||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
20 | */ | ||
21 | |||
22 | /** | ||
23 | * @file | ||
24 | * VC-1 and WMV3 decoder | ||
25 | */ | ||
26 | |||
27 | #include "config_components.h" | ||
28 | |||
29 | #include "libavutil/avassert.h" | ||
30 | #include "libavutil/common.h" | ||
31 | #include "libavutil/intreadwrite.h" | ||
32 | #include "h264chroma.h" | ||
33 | #include "qpeldsp.h" | ||
34 | #include "rnd_avg.h" | ||
35 | #include "vc1dsp.h" | ||
36 | #include "startcode.h" | ||
37 | #include "vc1_common.h" | ||
38 | |||
39 | /* Apply overlap transform to horizontal edge */ | ||
40 | ✗ | static void vc1_v_overlap_c(uint8_t *src, ptrdiff_t stride) | |
41 | { | ||
42 | int i; | ||
43 | int a, b, c, d; | ||
44 | int d1, d2; | ||
45 | ✗ | int rnd = 1; | |
46 | ✗ | for (i = 0; i < 8; i++) { | |
47 | ✗ | a = src[-2 * stride]; | |
48 | ✗ | b = src[-stride]; | |
49 | ✗ | c = src[0]; | |
50 | ✗ | d = src[stride]; | |
51 | ✗ | d1 = (a - d + 3 + rnd) >> 3; | |
52 | ✗ | d2 = (a - d + b - c + 4 - rnd) >> 3; | |
53 | |||
54 | ✗ | src[-2 * stride] = a - d1; | |
55 | ✗ | src[-stride] = av_clip_uint8(b - d2); | |
56 | ✗ | src[0] = av_clip_uint8(c + d2); | |
57 | ✗ | src[stride] = d + d1; | |
58 | ✗ | src++; | |
59 | ✗ | rnd = !rnd; | |
60 | } | ||
61 | ✗ | } | |
62 | |||
63 | /* Apply overlap transform to vertical edge */ | ||
64 | ✗ | static void vc1_h_overlap_c(uint8_t *src, ptrdiff_t stride) | |
65 | { | ||
66 | int i; | ||
67 | int a, b, c, d; | ||
68 | int d1, d2; | ||
69 | ✗ | int rnd = 1; | |
70 | ✗ | for (i = 0; i < 8; i++) { | |
71 | ✗ | a = src[-2]; | |
72 | ✗ | b = src[-1]; | |
73 | ✗ | c = src[0]; | |
74 | ✗ | d = src[1]; | |
75 | ✗ | d1 = (a - d + 3 + rnd) >> 3; | |
76 | ✗ | d2 = (a - d + b - c + 4 - rnd) >> 3; | |
77 | |||
78 | ✗ | src[-2] = a - d1; | |
79 | ✗ | src[-1] = av_clip_uint8(b - d2); | |
80 | ✗ | src[0] = av_clip_uint8(c + d2); | |
81 | ✗ | src[1] = d + d1; | |
82 | ✗ | src += stride; | |
83 | ✗ | rnd = !rnd; | |
84 | } | ||
85 | ✗ | } | |
86 | |||
87 | 2456 | static void vc1_v_s_overlap_c(int16_t *top, int16_t *bottom) | |
88 | { | ||
89 | int i; | ||
90 | int a, b, c, d; | ||
91 | int d1, d2; | ||
92 | 2456 | int rnd1 = 4, rnd2 = 3; | |
93 |
2/2✓ Branch 0 taken 19648 times.
✓ Branch 1 taken 2456 times.
|
22104 | for (i = 0; i < 8; i++) { |
94 | 19648 | a = top[48]; | |
95 | 19648 | b = top[56]; | |
96 | 19648 | c = bottom[0]; | |
97 | 19648 | d = bottom[8]; | |
98 | 19648 | d1 = a - d; | |
99 | 19648 | d2 = a - d + b - c; | |
100 | |||
101 | 19648 | top[48] = ((a * 8) - d1 + rnd1) >> 3; | |
102 | 19648 | top[56] = ((b * 8) - d2 + rnd2) >> 3; | |
103 | 19648 | bottom[0] = ((c * 8) + d2 + rnd1) >> 3; | |
104 | 19648 | bottom[8] = ((d * 8) + d1 + rnd2) >> 3; | |
105 | |||
106 | 19648 | bottom++; | |
107 | 19648 | top++; | |
108 | 19648 | rnd2 = 7 - rnd2; | |
109 | 19648 | rnd1 = 7 - rnd1; | |
110 | } | ||
111 | 2456 | } | |
112 | |||
113 | 2688 | static void vc1_h_s_overlap_c(int16_t *left, int16_t *right, ptrdiff_t left_stride, ptrdiff_t right_stride, int flags) | |
114 | { | ||
115 | int i; | ||
116 | int a, b, c, d; | ||
117 | int d1, d2; | ||
118 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2688 times.
|
2688 | int rnd1 = flags & 2 ? 3 : 4; |
119 | 2688 | int rnd2 = 7 - rnd1; | |
120 |
2/2✓ Branch 0 taken 21504 times.
✓ Branch 1 taken 2688 times.
|
24192 | for (i = 0; i < 8; i++) { |
121 | 21504 | a = left[6]; | |
122 | 21504 | b = left[7]; | |
123 | 21504 | c = right[0]; | |
124 | 21504 | d = right[1]; | |
125 | 21504 | d1 = a - d; | |
126 | 21504 | d2 = a - d + b - c; | |
127 | |||
128 | 21504 | left[6] = ((a * 8) - d1 + rnd1) >> 3; | |
129 | 21504 | left[7] = ((b * 8) - d2 + rnd2) >> 3; | |
130 | 21504 | right[0] = ((c * 8) + d2 + rnd1) >> 3; | |
131 | 21504 | right[1] = ((d * 8) + d1 + rnd2) >> 3; | |
132 | |||
133 | 21504 | right += right_stride; | |
134 | 21504 | left += left_stride; | |
135 |
1/2✓ Branch 0 taken 21504 times.
✗ Branch 1 not taken.
|
21504 | if (flags & 1) { |
136 | 21504 | rnd2 = 7 - rnd2; | |
137 | 21504 | rnd1 = 7 - rnd1; | |
138 | } | ||
139 | } | ||
140 | 2688 | } | |
141 | |||
142 | /** | ||
143 | * VC-1 in-loop deblocking filter for one line | ||
144 | * @param src source block type | ||
145 | * @param stride block stride | ||
146 | * @param pq block quantizer | ||
147 | * @return whether other 3 pairs should be filtered or not | ||
148 | * @see 8.6 | ||
149 | */ | ||
150 | 13363859 | static av_always_inline int vc1_filter_line(uint8_t *src, ptrdiff_t stride, int pq) | |
151 | { | ||
152 | 13363859 | int a0 = (2 * (src[-2 * stride] - src[1 * stride]) - | |
153 | 13363859 | 5 * (src[-1 * stride] - src[0 * stride]) + 4) >> 3; | |
154 | 13363859 | int a0_sign = a0 >> 31; /* Store sign */ | |
155 | |||
156 | 13363859 | a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */ | |
157 |
2/2✓ Branch 0 taken 11868222 times.
✓ Branch 1 taken 1495637 times.
|
13363859 | if (a0 < pq) { |
158 | 11868222 | int a1 = FFABS((2 * (src[-4 * stride] - src[-1 * stride]) - | |
159 | 5 * (src[-3 * stride] - src[-2 * stride]) + 4) >> 3); | ||
160 | 11868222 | int a2 = FFABS((2 * (src[ 0 * stride] - src[ 3 * stride]) - | |
161 | 5 * (src[ 1 * stride] - src[ 2 * stride]) + 4) >> 3); | ||
162 |
4/4✓ Branch 0 taken 6370264 times.
✓ Branch 1 taken 5497958 times.
✓ Branch 2 taken 1394079 times.
✓ Branch 3 taken 4976185 times.
|
11868222 | if (a1 < a0 || a2 < a0) { |
163 | 6892037 | int clip = src[-1 * stride] - src[0 * stride]; | |
164 | 6892037 | int clip_sign = clip >> 31; | |
165 | |||
166 | 6892037 | clip = ((clip ^ clip_sign) - clip_sign) >> 1; | |
167 |
2/2✓ Branch 0 taken 5569558 times.
✓ Branch 1 taken 1322479 times.
|
6892037 | if (clip) { |
168 | 5569558 | int a3 = FFMIN(a1, a2); | |
169 | 5569558 | int d = 5 * (a3 - a0); | |
170 | 5569558 | int d_sign = (d >> 31); | |
171 | |||
172 | 5569558 | d = ((d ^ d_sign) - d_sign) >> 3; | |
173 | 5569558 | d_sign ^= a0_sign; | |
174 | |||
175 |
2/2✓ Branch 0 taken 334206 times.
✓ Branch 1 taken 5235352 times.
|
5569558 | if (d_sign ^ clip_sign) |
176 | 334206 | d = 0; | |
177 | else { | ||
178 | 5235352 | d = FFMIN(d, clip); | |
179 | 5235352 | d = (d ^ d_sign) - d_sign; /* Restore sign */ | |
180 | 5235352 | src[-1 * stride] = av_clip_uint8(src[-1 * stride] - d); | |
181 | 5235352 | src[ 0 * stride] = av_clip_uint8(src[ 0 * stride] + d); | |
182 | } | ||
183 | 5569558 | return 1; | |
184 | } | ||
185 | } | ||
186 | } | ||
187 | 7794301 | return 0; | |
188 | } | ||
189 | |||
190 | /** | ||
191 | * VC-1 in-loop deblocking filter | ||
192 | * @param src source block type | ||
193 | * @param step distance between horizontally adjacent elements | ||
194 | * @param stride distance between vertically adjacent elements | ||
195 | * @param len edge length to filter (4 or 8 pixels) | ||
196 | * @param pq block quantizer | ||
197 | * @see 8.6 | ||
198 | */ | ||
199 | 4018195 | static inline void vc1_loop_filter(uint8_t *src, int step, ptrdiff_t stride, | |
200 | int len, int pq) | ||
201 | { | ||
202 | int i; | ||
203 | int filt3; | ||
204 | |||
205 |
2/2✓ Branch 0 taken 6934310 times.
✓ Branch 1 taken 4018195 times.
|
10952505 | for (i = 0; i < len; i += 4) { |
206 | 6934310 | filt3 = vc1_filter_line(src + 2 * step, stride, pq); | |
207 |
2/2✓ Branch 0 taken 2143183 times.
✓ Branch 1 taken 4791127 times.
|
6934310 | if (filt3) { |
208 | 2143183 | vc1_filter_line(src + 0 * step, stride, pq); | |
209 | 2143183 | vc1_filter_line(src + 1 * step, stride, pq); | |
210 | 2143183 | vc1_filter_line(src + 3 * step, stride, pq); | |
211 | } | ||
212 | 6934310 | src += step * 4; | |
213 | } | ||
214 | 4018195 | } | |
215 | |||
216 | 430495 | static void vc1_v_loop_filter4_c(uint8_t *src, ptrdiff_t stride, int pq) | |
217 | { | ||
218 | 430495 | vc1_loop_filter(src, 1, stride, 4, pq); | |
219 | 430495 | } | |
220 | |||
221 | 995837 | static void vc1_h_loop_filter4_c(uint8_t *src, ptrdiff_t stride, int pq) | |
222 | { | ||
223 | 995837 | vc1_loop_filter(src, stride, 1, 4, pq); | |
224 | 995837 | } | |
225 | |||
226 | 1402497 | static void vc1_v_loop_filter8_c(uint8_t *src, ptrdiff_t stride, int pq) | |
227 | { | ||
228 | 1402497 | vc1_loop_filter(src, 1, stride, 8, pq); | |
229 | 1402497 | } | |
230 | |||
231 | 1027240 | static void vc1_h_loop_filter8_c(uint8_t *src, ptrdiff_t stride, int pq) | |
232 | { | ||
233 | 1027240 | vc1_loop_filter(src, stride, 1, 8, pq); | |
234 | 1027240 | } | |
235 | |||
236 | 107494 | static void vc1_v_loop_filter16_c(uint8_t *src, ptrdiff_t stride, int pq) | |
237 | { | ||
238 | 107494 | vc1_loop_filter(src, 1, stride, 16, pq); | |
239 | 107494 | } | |
240 | |||
241 | 54632 | static void vc1_h_loop_filter16_c(uint8_t *src, ptrdiff_t stride, int pq) | |
242 | { | ||
243 | 54632 | vc1_loop_filter(src, stride, 1, 16, pq); | |
244 | 54632 | } | |
245 | |||
246 | /* Do inverse transform on 8x8 block */ | ||
247 | 48604 | static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block) | |
248 | { | ||
249 | int i; | ||
250 | 48604 | int dc = block[0]; | |
251 | |||
252 | 48604 | dc = (3 * dc + 1) >> 1; | |
253 | 48604 | dc = (3 * dc + 16) >> 5; | |
254 | |||
255 |
2/2✓ Branch 0 taken 388832 times.
✓ Branch 1 taken 48604 times.
|
437436 | for (i = 0; i < 8; i++) { |
256 | 388832 | dest[0] = av_clip_uint8(dest[0] + dc); | |
257 | 388832 | dest[1] = av_clip_uint8(dest[1] + dc); | |
258 | 388832 | dest[2] = av_clip_uint8(dest[2] + dc); | |
259 | 388832 | dest[3] = av_clip_uint8(dest[3] + dc); | |
260 | 388832 | dest[4] = av_clip_uint8(dest[4] + dc); | |
261 | 388832 | dest[5] = av_clip_uint8(dest[5] + dc); | |
262 | 388832 | dest[6] = av_clip_uint8(dest[6] + dc); | |
263 | 388832 | dest[7] = av_clip_uint8(dest[7] + dc); | |
264 | 388832 | dest += stride; | |
265 | } | ||
266 | 48604 | } | |
267 | |||
268 | 800677 | static void vc1_inv_trans_8x8_c(int16_t block[64]) | |
269 | { | ||
270 | int i; | ||
271 | register int t1, t2, t3, t4, t5, t6, t7, t8; | ||
272 | int16_t *src, *dst, temp[64]; | ||
273 | |||
274 | 800677 | src = block; | |
275 | 800677 | dst = temp; | |
276 |
2/2✓ Branch 0 taken 6405416 times.
✓ Branch 1 taken 800677 times.
|
7206093 | for (i = 0; i < 8; i++) { |
277 | 6405416 | t1 = 12 * (src[ 0] + src[32]) + 4; | |
278 | 6405416 | t2 = 12 * (src[ 0] - src[32]) + 4; | |
279 | 6405416 | t3 = 16 * src[16] + 6 * src[48]; | |
280 | 6405416 | t4 = 6 * src[16] - 16 * src[48]; | |
281 | |||
282 | 6405416 | t5 = t1 + t3; | |
283 | 6405416 | t6 = t2 + t4; | |
284 | 6405416 | t7 = t2 - t4; | |
285 | 6405416 | t8 = t1 - t3; | |
286 | |||
287 | 6405416 | t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; | |
288 | 6405416 | t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; | |
289 | 6405416 | t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | |
290 | 6405416 | t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | |
291 | |||
292 | 6405416 | dst[0] = (t5 + t1) >> 3; | |
293 | 6405416 | dst[1] = (t6 + t2) >> 3; | |
294 | 6405416 | dst[2] = (t7 + t3) >> 3; | |
295 | 6405416 | dst[3] = (t8 + t4) >> 3; | |
296 | 6405416 | dst[4] = (t8 - t4) >> 3; | |
297 | 6405416 | dst[5] = (t7 - t3) >> 3; | |
298 | 6405416 | dst[6] = (t6 - t2) >> 3; | |
299 | 6405416 | dst[7] = (t5 - t1) >> 3; | |
300 | |||
301 | 6405416 | src += 1; | |
302 | 6405416 | dst += 8; | |
303 | } | ||
304 | |||
305 | 800677 | src = temp; | |
306 | 800677 | dst = block; | |
307 |
2/2✓ Branch 0 taken 6405416 times.
✓ Branch 1 taken 800677 times.
|
7206093 | for (i = 0; i < 8; i++) { |
308 | 6405416 | t1 = 12 * (src[ 0] + src[32]) + 64; | |
309 | 6405416 | t2 = 12 * (src[ 0] - src[32]) + 64; | |
310 | 6405416 | t3 = 16 * src[16] + 6 * src[48]; | |
311 | 6405416 | t4 = 6 * src[16] - 16 * src[48]; | |
312 | |||
313 | 6405416 | t5 = t1 + t3; | |
314 | 6405416 | t6 = t2 + t4; | |
315 | 6405416 | t7 = t2 - t4; | |
316 | 6405416 | t8 = t1 - t3; | |
317 | |||
318 | 6405416 | t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; | |
319 | 6405416 | t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; | |
320 | 6405416 | t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | |
321 | 6405416 | t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | |
322 | |||
323 | 6405416 | dst[ 0] = (t5 + t1) >> 7; | |
324 | 6405416 | dst[ 8] = (t6 + t2) >> 7; | |
325 | 6405416 | dst[16] = (t7 + t3) >> 7; | |
326 | 6405416 | dst[24] = (t8 + t4) >> 7; | |
327 | 6405416 | dst[32] = (t8 - t4 + 1) >> 7; | |
328 | 6405416 | dst[40] = (t7 - t3 + 1) >> 7; | |
329 | 6405416 | dst[48] = (t6 - t2 + 1) >> 7; | |
330 | 6405416 | dst[56] = (t5 - t1 + 1) >> 7; | |
331 | |||
332 | 6405416 | src++; | |
333 | 6405416 | dst++; | |
334 | } | ||
335 | 800677 | } | |
336 | |||
337 | /* Do inverse transform on 8x4 part of block */ | ||
338 | 15850 | static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block) | |
339 | { | ||
340 | int i; | ||
341 | 15850 | int dc = block[0]; | |
342 | |||
343 | 15850 | dc = (3 * dc + 1) >> 1; | |
344 | 15850 | dc = (17 * dc + 64) >> 7; | |
345 | |||
346 |
2/2✓ Branch 0 taken 63400 times.
✓ Branch 1 taken 15850 times.
|
79250 | for (i = 0; i < 4; i++) { |
347 | 63400 | dest[0] = av_clip_uint8(dest[0] + dc); | |
348 | 63400 | dest[1] = av_clip_uint8(dest[1] + dc); | |
349 | 63400 | dest[2] = av_clip_uint8(dest[2] + dc); | |
350 | 63400 | dest[3] = av_clip_uint8(dest[3] + dc); | |
351 | 63400 | dest[4] = av_clip_uint8(dest[4] + dc); | |
352 | 63400 | dest[5] = av_clip_uint8(dest[5] + dc); | |
353 | 63400 | dest[6] = av_clip_uint8(dest[6] + dc); | |
354 | 63400 | dest[7] = av_clip_uint8(dest[7] + dc); | |
355 | 63400 | dest += stride; | |
356 | } | ||
357 | 15850 | } | |
358 | |||
359 | 207387 | static void vc1_inv_trans_8x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block) | |
360 | { | ||
361 | int i; | ||
362 | register int t1, t2, t3, t4, t5, t6, t7, t8; | ||
363 | int16_t *src, *dst; | ||
364 | |||
365 | 207387 | src = block; | |
366 | 207387 | dst = block; | |
367 | |||
368 |
2/2✓ Branch 0 taken 829548 times.
✓ Branch 1 taken 207387 times.
|
1036935 | for (i = 0; i < 4; i++) { |
369 | 829548 | t1 = 12 * (src[0] + src[4]) + 4; | |
370 | 829548 | t2 = 12 * (src[0] - src[4]) + 4; | |
371 | 829548 | t3 = 16 * src[2] + 6 * src[6]; | |
372 | 829548 | t4 = 6 * src[2] - 16 * src[6]; | |
373 | |||
374 | 829548 | t5 = t1 + t3; | |
375 | 829548 | t6 = t2 + t4; | |
376 | 829548 | t7 = t2 - t4; | |
377 | 829548 | t8 = t1 - t3; | |
378 | |||
379 | 829548 | t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; | |
380 | 829548 | t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; | |
381 | 829548 | t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; | |
382 | 829548 | t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; | |
383 | |||
384 | 829548 | dst[0] = (t5 + t1) >> 3; | |
385 | 829548 | dst[1] = (t6 + t2) >> 3; | |
386 | 829548 | dst[2] = (t7 + t3) >> 3; | |
387 | 829548 | dst[3] = (t8 + t4) >> 3; | |
388 | 829548 | dst[4] = (t8 - t4) >> 3; | |
389 | 829548 | dst[5] = (t7 - t3) >> 3; | |
390 | 829548 | dst[6] = (t6 - t2) >> 3; | |
391 | 829548 | dst[7] = (t5 - t1) >> 3; | |
392 | |||
393 | 829548 | src += 8; | |
394 | 829548 | dst += 8; | |
395 | } | ||
396 | |||
397 | 207387 | src = block; | |
398 |
2/2✓ Branch 0 taken 1659096 times.
✓ Branch 1 taken 207387 times.
|
1866483 | for (i = 0; i < 8; i++) { |
399 | 1659096 | t1 = 17 * (src[ 0] + src[16]) + 64; | |
400 | 1659096 | t2 = 17 * (src[ 0] - src[16]) + 64; | |
401 | 1659096 | t3 = 22 * src[ 8] + 10 * src[24]; | |
402 | 1659096 | t4 = 22 * src[24] - 10 * src[ 8]; | |
403 | |||
404 | 1659096 | dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t1 + t3) >> 7)); | |
405 | 1659096 | dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t2 - t4) >> 7)); | |
406 | 1659096 | dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t2 + t4) >> 7)); | |
407 | 1659096 | dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t1 - t3) >> 7)); | |
408 | |||
409 | 1659096 | src++; | |
410 | 1659096 | dest++; | |
411 | } | ||
412 | 207387 | } | |
413 | |||
414 | /* Do inverse transform on 4x8 parts of block */ | ||
415 | 17937 | static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block) | |
416 | { | ||
417 | int i; | ||
418 | 17937 | int dc = block[0]; | |
419 | |||
420 | 17937 | dc = (17 * dc + 4) >> 3; | |
421 | 17937 | dc = (12 * dc + 64) >> 7; | |
422 | |||
423 |
2/2✓ Branch 0 taken 143496 times.
✓ Branch 1 taken 17937 times.
|
161433 | for (i = 0; i < 8; i++) { |
424 | 143496 | dest[0] = av_clip_uint8(dest[0] + dc); | |
425 | 143496 | dest[1] = av_clip_uint8(dest[1] + dc); | |
426 | 143496 | dest[2] = av_clip_uint8(dest[2] + dc); | |
427 | 143496 | dest[3] = av_clip_uint8(dest[3] + dc); | |
428 | 143496 | dest += stride; | |
429 | } | ||
430 | 17937 | } | |
431 | |||
432 | 209107 | static void vc1_inv_trans_4x8_c(uint8_t *dest, ptrdiff_t stride, int16_t *block) | |
433 | { | ||
434 | int i; | ||
435 | register int t1, t2, t3, t4, t5, t6, t7, t8; | ||
436 | int16_t *src, *dst; | ||
437 | |||
438 | 209107 | src = block; | |
439 | 209107 | dst = block; | |
440 | |||
441 |
2/2✓ Branch 0 taken 1672856 times.
✓ Branch 1 taken 209107 times.
|
1881963 | for (i = 0; i < 8; i++) { |
442 | 1672856 | t1 = 17 * (src[0] + src[2]) + 4; | |
443 | 1672856 | t2 = 17 * (src[0] - src[2]) + 4; | |
444 | 1672856 | t3 = 22 * src[1] + 10 * src[3]; | |
445 | 1672856 | t4 = 22 * src[3] - 10 * src[1]; | |
446 | |||
447 | 1672856 | dst[0] = (t1 + t3) >> 3; | |
448 | 1672856 | dst[1] = (t2 - t4) >> 3; | |
449 | 1672856 | dst[2] = (t2 + t4) >> 3; | |
450 | 1672856 | dst[3] = (t1 - t3) >> 3; | |
451 | |||
452 | 1672856 | src += 8; | |
453 | 1672856 | dst += 8; | |
454 | } | ||
455 | |||
456 | 209107 | src = block; | |
457 |
2/2✓ Branch 0 taken 836428 times.
✓ Branch 1 taken 209107 times.
|
1045535 | for (i = 0; i < 4; i++) { |
458 | 836428 | t1 = 12 * (src[ 0] + src[32]) + 64; | |
459 | 836428 | t2 = 12 * (src[ 0] - src[32]) + 64; | |
460 | 836428 | t3 = 16 * src[16] + 6 * src[48]; | |
461 | 836428 | t4 = 6 * src[16] - 16 * src[48]; | |
462 | |||
463 | 836428 | t5 = t1 + t3; | |
464 | 836428 | t6 = t2 + t4; | |
465 | 836428 | t7 = t2 - t4; | |
466 | 836428 | t8 = t1 - t3; | |
467 | |||
468 | 836428 | t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; | |
469 | 836428 | t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; | |
470 | 836428 | t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | |
471 | 836428 | t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | |
472 | |||
473 | 836428 | dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t5 + t1) >> 7)); | |
474 | 836428 | dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t6 + t2) >> 7)); | |
475 | 836428 | dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t7 + t3) >> 7)); | |
476 | 836428 | dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t8 + t4) >> 7)); | |
477 | 836428 | dest[4 * stride] = av_clip_uint8(dest[4 * stride] + ((t8 - t4 + 1) >> 7)); | |
478 | 836428 | dest[5 * stride] = av_clip_uint8(dest[5 * stride] + ((t7 - t3 + 1) >> 7)); | |
479 | 836428 | dest[6 * stride] = av_clip_uint8(dest[6 * stride] + ((t6 - t2 + 1) >> 7)); | |
480 | 836428 | dest[7 * stride] = av_clip_uint8(dest[7 * stride] + ((t5 - t1 + 1) >> 7)); | |
481 | |||
482 | 836428 | src++; | |
483 | 836428 | dest++; | |
484 | } | ||
485 | 209107 | } | |
486 | |||
487 | /* Do inverse transform on 4x4 part of block */ | ||
488 | 21252 | static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block) | |
489 | { | ||
490 | int i; | ||
491 | 21252 | int dc = block[0]; | |
492 | |||
493 | 21252 | dc = (17 * dc + 4) >> 3; | |
494 | 21252 | dc = (17 * dc + 64) >> 7; | |
495 | |||
496 |
2/2✓ Branch 0 taken 85008 times.
✓ Branch 1 taken 21252 times.
|
106260 | for (i = 0; i < 4; i++) { |
497 | 85008 | dest[0] = av_clip_uint8(dest[0] + dc); | |
498 | 85008 | dest[1] = av_clip_uint8(dest[1] + dc); | |
499 | 85008 | dest[2] = av_clip_uint8(dest[2] + dc); | |
500 | 85008 | dest[3] = av_clip_uint8(dest[3] + dc); | |
501 | 85008 | dest += stride; | |
502 | } | ||
503 | 21252 | } | |
504 | |||
505 | 170154 | static void vc1_inv_trans_4x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block) | |
506 | { | ||
507 | int i; | ||
508 | register int t1, t2, t3, t4; | ||
509 | int16_t *src, *dst; | ||
510 | |||
511 | 170154 | src = block; | |
512 | 170154 | dst = block; | |
513 |
2/2✓ Branch 0 taken 680616 times.
✓ Branch 1 taken 170154 times.
|
850770 | for (i = 0; i < 4; i++) { |
514 | 680616 | t1 = 17 * (src[0] + src[2]) + 4; | |
515 | 680616 | t2 = 17 * (src[0] - src[2]) + 4; | |
516 | 680616 | t3 = 22 * src[1] + 10 * src[3]; | |
517 | 680616 | t4 = 22 * src[3] - 10 * src[1]; | |
518 | |||
519 | 680616 | dst[0] = (t1 + t3) >> 3; | |
520 | 680616 | dst[1] = (t2 - t4) >> 3; | |
521 | 680616 | dst[2] = (t2 + t4) >> 3; | |
522 | 680616 | dst[3] = (t1 - t3) >> 3; | |
523 | |||
524 | 680616 | src += 8; | |
525 | 680616 | dst += 8; | |
526 | } | ||
527 | |||
528 | 170154 | src = block; | |
529 |
2/2✓ Branch 0 taken 680616 times.
✓ Branch 1 taken 170154 times.
|
850770 | for (i = 0; i < 4; i++) { |
530 | 680616 | t1 = 17 * (src[0] + src[16]) + 64; | |
531 | 680616 | t2 = 17 * (src[0] - src[16]) + 64; | |
532 | 680616 | t3 = 22 * src[8] + 10 * src[24]; | |
533 | 680616 | t4 = 22 * src[24] - 10 * src[8]; | |
534 | |||
535 | 680616 | dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t1 + t3) >> 7)); | |
536 | 680616 | dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t2 - t4) >> 7)); | |
537 | 680616 | dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t2 + t4) >> 7)); | |
538 | 680616 | dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t1 - t3) >> 7)); | |
539 | |||
540 | 680616 | src++; | |
541 | 680616 | dest++; | |
542 | } | ||
543 | 170154 | } | |
544 | |||
545 | /* motion compensation functions */ | ||
546 | |||
547 | /* Filter in case of 2 filters */ | ||
548 | #define VC1_MSPEL_FILTER_16B(DIR, TYPE) \ | ||
549 | static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, \ | ||
550 | int stride, \ | ||
551 | int mode) \ | ||
552 | { \ | ||
553 | switch(mode) { \ | ||
554 | case 0: /* no shift - should not occur */ \ | ||
555 | return 0; \ | ||
556 | case 1: /* 1/4 shift */ \ | ||
557 | return -4 * src[-stride] + 53 * src[0] + \ | ||
558 | 18 * src[stride] - 3 * src[stride * 2]; \ | ||
559 | case 2: /* 1/2 shift */ \ | ||
560 | return -1 * src[-stride] + 9 * src[0] + \ | ||
561 | 9 * src[stride] - 1 * src[stride * 2]; \ | ||
562 | case 3: /* 3/4 shift */ \ | ||
563 | return -3 * src[-stride] + 18 * src[0] + \ | ||
564 | 53 * src[stride] - 4 * src[stride * 2]; \ | ||
565 | } \ | ||
566 | return 0; /* should not occur */ \ | ||
567 | } | ||
568 | |||
569 |
3/5✗ Branch 0 not taken.
✓ Branch 1 taken 9633480 times.
✓ Branch 2 taken 15663624 times.
✓ Branch 3 taken 9146632 times.
✗ Branch 4 not taken.
|
34443736 | VC1_MSPEL_FILTER_16B(ver, uint8_t) |
570 |
3/5✗ Branch 0 not taken.
✓ Branch 1 taken 7453760 times.
✓ Branch 2 taken 12923840 times.
✓ Branch 3 taken 7283520 times.
✗ Branch 4 not taken.
|
27661120 | VC1_MSPEL_FILTER_16B(hor, int16_t) |
571 | |||
572 | /* Filter used to interpolate fractional pel values */ | ||
573 | 18729536 | static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, | |
574 | int mode, int r) | ||
575 | { | ||
576 |
3/5✗ Branch 0 not taken.
✓ Branch 1 taken 5365632 times.
✓ Branch 2 taken 8441856 times.
✓ Branch 3 taken 4922048 times.
✗ Branch 4 not taken.
|
18729536 | switch (mode) { |
577 | ✗ | case 0: // no shift | |
578 | ✗ | return src[0]; | |
579 | 5365632 | case 1: // 1/4 shift | |
580 | 5365632 | return (-4 * src[-stride] + 53 * src[0] + | |
581 | 5365632 | 18 * src[stride] - 3 * src[stride * 2] + 32 - r) >> 6; | |
582 | 8441856 | case 2: // 1/2 shift | |
583 | 8441856 | return (-1 * src[-stride] + 9 * src[0] + | |
584 | 8441856 | 9 * src[stride] - 1 * src[stride * 2] + 8 - r) >> 4; | |
585 | 4922048 | case 3: // 3/4 shift | |
586 | 4922048 | return (-3 * src[-stride] + 18 * src[0] + | |
587 | 4922048 | 53 * src[stride] - 4 * src[stride * 2] + 32 - r) >> 6; | |
588 | } | ||
589 | ✗ | return 0; // should not occur | |
590 | } | ||
591 | |||
592 | /* Function used to do motion compensation with bicubic interpolation */ | ||
593 | #define VC1_MSPEL_MC(OP, OP4, OPNAME) \ | ||
594 | static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst, \ | ||
595 | const uint8_t *src, \ | ||
596 | ptrdiff_t stride, \ | ||
597 | int hmode, \ | ||
598 | int vmode, \ | ||
599 | int rnd) \ | ||
600 | { \ | ||
601 | int i, j; \ | ||
602 | \ | ||
603 | if (vmode) { /* Horizontal filter to apply */ \ | ||
604 | int r; \ | ||
605 | \ | ||
606 | if (hmode) { /* Vertical filter to apply, output to tmp */ \ | ||
607 | static const int shift_value[] = { 0, 5, 1, 5 }; \ | ||
608 | int shift = (shift_value[hmode] + shift_value[vmode]) >> 1; \ | ||
609 | int16_t tmp[11 * 8], *tptr = tmp; \ | ||
610 | \ | ||
611 | r = (1 << (shift - 1)) + rnd - 1; \ | ||
612 | \ | ||
613 | src -= 1; \ | ||
614 | for (j = 0; j < 8; j++) { \ | ||
615 | for (i = 0; i < 11; i++) \ | ||
616 | tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \ | ||
617 | src += stride; \ | ||
618 | tptr += 11; \ | ||
619 | } \ | ||
620 | \ | ||
621 | r = 64 - rnd; \ | ||
622 | tptr = tmp + 1; \ | ||
623 | for (j = 0; j < 8; j++) { \ | ||
624 | for (i = 0; i < 8; i++) \ | ||
625 | OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \ | ||
626 | dst += stride; \ | ||
627 | tptr += 11; \ | ||
628 | } \ | ||
629 | \ | ||
630 | return; \ | ||
631 | } else { /* No horizontal filter, output 8 lines to dst */ \ | ||
632 | r = 1 - rnd; \ | ||
633 | \ | ||
634 | for (j = 0; j < 8; j++) { \ | ||
635 | for (i = 0; i < 8; i++) \ | ||
636 | OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r)); \ | ||
637 | src += stride; \ | ||
638 | dst += stride; \ | ||
639 | } \ | ||
640 | return; \ | ||
641 | } \ | ||
642 | } \ | ||
643 | \ | ||
644 | /* Horizontal mode with no vertical mode */ \ | ||
645 | for (j = 0; j < 8; j++) { \ | ||
646 | for (i = 0; i < 8; i++) \ | ||
647 | OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd)); \ | ||
648 | dst += stride; \ | ||
649 | src += stride; \ | ||
650 | } \ | ||
651 | }\ | ||
652 | static av_always_inline void OPNAME ## vc1_mspel_mc_16(uint8_t *dst, \ | ||
653 | const uint8_t *src, \ | ||
654 | ptrdiff_t stride, \ | ||
655 | int hmode, \ | ||
656 | int vmode, \ | ||
657 | int rnd) \ | ||
658 | { \ | ||
659 | int i, j; \ | ||
660 | \ | ||
661 | if (vmode) { /* Horizontal filter to apply */ \ | ||
662 | int r; \ | ||
663 | \ | ||
664 | if (hmode) { /* Vertical filter to apply, output to tmp */ \ | ||
665 | static const int shift_value[] = { 0, 5, 1, 5 }; \ | ||
666 | int shift = (shift_value[hmode] + shift_value[vmode]) >> 1; \ | ||
667 | int16_t tmp[19 * 16], *tptr = tmp; \ | ||
668 | \ | ||
669 | r = (1 << (shift - 1)) + rnd - 1; \ | ||
670 | \ | ||
671 | src -= 1; \ | ||
672 | for (j = 0; j < 16; j++) { \ | ||
673 | for (i = 0; i < 19; i++) \ | ||
674 | tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \ | ||
675 | src += stride; \ | ||
676 | tptr += 19; \ | ||
677 | } \ | ||
678 | \ | ||
679 | r = 64 - rnd; \ | ||
680 | tptr = tmp + 1; \ | ||
681 | for (j = 0; j < 16; j++) { \ | ||
682 | for (i = 0; i < 16; i++) \ | ||
683 | OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \ | ||
684 | dst += stride; \ | ||
685 | tptr += 19; \ | ||
686 | } \ | ||
687 | \ | ||
688 | return; \ | ||
689 | } else { /* No horizontal filter, output 8 lines to dst */ \ | ||
690 | r = 1 - rnd; \ | ||
691 | \ | ||
692 | for (j = 0; j < 16; j++) { \ | ||
693 | for (i = 0; i < 16; i++) \ | ||
694 | OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r)); \ | ||
695 | src += stride; \ | ||
696 | dst += stride; \ | ||
697 | } \ | ||
698 | return; \ | ||
699 | } \ | ||
700 | } \ | ||
701 | \ | ||
702 | /* Horizontal mode with no vertical mode */ \ | ||
703 | for (j = 0; j < 16; j++) { \ | ||
704 | for (i = 0; i < 16; i++) \ | ||
705 | OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd)); \ | ||
706 | dst += stride; \ | ||
707 | src += stride; \ | ||
708 | } \ | ||
709 | }\ | ||
710 | static void OPNAME ## pixels8x8_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\ | ||
711 | int i;\ | ||
712 | for(i=0; i<8; i++){\ | ||
713 | OP4(*(uint32_t*)(block ), AV_RN32(pixels ));\ | ||
714 | OP4(*(uint32_t*)(block+4), AV_RN32(pixels+4));\ | ||
715 | pixels+=line_size;\ | ||
716 | block +=line_size;\ | ||
717 | }\ | ||
718 | }\ | ||
719 | static void OPNAME ## pixels16x16_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\ | ||
720 | int i;\ | ||
721 | for(i=0; i<16; i++){\ | ||
722 | OP4(*(uint32_t*)(block ), AV_RN32(pixels ));\ | ||
723 | OP4(*(uint32_t*)(block+ 4), AV_RN32(pixels+ 4));\ | ||
724 | OP4(*(uint32_t*)(block+ 8), AV_RN32(pixels+ 8));\ | ||
725 | OP4(*(uint32_t*)(block+12), AV_RN32(pixels+12));\ | ||
726 | pixels+=line_size;\ | ||
727 | block +=line_size;\ | ||
728 | }\ | ||
729 | } | ||
730 | |||
731 | #define op_put(a, b) (a) = av_clip_uint8(b) | ||
732 | #define op_avg(a, b) (a) = ((a) + av_clip_uint8(b) + 1) >> 1 | ||
733 | #define op4_avg(a, b) (a) = rnd_avg32(a, b) | ||
734 | #define op4_put(a, b) (a) = (b) | ||
735 | |||
736 |
20/20✓ Branch 0 taken 926561 times.
✓ Branch 1 taken 118437 times.
✓ Branch 2 taken 185195 times.
✓ Branch 3 taken 78398 times.
✓ Branch 5 taken 30334136 times.
✓ Branch 6 taken 2001448 times.
✓ Branch 7 taken 2001448 times.
✓ Branch 8 taken 185195 times.
✓ Branch 10 taken 24329792 times.
✓ Branch 11 taken 2001448 times.
✓ Branch 12 taken 2001448 times.
✓ Branch 13 taken 185195 times.
✓ Branch 15 taken 9509696 times.
✓ Branch 16 taken 814360 times.
✓ Branch 17 taken 814360 times.
✓ Branch 18 taken 78398 times.
✓ Branch 20 taken 6925440 times.
✓ Branch 21 taken 571536 times.
✓ Branch 22 taken 571536 times.
✓ Branch 23 taken 53058 times.
|
155065708 | VC1_MSPEL_MC(op_put, op4_put, put_) |
737 |
21/21✓ Branch 0 taken 30395 times.
✓ Branch 1 taken 6015 times.
✓ Branch 2 taken 31296 times.
✓ Branch 3 taken 8864 times.
✓ Branch 4 taken 71296 times.
✓ Branch 5 taken 4114056 times.
✓ Branch 6 taken 259424 times.
✓ Branch 7 taken 259424 times.
✓ Branch 8 taken 22616 times.
✓ Branch 10 taken 3331328 times.
✓ Branch 11 taken 259424 times.
✓ Branch 12 taken 259424 times.
✓ Branch 13 taken 22616 times.
✓ Branch 15 taken 1322880 times.
✓ Branch 16 taken 96608 times.
✓ Branch 17 taken 96608 times.
✓ Branch 18 taken 7779 times.
✓ Branch 20 taken 971520 times.
✓ Branch 21 taken 72560 times.
✓ Branch 22 taken 72560 times.
✓ Branch 23 taken 6015 times.
|
21090542 | VC1_MSPEL_MC(op_avg, op4_avg, avg_) |
738 | |||
739 | /* pixel functions - really are entry points to vc1_mspel_mc */ | ||
740 | |||
741 | #define PUT_VC1_MSPEL(a, b) \ | ||
742 | static void put_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst, \ | ||
743 | const uint8_t *src, \ | ||
744 | ptrdiff_t stride, int rnd) \ | ||
745 | { \ | ||
746 | put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ | ||
747 | } \ | ||
748 | static void avg_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst, \ | ||
749 | const uint8_t *src, \ | ||
750 | ptrdiff_t stride, int rnd) \ | ||
751 | { \ | ||
752 | avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ | ||
753 | } \ | ||
754 | static void put_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \ | ||
755 | const uint8_t *src, \ | ||
756 | ptrdiff_t stride, int rnd) \ | ||
757 | { \ | ||
758 | put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ | ||
759 | } \ | ||
760 | static void avg_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \ | ||
761 | const uint8_t *src, \ | ||
762 | ptrdiff_t stride, int rnd) \ | ||
763 | { \ | ||
764 | avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ | ||
765 | } | ||
766 | |||
767 | 38292 | PUT_VC1_MSPEL(1, 0) | |
768 | 46538 | PUT_VC1_MSPEL(2, 0) | |
769 | 33316 | PUT_VC1_MSPEL(3, 0) | |
770 | |||
771 | 40446 | PUT_VC1_MSPEL(0, 1) | |
772 | 31488 | PUT_VC1_MSPEL(1, 1) | |
773 | 54584 | PUT_VC1_MSPEL(2, 1) | |
774 | 28906 | PUT_VC1_MSPEL(3, 1) | |
775 | |||
776 | 93946 | PUT_VC1_MSPEL(0, 2) | |
777 | 48354 | PUT_VC1_MSPEL(1, 2) | |
778 | 92438 | PUT_VC1_MSPEL(2, 2) | |
779 | 49606 | PUT_VC1_MSPEL(3, 2) | |
780 | |||
781 | 37962 | PUT_VC1_MSPEL(0, 3) | |
782 | 29992 | PUT_VC1_MSPEL(1, 3) | |
783 | 51924 | PUT_VC1_MSPEL(2, 3) | |
784 | 28330 | PUT_VC1_MSPEL(3, 3) | |
785 | |||
786 | #define chroma_mc(a) \ | ||
787 | ((A * src[a] + B * src[a + 1] + \ | ||
788 | C * src[stride + a] + D * src[stride + a + 1] + 32 - 4) >> 6) | ||
789 | 244032 | static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */, | |
790 | const uint8_t *src /* align 1 */, | ||
791 | ptrdiff_t stride, int h, int x, int y) | ||
792 | { | ||
793 | 244032 | const int A = (8 - x) * (8 - y); | |
794 | 244032 | const int B = (x) * (8 - y); | |
795 | 244032 | const int C = (8 - x) * (y); | |
796 | 244032 | const int D = (x) * (y); | |
797 | int i; | ||
798 | |||
799 | av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); | ||
800 | |||
801 |
2/2✓ Branch 0 taken 1952256 times.
✓ Branch 1 taken 244032 times.
|
2196288 | for (i = 0; i < h; i++) { |
802 | 1952256 | dst[0] = chroma_mc(0); | |
803 | 1952256 | dst[1] = chroma_mc(1); | |
804 | 1952256 | dst[2] = chroma_mc(2); | |
805 | 1952256 | dst[3] = chroma_mc(3); | |
806 | 1952256 | dst[4] = chroma_mc(4); | |
807 | 1952256 | dst[5] = chroma_mc(5); | |
808 | 1952256 | dst[6] = chroma_mc(6); | |
809 | 1952256 | dst[7] = chroma_mc(7); | |
810 | 1952256 | dst += stride; | |
811 | 1952256 | src += stride; | |
812 | } | ||
813 | 244032 | } | |
814 | |||
815 | 86192 | static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, const uint8_t *src, | |
816 | ptrdiff_t stride, int h, int x, int y) | ||
817 | { | ||
818 | 86192 | const int A = (8 - x) * (8 - y); | |
819 | 86192 | const int B = (x) * (8 - y); | |
820 | 86192 | const int C = (8 - x) * (y); | |
821 | 86192 | const int D = (x) * (y); | |
822 | int i; | ||
823 | |||
824 | av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); | ||
825 | |||
826 |
2/2✓ Branch 0 taken 344768 times.
✓ Branch 1 taken 86192 times.
|
430960 | for (i = 0; i < h; i++) { |
827 | 344768 | dst[0] = chroma_mc(0); | |
828 | 344768 | dst[1] = chroma_mc(1); | |
829 | 344768 | dst[2] = chroma_mc(2); | |
830 | 344768 | dst[3] = chroma_mc(3); | |
831 | 344768 | dst += stride; | |
832 | 344768 | src += stride; | |
833 | } | ||
834 | 86192 | } | |
835 | |||
836 | #define avg2(a, b) (((a) + (b) + 1) >> 1) | ||
837 | 21934 | static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */, | |
838 | const uint8_t *src /* align 1 */, | ||
839 | ptrdiff_t stride, int h, int x, int y) | ||
840 | { | ||
841 | 21934 | const int A = (8 - x) * (8 - y); | |
842 | 21934 | const int B = (x) * (8 - y); | |
843 | 21934 | const int C = (8 - x) * (y); | |
844 | 21934 | const int D = (x) * (y); | |
845 | int i; | ||
846 | |||
847 | av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); | ||
848 | |||
849 |
2/2✓ Branch 0 taken 175472 times.
✓ Branch 1 taken 21934 times.
|
197406 | for (i = 0; i < h; i++) { |
850 | 175472 | dst[0] = avg2(dst[0], chroma_mc(0)); | |
851 | 175472 | dst[1] = avg2(dst[1], chroma_mc(1)); | |
852 | 175472 | dst[2] = avg2(dst[2], chroma_mc(2)); | |
853 | 175472 | dst[3] = avg2(dst[3], chroma_mc(3)); | |
854 | 175472 | dst[4] = avg2(dst[4], chroma_mc(4)); | |
855 | 175472 | dst[5] = avg2(dst[5], chroma_mc(5)); | |
856 | 175472 | dst[6] = avg2(dst[6], chroma_mc(6)); | |
857 | 175472 | dst[7] = avg2(dst[7], chroma_mc(7)); | |
858 | 175472 | dst += stride; | |
859 | 175472 | src += stride; | |
860 | } | ||
861 | 21934 | } | |
862 | |||
863 | 18992 | static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst /* align 8 */, | |
864 | const uint8_t *src /* align 1 */, | ||
865 | ptrdiff_t stride, int h, int x, int y) | ||
866 | { | ||
867 | 18992 | const int A = (8 - x) * (8 - y); | |
868 | 18992 | const int B = ( x) * (8 - y); | |
869 | 18992 | const int C = (8 - x) * ( y); | |
870 | 18992 | const int D = ( x) * ( y); | |
871 | int i; | ||
872 | |||
873 | av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); | ||
874 | |||
875 |
2/2✓ Branch 0 taken 75968 times.
✓ Branch 1 taken 18992 times.
|
94960 | for (i = 0; i < h; i++) { |
876 | 75968 | dst[0] = avg2(dst[0], chroma_mc(0)); | |
877 | 75968 | dst[1] = avg2(dst[1], chroma_mc(1)); | |
878 | 75968 | dst[2] = avg2(dst[2], chroma_mc(2)); | |
879 | 75968 | dst[3] = avg2(dst[3], chroma_mc(3)); | |
880 | 75968 | dst += stride; | |
881 | 75968 | src += stride; | |
882 | } | ||
883 | 18992 | } | |
884 | |||
885 | #if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER | ||
886 | |||
887 | ✗ | static void sprite_h_c(uint8_t *dst, const uint8_t *src, int offset, | |
888 | int advance, int count) | ||
889 | { | ||
890 | ✗ | while (count--) { | |
891 | ✗ | int a = src[(offset >> 16)]; | |
892 | ✗ | int b = src[(offset >> 16) + 1]; | |
893 | ✗ | *dst++ = a + ((b - a) * (offset & 0xFFFF) >> 16); | |
894 | ✗ | offset += advance; | |
895 | } | ||
896 | ✗ | } | |
897 | |||
898 | ✗ | static av_always_inline void sprite_v_template(uint8_t *dst, | |
899 | const uint8_t *src1a, | ||
900 | const uint8_t *src1b, | ||
901 | int offset1, | ||
902 | int two_sprites, | ||
903 | const uint8_t *src2a, | ||
904 | const uint8_t *src2b, | ||
905 | int offset2, | ||
906 | int alpha, int scaled, | ||
907 | int width) | ||
908 | { | ||
909 | int a1, b1, a2, b2; | ||
910 | ✗ | while (width--) { | |
911 | ✗ | a1 = *src1a++; | |
912 | ✗ | if (scaled) { | |
913 | ✗ | b1 = *src1b++; | |
914 | ✗ | a1 = a1 + ((b1 - a1) * offset1 >> 16); | |
915 | } | ||
916 | ✗ | if (two_sprites) { | |
917 | ✗ | a2 = *src2a++; | |
918 | ✗ | if (scaled > 1) { | |
919 | ✗ | b2 = *src2b++; | |
920 | ✗ | a2 = a2 + ((b2 - a2) * offset2 >> 16); | |
921 | } | ||
922 | ✗ | a1 = a1 + ((a2 - a1) * alpha >> 16); | |
923 | } | ||
924 | ✗ | *dst++ = a1; | |
925 | } | ||
926 | ✗ | } | |
927 | |||
928 | ✗ | static void sprite_v_single_c(uint8_t *dst, const uint8_t *src1a, | |
929 | const uint8_t *src1b, | ||
930 | int offset, int width) | ||
931 | { | ||
932 | ✗ | sprite_v_template(dst, src1a, src1b, offset, 0, NULL, NULL, 0, 0, 1, width); | |
933 | ✗ | } | |
934 | |||
935 | ✗ | static void sprite_v_double_noscale_c(uint8_t *dst, const uint8_t *src1a, | |
936 | const uint8_t *src2a, | ||
937 | int alpha, int width) | ||
938 | { | ||
939 | ✗ | sprite_v_template(dst, src1a, NULL, 0, 1, src2a, NULL, 0, alpha, 0, width); | |
940 | ✗ | } | |
941 | |||
942 | ✗ | static void sprite_v_double_onescale_c(uint8_t *dst, | |
943 | const uint8_t *src1a, | ||
944 | const uint8_t *src1b, | ||
945 | int offset1, | ||
946 | const uint8_t *src2a, | ||
947 | int alpha, int width) | ||
948 | { | ||
949 | ✗ | sprite_v_template(dst, src1a, src1b, offset1, 1, src2a, NULL, 0, alpha, 1, | |
950 | width); | ||
951 | ✗ | } | |
952 | |||
953 | ✗ | static void sprite_v_double_twoscale_c(uint8_t *dst, | |
954 | const uint8_t *src1a, | ||
955 | const uint8_t *src1b, | ||
956 | int offset1, | ||
957 | const uint8_t *src2a, | ||
958 | const uint8_t *src2b, | ||
959 | int offset2, | ||
960 | int alpha, | ||
961 | int width) | ||
962 | { | ||
963 | ✗ | sprite_v_template(dst, src1a, src1b, offset1, 1, src2a, src2b, offset2, | |
964 | alpha, 2, width); | ||
965 | ✗ | } | |
966 | |||
967 | #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */ | ||
968 | #define FN_ASSIGN(X, Y) \ | ||
969 | dsp->put_vc1_mspel_pixels_tab[1][X+4*Y] = put_vc1_mspel_mc##X##Y##_c; \ | ||
970 | dsp->put_vc1_mspel_pixels_tab[0][X+4*Y] = put_vc1_mspel_mc##X##Y##_16_c; \ | ||
971 | dsp->avg_vc1_mspel_pixels_tab[1][X+4*Y] = avg_vc1_mspel_mc##X##Y##_c; \ | ||
972 | dsp->avg_vc1_mspel_pixels_tab[0][X+4*Y] = avg_vc1_mspel_mc##X##Y##_16_c | ||
973 | |||
974 | 107 | av_cold void ff_vc1dsp_init(VC1DSPContext *dsp) | |
975 | { | ||
976 | 107 | dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c; | |
977 | 107 | dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c; | |
978 | 107 | dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c; | |
979 | 107 | dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c; | |
980 | 107 | dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_c; | |
981 | 107 | dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_c; | |
982 | 107 | dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_c; | |
983 | 107 | dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_c; | |
984 | |||
985 | 107 | dsp->vc1_h_overlap = vc1_h_overlap_c; | |
986 | 107 | dsp->vc1_v_overlap = vc1_v_overlap_c; | |
987 | 107 | dsp->vc1_h_s_overlap = vc1_h_s_overlap_c; | |
988 | 107 | dsp->vc1_v_s_overlap = vc1_v_s_overlap_c; | |
989 | |||
990 | 107 | dsp->vc1_v_loop_filter4 = vc1_v_loop_filter4_c; | |
991 | 107 | dsp->vc1_h_loop_filter4 = vc1_h_loop_filter4_c; | |
992 | 107 | dsp->vc1_v_loop_filter8 = vc1_v_loop_filter8_c; | |
993 | 107 | dsp->vc1_h_loop_filter8 = vc1_h_loop_filter8_c; | |
994 | 107 | dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_c; | |
995 | 107 | dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_c; | |
996 | |||
997 | 107 | dsp->put_vc1_mspel_pixels_tab[0][0] = put_pixels16x16_c; | |
998 | 107 | dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_pixels16x16_c; | |
999 | 107 | dsp->put_vc1_mspel_pixels_tab[1][0] = put_pixels8x8_c; | |
1000 | 107 | dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_pixels8x8_c; | |
1001 | 107 | FN_ASSIGN(0, 1); | |
1002 | 107 | FN_ASSIGN(0, 2); | |
1003 | 107 | FN_ASSIGN(0, 3); | |
1004 | |||
1005 | 107 | FN_ASSIGN(1, 0); | |
1006 | 107 | FN_ASSIGN(1, 1); | |
1007 | 107 | FN_ASSIGN(1, 2); | |
1008 | 107 | FN_ASSIGN(1, 3); | |
1009 | |||
1010 | 107 | FN_ASSIGN(2, 0); | |
1011 | 107 | FN_ASSIGN(2, 1); | |
1012 | 107 | FN_ASSIGN(2, 2); | |
1013 | 107 | FN_ASSIGN(2, 3); | |
1014 | |||
1015 | 107 | FN_ASSIGN(3, 0); | |
1016 | 107 | FN_ASSIGN(3, 1); | |
1017 | 107 | FN_ASSIGN(3, 2); | |
1018 | 107 | FN_ASSIGN(3, 3); | |
1019 | |||
1020 | 107 | dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_c; | |
1021 | 107 | dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_c; | |
1022 | 107 | dsp->put_no_rnd_vc1_chroma_pixels_tab[1] = put_no_rnd_vc1_chroma_mc4_c; | |
1023 | 107 | dsp->avg_no_rnd_vc1_chroma_pixels_tab[1] = avg_no_rnd_vc1_chroma_mc4_c; | |
1024 | |||
1025 | #if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER | ||
1026 | 107 | dsp->sprite_h = sprite_h_c; | |
1027 | 107 | dsp->sprite_v_single = sprite_v_single_c; | |
1028 | 107 | dsp->sprite_v_double_noscale = sprite_v_double_noscale_c; | |
1029 | 107 | dsp->sprite_v_double_onescale = sprite_v_double_onescale_c; | |
1030 | 107 | dsp->sprite_v_double_twoscale = sprite_v_double_twoscale_c; | |
1031 | #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */ | ||
1032 | |||
1033 | 107 | dsp->startcode_find_candidate = ff_startcode_find_candidate_c; | |
1034 | 107 | dsp->vc1_unescape_buffer = vc1_unescape_buffer; | |
1035 | |||
1036 | #if ARCH_AARCH64 | ||
1037 | ff_vc1dsp_init_aarch64(dsp); | ||
1038 | #elif ARCH_ARM | ||
1039 | ff_vc1dsp_init_arm(dsp); | ||
1040 | #elif ARCH_PPC | ||
1041 | ff_vc1dsp_init_ppc(dsp); | ||
1042 | #elif ARCH_RISCV | ||
1043 | ff_vc1dsp_init_riscv(dsp); | ||
1044 | #elif ARCH_X86 | ||
1045 | 107 | ff_vc1dsp_init_x86(dsp); | |
1046 | #elif ARCH_MIPS | ||
1047 | ff_vc1dsp_init_mips(dsp); | ||
1048 | #elif ARCH_LOONGARCH | ||
1049 | ff_vc1dsp_init_loongarch(dsp); | ||
1050 | #endif | ||
1051 | 107 | } | |
1052 |