Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (C) 2004 The FFmpeg project | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file | ||
23 | * Standard C DSP-oriented functions cribbed from the original VP3 | ||
24 | * source code. | ||
25 | */ | ||
26 | |||
27 | #include <string.h> | ||
28 | |||
29 | #include "config.h" | ||
30 | #include "libavutil/attributes.h" | ||
31 | #include "libavutil/common.h" | ||
32 | #include "libavutil/internal.h" | ||
33 | #include "libavutil/intreadwrite.h" | ||
34 | #include "libavutil/avassert.h" | ||
35 | |||
36 | #include "rnd_avg.h" | ||
37 | #include "vp3dsp.h" | ||
38 | |||
39 | #define IdctAdjustBeforeShift 8 | ||
40 | #define xC1S7 64277 | ||
41 | #define xC2S6 60547 | ||
42 | #define xC3S5 54491 | ||
43 | #define xC4S4 46341 | ||
44 | #define xC5S3 36410 | ||
45 | #define xC6S2 25080 | ||
46 | #define xC7S1 12785 | ||
47 | |||
48 | #define M(a, b) ((int)((SUINT)(a) * (b)) >> 16) | ||
49 | |||
50 | 1496709 | static av_always_inline void idct(uint8_t *dst, ptrdiff_t stride, | |
51 | int16_t *input, int type) | ||
52 | { | ||
53 | 1496709 | int16_t *ip = input; | |
54 | |||
55 | int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H; | ||
56 | int Ed, Gd, Add, Bdd, Fd, Hd; | ||
57 | |||
58 | int i; | ||
59 | |||
60 | /* Inverse DCT on the rows now */ | ||
61 |
2/2✓ Branch 0 taken 11973672 times.
✓ Branch 1 taken 1496709 times.
|
13470381 | for (i = 0; i < 8; i++) { |
62 | /* Check for non-zero values */ | ||
63 | 11973672 | if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | | |
64 |
2/2✓ Branch 0 taken 2241308 times.
✓ Branch 1 taken 9732364 times.
|
11973672 | ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8]) { |
65 | 2241308 | A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]); | |
66 | 2241308 | B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]); | |
67 | 2241308 | C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]); | |
68 | 2241308 | D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]); | |
69 | |||
70 | 2241308 | Ad = M(xC4S4, (A - C)); | |
71 | 2241308 | Bd = M(xC4S4, (B - D)); | |
72 | |||
73 | 2241308 | Cd = A + C; | |
74 | 2241308 | Dd = B + D; | |
75 | |||
76 | 2241308 | E = M(xC4S4, (ip[0 * 8] + ip[4 * 8])); | |
77 | 2241308 | F = M(xC4S4, (ip[0 * 8] - ip[4 * 8])); | |
78 | |||
79 | 2241308 | G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]); | |
80 | 2241308 | H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]); | |
81 | |||
82 | 2241308 | Ed = E - G; | |
83 | 2241308 | Gd = E + G; | |
84 | |||
85 | 2241308 | Add = F + Ad; | |
86 | 2241308 | Bdd = Bd - H; | |
87 | |||
88 | 2241308 | Fd = F - Ad; | |
89 | 2241308 | Hd = Bd + H; | |
90 | |||
91 | /* Final sequence of operations over-write original inputs. */ | ||
92 | 2241308 | ip[0 * 8] = Gd + Cd; | |
93 | 2241308 | ip[7 * 8] = Gd - Cd; | |
94 | |||
95 | 2241308 | ip[1 * 8] = Add + Hd; | |
96 | 2241308 | ip[2 * 8] = Add - Hd; | |
97 | |||
98 | 2241308 | ip[3 * 8] = Ed + Dd; | |
99 | 2241308 | ip[4 * 8] = Ed - Dd; | |
100 | |||
101 | 2241308 | ip[5 * 8] = Fd + Bdd; | |
102 | 2241308 | ip[6 * 8] = Fd - Bdd; | |
103 | } | ||
104 | |||
105 | 11973672 | ip += 1; /* next row */ | |
106 | } | ||
107 | |||
108 | 1496709 | ip = input; | |
109 | |||
110 |
2/2✓ Branch 0 taken 11973672 times.
✓ Branch 1 taken 1496709 times.
|
13470381 | for (i = 0; i < 8; i++) { |
111 | /* Check for non-zero values (bitwise or faster than ||) */ | ||
112 | 11973672 | if (ip[1] | ip[2] | ip[3] | | |
113 |
2/2✓ Branch 0 taken 3886627 times.
✓ Branch 1 taken 8087045 times.
|
11973672 | ip[4] | ip[5] | ip[6] | ip[7]) { |
114 | 3886627 | A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]); | |
115 | 3886627 | B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]); | |
116 | 3886627 | C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]); | |
117 | 3886627 | D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]); | |
118 | |||
119 | 3886627 | Ad = M(xC4S4, (A - C)); | |
120 | 3886627 | Bd = M(xC4S4, (B - D)); | |
121 | |||
122 | 3886627 | Cd = A + C; | |
123 | 3886627 | Dd = B + D; | |
124 | |||
125 | 3886627 | E = M(xC4S4, (ip[0] + ip[4])) + 8; | |
126 | 3886627 | F = M(xC4S4, (ip[0] - ip[4])) + 8; | |
127 | |||
128 |
2/2✓ Branch 0 taken 741480 times.
✓ Branch 1 taken 3145147 times.
|
3886627 | if (type == 1) { // HACK |
129 | 741480 | E += 16 * 128; | |
130 | 741480 | F += 16 * 128; | |
131 | } | ||
132 | |||
133 | 3886627 | G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]); | |
134 | 3886627 | H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]); | |
135 | |||
136 | 3886627 | Ed = E - G; | |
137 | 3886627 | Gd = E + G; | |
138 | |||
139 | 3886627 | Add = F + Ad; | |
140 | 3886627 | Bdd = Bd - H; | |
141 | |||
142 | 3886627 | Fd = F - Ad; | |
143 | 3886627 | Hd = Bd + H; | |
144 | |||
145 | /* Final sequence of operations over-write original inputs. */ | ||
146 |
2/2✓ Branch 0 taken 741480 times.
✓ Branch 1 taken 3145147 times.
|
3886627 | if (type == 1) { |
147 | 741480 | dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4); | |
148 | 741480 | dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4); | |
149 | |||
150 | 741480 | dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4); | |
151 | 741480 | dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4); | |
152 | |||
153 | 741480 | dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4); | |
154 | 741480 | dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4); | |
155 | |||
156 | 741480 | dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4); | |
157 | 741480 | dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4); | |
158 | } else { | ||
159 | 3145147 | dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4)); | |
160 | 3145147 | dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4)); | |
161 | |||
162 | 3145147 | dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4)); | |
163 | 3145147 | dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4)); | |
164 | |||
165 | 3145147 | dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4)); | |
166 | 3145147 | dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4)); | |
167 | |||
168 | 3145147 | dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4)); | |
169 | 3145147 | dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4)); | |
170 | } | ||
171 | } else { | ||
172 |
2/2✓ Branch 0 taken 2034952 times.
✓ Branch 1 taken 6052093 times.
|
8087045 | if (type == 1) { |
173 | 2034952 | dst[0*stride] = | |
174 | 2034952 | dst[1*stride] = | |
175 | 2034952 | dst[2*stride] = | |
176 | 2034952 | dst[3*stride] = | |
177 | 2034952 | dst[4*stride] = | |
178 | 2034952 | dst[5*stride] = | |
179 | 2034952 | dst[6*stride] = | |
180 | 2034952 | dst[7*stride] = av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20)); | |
181 | } else { | ||
182 |
2/2✓ Branch 0 taken 1482043 times.
✓ Branch 1 taken 4570050 times.
|
6052093 | if (ip[0]) { |
183 | 1482043 | int v = (xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20; | |
184 | 1482043 | dst[0 * stride] = av_clip_uint8(dst[0 * stride] + v); | |
185 | 1482043 | dst[1 * stride] = av_clip_uint8(dst[1 * stride] + v); | |
186 | 1482043 | dst[2 * stride] = av_clip_uint8(dst[2 * stride] + v); | |
187 | 1482043 | dst[3 * stride] = av_clip_uint8(dst[3 * stride] + v); | |
188 | 1482043 | dst[4 * stride] = av_clip_uint8(dst[4 * stride] + v); | |
189 | 1482043 | dst[5 * stride] = av_clip_uint8(dst[5 * stride] + v); | |
190 | 1482043 | dst[6 * stride] = av_clip_uint8(dst[6 * stride] + v); | |
191 | 1482043 | dst[7 * stride] = av_clip_uint8(dst[7 * stride] + v); | |
192 | } | ||
193 | } | ||
194 | } | ||
195 | |||
196 | 11973672 | ip += 8; /* next column */ | |
197 | 11973672 | dst++; | |
198 | } | ||
199 | 1496709 | } | |
200 | |||
201 | 404243 | static av_always_inline void idct10(uint8_t *dst, ptrdiff_t stride, | |
202 | int16_t *input, int type) | ||
203 | { | ||
204 | 404243 | int16_t *ip = input; | |
205 | |||
206 | int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H; | ||
207 | int Ed, Gd, Add, Bdd, Fd, Hd; | ||
208 | |||
209 | int i; | ||
210 | |||
211 | /* Inverse DCT on the rows now */ | ||
212 |
2/2✓ Branch 0 taken 1616972 times.
✓ Branch 1 taken 404243 times.
|
2021215 | for (i = 0; i < 4; i++) { |
213 | /* Check for non-zero values */ | ||
214 |
2/2✓ Branch 0 taken 110601 times.
✓ Branch 1 taken 1506371 times.
|
1616972 | if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8]) { |
215 | 110601 | A = M(xC1S7, ip[1 * 8]); | |
216 | 110601 | B = M(xC7S1, ip[1 * 8]); | |
217 | 110601 | C = M(xC3S5, ip[3 * 8]); | |
218 | 110601 | D = -M(xC5S3, ip[3 * 8]); | |
219 | |||
220 | 110601 | Ad = M(xC4S4, (A - C)); | |
221 | 110601 | Bd = M(xC4S4, (B - D)); | |
222 | |||
223 | 110601 | Cd = A + C; | |
224 | 110601 | Dd = B + D; | |
225 | |||
226 | 110601 | E = M(xC4S4, ip[0 * 8]); | |
227 | 110601 | F = E; | |
228 | |||
229 | 110601 | G = M(xC2S6, ip[2 * 8]); | |
230 | 110601 | H = M(xC6S2, ip[2 * 8]); | |
231 | |||
232 | 110601 | Ed = E - G; | |
233 | 110601 | Gd = E + G; | |
234 | |||
235 | 110601 | Add = F + Ad; | |
236 | 110601 | Bdd = Bd - H; | |
237 | |||
238 | 110601 | Fd = F - Ad; | |
239 | 110601 | Hd = Bd + H; | |
240 | |||
241 | /* Final sequence of operations over-write original inputs */ | ||
242 | 110601 | ip[0 * 8] = Gd + Cd; | |
243 | 110601 | ip[7 * 8] = Gd - Cd; | |
244 | |||
245 | 110601 | ip[1 * 8] = Add + Hd; | |
246 | 110601 | ip[2 * 8] = Add - Hd; | |
247 | |||
248 | 110601 | ip[3 * 8] = Ed + Dd; | |
249 | 110601 | ip[4 * 8] = Ed - Dd; | |
250 | |||
251 | 110601 | ip[5 * 8] = Fd + Bdd; | |
252 | 110601 | ip[6 * 8] = Fd - Bdd; | |
253 | |||
254 | } | ||
255 | |||
256 | 1616972 | ip += 1; | |
257 | } | ||
258 | |||
259 | 404243 | ip = input; | |
260 | |||
261 |
2/2✓ Branch 0 taken 3233944 times.
✓ Branch 1 taken 404243 times.
|
3638187 | for (i = 0; i < 8; i++) { |
262 | /* Check for non-zero values (bitwise or faster than ||) */ | ||
263 |
2/2✓ Branch 0 taken 696508 times.
✓ Branch 1 taken 2537436 times.
|
3233944 | if (ip[0] | ip[1] | ip[2] | ip[3]) { |
264 | 696508 | A = M(xC1S7, ip[1]); | |
265 | 696508 | B = M(xC7S1, ip[1]); | |
266 | 696508 | C = M(xC3S5, ip[3]); | |
267 | 696508 | D = -M(xC5S3, ip[3]); | |
268 | |||
269 | 696508 | Ad = M(xC4S4, (A - C)); | |
270 | 696508 | Bd = M(xC4S4, (B - D)); | |
271 | |||
272 | 696508 | Cd = A + C; | |
273 | 696508 | Dd = B + D; | |
274 | |||
275 | 696508 | E = M(xC4S4, ip[0]); | |
276 |
2/2✓ Branch 0 taken 227082 times.
✓ Branch 1 taken 469426 times.
|
696508 | if (type == 1) |
277 | 227082 | E += 16 * 128; | |
278 | 696508 | F = E; | |
279 | |||
280 | 696508 | G = M(xC2S6, ip[2]); | |
281 | 696508 | H = M(xC6S2, ip[2]); | |
282 | |||
283 | 696508 | Ed = E - G; | |
284 | 696508 | Gd = E + G; | |
285 | |||
286 | 696508 | Add = F + Ad; | |
287 | 696508 | Bdd = Bd - H; | |
288 | |||
289 | 696508 | Fd = F - Ad; | |
290 | 696508 | Hd = Bd + H; | |
291 | |||
292 | 696508 | Gd += 8; | |
293 | 696508 | Add += 8; | |
294 | 696508 | Ed += 8; | |
295 | 696508 | Fd += 8; | |
296 | |||
297 | /* Final sequence of operations over-write original inputs. */ | ||
298 |
2/2✓ Branch 0 taken 227082 times.
✓ Branch 1 taken 469426 times.
|
696508 | if (type == 1) { |
299 | 227082 | dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4); | |
300 | 227082 | dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4); | |
301 | |||
302 | 227082 | dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4); | |
303 | 227082 | dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4); | |
304 | |||
305 | 227082 | dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4); | |
306 | 227082 | dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4); | |
307 | |||
308 | 227082 | dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4); | |
309 | 227082 | dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4); | |
310 | } else { | ||
311 | 469426 | dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4)); | |
312 | 469426 | dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4)); | |
313 | |||
314 | 469426 | dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4)); | |
315 | 469426 | dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4)); | |
316 | |||
317 | 469426 | dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4)); | |
318 | 469426 | dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4)); | |
319 | |||
320 | 469426 | dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4)); | |
321 | 469426 | dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4)); | |
322 | } | ||
323 | } else { | ||
324 |
2/2✓ Branch 0 taken 52470 times.
✓ Branch 1 taken 2484966 times.
|
2537436 | if (type == 1) { |
325 | 52470 | dst[0*stride] = | |
326 | 52470 | dst[1*stride] = | |
327 | 52470 | dst[2*stride] = | |
328 | 52470 | dst[3*stride] = | |
329 | 52470 | dst[4*stride] = | |
330 | 52470 | dst[5*stride] = | |
331 | 52470 | dst[6*stride] = | |
332 | 52470 | dst[7*stride] = 128; | |
333 | } | ||
334 | } | ||
335 | |||
336 | 3233944 | ip += 8; | |
337 | 3233944 | dst++; | |
338 | } | ||
339 | 404243 | } | |
340 | |||
341 | 34944 | void ff_vp3dsp_idct10_put(uint8_t *dest, ptrdiff_t stride, int16_t *block) | |
342 | { | ||
343 | 34944 | idct10(dest, stride, block, 1); | |
344 | 34944 | memset(block, 0, sizeof(*block) * 64); | |
345 | 34944 | } | |
346 | |||
347 | 369299 | void ff_vp3dsp_idct10_add(uint8_t *dest, ptrdiff_t stride, int16_t *block) | |
348 | { | ||
349 | 369299 | idct10(dest, stride, block, 2); | |
350 | 369299 | memset(block, 0, sizeof(*block) * 64); | |
351 | 369299 | } | |
352 | |||
353 | 347054 | static void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, | |
354 | int16_t *block /* align 16 */) | ||
355 | { | ||
356 | 347054 | idct(dest, stride, block, 1); | |
357 | 347054 | memset(block, 0, sizeof(*block) * 64); | |
358 | 347054 | } | |
359 | |||
360 | 1149655 | static void vp3_idct_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, | |
361 | int16_t *block /* align 16 */) | ||
362 | { | ||
363 | 1149655 | idct(dest, stride, block, 2); | |
364 | 1149655 | memset(block, 0, sizeof(*block) * 64); | |
365 | 1149655 | } | |
366 | |||
367 | 127148 | static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, | |
368 | int16_t *block /* align 16 */) | ||
369 | { | ||
370 | 127148 | int i, dc = (block[0] + 15) >> 5; | |
371 | |||
372 |
2/2✓ Branch 0 taken 1017184 times.
✓ Branch 1 taken 127148 times.
|
1144332 | for (i = 0; i < 8; i++) { |
373 | 1017184 | dest[0] = av_clip_uint8(dest[0] + dc); | |
374 | 1017184 | dest[1] = av_clip_uint8(dest[1] + dc); | |
375 | 1017184 | dest[2] = av_clip_uint8(dest[2] + dc); | |
376 | 1017184 | dest[3] = av_clip_uint8(dest[3] + dc); | |
377 | 1017184 | dest[4] = av_clip_uint8(dest[4] + dc); | |
378 | 1017184 | dest[5] = av_clip_uint8(dest[5] + dc); | |
379 | 1017184 | dest[6] = av_clip_uint8(dest[6] + dc); | |
380 | 1017184 | dest[7] = av_clip_uint8(dest[7] + dc); | |
381 | 1017184 | dest += stride; | |
382 | } | ||
383 | 127148 | block[0] = 0; | |
384 | 127148 | } | |
385 | |||
386 | 465940 | static av_always_inline void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, | |
387 | int *bounding_values, int count) | ||
388 | { | ||
389 | unsigned char *end; | ||
390 | int filter_value; | ||
391 | 465940 | const ptrdiff_t nstride = -stride; | |
392 | |||
393 |
2/2✓ Branch 0 taken 4048776 times.
✓ Branch 1 taken 465940 times.
|
4514716 | for (end = first_pixel + count; first_pixel < end; first_pixel++) { |
394 | 4048776 | filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) + | |
395 | 4048776 | (first_pixel[0] - first_pixel[nstride]) * 3; | |
396 | 4048776 | filter_value = bounding_values[(filter_value + 4) >> 3]; | |
397 | |||
398 | 4048776 | first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value); | |
399 | 4048776 | first_pixel[0] = av_clip_uint8(first_pixel[0] - filter_value); | |
400 | } | ||
401 | 465940 | } | |
402 | |||
403 | 492808 | static av_always_inline void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, | |
404 | int *bounding_values, int count) | ||
405 | { | ||
406 | unsigned char *end; | ||
407 | int filter_value; | ||
408 | |||
409 |
2/2✓ Branch 0 taken 4307536 times.
✓ Branch 1 taken 492808 times.
|
4800344 | for (end = first_pixel + count * stride; first_pixel != end; first_pixel += stride) { |
410 | 4307536 | filter_value = (first_pixel[-2] - first_pixel[1]) + | |
411 | 4307536 | (first_pixel[ 0] - first_pixel[-1]) * 3; | |
412 | 4307536 | filter_value = bounding_values[(filter_value + 4) >> 3]; | |
413 | |||
414 | 4307536 | first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value); | |
415 | 4307536 | first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value); | |
416 | } | ||
417 | 492808 | } | |
418 | |||
419 | #define LOOP_FILTER(prefix, suffix, dim, count) \ | ||
420 | void prefix##_##dim##_loop_filter_##count##suffix(uint8_t *first_pixel, ptrdiff_t stride, \ | ||
421 | int *bounding_values) \ | ||
422 | { \ | ||
423 | vp3_##dim##_loop_filter_c(first_pixel, stride, bounding_values, count); \ | ||
424 | } | ||
425 | |||
426 | 385626 | static LOOP_FILTER(vp3,_c, v, 8) | |
427 | 401540 | static LOOP_FILTER(vp3,_c, h, 8) | |
428 | 80314 | LOOP_FILTER(ff_vp3dsp, , v, 12) | |
429 | 91268 | LOOP_FILTER(ff_vp3dsp, , h, 12) | |
430 | |||
431 | 333697 | static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1, | |
432 | const uint8_t *src2, ptrdiff_t stride, int h) | ||
433 | { | ||
434 | int i; | ||
435 | |||
436 |
2/2✓ Branch 0 taken 2669576 times.
✓ Branch 1 taken 333697 times.
|
3003273 | for (i = 0; i < h; i++) { |
437 | uint32_t a, b; | ||
438 | |||
439 | 2669576 | a = AV_RN32(&src1[i * stride]); | |
440 | 2669576 | b = AV_RN32(&src2[i * stride]); | |
441 | 2669576 | AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b)); | |
442 | 2669576 | a = AV_RN32(&src1[i * stride + 4]); | |
443 | 2669576 | b = AV_RN32(&src2[i * stride + 4]); | |
444 | 2669576 | AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b)); | |
445 | } | ||
446 | 333697 | } | |
447 | |||
448 | 55 | av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags) | |
449 | { | ||
450 | 55 | c->put_no_rnd_pixels_l2 = put_no_rnd_pixels_l2; | |
451 | |||
452 | 55 | c->idct_put = vp3_idct_put_c; | |
453 | 55 | c->idct_add = vp3_idct_add_c; | |
454 | 55 | c->idct_dc_add = vp3_idct_dc_add_c; | |
455 | 55 | c->v_loop_filter = c->v_loop_filter_unaligned = vp3_v_loop_filter_8_c; | |
456 | 55 | c->h_loop_filter = c->h_loop_filter_unaligned = vp3_h_loop_filter_8_c; | |
457 | |||
458 | #if ARCH_ARM | ||
459 | ff_vp3dsp_init_arm(c, flags); | ||
460 | #elif ARCH_PPC | ||
461 | ff_vp3dsp_init_ppc(c, flags); | ||
462 | #elif ARCH_X86 | ||
463 | 55 | ff_vp3dsp_init_x86(c, flags); | |
464 | #elif ARCH_MIPS | ||
465 | ff_vp3dsp_init_mips(c, flags); | ||
466 | #endif | ||
467 | 55 | } | |
468 | |||
469 | /* | ||
470 | * This function initializes the loop filter boundary limits if the frame's | ||
471 | * quality index is different from the previous frame's. | ||
472 | * | ||
473 | * where sizeof(bounding_values_array) is 256 * sizeof(int) | ||
474 | * | ||
475 | * The filter_limit_values may not be larger than 127. | ||
476 | */ | ||
477 | 470 | void ff_vp3dsp_set_bounding_values(int * bounding_values_array, int filter_limit) | |
478 | { | ||
479 | 470 | int *bounding_values = bounding_values_array + 127; | |
480 | int x; | ||
481 | int value; | ||
482 | |||
483 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 470 times.
|
470 | av_assert0(filter_limit < 128U); |
484 | |||
485 | /* set up the bounding values */ | ||
486 | 470 | memset(bounding_values_array, 0, 256 * sizeof(int)); | |
487 |
2/2✓ Branch 0 taken 2656 times.
✓ Branch 1 taken 470 times.
|
3126 | for (x = 0; x < filter_limit; x++) { |
488 | 2656 | bounding_values[-x] = -x; | |
489 | 2656 | bounding_values[x] = x; | |
490 | } | ||
491 |
3/4✓ Branch 0 taken 3126 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2656 times.
✓ Branch 3 taken 470 times.
|
3126 | for (x = value = filter_limit; x < 128 && value; x++, value--) { |
492 | 2656 | bounding_values[ x] = value; | |
493 | 2656 | bounding_values[-x] = -value; | |
494 | } | ||
495 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 470 times.
|
470 | if (value) |
496 | ✗ | bounding_values[128] = value; | |
497 | 470 | bounding_values[129] = bounding_values[130] = filter_limit * 0x02020202U; | |
498 | 470 | } | |
499 |