| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (C) 2004 The FFmpeg project | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public | ||
| 17 | * License along with FFmpeg; if not, write to the Free Software | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | /** | ||
| 22 | * @file | ||
| 23 | * Standard C DSP-oriented functions cribbed from the original VP3 | ||
| 24 | * source code. | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <string.h> | ||
| 28 | |||
| 29 | #include "config.h" | ||
| 30 | #include "libavutil/attributes.h" | ||
| 31 | #include "libavutil/common.h" | ||
| 32 | #include "libavutil/internal.h" | ||
| 33 | #include "libavutil/intreadwrite.h" | ||
| 34 | #include "libavutil/avassert.h" | ||
| 35 | |||
| 36 | #include "rnd_avg.h" | ||
| 37 | #include "vp3dsp.h" | ||
| 38 | |||
| 39 | #define IdctAdjustBeforeShift 8 | ||
| 40 | #define xC1S7 64277 | ||
| 41 | #define xC2S6 60547 | ||
| 42 | #define xC3S5 54491 | ||
| 43 | #define xC4S4 46341 | ||
| 44 | #define xC5S3 36410 | ||
| 45 | #define xC6S2 25080 | ||
| 46 | #define xC7S1 12785 | ||
| 47 | |||
| 48 | #define M(a, b) ((int)((SUINT)(a) * (b)) >> 16) | ||
| 49 | |||
| 50 | 1505764 | static av_always_inline void idct(uint8_t *dst, ptrdiff_t stride, | |
| 51 | int16_t *input, int type) | ||
| 52 | { | ||
| 53 | 1505764 | int16_t *ip = input; | |
| 54 | |||
| 55 | int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H; | ||
| 56 | int Ed, Gd, Add, Bdd, Fd, Hd; | ||
| 57 | |||
| 58 | int i; | ||
| 59 | |||
| 60 | /* Inverse DCT on the rows now */ | ||
| 61 |
2/2✓ Branch 0 taken 12046112 times.
✓ Branch 1 taken 1505764 times.
|
13551876 | for (i = 0; i < 8; i++) { |
| 62 | /* Check for non-zero values */ | ||
| 63 | 12046112 | if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | | |
| 64 |
2/2✓ Branch 0 taken 2294188 times.
✓ Branch 1 taken 9751924 times.
|
12046112 | ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8]) { |
| 65 | 2294188 | A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]); | |
| 66 | 2294188 | B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]); | |
| 67 | 2294188 | C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]); | |
| 68 | 2294188 | D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]); | |
| 69 | |||
| 70 | 2294188 | Ad = M(xC4S4, (A - C)); | |
| 71 | 2294188 | Bd = M(xC4S4, (B - D)); | |
| 72 | |||
| 73 | 2294188 | Cd = A + C; | |
| 74 | 2294188 | Dd = B + D; | |
| 75 | |||
| 76 | 2294188 | E = M(xC4S4, (ip[0 * 8] + ip[4 * 8])); | |
| 77 | 2294188 | F = M(xC4S4, (ip[0 * 8] - ip[4 * 8])); | |
| 78 | |||
| 79 | 2294188 | G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]); | |
| 80 | 2294188 | H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]); | |
| 81 | |||
| 82 | 2294188 | Ed = E - G; | |
| 83 | 2294188 | Gd = E + G; | |
| 84 | |||
| 85 | 2294188 | Add = F + Ad; | |
| 86 | 2294188 | Bdd = Bd - H; | |
| 87 | |||
| 88 | 2294188 | Fd = F - Ad; | |
| 89 | 2294188 | Hd = Bd + H; | |
| 90 | |||
| 91 | /* Final sequence of operations over-write original inputs. */ | ||
| 92 | 2294188 | ip[0 * 8] = Gd + Cd; | |
| 93 | 2294188 | ip[7 * 8] = Gd - Cd; | |
| 94 | |||
| 95 | 2294188 | ip[1 * 8] = Add + Hd; | |
| 96 | 2294188 | ip[2 * 8] = Add - Hd; | |
| 97 | |||
| 98 | 2294188 | ip[3 * 8] = Ed + Dd; | |
| 99 | 2294188 | ip[4 * 8] = Ed - Dd; | |
| 100 | |||
| 101 | 2294188 | ip[5 * 8] = Fd + Bdd; | |
| 102 | 2294188 | ip[6 * 8] = Fd - Bdd; | |
| 103 | } | ||
| 104 | |||
| 105 | 12046112 | ip += 1; /* next row */ | |
| 106 | } | ||
| 107 | |||
| 108 | 1505764 | ip = input; | |
| 109 | |||
| 110 |
2/2✓ Branch 0 taken 12046112 times.
✓ Branch 1 taken 1505764 times.
|
13551876 | for (i = 0; i < 8; i++) { |
| 111 | /* Check for non-zero values (bitwise or faster than ||) */ | ||
| 112 | 12046112 | if (ip[1] | ip[2] | ip[3] | | |
| 113 |
2/2✓ Branch 0 taken 3954126 times.
✓ Branch 1 taken 8091986 times.
|
12046112 | ip[4] | ip[5] | ip[6] | ip[7]) { |
| 114 | 3954126 | A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]); | |
| 115 | 3954126 | B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]); | |
| 116 | 3954126 | C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]); | |
| 117 | 3954126 | D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]); | |
| 118 | |||
| 119 | 3954126 | Ad = M(xC4S4, (A - C)); | |
| 120 | 3954126 | Bd = M(xC4S4, (B - D)); | |
| 121 | |||
| 122 | 3954126 | Cd = A + C; | |
| 123 | 3954126 | Dd = B + D; | |
| 124 | |||
| 125 | 3954126 | E = M(xC4S4, (ip[0] + ip[4])) + 8; | |
| 126 | 3954126 | F = M(xC4S4, (ip[0] - ip[4])) + 8; | |
| 127 | |||
| 128 |
2/2✓ Branch 0 taken 742472 times.
✓ Branch 1 taken 3211654 times.
|
3954126 | if (type == 1) { // HACK |
| 129 | 742472 | E += 16 * 128; | |
| 130 | 742472 | F += 16 * 128; | |
| 131 | } | ||
| 132 | |||
| 133 | 3954126 | G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]); | |
| 134 | 3954126 | H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]); | |
| 135 | |||
| 136 | 3954126 | Ed = E - G; | |
| 137 | 3954126 | Gd = E + G; | |
| 138 | |||
| 139 | 3954126 | Add = F + Ad; | |
| 140 | 3954126 | Bdd = Bd - H; | |
| 141 | |||
| 142 | 3954126 | Fd = F - Ad; | |
| 143 | 3954126 | Hd = Bd + H; | |
| 144 | |||
| 145 | /* Final sequence of operations over-write original inputs. */ | ||
| 146 |
2/2✓ Branch 0 taken 742472 times.
✓ Branch 1 taken 3211654 times.
|
3954126 | if (type == 1) { |
| 147 | 742472 | dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4); | |
| 148 | 742472 | dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4); | |
| 149 | |||
| 150 | 742472 | dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4); | |
| 151 | 742472 | dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4); | |
| 152 | |||
| 153 | 742472 | dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4); | |
| 154 | 742472 | dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4); | |
| 155 | |||
| 156 | 742472 | dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4); | |
| 157 | 742472 | dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4); | |
| 158 | } else { | ||
| 159 | 3211654 | dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4)); | |
| 160 | 3211654 | dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4)); | |
| 161 | |||
| 162 | 3211654 | dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4)); | |
| 163 | 3211654 | dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4)); | |
| 164 | |||
| 165 | 3211654 | dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4)); | |
| 166 | 3211654 | dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4)); | |
| 167 | |||
| 168 | 3211654 | dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4)); | |
| 169 | 3211654 | dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4)); | |
| 170 | } | ||
| 171 | } else { | ||
| 172 |
2/2✓ Branch 0 taken 2034960 times.
✓ Branch 1 taken 6057026 times.
|
8091986 | if (type == 1) { |
| 173 | 2034960 | dst[0*stride] = | |
| 174 | 2034960 | dst[1*stride] = | |
| 175 | 2034960 | dst[2*stride] = | |
| 176 | 2034960 | dst[3*stride] = | |
| 177 | 2034960 | dst[4*stride] = | |
| 178 | 2034960 | dst[5*stride] = | |
| 179 | 2034960 | dst[6*stride] = | |
| 180 | 2034960 | dst[7*stride] = av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20)); | |
| 181 | } else { | ||
| 182 |
2/2✓ Branch 0 taken 1484827 times.
✓ Branch 1 taken 4572199 times.
|
6057026 | if (ip[0]) { |
| 183 | 1484827 | int v = (xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20; | |
| 184 | 1484827 | dst[0 * stride] = av_clip_uint8(dst[0 * stride] + v); | |
| 185 | 1484827 | dst[1 * stride] = av_clip_uint8(dst[1 * stride] + v); | |
| 186 | 1484827 | dst[2 * stride] = av_clip_uint8(dst[2 * stride] + v); | |
| 187 | 1484827 | dst[3 * stride] = av_clip_uint8(dst[3 * stride] + v); | |
| 188 | 1484827 | dst[4 * stride] = av_clip_uint8(dst[4 * stride] + v); | |
| 189 | 1484827 | dst[5 * stride] = av_clip_uint8(dst[5 * stride] + v); | |
| 190 | 1484827 | dst[6 * stride] = av_clip_uint8(dst[6 * stride] + v); | |
| 191 | 1484827 | dst[7 * stride] = av_clip_uint8(dst[7 * stride] + v); | |
| 192 | } | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | 12046112 | ip += 8; /* next column */ | |
| 197 | 12046112 | dst++; | |
| 198 | } | ||
| 199 | 1505764 | } | |
| 200 | |||
| 201 | 405935 | static av_always_inline void idct10(uint8_t *dst, ptrdiff_t stride, | |
| 202 | int16_t *input, int type) | ||
| 203 | { | ||
| 204 | 405935 | int16_t *ip = input; | |
| 205 | |||
| 206 | int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H; | ||
| 207 | int Ed, Gd, Add, Bdd, Fd, Hd; | ||
| 208 | |||
| 209 | int i; | ||
| 210 | |||
| 211 | /* Inverse DCT on the rows now */ | ||
| 212 |
2/2✓ Branch 0 taken 1623740 times.
✓ Branch 1 taken 405935 times.
|
2029675 | for (i = 0; i < 4; i++) { |
| 213 | /* Check for non-zero values */ | ||
| 214 |
2/2✓ Branch 0 taken 111729 times.
✓ Branch 1 taken 1512011 times.
|
1623740 | if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8]) { |
| 215 | 111729 | A = M(xC1S7, ip[1 * 8]); | |
| 216 | 111729 | B = M(xC7S1, ip[1 * 8]); | |
| 217 | 111729 | C = M(xC3S5, ip[3 * 8]); | |
| 218 | 111729 | D = -M(xC5S3, ip[3 * 8]); | |
| 219 | |||
| 220 | 111729 | Ad = M(xC4S4, (A - C)); | |
| 221 | 111729 | Bd = M(xC4S4, (B - D)); | |
| 222 | |||
| 223 | 111729 | Cd = A + C; | |
| 224 | 111729 | Dd = B + D; | |
| 225 | |||
| 226 | 111729 | E = M(xC4S4, ip[0 * 8]); | |
| 227 | 111729 | F = E; | |
| 228 | |||
| 229 | 111729 | G = M(xC2S6, ip[2 * 8]); | |
| 230 | 111729 | H = M(xC6S2, ip[2 * 8]); | |
| 231 | |||
| 232 | 111729 | Ed = E - G; | |
| 233 | 111729 | Gd = E + G; | |
| 234 | |||
| 235 | 111729 | Add = F + Ad; | |
| 236 | 111729 | Bdd = Bd - H; | |
| 237 | |||
| 238 | 111729 | Fd = F - Ad; | |
| 239 | 111729 | Hd = Bd + H; | |
| 240 | |||
| 241 | /* Final sequence of operations over-write original inputs */ | ||
| 242 | 111729 | ip[0 * 8] = Gd + Cd; | |
| 243 | 111729 | ip[7 * 8] = Gd - Cd; | |
| 244 | |||
| 245 | 111729 | ip[1 * 8] = Add + Hd; | |
| 246 | 111729 | ip[2 * 8] = Add - Hd; | |
| 247 | |||
| 248 | 111729 | ip[3 * 8] = Ed + Dd; | |
| 249 | 111729 | ip[4 * 8] = Ed - Dd; | |
| 250 | |||
| 251 | 111729 | ip[5 * 8] = Fd + Bdd; | |
| 252 | 111729 | ip[6 * 8] = Fd - Bdd; | |
| 253 | |||
| 254 | } | ||
| 255 | |||
| 256 | 1623740 | ip += 1; | |
| 257 | } | ||
| 258 | |||
| 259 | 405935 | ip = input; | |
| 260 | |||
| 261 |
2/2✓ Branch 0 taken 3247480 times.
✓ Branch 1 taken 405935 times.
|
3653415 | for (i = 0; i < 8; i++) { |
| 262 | /* Check for non-zero values (bitwise or faster than ||) */ | ||
| 263 |
2/2✓ Branch 0 taken 703548 times.
✓ Branch 1 taken 2543932 times.
|
3247480 | if (ip[0] | ip[1] | ip[2] | ip[3]) { |
| 264 | 703548 | A = M(xC1S7, ip[1]); | |
| 265 | 703548 | B = M(xC7S1, ip[1]); | |
| 266 | 703548 | C = M(xC3S5, ip[3]); | |
| 267 | 703548 | D = -M(xC5S3, ip[3]); | |
| 268 | |||
| 269 | 703548 | Ad = M(xC4S4, (A - C)); | |
| 270 | 703548 | Bd = M(xC4S4, (B - D)); | |
| 271 | |||
| 272 | 703548 | Cd = A + C; | |
| 273 | 703548 | Dd = B + D; | |
| 274 | |||
| 275 | 703548 | E = M(xC4S4, ip[0]); | |
| 276 |
2/2✓ Branch 0 taken 227418 times.
✓ Branch 1 taken 476130 times.
|
703548 | if (type == 1) |
| 277 | 227418 | E += 16 * 128; | |
| 278 | 703548 | F = E; | |
| 279 | |||
| 280 | 703548 | G = M(xC2S6, ip[2]); | |
| 281 | 703548 | H = M(xC6S2, ip[2]); | |
| 282 | |||
| 283 | 703548 | Ed = E - G; | |
| 284 | 703548 | Gd = E + G; | |
| 285 | |||
| 286 | 703548 | Add = F + Ad; | |
| 287 | 703548 | Bdd = Bd - H; | |
| 288 | |||
| 289 | 703548 | Fd = F - Ad; | |
| 290 | 703548 | Hd = Bd + H; | |
| 291 | |||
| 292 | 703548 | Gd += 8; | |
| 293 | 703548 | Add += 8; | |
| 294 | 703548 | Ed += 8; | |
| 295 | 703548 | Fd += 8; | |
| 296 | |||
| 297 | /* Final sequence of operations over-write original inputs. */ | ||
| 298 |
2/2✓ Branch 0 taken 227418 times.
✓ Branch 1 taken 476130 times.
|
703548 | if (type == 1) { |
| 299 | 227418 | dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4); | |
| 300 | 227418 | dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4); | |
| 301 | |||
| 302 | 227418 | dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4); | |
| 303 | 227418 | dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4); | |
| 304 | |||
| 305 | 227418 | dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4); | |
| 306 | 227418 | dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4); | |
| 307 | |||
| 308 | 227418 | dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4); | |
| 309 | 227418 | dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4); | |
| 310 | } else { | ||
| 311 | 476130 | dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4)); | |
| 312 | 476130 | dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4)); | |
| 313 | |||
| 314 | 476130 | dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4)); | |
| 315 | 476130 | dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4)); | |
| 316 | |||
| 317 | 476130 | dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4)); | |
| 318 | 476130 | dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4)); | |
| 319 | |||
| 320 | 476130 | dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4)); | |
| 321 | 476130 | dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4)); | |
| 322 | } | ||
| 323 | } else { | ||
| 324 |
2/2✓ Branch 0 taken 52526 times.
✓ Branch 1 taken 2491406 times.
|
2543932 | if (type == 1) { |
| 325 | 52526 | dst[0*stride] = | |
| 326 | 52526 | dst[1*stride] = | |
| 327 | 52526 | dst[2*stride] = | |
| 328 | 52526 | dst[3*stride] = | |
| 329 | 52526 | dst[4*stride] = | |
| 330 | 52526 | dst[5*stride] = | |
| 331 | 52526 | dst[6*stride] = | |
| 332 | 52526 | dst[7*stride] = 128; | |
| 333 | } | ||
| 334 | } | ||
| 335 | |||
| 336 | 3247480 | ip += 8; | |
| 337 | 3247480 | dst++; | |
| 338 | } | ||
| 339 | 405935 | } | |
| 340 | |||
| 341 | 34993 | void ff_vp3dsp_idct10_put(uint8_t *dest, ptrdiff_t stride, int16_t *block) | |
| 342 | { | ||
| 343 | 34993 | idct10(dest, stride, block, 1); | |
| 344 | 34993 | memset(block, 0, sizeof(*block) * 64); | |
| 345 | 34993 | } | |
| 346 | |||
| 347 | 370942 | void ff_vp3dsp_idct10_add(uint8_t *dest, ptrdiff_t stride, int16_t *block) | |
| 348 | { | ||
| 349 | 370942 | idct10(dest, stride, block, 2); | |
| 350 | 370942 | memset(block, 0, sizeof(*block) * 64); | |
| 351 | 370942 | } | |
| 352 | |||
| 353 | 347179 | static void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, | |
| 354 | int16_t *block /* align 16 */) | ||
| 355 | { | ||
| 356 | 347179 | idct(dest, stride, block, 1); | |
| 357 | 347179 | memset(block, 0, sizeof(*block) * 64); | |
| 358 | 347179 | } | |
| 359 | |||
| 360 | 1158585 | static void vp3_idct_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, | |
| 361 | int16_t *block /* align 16 */) | ||
| 362 | { | ||
| 363 | 1158585 | idct(dest, stride, block, 2); | |
| 364 | 1158585 | memset(block, 0, sizeof(*block) * 64); | |
| 365 | 1158585 | } | |
| 366 | |||
| 367 | 127567 | static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, | |
| 368 | int16_t *block /* align 16 */) | ||
| 369 | { | ||
| 370 | 127567 | int i, dc = (block[0] + 15) >> 5; | |
| 371 | |||
| 372 |
2/2✓ Branch 0 taken 1020536 times.
✓ Branch 1 taken 127567 times.
|
1148103 | for (i = 0; i < 8; i++) { |
| 373 | 1020536 | dest[0] = av_clip_uint8(dest[0] + dc); | |
| 374 | 1020536 | dest[1] = av_clip_uint8(dest[1] + dc); | |
| 375 | 1020536 | dest[2] = av_clip_uint8(dest[2] + dc); | |
| 376 | 1020536 | dest[3] = av_clip_uint8(dest[3] + dc); | |
| 377 | 1020536 | dest[4] = av_clip_uint8(dest[4] + dc); | |
| 378 | 1020536 | dest[5] = av_clip_uint8(dest[5] + dc); | |
| 379 | 1020536 | dest[6] = av_clip_uint8(dest[6] + dc); | |
| 380 | 1020536 | dest[7] = av_clip_uint8(dest[7] + dc); | |
| 381 | 1020536 | dest += stride; | |
| 382 | } | ||
| 383 | 127567 | block[0] = 0; | |
| 384 | 127567 | } | |
| 385 | |||
| 386 | 465946 | static av_always_inline void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, | |
| 387 | int *bounding_values, int count) | ||
| 388 | { | ||
| 389 | unsigned char *end; | ||
| 390 | int filter_value; | ||
| 391 | 465946 | const ptrdiff_t nstride = -stride; | |
| 392 | |||
| 393 |
2/2✓ Branch 0 taken 4048824 times.
✓ Branch 1 taken 465946 times.
|
4514770 | for (end = first_pixel + count; first_pixel < end; first_pixel++) { |
| 394 | 4048824 | filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) + | |
| 395 | 4048824 | (first_pixel[0] - first_pixel[nstride]) * 3; | |
| 396 | 4048824 | filter_value = bounding_values[(filter_value + 4) >> 3]; | |
| 397 | |||
| 398 | 4048824 | first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value); | |
| 399 | 4048824 | first_pixel[0] = av_clip_uint8(first_pixel[0] - filter_value); | |
| 400 | } | ||
| 401 | 465946 | } | |
| 402 | |||
| 403 | 492814 | static av_always_inline void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, | |
| 404 | int *bounding_values, int count) | ||
| 405 | { | ||
| 406 | unsigned char *end; | ||
| 407 | int filter_value; | ||
| 408 | |||
| 409 |
2/2✓ Branch 0 taken 4307584 times.
✓ Branch 1 taken 492814 times.
|
4800398 | for (end = first_pixel + count * stride; first_pixel != end; first_pixel += stride) { |
| 410 | 4307584 | filter_value = (first_pixel[-2] - first_pixel[1]) + | |
| 411 | 4307584 | (first_pixel[ 0] - first_pixel[-1]) * 3; | |
| 412 | 4307584 | filter_value = bounding_values[(filter_value + 4) >> 3]; | |
| 413 | |||
| 414 | 4307584 | first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value); | |
| 415 | 4307584 | first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value); | |
| 416 | } | ||
| 417 | 492814 | } | |
| 418 | |||
| 419 | #define LOOP_FILTER(prefix, suffix, dim, count) \ | ||
| 420 | void prefix##_##dim##_loop_filter_##count##suffix(uint8_t *first_pixel, ptrdiff_t stride, \ | ||
| 421 | int *bounding_values) \ | ||
| 422 | { \ | ||
| 423 | vp3_##dim##_loop_filter_c(first_pixel, stride, bounding_values, count); \ | ||
| 424 | } | ||
| 425 | |||
| 426 | 385632 | static LOOP_FILTER(vp3,_c, v, 8) | |
| 427 | 401546 | static LOOP_FILTER(vp3,_c, h, 8) | |
| 428 | 80314 | LOOP_FILTER(ff_vp3dsp, , v, 12) | |
| 429 | 91268 | LOOP_FILTER(ff_vp3dsp, , h, 12) | |
| 430 | |||
| 431 | 333697 | static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1, | |
| 432 | const uint8_t *src2, ptrdiff_t stride, int h) | ||
| 433 | { | ||
| 434 | int i; | ||
| 435 | |||
| 436 |
2/2✓ Branch 0 taken 2669576 times.
✓ Branch 1 taken 333697 times.
|
3003273 | for (i = 0; i < h; i++) { |
| 437 | uint32_t a, b; | ||
| 438 | |||
| 439 | 2669576 | a = AV_RN32(&src1[i * stride]); | |
| 440 | 2669576 | b = AV_RN32(&src2[i * stride]); | |
| 441 | 2669576 | AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b)); | |
| 442 | 2669576 | a = AV_RN32(&src1[i * stride + 4]); | |
| 443 | 2669576 | b = AV_RN32(&src2[i * stride + 4]); | |
| 444 | 2669576 | AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b)); | |
| 445 | } | ||
| 446 | 333697 | } | |
| 447 | |||
| 448 | 73 | av_cold void ff_vp3dsp_init(VP3DSPContext *c) | |
| 449 | { | ||
| 450 | 73 | c->put_no_rnd_pixels_l2 = put_no_rnd_pixels_l2; | |
| 451 | |||
| 452 | 73 | c->idct_put = vp3_idct_put_c; | |
| 453 | 73 | c->idct_add = vp3_idct_add_c; | |
| 454 | 73 | c->idct_dc_add = vp3_idct_dc_add_c; | |
| 455 | 73 | c->v_loop_filter = c->v_loop_filter_unaligned = vp3_v_loop_filter_8_c; | |
| 456 | 73 | c->h_loop_filter = c->h_loop_filter_unaligned = vp3_h_loop_filter_8_c; | |
| 457 | |||
| 458 | #if ARCH_ARM | ||
| 459 | ff_vp3dsp_init_arm(c); | ||
| 460 | #elif ARCH_PPC | ||
| 461 | ff_vp3dsp_init_ppc(c); | ||
| 462 | #elif ARCH_X86 | ||
| 463 | 73 | ff_vp3dsp_init_x86(c); | |
| 464 | #elif ARCH_MIPS | ||
| 465 | ff_vp3dsp_init_mips(c); | ||
| 466 | #endif | ||
| 467 | 73 | } | |
| 468 | |||
| 469 | /* | ||
| 470 | * This function initializes the loop filter boundary limits if the frame's | ||
| 471 | * quality index is different from the previous frame's. | ||
| 472 | * | ||
| 473 | * where sizeof(bounding_values_array) is 256 * sizeof(int) | ||
| 474 | * | ||
| 475 | * The filter_limit_values may not be larger than 127. | ||
| 476 | */ | ||
| 477 | 485 | void ff_vp3dsp_set_bounding_values(int * bounding_values_array, int filter_limit) | |
| 478 | { | ||
| 479 | 485 | int *bounding_values = bounding_values_array + 127; | |
| 480 | int x; | ||
| 481 | int value; | ||
| 482 | |||
| 483 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 485 times.
|
485 | av_assert0(filter_limit < 128U); |
| 484 | |||
| 485 | /* set up the bounding values */ | ||
| 486 | 485 | memset(bounding_values_array, 0, 256 * sizeof(int)); | |
| 487 |
2/2✓ Branch 0 taken 3453 times.
✓ Branch 1 taken 485 times.
|
3938 | for (x = 0; x < filter_limit; x++) { |
| 488 | 3453 | bounding_values[-x] = -x; | |
| 489 | 3453 | bounding_values[x] = x; | |
| 490 | } | ||
| 491 |
4/4✓ Branch 0 taken 3529 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 3049 times.
✓ Branch 3 taken 480 times.
|
3534 | for (x = value = filter_limit; x < 128 && value; x++, value--) { |
| 492 | 3049 | bounding_values[ x] = value; | |
| 493 | 3049 | bounding_values[-x] = -value; | |
| 494 | } | ||
| 495 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 480 times.
|
485 | if (value) |
| 496 | 5 | bounding_values[128] = value; | |
| 497 | #if ARCH_X86 | ||
| 498 | 485 | bounding_values[129] = bounding_values[130] = | |
| 499 | 485 | bounding_values[131] = bounding_values[132] = filter_limit * 0x00020002U; | |
| 500 | #else | ||
| 501 | bounding_values[129] = bounding_values[130] = filter_limit * 0x02020202U; | ||
| 502 | #endif | ||
| 503 | 485 | } | |
| 504 |