| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * HEVC video decoder | ||
| 3 | * | ||
| 4 | * Copyright (C) 2012 - 2013 Guillaume Martres | ||
| 5 | * | ||
| 6 | * This file is part of FFmpeg. | ||
| 7 | * | ||
| 8 | * FFmpeg is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU Lesser General Public | ||
| 10 | * License as published by the Free Software Foundation; either | ||
| 11 | * version 2.1 of the License, or (at your option) any later version. | ||
| 12 | * | ||
| 13 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 16 | * Lesser General Public License for more details. | ||
| 17 | * | ||
| 18 | * You should have received a copy of the GNU Lesser General Public | ||
| 19 | * License along with FFmpeg; if not, write to the Free Software | ||
| 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include "get_bits.h" | ||
| 24 | #include "hevcdec.h" | ||
| 25 | |||
| 26 | #include "bit_depth_template.c" | ||
| 27 | #include "dsp.h" | ||
| 28 | #include "h26x/h2656_sao_template.c" | ||
| 29 | #include "h26x/h2656_inter_template.c" | ||
| 30 | |||
| 31 | 74598 | static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height, | |
| 32 | GetBitContext *gb, int pcm_bit_depth) | ||
| 33 | { | ||
| 34 | int x, y; | ||
| 35 | 74598 | pixel *dst = (pixel *)_dst; | |
| 36 | |||
| 37 | 74598 | stride /= sizeof(pixel); | |
| 38 | |||
| 39 |
2/2✓ Branch 0 taken 327424 times.
✓ Branch 1 taken 37299 times.
|
729446 | for (y = 0; y < height; y++) { |
| 40 |
2/2✓ Branch 0 taken 4382080 times.
✓ Branch 1 taken 327424 times.
|
9419008 | for (x = 0; x < width; x++) |
| 41 | 8764160 | dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth); | |
| 42 | 654848 | dst += stride; | |
| 43 | } | ||
| 44 | 74598 | } | |
| 45 | |||
| 46 | 32505218 | static av_always_inline void FUNC(add_residual)(uint8_t *restrict dst8, const int16_t *restrict res, | |
| 47 | ptrdiff_t stride, int size) | ||
| 48 | { | ||
| 49 | int x, y; | ||
| 50 | |||
| 51 |
2/2✓ Branch 0 taken 132390696 times.
✓ Branch 1 taken 16252609 times.
|
297286610 | for (y = 0; y < size; y++) { |
| 52 | 264781392 | pixel *restrict dst = (pixel *)dst8; | |
| 53 |
2/2✓ Branch 0 taken 1841127232 times.
✓ Branch 1 taken 132390696 times.
|
3947035856 | for (x = 0; x < size; x++) { |
| 54 | 3682254464 | dst[x] = av_clip_pixel(dst[x] + *res); | |
| 55 | 3682254464 | res++; | |
| 56 | } | ||
| 57 | 264781392 | dst8 += stride; | |
| 58 | } | ||
| 59 | 32505218 | } | |
| 60 | |||
| 61 | 17708288 | static void FUNC(add_residual4x4)(uint8_t *_dst, const int16_t *res, | |
| 62 | ptrdiff_t stride) | ||
| 63 | { | ||
| 64 | 17708288 | FUNC(add_residual)(_dst, res, stride, 4); | |
| 65 | 17708288 | } | |
| 66 | |||
| 67 | 8674138 | static void FUNC(add_residual8x8)(uint8_t *_dst, const int16_t *res, | |
| 68 | ptrdiff_t stride) | ||
| 69 | { | ||
| 70 | 8674138 | FUNC(add_residual)(_dst, res, stride, 8); | |
| 71 | 8674138 | } | |
| 72 | |||
| 73 | 4460888 | static void FUNC(add_residual16x16)(uint8_t *_dst, const int16_t *res, | |
| 74 | ptrdiff_t stride) | ||
| 75 | { | ||
| 76 | 4460888 | FUNC(add_residual)(_dst, res, stride, 16); | |
| 77 | 4460888 | } | |
| 78 | |||
| 79 | 1661904 | static void FUNC(add_residual32x32)(uint8_t *_dst, const int16_t *res, | |
| 80 | ptrdiff_t stride) | ||
| 81 | { | ||
| 82 | 1661904 | FUNC(add_residual)(_dst, res, stride, 32); | |
| 83 | 1661904 | } | |
| 84 | |||
| 85 | 54126 | static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode) | |
| 86 | { | ||
| 87 | 54126 | int16_t *coeffs = (int16_t *) _coeffs; | |
| 88 | int x, y; | ||
| 89 | 54126 | int size = 1 << log2_size; | |
| 90 | |||
| 91 |
2/2✓ Branch 0 taken 9989 times.
✓ Branch 1 taken 17074 times.
|
54126 | if (mode) { |
| 92 | 19978 | coeffs += size; | |
| 93 |
2/2✓ Branch 0 taken 56315 times.
✓ Branch 1 taken 9989 times.
|
132608 | for (y = 0; y < size - 1; y++) { |
| 94 |
2/2✓ Branch 0 taken 523168 times.
✓ Branch 1 taken 56315 times.
|
1158966 | for (x = 0; x < size; x++) |
| 95 | 1046336 | coeffs[x] += coeffs[x - size]; | |
| 96 | 112630 | coeffs += size; | |
| 97 | } | ||
| 98 | } else { | ||
| 99 |
2/2✓ Branch 0 taken 124352 times.
✓ Branch 1 taken 17074 times.
|
282852 | for (y = 0; y < size; y++) { |
| 100 |
2/2✓ Branch 0 taken 1049312 times.
✓ Branch 1 taken 124352 times.
|
2347328 | for (x = 1; x < size; x++) |
| 101 | 2098624 | coeffs[x] += coeffs[x - 1]; | |
| 102 | 248704 | coeffs += size; | |
| 103 | } | ||
| 104 | } | ||
| 105 | 54126 | } | |
| 106 | |||
| 107 | /** | ||
| 108 | * HEVC transform dequantization (ITU-T H.265 8.6.3) | ||
| 109 | * | ||
| 110 | * @param coeffs transform coefficient buffer (in-place) | ||
| 111 | * @param log2_size log2 of transform block size, range: 2..5 (4x4 to 32x32) | ||
| 112 | * This value comes from recursive split_transform_flag parsing | ||
| 113 | * in the bitstream, bounded by log2_min_tb_size (min 2) and | ||
| 114 | * log2_max_trafo_size (max 5) from SPS. | ||
| 115 | * | ||
| 116 | * Formula: shift = 15 - BIT_DEPTH - log2_size | ||
| 117 | * | ||
| 118 | * bit_depth | 4x4 (2) | 8x8 (3) | 16x16 (4) | 32x32 (5) | ||
| 119 | * ----------+---------+---------+-----------+---------- | ||
| 120 | * 8-bit | 5 | 4 | 3 | 2 (shift right) | ||
| 121 | * 10-bit | 3 | 2 | 1 | 0 (shift right / no-op) | ||
| 122 | * 12-bit | 1 | 0 | -1 | -2 (shift right / no-op / shift left) | ||
| 123 | * | ||
| 124 | * When shift == 0, output equals input (identity transform), so we skip | ||
| 125 | * the loop entirely for better performance. | ||
| 126 | */ | ||
| 127 | 888660 | static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size) | |
| 128 | { | ||
| 129 | 888660 | int shift = 15 - BIT_DEPTH - log2_size; | |
| 130 | int x, y; | ||
| 131 | 888660 | int size = 1 << log2_size; | |
| 132 | |||
| 133 |
2/2✓ Branch 0 taken 145123 times.
✓ Branch 1 taken 9294 times.
|
308834 | if (BIT_DEPTH <= 9 || shift > 0) { |
| 134 | 870072 | int offset = 1 << (shift - 1); | |
| 135 |
2/2✓ Branch 0 taken 1814020 times.
✓ Branch 1 taken 435036 times.
|
4498112 | for (y = 0; y < size; y++) { |
| 136 |
2/2✓ Branch 0 taken 8282832 times.
✓ Branch 1 taken 1814020 times.
|
20193704 | for (x = 0; x < size; x++) { |
| 137 | 16565664 | *coeffs = (*coeffs + offset) >> shift; | |
| 138 | 16565664 | coeffs++; | |
| 139 | } | ||
| 140 | } | ||
| 141 |
2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 9212 times.
|
18584 | } else if (BIT_DEPTH > 10 && shift < 0) { |
| 142 |
2/2✓ Branch 0 taken 1408 times.
✓ Branch 1 taken 80 times.
|
2976 | for (y = 0; y < size; y++) { |
| 143 |
2/2✓ Branch 0 taken 26624 times.
✓ Branch 1 taken 1408 times.
|
56064 | for (x = 0; x < size; x++) { |
| 144 | 53248 | *coeffs = *(uint16_t*)coeffs << -shift; | |
| 145 | 53248 | coeffs++; | |
| 146 | } | ||
| 147 | } | ||
| 148 | } | ||
| 149 | /* shift == 0: no operation needed (identity transform) */ | ||
| 150 | 888660 | } | |
| 151 | |||
| 152 | #define SET(dst, x) (dst) = (x) | ||
| 153 | #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift) | ||
| 154 | |||
| 155 | #define TR_4x4_LUMA(dst, src, step, assign) \ | ||
| 156 | do { \ | ||
| 157 | int c0 = src[0 * step] + src[2 * step]; \ | ||
| 158 | int c1 = src[2 * step] + src[3 * step]; \ | ||
| 159 | int c2 = src[0 * step] - src[3 * step]; \ | ||
| 160 | int c3 = 74 * src[1 * step]; \ | ||
| 161 | \ | ||
| 162 | assign(dst[2 * step], 74 * (src[0 * step] - \ | ||
| 163 | src[2 * step] + \ | ||
| 164 | src[3 * step])); \ | ||
| 165 | assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \ | ||
| 166 | assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \ | ||
| 167 | assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \ | ||
| 168 | } while (0) | ||
| 169 | |||
| 170 | 8023490 | static void FUNC(transform_4x4_luma)(int16_t *coeffs) | |
| 171 | { | ||
| 172 | int i; | ||
| 173 | 8023490 | int shift = 7; | |
| 174 | 8023490 | int add = 1 << (shift - 1); | |
| 175 | 8023490 | int16_t *src = coeffs; | |
| 176 | |||
| 177 |
2/2✓ Branch 0 taken 16046980 times.
✓ Branch 1 taken 4011745 times.
|
40117450 | for (i = 0; i < 4; i++) { |
| 178 | 32093960 | TR_4x4_LUMA(src, src, 4, SCALE); | |
| 179 | 32093960 | src++; | |
| 180 | } | ||
| 181 | |||
| 182 | 8023490 | shift = 20 - BIT_DEPTH; | |
| 183 | 8023490 | add = 1 << (shift - 1); | |
| 184 |
2/2✓ Branch 0 taken 16046980 times.
✓ Branch 1 taken 4011745 times.
|
40117450 | for (i = 0; i < 4; i++) { |
| 185 | 32093960 | TR_4x4_LUMA(coeffs, coeffs, 1, SCALE); | |
| 186 | 32093960 | coeffs += 4; | |
| 187 | } | ||
| 188 | 8023490 | } | |
| 189 | |||
| 190 | #undef TR_4x4_LUMA | ||
| 191 | |||
| 192 | #define TR_4(dst, src, dstep, sstep, assign, end) \ | ||
| 193 | do { \ | ||
| 194 | const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \ | ||
| 195 | const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \ | ||
| 196 | const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \ | ||
| 197 | const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \ | ||
| 198 | \ | ||
| 199 | assign(dst[0 * dstep], e0 + o0); \ | ||
| 200 | assign(dst[1 * dstep], e1 + o1); \ | ||
| 201 | assign(dst[2 * dstep], e1 - o1); \ | ||
| 202 | assign(dst[3 * dstep], e0 - o0); \ | ||
| 203 | } while (0) | ||
| 204 | |||
| 205 | #define TR_8(dst, src, dstep, sstep, assign, end) \ | ||
| 206 | do { \ | ||
| 207 | int i, j; \ | ||
| 208 | int e_8[4]; \ | ||
| 209 | int o_8[4] = { 0 }; \ | ||
| 210 | for (i = 0; i < 4; i++) \ | ||
| 211 | for (j = 1; j < end; j += 2) \ | ||
| 212 | o_8[i] += transform[4 * j][i] * src[j * sstep]; \ | ||
| 213 | TR_4(e_8, src, 1, 2 * sstep, SET, 4); \ | ||
| 214 | \ | ||
| 215 | for (i = 0; i < 4; i++) { \ | ||
| 216 | assign(dst[i * dstep], e_8[i] + o_8[i]); \ | ||
| 217 | assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \ | ||
| 218 | } \ | ||
| 219 | } while (0) | ||
| 220 | |||
| 221 | #define TR_16(dst, src, dstep, sstep, assign, end) \ | ||
| 222 | do { \ | ||
| 223 | int i, j; \ | ||
| 224 | int e_16[8]; \ | ||
| 225 | int o_16[8] = { 0 }; \ | ||
| 226 | for (i = 0; i < 8; i++) \ | ||
| 227 | for (j = 1; j < end; j += 2) \ | ||
| 228 | o_16[i] += transform[2 * j][i] * src[j * sstep]; \ | ||
| 229 | TR_8(e_16, src, 1, 2 * sstep, SET, 8); \ | ||
| 230 | \ | ||
| 231 | for (i = 0; i < 8; i++) { \ | ||
| 232 | assign(dst[i * dstep], e_16[i] + o_16[i]); \ | ||
| 233 | assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \ | ||
| 234 | } \ | ||
| 235 | } while (0) | ||
| 236 | |||
| 237 | #define TR_32(dst, src, dstep, sstep, assign, end) \ | ||
| 238 | do { \ | ||
| 239 | int i, j; \ | ||
| 240 | int e_32[16]; \ | ||
| 241 | int o_32[16] = { 0 }; \ | ||
| 242 | for (i = 0; i < 16; i++) \ | ||
| 243 | for (j = 1; j < end; j += 2) \ | ||
| 244 | o_32[i] += transform[j][i] * src[j * sstep]; \ | ||
| 245 | TR_16(e_32, src, 1, 2 * sstep, SET, end / 2); \ | ||
| 246 | \ | ||
| 247 | for (i = 0; i < 16; i++) { \ | ||
| 248 | assign(dst[i * dstep], e_32[i] + o_32[i]); \ | ||
| 249 | assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \ | ||
| 250 | } \ | ||
| 251 | } while (0) | ||
| 252 | |||
| 253 | #define IDCT_VAR4(H) \ | ||
| 254 | int limit2 = FFMIN(col_limit + 4, H) | ||
| 255 | #define IDCT_VAR8(H) \ | ||
| 256 | int limit = FFMIN(col_limit, H); \ | ||
| 257 | int limit2 = FFMIN(col_limit + 4, H) | ||
| 258 | #define IDCT_VAR16(H) IDCT_VAR8(H) | ||
| 259 | #define IDCT_VAR32(H) IDCT_VAR8(H) | ||
| 260 | |||
| 261 | #define IDCT(H) \ | ||
| 262 | static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \ | ||
| 263 | int col_limit) \ | ||
| 264 | { \ | ||
| 265 | int i; \ | ||
| 266 | int shift = 7; \ | ||
| 267 | int add = 1 << (shift - 1); \ | ||
| 268 | int16_t *src = coeffs; \ | ||
| 269 | IDCT_VAR ## H(H); \ | ||
| 270 | \ | ||
| 271 | for (i = 0; i < H; i++) { \ | ||
| 272 | TR_ ## H(src, src, H, H, SCALE, limit2); \ | ||
| 273 | if (limit2 < H && i%4 == 0 && !!i) \ | ||
| 274 | limit2 -= 4; \ | ||
| 275 | src++; \ | ||
| 276 | } \ | ||
| 277 | \ | ||
| 278 | shift = 20 - BIT_DEPTH; \ | ||
| 279 | add = 1 << (shift - 1); \ | ||
| 280 | for (i = 0; i < H; i++) { \ | ||
| 281 | TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \ | ||
| 282 | coeffs += H; \ | ||
| 283 | } \ | ||
| 284 | } | ||
| 285 | |||
| 286 | #define IDCT_DC(H) \ | ||
| 287 | static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs) \ | ||
| 288 | { \ | ||
| 289 | int i, j; \ | ||
| 290 | int shift = 14 - BIT_DEPTH; \ | ||
| 291 | int add = 1 << (shift - 1); \ | ||
| 292 | int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \ | ||
| 293 | \ | ||
| 294 | for (j = 0; j < H; j++) { \ | ||
| 295 | for (i = 0; i < H; i++) { \ | ||
| 296 | coeffs[i + j * H] = coeff; \ | ||
| 297 | } \ | ||
| 298 | } \ | ||
| 299 | } | ||
| 300 | |||
| 301 |
5/10✗ Branch 0 not taken.
✓ Branch 1 taken 11060940 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 11060940 times.
✓ Branch 7 taken 2765235 times.
✓ Branch 8 taken 11060940 times.
✓ Branch 9 taken 2765235 times.
|
49774230 | IDCT( 4) |
| 302 |
17/22✓ Branch 0 taken 423574784 times.
✓ Branch 1 taken 105893696 times.
✓ Branch 2 taken 105893696 times.
✓ Branch 3 taken 26473424 times.
✓ Branch 4 taken 105893696 times.
✓ Branch 5 taken 26473424 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 26473424 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 26473424 times.
✓ Branch 13 taken 3309178 times.
✓ Branch 14 taken 297800192 times.
✓ Branch 15 taken 105893696 times.
✓ Branch 16 taken 105893696 times.
✓ Branch 17 taken 26473424 times.
✓ Branch 18 taken 105893696 times.
✓ Branch 19 taken 26473424 times.
✓ Branch 20 taken 26473424 times.
✓ Branch 21 taken 3309178 times.
|
2402411572 | IDCT( 8) |
| 303 |
34/34✓ Branch 0 taken 856155104 times.
✓ Branch 1 taken 226427520 times.
✓ Branch 2 taken 226427520 times.
✓ Branch 3 taken 28303440 times.
✓ Branch 4 taken 452855040 times.
✓ Branch 5 taken 113213760 times.
✓ Branch 6 taken 113213760 times.
✓ Branch 7 taken 28303440 times.
✓ Branch 8 taken 113213760 times.
✓ Branch 9 taken 28303440 times.
✓ Branch 10 taken 226427520 times.
✓ Branch 11 taken 28303440 times.
✓ Branch 12 taken 21047264 times.
✓ Branch 13 taken 7256176 times.
✓ Branch 14 taken 5261816 times.
✓ Branch 15 taken 15785448 times.
✓ Branch 16 taken 3946362 times.
✓ Branch 17 taken 1315454 times.
✓ Branch 18 taken 28303440 times.
✓ Branch 19 taken 1768965 times.
✓ Branch 20 taken 899885568 times.
✓ Branch 21 taken 226427520 times.
✓ Branch 22 taken 226427520 times.
✓ Branch 23 taken 28303440 times.
✓ Branch 24 taken 452855040 times.
✓ Branch 25 taken 113213760 times.
✓ Branch 26 taken 113213760 times.
✓ Branch 27 taken 28303440 times.
✓ Branch 28 taken 113213760 times.
✓ Branch 29 taken 28303440 times.
✓ Branch 30 taken 226427520 times.
✓ Branch 31 taken 28303440 times.
✓ Branch 32 taken 28303440 times.
✓ Branch 33 taken 1768965 times.
|
8157383434 | IDCT(16) |
| 304 |
46/46✓ Branch 0 taken 1339889984 times.
✓ Branch 1 taken 350994432 times.
✓ Branch 2 taken 350994432 times.
✓ Branch 3 taken 21937152 times.
✓ Branch 4 taken 325889408 times.
✓ Branch 5 taken 175497216 times.
✓ Branch 6 taken 175497216 times.
✓ Branch 7 taken 21937152 times.
✓ Branch 8 taken 350994432 times.
✓ Branch 9 taken 87748608 times.
✓ Branch 10 taken 87748608 times.
✓ Branch 11 taken 21937152 times.
✓ Branch 12 taken 87748608 times.
✓ Branch 13 taken 21937152 times.
✓ Branch 14 taken 175497216 times.
✓ Branch 15 taken 21937152 times.
✓ Branch 16 taken 350994432 times.
✓ Branch 17 taken 21937152 times.
✓ Branch 18 taken 19500576 times.
✓ Branch 19 taken 2436576 times.
✓ Branch 20 taken 4875144 times.
✓ Branch 21 taken 14625432 times.
✓ Branch 22 taken 4265751 times.
✓ Branch 23 taken 609393 times.
✓ Branch 24 taken 21937152 times.
✓ Branch 25 taken 685536 times.
✓ Branch 26 taken 2028439552 times.
✓ Branch 27 taken 350994432 times.
✓ Branch 28 taken 350994432 times.
✓ Branch 29 taken 21937152 times.
✓ Branch 30 taken 494815232 times.
✓ Branch 31 taken 175497216 times.
✓ Branch 32 taken 175497216 times.
✓ Branch 33 taken 21937152 times.
✓ Branch 34 taken 350994432 times.
✓ Branch 35 taken 87748608 times.
✓ Branch 36 taken 87748608 times.
✓ Branch 37 taken 21937152 times.
✓ Branch 38 taken 87748608 times.
✓ Branch 39 taken 21937152 times.
✓ Branch 40 taken 175497216 times.
✓ Branch 41 taken 21937152 times.
✓ Branch 42 taken 350994432 times.
✓ Branch 43 taken 21937152 times.
✓ Branch 44 taken 21937152 times.
✓ Branch 45 taken 685536 times.
|
14785087808 | IDCT(32) |
| 305 | |||
| 306 |
4/4✓ Branch 0 taken 19465600 times.
✓ Branch 1 taken 4866400 times.
✓ Branch 2 taken 4866400 times.
✓ Branch 3 taken 1216600 times.
|
51097200 | IDCT_DC( 4) |
| 307 |
4/4✓ Branch 0 taken 60966656 times.
✓ Branch 1 taken 7620832 times.
✓ Branch 2 taken 7620832 times.
✓ Branch 3 taken 952604 times.
|
139080184 | IDCT_DC( 8) |
| 308 |
4/4✓ Branch 0 taken 114058752 times.
✓ Branch 1 taken 7128672 times.
✓ Branch 2 taken 7128672 times.
✓ Branch 3 taken 445542 times.
|
243265932 | IDCT_DC(16) |
| 309 |
4/4✓ Branch 0 taken 146119680 times.
✓ Branch 1 taken 4566240 times.
✓ Branch 2 taken 4566240 times.
✓ Branch 3 taken 142695 times.
|
301657230 | IDCT_DC(32) |
| 310 | |||
| 311 | #undef TR_4 | ||
| 312 | #undef TR_8 | ||
| 313 | #undef TR_16 | ||
| 314 | #undef TR_32 | ||
| 315 | |||
| 316 | #undef SET | ||
| 317 | #undef SCALE | ||
| 318 | |||
| 319 | //////////////////////////////////////////////////////////////////////////////// | ||
| 320 | // | ||
| 321 | //////////////////////////////////////////////////////////////////////////////// | ||
| 322 | #define ff_hevc_pel_filters ff_hevc_qpel_filters | ||
| 323 | #define DECL_HV_FILTER(f) \ | ||
| 324 | const int8_t *hf = ff_hevc_ ## f ## _filters[mx]; \ | ||
| 325 | const int8_t *vf = ff_hevc_ ## f ## _filters[my]; | ||
| 326 | |||
| 327 | #define FW_PUT(p, f, t) \ | ||
| 328 | static void FUNC(put_hevc_## f)(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, \ | ||
| 329 | intptr_t mx, intptr_t my, int width) \ | ||
| 330 | { \ | ||
| 331 | DECL_HV_FILTER(p) \ | ||
| 332 | FUNC(put_ ## t)(dst, src, srcstride, height, hf, vf, width); \ | ||
| 333 | } | ||
| 334 | |||
| 335 | #define FW_PUT_UNI(p, f, t) \ | ||
| 336 | static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
| 337 | ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width) \ | ||
| 338 | { \ | ||
| 339 | DECL_HV_FILTER(p) \ | ||
| 340 | FUNC(put_ ## t)(dst, dststride, src, srcstride, height, hf, vf, width); \ | ||
| 341 | } | ||
| 342 | |||
| 343 | #define FW_PUT_UNI_W(p, f, t) \ | ||
| 344 | static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
| 345 | ptrdiff_t srcstride,int height, int denom, int wx, int ox, \ | ||
| 346 | intptr_t mx, intptr_t my, int width) \ | ||
| 347 | { \ | ||
| 348 | DECL_HV_FILTER(p) \ | ||
| 349 | FUNC(put_ ## t)(dst, dststride, src, srcstride, height, denom, wx, ox, hf, vf, width); \ | ||
| 350 | } | ||
| 351 | |||
| 352 | #define FW_PUT_FUNCS(f, t, dir) \ | ||
| 353 | FW_PUT(f, f ## _ ## dir, t ## _ ## dir) \ | ||
| 354 | FW_PUT_UNI(f, f ## _uni_ ## dir, uni_ ## t ## _ ## dir) \ | ||
| 355 | FW_PUT_UNI_W(f, f ## _uni_w_ ## dir, uni_## t ## _w_ ## dir) | ||
| 356 | |||
| 357 | 4997298 | FW_PUT(pel, pel_pixels, pixels) | |
| 358 | 6980298 | FW_PUT_UNI(pel, pel_uni_pixels, uni_pixels) | |
| 359 | 146434 | FW_PUT_UNI_W(pel, pel_uni_w_pixels, uni_w_pixels) | |
| 360 | |||
| 361 | 3444674 | FW_PUT_FUNCS(qpel, luma, h ) | |
| 362 | 2977700 | FW_PUT_FUNCS(qpel, luma, v ) | |
| 363 | 9269802 | FW_PUT_FUNCS(qpel, luma, hv ) | |
| 364 | 5499376 | FW_PUT_FUNCS(epel, chroma, h ) | |
| 365 | 4387812 | FW_PUT_FUNCS(epel, chroma, v ) | |
| 366 | 24023776 | FW_PUT_FUNCS(epel, chroma, hv ) | |
| 367 | |||
| 368 | 5462242 | static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, | |
| 369 | const int16_t *src2, | ||
| 370 | int height, intptr_t mx, intptr_t my, int width) | ||
| 371 | { | ||
| 372 | int x, y; | ||
| 373 | 5462242 | const pixel *src = (const pixel *)_src; | |
| 374 | 5462242 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 375 | 5462242 | pixel *dst = (pixel *)_dst; | |
| 376 | 5462242 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 377 | |||
| 378 | 5462242 | int shift = 14 + 1 - BIT_DEPTH; | |
| 379 | #if BIT_DEPTH < 14 | ||
| 380 | 5462242 | int offset = 1 << (shift - 1); | |
| 381 | #else | ||
| 382 | int offset = 0; | ||
| 383 | #endif | ||
| 384 | |||
| 385 |
2/2✓ Branch 0 taken 41305794 times.
✓ Branch 1 taken 2731121 times.
|
88073830 | for (y = 0; y < height; y++) { |
| 386 |
2/2✓ Branch 0 taken 1067725940 times.
✓ Branch 1 taken 41305794 times.
|
2218063468 | for (x = 0; x < width; x++) |
| 387 | 2135451880 | dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift); | |
| 388 | 82611588 | src += srcstride; | |
| 389 | 82611588 | dst += dststride; | |
| 390 | 82611588 | src2 += MAX_PB_SIZE; | |
| 391 | } | ||
| 392 | 5462242 | } | |
| 393 | |||
| 394 | 64154 | static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, | |
| 395 | const int16_t *src2, | ||
| 396 | int height, int denom, int wx0, int wx1, | ||
| 397 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 398 | { | ||
| 399 | int x, y; | ||
| 400 | 64154 | const pixel *src = (const pixel *)_src; | |
| 401 | 64154 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 402 | 64154 | pixel *dst = (pixel *)_dst; | |
| 403 | 64154 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 404 | |||
| 405 | 64154 | int shift = 14 + 1 - BIT_DEPTH; | |
| 406 | 64154 | int log2Wd = denom + shift - 1; | |
| 407 | |||
| 408 | 64154 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 409 | 64154 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 410 |
2/2✓ Branch 0 taken 671000 times.
✓ Branch 1 taken 32077 times.
|
1406154 | for (y = 0; y < height; y++) { |
| 411 |
2/2✓ Branch 0 taken 21890728 times.
✓ Branch 1 taken 671000 times.
|
45123456 | for (x = 0; x < width; x++) { |
| 412 | 43781456 | dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1)); | |
| 413 | } | ||
| 414 | 1342000 | src += srcstride; | |
| 415 | 1342000 | dst += dststride; | |
| 416 | 1342000 | src2 += MAX_PB_SIZE; | |
| 417 | } | ||
| 418 | 64154 | } | |
| 419 | |||
| 420 | //////////////////////////////////////////////////////////////////////////////// | ||
| 421 | // | ||
| 422 | //////////////////////////////////////////////////////////////////////////////// | ||
| 423 | #define QPEL_FILTER(src, stride) \ | ||
| 424 | (filter[0] * src[x - 3 * stride] + \ | ||
| 425 | filter[1] * src[x - 2 * stride] + \ | ||
| 426 | filter[2] * src[x - stride] + \ | ||
| 427 | filter[3] * src[x ] + \ | ||
| 428 | filter[4] * src[x + stride] + \ | ||
| 429 | filter[5] * src[x + 2 * stride] + \ | ||
| 430 | filter[6] * src[x + 3 * stride] + \ | ||
| 431 | filter[7] * src[x + 4 * stride]) | ||
| 432 | |||
| 433 | 1362336 | static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, | |
| 434 | const int16_t *src2, | ||
| 435 | int height, intptr_t mx, intptr_t my, int width) | ||
| 436 | { | ||
| 437 | int x, y; | ||
| 438 | 1362336 | const pixel *src = (const pixel*)_src; | |
| 439 | 1362336 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 440 | 1362336 | pixel *dst = (pixel *)_dst; | |
| 441 | 1362336 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 442 | |||
| 443 | 1362336 | const int8_t *filter = ff_hevc_qpel_filters[mx]; | |
| 444 | |||
| 445 | 1362336 | int shift = 14 + 1 - BIT_DEPTH; | |
| 446 | #if BIT_DEPTH < 14 | ||
| 447 | 1362336 | int offset = 1 << (shift - 1); | |
| 448 | #else | ||
| 449 | int offset = 0; | ||
| 450 | #endif | ||
| 451 | |||
| 452 |
2/2✓ Branch 0 taken 15144496 times.
✓ Branch 1 taken 681168 times.
|
31651328 | for (y = 0; y < height; y++) { |
| 453 |
2/2✓ Branch 0 taken 513210376 times.
✓ Branch 1 taken 15144496 times.
|
1056709744 | for (x = 0; x < width; x++) |
| 454 | 1026420752 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); | |
| 455 | 30288992 | src += srcstride; | |
| 456 | 30288992 | dst += dststride; | |
| 457 | 30288992 | src2 += MAX_PB_SIZE; | |
| 458 | } | ||
| 459 | 1362336 | } | |
| 460 | |||
| 461 | 1073322 | static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 462 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 463 | int height, intptr_t mx, intptr_t my, int width) | ||
| 464 | { | ||
| 465 | int x, y; | ||
| 466 | 1073322 | const pixel *src = (const pixel*)_src; | |
| 467 | 1073322 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 468 | 1073322 | pixel *dst = (pixel *)_dst; | |
| 469 | 1073322 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 470 | |||
| 471 | 1073322 | const int8_t *filter = ff_hevc_qpel_filters[my]; | |
| 472 | |||
| 473 | 1073322 | int shift = 14 + 1 - BIT_DEPTH; | |
| 474 | #if BIT_DEPTH < 14 | ||
| 475 | 1073322 | int offset = 1 << (shift - 1); | |
| 476 | #else | ||
| 477 | int offset = 0; | ||
| 478 | #endif | ||
| 479 | |||
| 480 |
2/2✓ Branch 0 taken 11564808 times.
✓ Branch 1 taken 536661 times.
|
24202938 | for (y = 0; y < height; y++) { |
| 481 |
2/2✓ Branch 0 taken 377783176 times.
✓ Branch 1 taken 11564808 times.
|
778695968 | for (x = 0; x < width; x++) |
| 482 | 755566352 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); | |
| 483 | 23129616 | src += srcstride; | |
| 484 | 23129616 | dst += dststride; | |
| 485 | 23129616 | src2 += MAX_PB_SIZE; | |
| 486 | } | ||
| 487 | 1073322 | } | |
| 488 | |||
| 489 | 3364606 | static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 490 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 491 | int height, intptr_t mx, intptr_t my, int width) | ||
| 492 | { | ||
| 493 | int x, y; | ||
| 494 | const int8_t *filter; | ||
| 495 | 3364606 | const pixel *src = (const pixel*)_src; | |
| 496 | 3364606 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 497 | 3364606 | pixel *dst = (pixel *)_dst; | |
| 498 | 3364606 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 499 | int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; | ||
| 500 | 3364606 | int16_t *tmp = tmp_array; | |
| 501 | 3364606 | int shift = 14 + 1 - BIT_DEPTH; | |
| 502 | #if BIT_DEPTH < 14 | ||
| 503 | 3364606 | int offset = 1 << (shift - 1); | |
| 504 | #else | ||
| 505 | int offset = 0; | ||
| 506 | #endif | ||
| 507 | |||
| 508 | 3364606 | src -= QPEL_EXTRA_BEFORE * srcstride; | |
| 509 | 3364606 | filter = ff_hevc_qpel_filters[mx]; | |
| 510 |
2/2✓ Branch 0 taken 48024893 times.
✓ Branch 1 taken 1682303 times.
|
99414392 | for (y = 0; y < height + QPEL_EXTRA; y++) { |
| 511 |
2/2✓ Branch 0 taken 1424943792 times.
✓ Branch 1 taken 48024893 times.
|
2945937370 | for (x = 0; x < width; x++) |
| 512 | 2849887584 | tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 513 | 96049786 | src += srcstride; | |
| 514 | 96049786 | tmp += MAX_PB_SIZE; | |
| 515 | } | ||
| 516 | |||
| 517 | 3364606 | tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 518 | 3364606 | filter = ff_hevc_qpel_filters[my]; | |
| 519 | |||
| 520 |
2/2✓ Branch 0 taken 36248772 times.
✓ Branch 1 taken 1682303 times.
|
75862150 | for (y = 0; y < height; y++) { |
| 521 |
2/2✓ Branch 0 taken 1174254024 times.
✓ Branch 1 taken 36248772 times.
|
2421005592 | for (x = 0; x < width; x++) |
| 522 | 2348508048 | dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift); | |
| 523 | 72497544 | tmp += MAX_PB_SIZE; | |
| 524 | 72497544 | dst += dststride; | |
| 525 | 72497544 | src2 += MAX_PB_SIZE; | |
| 526 | } | ||
| 527 | 3364606 | } | |
| 528 | |||
| 529 | 22810 | static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 530 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 531 | int height, int denom, int wx0, int wx1, | ||
| 532 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 533 | { | ||
| 534 | int x, y; | ||
| 535 | 22810 | const pixel *src = (const pixel*)_src; | |
| 536 | 22810 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 537 | 22810 | pixel *dst = (pixel *)_dst; | |
| 538 | 22810 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 539 | |||
| 540 | 22810 | const int8_t *filter = ff_hevc_qpel_filters[mx]; | |
| 541 | |||
| 542 | 22810 | int shift = 14 + 1 - BIT_DEPTH; | |
| 543 | 22810 | int log2Wd = denom + shift - 1; | |
| 544 | |||
| 545 | 22810 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 546 | 22810 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 547 |
2/2✓ Branch 0 taken 278600 times.
✓ Branch 1 taken 11405 times.
|
580010 | for (y = 0; y < height; y++) { |
| 548 |
2/2✓ Branch 0 taken 9869200 times.
✓ Branch 1 taken 278600 times.
|
20295600 | for (x = 0; x < width; x++) |
| 549 | 19738400 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + | |
| 550 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
| 551 | 557200 | src += srcstride; | |
| 552 | 557200 | dst += dststride; | |
| 553 | 557200 | src2 += MAX_PB_SIZE; | |
| 554 | } | ||
| 555 | 22810 | } | |
| 556 | |||
| 557 | 20448 | static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 558 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 559 | int height, int denom, int wx0, int wx1, | ||
| 560 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 561 | { | ||
| 562 | int x, y; | ||
| 563 | 20448 | const pixel *src = (const pixel*)_src; | |
| 564 | 20448 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 565 | 20448 | pixel *dst = (pixel *)_dst; | |
| 566 | 20448 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 567 | |||
| 568 | 20448 | const int8_t *filter = ff_hevc_qpel_filters[my]; | |
| 569 | |||
| 570 | 20448 | int shift = 14 + 1 - BIT_DEPTH; | |
| 571 | 20448 | int log2Wd = denom + shift - 1; | |
| 572 | |||
| 573 | 20448 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 574 | 20448 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 575 |
2/2✓ Branch 0 taken 236324 times.
✓ Branch 1 taken 10224 times.
|
493096 | for (y = 0; y < height; y++) { |
| 576 |
2/2✓ Branch 0 taken 7942032 times.
✓ Branch 1 taken 236324 times.
|
16356712 | for (x = 0; x < width; x++) |
| 577 | 15884064 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + | |
| 578 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
| 579 | 472648 | src += srcstride; | |
| 580 | 472648 | dst += dststride; | |
| 581 | 472648 | src2 += MAX_PB_SIZE; | |
| 582 | } | ||
| 583 | 20448 | } | |
| 584 | |||
| 585 | 71042 | static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 586 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 587 | int height, int denom, int wx0, int wx1, | ||
| 588 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 589 | { | ||
| 590 | int x, y; | ||
| 591 | const int8_t *filter; | ||
| 592 | 71042 | const pixel *src = (const pixel*)_src; | |
| 593 | 71042 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 594 | 71042 | pixel *dst = (pixel *)_dst; | |
| 595 | 71042 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 596 | int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; | ||
| 597 | 71042 | int16_t *tmp = tmp_array; | |
| 598 | 71042 | int shift = 14 + 1 - BIT_DEPTH; | |
| 599 | 71042 | int log2Wd = denom + shift - 1; | |
| 600 | |||
| 601 | 71042 | src -= QPEL_EXTRA_BEFORE * srcstride; | |
| 602 | 71042 | filter = ff_hevc_qpel_filters[mx]; | |
| 603 |
2/2✓ Branch 0 taken 1131207 times.
✓ Branch 1 taken 35521 times.
|
2333456 | for (y = 0; y < height + QPEL_EXTRA; y++) { |
| 604 |
2/2✓ Branch 0 taken 36956072 times.
✓ Branch 1 taken 1131207 times.
|
76174558 | for (x = 0; x < width; x++) |
| 605 | 73912144 | tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 606 | 2262414 | src += srcstride; | |
| 607 | 2262414 | tmp += MAX_PB_SIZE; | |
| 608 | } | ||
| 609 | |||
| 610 | 71042 | tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 611 | 71042 | filter = ff_hevc_qpel_filters[my]; | |
| 612 | |||
| 613 | 71042 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 614 | 71042 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 615 |
2/2✓ Branch 0 taken 882560 times.
✓ Branch 1 taken 35521 times.
|
1836162 | for (y = 0; y < height; y++) { |
| 616 |
2/2✓ Branch 0 taken 30984848 times.
✓ Branch 1 taken 882560 times.
|
63734816 | for (x = 0; x < width; x++) |
| 617 | 61969696 | dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + | |
| 618 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
| 619 | 1765120 | tmp += MAX_PB_SIZE; | |
| 620 | 1765120 | dst += dststride; | |
| 621 | 1765120 | src2 += MAX_PB_SIZE; | |
| 622 | } | ||
| 623 | 71042 | } | |
| 624 | |||
| 625 | //////////////////////////////////////////////////////////////////////////////// | ||
| 626 | // | ||
| 627 | //////////////////////////////////////////////////////////////////////////////// | ||
| 628 | #define EPEL_FILTER(src, stride) \ | ||
| 629 | (filter[0] * src[x - stride] + \ | ||
| 630 | filter[1] * src[x] + \ | ||
| 631 | filter[2] * src[x + stride] + \ | ||
| 632 | filter[3] * src[x + 2 * stride]) | ||
| 633 | |||
| 634 | 2165962 | static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 635 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 636 | int height, intptr_t mx, intptr_t my, int width) | ||
| 637 | { | ||
| 638 | int x, y; | ||
| 639 | 2165962 | const pixel *src = (const pixel *)_src; | |
| 640 | 2165962 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 641 | 2165962 | pixel *dst = (pixel *)_dst; | |
| 642 | 2165962 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 643 | 2165962 | const int8_t *filter = ff_hevc_epel_filters[mx]; | |
| 644 | 2165962 | int shift = 14 + 1 - BIT_DEPTH; | |
| 645 | #if BIT_DEPTH < 14 | ||
| 646 | 2165962 | int offset = 1 << (shift - 1); | |
| 647 | #else | ||
| 648 | int offset = 0; | ||
| 649 | #endif | ||
| 650 | |||
| 651 |
2/2✓ Branch 0 taken 12792482 times.
✓ Branch 1 taken 1082981 times.
|
27750926 | for (y = 0; y < height; y++) { |
| 652 |
2/2✓ Branch 0 taken 217326132 times.
✓ Branch 1 taken 12792482 times.
|
460237228 | for (x = 0; x < width; x++) { |
| 653 | 434652264 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); | |
| 654 | } | ||
| 655 | 25584964 | dst += dststride; | |
| 656 | 25584964 | src += srcstride; | |
| 657 | 25584964 | src2 += MAX_PB_SIZE; | |
| 658 | } | ||
| 659 | 2165962 | } | |
| 660 | |||
| 661 | 1506398 | static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 662 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 663 | int height, intptr_t mx, intptr_t my, int width) | ||
| 664 | { | ||
| 665 | int x, y; | ||
| 666 | 1506398 | const pixel *src = (const pixel *)_src; | |
| 667 | 1506398 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 668 | 1506398 | const int8_t *filter = ff_hevc_epel_filters[my]; | |
| 669 | 1506398 | pixel *dst = (pixel *)_dst; | |
| 670 | 1506398 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 671 | 1506398 | int shift = 14 + 1 - BIT_DEPTH; | |
| 672 | #if BIT_DEPTH < 14 | ||
| 673 | 1506398 | int offset = 1 << (shift - 1); | |
| 674 | #else | ||
| 675 | int offset = 0; | ||
| 676 | #endif | ||
| 677 | |||
| 678 |
2/2✓ Branch 0 taken 9087706 times.
✓ Branch 1 taken 753199 times.
|
19681810 | for (y = 0; y < height; y++) { |
| 679 |
2/2✓ Branch 0 taken 149357300 times.
✓ Branch 1 taken 9087706 times.
|
316890012 | for (x = 0; x < width; x++) |
| 680 | 298714600 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); | |
| 681 | 18175412 | dst += dststride; | |
| 682 | 18175412 | src += srcstride; | |
| 683 | 18175412 | src2 += MAX_PB_SIZE; | |
| 684 | } | ||
| 685 | 1506398 | } | |
| 686 | |||
| 687 | 8559622 | static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 688 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 689 | int height, intptr_t mx, intptr_t my, int width) | ||
| 690 | { | ||
| 691 | int x, y; | ||
| 692 | 8559622 | const pixel *src = (const pixel *)_src; | |
| 693 | 8559622 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 694 | 8559622 | pixel *dst = (pixel *)_dst; | |
| 695 | 8559622 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 696 | 8559622 | const int8_t *filter = ff_hevc_epel_filters[mx]; | |
| 697 | int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; | ||
| 698 | 8559622 | int16_t *tmp = tmp_array; | |
| 699 | 8559622 | int shift = 14 + 1 - BIT_DEPTH; | |
| 700 | #if BIT_DEPTH < 14 | ||
| 701 | 8559622 | int offset = 1 << (shift - 1); | |
| 702 | #else | ||
| 703 | int offset = 0; | ||
| 704 | #endif | ||
| 705 | |||
| 706 | 8559622 | src -= EPEL_EXTRA_BEFORE * srcstride; | |
| 707 | |||
| 708 |
2/2✓ Branch 0 taken 61896855 times.
✓ Branch 1 taken 4279811 times.
|
132353332 | for (y = 0; y < height + EPEL_EXTRA; y++) { |
| 709 |
2/2✓ Branch 0 taken 920742850 times.
✓ Branch 1 taken 61896855 times.
|
1965279410 | for (x = 0; x < width; x++) |
| 710 | 1841485700 | tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 711 | 123793710 | src += srcstride; | |
| 712 | 123793710 | tmp += MAX_PB_SIZE; | |
| 713 | } | ||
| 714 | |||
| 715 | 8559622 | tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 716 | 8559622 | filter = ff_hevc_epel_filters[my]; | |
| 717 | |||
| 718 |
2/2✓ Branch 0 taken 49057422 times.
✓ Branch 1 taken 4279811 times.
|
106674466 | for (y = 0; y < height; y++) { |
| 719 |
2/2✓ Branch 0 taken 783203092 times.
✓ Branch 1 taken 49057422 times.
|
1664521028 | for (x = 0; x < width; x++) |
| 720 | 1566406184 | dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift); | |
| 721 | 98114844 | tmp += MAX_PB_SIZE; | |
| 722 | 98114844 | dst += dststride; | |
| 723 | 98114844 | src2 += MAX_PB_SIZE; | |
| 724 | } | ||
| 725 | 8559622 | } | |
| 726 | |||
| 727 | 33080 | static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 728 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 729 | int height, int denom, int wx0, int wx1, | ||
| 730 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 731 | { | ||
| 732 | int x, y; | ||
| 733 | 33080 | const pixel *src = (const pixel *)_src; | |
| 734 | 33080 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 735 | 33080 | pixel *dst = (pixel *)_dst; | |
| 736 | 33080 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 737 | 33080 | const int8_t *filter = ff_hevc_epel_filters[mx]; | |
| 738 | 33080 | int shift = 14 + 1 - BIT_DEPTH; | |
| 739 | 33080 | int log2Wd = denom + shift - 1; | |
| 740 | |||
| 741 | 33080 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 742 | 33080 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 743 |
2/2✓ Branch 0 taken 233792 times.
✓ Branch 1 taken 16540 times.
|
500664 | for (y = 0; y < height; y++) { |
| 744 |
2/2✓ Branch 0 taken 5306152 times.
✓ Branch 1 taken 233792 times.
|
11079888 | for (x = 0; x < width; x++) |
| 745 | 10612304 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + | |
| 746 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
| 747 | 467584 | src += srcstride; | |
| 748 | 467584 | dst += dststride; | |
| 749 | 467584 | src2 += MAX_PB_SIZE; | |
| 750 | } | ||
| 751 | 33080 | } | |
| 752 | |||
| 753 | 24552 | static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 754 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 755 | int height, int denom, int wx0, int wx1, | ||
| 756 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 757 | { | ||
| 758 | int x, y; | ||
| 759 | 24552 | const pixel *src = (const pixel *)_src; | |
| 760 | 24552 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 761 | 24552 | const int8_t *filter = ff_hevc_epel_filters[my]; | |
| 762 | 24552 | pixel *dst = (pixel *)_dst; | |
| 763 | 24552 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 764 | 24552 | int shift = 14 + 1 - BIT_DEPTH; | |
| 765 | 24552 | int log2Wd = denom + shift - 1; | |
| 766 | |||
| 767 | 24552 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 768 | 24552 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 769 |
2/2✓ Branch 0 taken 167956 times.
✓ Branch 1 taken 12276 times.
|
360464 | for (y = 0; y < height; y++) { |
| 770 |
2/2✓ Branch 0 taken 3922696 times.
✓ Branch 1 taken 167956 times.
|
8181304 | for (x = 0; x < width; x++) |
| 771 | 7845392 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + | |
| 772 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
| 773 | 335912 | src += srcstride; | |
| 774 | 335912 | dst += dststride; | |
| 775 | 335912 | src2 += MAX_PB_SIZE; | |
| 776 | } | ||
| 777 | 24552 | } | |
| 778 | |||
| 779 | 166916 | static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 780 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 781 | int height, int denom, int wx0, int wx1, | ||
| 782 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 783 | { | ||
| 784 | int x, y; | ||
| 785 | 166916 | const pixel *src = (const pixel *)_src; | |
| 786 | 166916 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 787 | 166916 | pixel *dst = (pixel *)_dst; | |
| 788 | 166916 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 789 | 166916 | const int8_t *filter = ff_hevc_epel_filters[mx]; | |
| 790 | int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; | ||
| 791 | 166916 | int16_t *tmp = tmp_array; | |
| 792 | 166916 | int shift = 14 + 1 - BIT_DEPTH; | |
| 793 | 166916 | int log2Wd = denom + shift - 1; | |
| 794 | |||
| 795 | 166916 | src -= EPEL_EXTRA_BEFORE * srcstride; | |
| 796 | |||
| 797 |
2/2✓ Branch 0 taken 1288910 times.
✓ Branch 1 taken 83458 times.
|
2744736 | for (y = 0; y < height + EPEL_EXTRA; y++) { |
| 798 |
2/2✓ Branch 0 taken 21782220 times.
✓ Branch 1 taken 1288910 times.
|
46142260 | for (x = 0; x < width; x++) |
| 799 | 43564440 | tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 800 | 2577820 | src += srcstride; | |
| 801 | 2577820 | tmp += MAX_PB_SIZE; | |
| 802 | } | ||
| 803 | |||
| 804 | 166916 | tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 805 | 166916 | filter = ff_hevc_epel_filters[my]; | |
| 806 | |||
| 807 | 166916 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 808 | 166916 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 809 |
2/2✓ Branch 0 taken 1038536 times.
✓ Branch 1 taken 83458 times.
|
2243988 | for (y = 0; y < height; y++) { |
| 810 |
2/2✓ Branch 0 taken 18780552 times.
✓ Branch 1 taken 1038536 times.
|
39638176 | for (x = 0; x < width; x++) |
| 811 | 37561104 | dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + | |
| 812 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
| 813 | 2077072 | tmp += MAX_PB_SIZE; | |
| 814 | 2077072 | dst += dststride; | |
| 815 | 2077072 | src2 += MAX_PB_SIZE; | |
| 816 | } | ||
| 817 | 166916 | } | |
| 818 | |||
| 819 | // line zero | ||
| 820 | #define P3 pix[-4 * xstride] | ||
| 821 | #define P2 pix[-3 * xstride] | ||
| 822 | #define P1 pix[-2 * xstride] | ||
| 823 | #define P0 pix[-1 * xstride] | ||
| 824 | #define Q0 pix[0 * xstride] | ||
| 825 | #define Q1 pix[1 * xstride] | ||
| 826 | #define Q2 pix[2 * xstride] | ||
| 827 | #define Q3 pix[3 * xstride] | ||
| 828 | |||
| 829 | // line three. used only for deblocking decision | ||
| 830 | #define TP3 pix[-4 * xstride + 3 * ystride] | ||
| 831 | #define TP2 pix[-3 * xstride + 3 * ystride] | ||
| 832 | #define TP1 pix[-2 * xstride + 3 * ystride] | ||
| 833 | #define TP0 pix[-1 * xstride + 3 * ystride] | ||
| 834 | #define TQ0 pix[0 * xstride + 3 * ystride] | ||
| 835 | #define TQ1 pix[1 * xstride + 3 * ystride] | ||
| 836 | #define TQ2 pix[2 * xstride + 3 * ystride] | ||
| 837 | #define TQ3 pix[3 * xstride + 3 * ystride] | ||
| 838 | |||
| 839 | #include "h26x/h2656_deblock_template.c" | ||
| 840 | |||
| 841 | 85719872 | static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix, | |
| 842 | ptrdiff_t _xstride, ptrdiff_t _ystride, | ||
| 843 | int beta, const int *_tc, | ||
| 844 | const uint8_t *_no_p, const uint8_t *_no_q) | ||
| 845 | { | ||
| 846 | 85719872 | ptrdiff_t xstride = _xstride / sizeof(pixel); | |
| 847 | 85719872 | ptrdiff_t ystride = _ystride / sizeof(pixel); | |
| 848 | |||
| 849 | 85719872 | beta <<= BIT_DEPTH - 8; | |
| 850 | |||
| 851 |
2/2✓ Branch 0 taken 85719872 times.
✓ Branch 1 taken 42859936 times.
|
257159616 | for (int j = 0; j < 2; j++) { |
| 852 | 171439744 | pixel* pix = (pixel*)_pix + j * 4 * ystride; | |
| 853 | 171439744 | const int dp0 = abs(P2 - 2 * P1 + P0); | |
| 854 | 171439744 | const int dq0 = abs(Q2 - 2 * Q1 + Q0); | |
| 855 | 171439744 | const int dp3 = abs(TP2 - 2 * TP1 + TP0); | |
| 856 | 171439744 | const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0); | |
| 857 | 171439744 | const int d0 = dp0 + dq0; | |
| 858 | 171439744 | const int d3 = dp3 + dq3; | |
| 859 | 171439744 | const int tc = _tc[j] << (BIT_DEPTH - 8); | |
| 860 | 171439744 | const int no_p = _no_p[j]; | |
| 861 | 171439744 | const int no_q = _no_q[j]; | |
| 862 | |||
| 863 |
2/2✓ Branch 0 taken 64560496 times.
✓ Branch 1 taken 21159376 times.
|
171439744 | if (d0 + d3 < beta) { |
| 864 | 129120992 | const int beta_3 = beta >> 3; | |
| 865 | 129120992 | const int beta_2 = beta >> 2; | |
| 866 | 129120992 | const int tc25 = ((tc * 5 + 1) >> 1); | |
| 867 | |||
| 868 |
4/4✓ Branch 0 taken 19435217 times.
✓ Branch 1 taken 45125279 times.
✓ Branch 2 taken 18939239 times.
✓ Branch 3 taken 495978 times.
|
129120992 | if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 && |
| 869 |
4/4✓ Branch 0 taken 14278793 times.
✓ Branch 1 taken 4660446 times.
✓ Branch 2 taken 14174787 times.
✓ Branch 3 taken 104006 times.
|
37878478 | abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 && |
| 870 |
4/4✓ Branch 0 taken 13677442 times.
✓ Branch 1 taken 497345 times.
✓ Branch 2 taken 13365700 times.
✓ Branch 3 taken 311742 times.
|
55080974 | (d0 << 1) < beta_2 && (d3 << 1) < beta_2) { |
| 871 | 26731400 | const int tc2 = tc << 1; | |
| 872 | 26731400 | FUNC(loop_filter_luma_strong)(pix, xstride, ystride, tc2, tc2, tc2, no_p, no_q); | |
| 873 | } else { | ||
| 874 | 102389592 | int nd_p = 1; | |
| 875 | 102389592 | int nd_q = 1; | |
| 876 |
2/2✓ Branch 0 taken 35739751 times.
✓ Branch 1 taken 15455045 times.
|
102389592 | if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3)) |
| 877 | 71479502 | nd_p = 2; | |
| 878 |
2/2✓ Branch 0 taken 34866989 times.
✓ Branch 1 taken 16327807 times.
|
102389592 | if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3)) |
| 879 | 69733978 | nd_q = 2; | |
| 880 | 102389592 | FUNC(loop_filter_luma_weak)(pix, xstride, ystride, tc, beta, no_p, no_q, nd_p, nd_q); | |
| 881 | } | ||
| 882 | } | ||
| 883 | } | ||
| 884 | 85719872 | } | |
| 885 | |||
| 886 | 26156308 | static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride, | |
| 887 | ptrdiff_t _ystride, const int *_tc, | ||
| 888 | const uint8_t *_no_p, const uint8_t *_no_q) | ||
| 889 | { | ||
| 890 | int no_p, no_q; | ||
| 891 | 26156308 | ptrdiff_t xstride = _xstride / sizeof(pixel); | |
| 892 | 26156308 | ptrdiff_t ystride = _ystride / sizeof(pixel); | |
| 893 | 26156308 | const int size = 4; | |
| 894 | |||
| 895 |
2/2✓ Branch 0 taken 26156308 times.
✓ Branch 1 taken 13078154 times.
|
78468924 | for (int j = 0; j < 2; j++) { |
| 896 | 52312616 | pixel *pix = (pixel *)_pix + j * size * ystride; | |
| 897 | 52312616 | const int tc = _tc[j] << (BIT_DEPTH - 8); | |
| 898 |
2/2✓ Branch 0 taken 23535267 times.
✓ Branch 1 taken 2621041 times.
|
52312616 | if (tc > 0) { |
| 899 | 47070534 | no_p = _no_p[j]; | |
| 900 | 47070534 | no_q = _no_q[j]; | |
| 901 | |||
| 902 | 47070534 | FUNC(loop_filter_chroma_weak)(pix, xstride, ystride, size, tc, no_p, no_q); | |
| 903 | } | ||
| 904 | } | ||
| 905 | 26156308 | } | |
| 906 | |||
| 907 | 12782610 | static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, | |
| 908 | const int32_t *tc, const uint8_t *no_p, | ||
| 909 | const uint8_t *no_q) | ||
| 910 | { | ||
| 911 | 12782610 | FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q); | |
| 912 | 12782610 | } | |
| 913 | |||
| 914 | 13373698 | static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, | |
| 915 | const int32_t *tc, const uint8_t *no_p, | ||
| 916 | const uint8_t *no_q) | ||
| 917 | { | ||
| 918 | 13373698 | FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q); | |
| 919 | 13373698 | } | |
| 920 | |||
| 921 | 43213298 | static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, | |
| 922 | int beta, const int32_t *tc, const uint8_t *no_p, | ||
| 923 | const uint8_t *no_q) | ||
| 924 | { | ||
| 925 | 43213298 | FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel), | |
| 926 | beta, tc, no_p, no_q); | ||
| 927 | 43213298 | } | |
| 928 | |||
| 929 | 42506574 | static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, | |
| 930 | int beta, const int32_t *tc, const uint8_t *no_p, | ||
| 931 | const uint8_t *no_q) | ||
| 932 | { | ||
| 933 | 42506574 | FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride, | |
| 934 | beta, tc, no_p, no_q); | ||
| 935 | 42506574 | } | |
| 936 | |||
| 937 | #undef P3 | ||
| 938 | #undef P2 | ||
| 939 | #undef P1 | ||
| 940 | #undef P0 | ||
| 941 | #undef Q0 | ||
| 942 | #undef Q1 | ||
| 943 | #undef Q2 | ||
| 944 | #undef Q3 | ||
| 945 | |||
| 946 | #undef TP3 | ||
| 947 | #undef TP2 | ||
| 948 | #undef TP1 | ||
| 949 | #undef TP0 | ||
| 950 | #undef TQ0 | ||
| 951 | #undef TQ1 | ||
| 952 | #undef TQ2 | ||
| 953 | #undef TQ3 | ||
| 954 |