| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * HEVC video decoder | ||
| 3 | * | ||
| 4 | * Copyright (C) 2012 - 2013 Guillaume Martres | ||
| 5 | * | ||
| 6 | * This file is part of FFmpeg. | ||
| 7 | * | ||
| 8 | * FFmpeg is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU Lesser General Public | ||
| 10 | * License as published by the Free Software Foundation; either | ||
| 11 | * version 2.1 of the License, or (at your option) any later version. | ||
| 12 | * | ||
| 13 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 16 | * Lesser General Public License for more details. | ||
| 17 | * | ||
| 18 | * You should have received a copy of the GNU Lesser General Public | ||
| 19 | * License along with FFmpeg; if not, write to the Free Software | ||
| 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include "get_bits.h" | ||
| 24 | #include "hevcdec.h" | ||
| 25 | |||
| 26 | #include "bit_depth_template.c" | ||
| 27 | #include "dsp.h" | ||
| 28 | #include "h26x/h2656_sao_template.c" | ||
| 29 | #include "h26x/h2656_inter_template.c" | ||
| 30 | |||
| 31 | 74598 | static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height, | |
| 32 | GetBitContext *gb, int pcm_bit_depth) | ||
| 33 | { | ||
| 34 | int x, y; | ||
| 35 | 74598 | pixel *dst = (pixel *)_dst; | |
| 36 | |||
| 37 | 74598 | stride /= sizeof(pixel); | |
| 38 | |||
| 39 |
2/2✓ Branch 0 taken 327424 times.
✓ Branch 1 taken 37299 times.
|
729446 | for (y = 0; y < height; y++) { |
| 40 |
2/2✓ Branch 0 taken 4382080 times.
✓ Branch 1 taken 327424 times.
|
9419008 | for (x = 0; x < width; x++) |
| 41 | 8764160 | dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth); | |
| 42 | 654848 | dst += stride; | |
| 43 | } | ||
| 44 | 74598 | } | |
| 45 | |||
| 46 | 32495946 | static av_always_inline void FUNC(add_residual)(uint8_t *_dst, const int16_t *res, | |
| 47 | ptrdiff_t stride, int size) | ||
| 48 | { | ||
| 49 | int x, y; | ||
| 50 | 32495946 | pixel *dst = (pixel *)_dst; | |
| 51 | |||
| 52 | 32495946 | stride /= sizeof(pixel); | |
| 53 | |||
| 54 |
2/2✓ Branch 0 taken 132367568 times.
✓ Branch 1 taken 16247973 times.
|
297231082 | for (y = 0; y < size; y++) { |
| 55 |
2/2✓ Branch 0 taken 1840979424 times.
✓ Branch 1 taken 132367568 times.
|
3946693984 | for (x = 0; x < size; x++) { |
| 56 | 3681958848 | dst[x] = av_clip_pixel(dst[x] + *res); | |
| 57 | 3681958848 | res++; | |
| 58 | } | ||
| 59 | 264735136 | dst += stride; | |
| 60 | } | ||
| 61 | 32495946 | } | |
| 62 | |||
| 63 | 17700692 | static void FUNC(add_residual4x4)(uint8_t *_dst, const int16_t *res, | |
| 64 | ptrdiff_t stride) | ||
| 65 | { | ||
| 66 | 17700692 | FUNC(add_residual)(_dst, res, stride, 4); | |
| 67 | 17700692 | } | |
| 68 | |||
| 69 | 8672730 | static void FUNC(add_residual8x8)(uint8_t *_dst, const int16_t *res, | |
| 70 | ptrdiff_t stride) | ||
| 71 | { | ||
| 72 | 8672730 | FUNC(add_residual)(_dst, res, stride, 8); | |
| 73 | 8672730 | } | |
| 74 | |||
| 75 | 4460640 | static void FUNC(add_residual16x16)(uint8_t *_dst, const int16_t *res, | |
| 76 | ptrdiff_t stride) | ||
| 77 | { | ||
| 78 | 4460640 | FUNC(add_residual)(_dst, res, stride, 16); | |
| 79 | 4460640 | } | |
| 80 | |||
| 81 | 1661884 | static void FUNC(add_residual32x32)(uint8_t *_dst, const int16_t *res, | |
| 82 | ptrdiff_t stride) | ||
| 83 | { | ||
| 84 | 1661884 | FUNC(add_residual)(_dst, res, stride, 32); | |
| 85 | 1661884 | } | |
| 86 | |||
| 87 | 54126 | static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode) | |
| 88 | { | ||
| 89 | 54126 | int16_t *coeffs = (int16_t *) _coeffs; | |
| 90 | int x, y; | ||
| 91 | 54126 | int size = 1 << log2_size; | |
| 92 | |||
| 93 |
2/2✓ Branch 0 taken 9989 times.
✓ Branch 1 taken 17074 times.
|
54126 | if (mode) { |
| 94 | 19978 | coeffs += size; | |
| 95 |
2/2✓ Branch 0 taken 56315 times.
✓ Branch 1 taken 9989 times.
|
132608 | for (y = 0; y < size - 1; y++) { |
| 96 |
2/2✓ Branch 0 taken 523168 times.
✓ Branch 1 taken 56315 times.
|
1158966 | for (x = 0; x < size; x++) |
| 97 | 1046336 | coeffs[x] += coeffs[x - size]; | |
| 98 | 112630 | coeffs += size; | |
| 99 | } | ||
| 100 | } else { | ||
| 101 |
2/2✓ Branch 0 taken 124352 times.
✓ Branch 1 taken 17074 times.
|
282852 | for (y = 0; y < size; y++) { |
| 102 |
2/2✓ Branch 0 taken 1049312 times.
✓ Branch 1 taken 124352 times.
|
2347328 | for (x = 1; x < size; x++) |
| 103 | 2098624 | coeffs[x] += coeffs[x - 1]; | |
| 104 | 248704 | coeffs += size; | |
| 105 | } | ||
| 106 | } | ||
| 107 | 54126 | } | |
| 108 | |||
| 109 | 888604 | static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size) | |
| 110 | { | ||
| 111 | 888604 | int shift = 15 - BIT_DEPTH - log2_size; | |
| 112 | int x, y; | ||
| 113 | 888604 | int size = 1 << log2_size; | |
| 114 | |||
| 115 |
2/2✓ Branch 0 taken 435016 times.
✓ Branch 1 taken 9286 times.
|
888604 | if (shift > 0) { |
| 116 | 870032 | int offset = 1 << (shift - 1); | |
| 117 |
2/2✓ Branch 0 taken 1813776 times.
✓ Branch 1 taken 435016 times.
|
4497584 | for (y = 0; y < size; y++) { |
| 118 |
2/2✓ Branch 0 taken 8278048 times.
✓ Branch 1 taken 1813776 times.
|
20183648 | for (x = 0; x < size; x++) { |
| 119 | 16556096 | *coeffs = (*coeffs + offset) >> shift; | |
| 120 | 16556096 | coeffs++; | |
| 121 | } | ||
| 122 | } | ||
| 123 | } else { | ||
| 124 |
2/2✓ Branch 0 taken 74992 times.
✓ Branch 1 taken 9286 times.
|
168556 | for (y = 0; y < size; y++) { |
| 125 |
2/2✓ Branch 0 taken 613504 times.
✓ Branch 1 taken 74992 times.
|
1376992 | for (x = 0; x < size; x++) { |
| 126 | 1227008 | *coeffs = *(uint16_t*)coeffs << -shift; | |
| 127 | 1227008 | coeffs++; | |
| 128 | } | ||
| 129 | } | ||
| 130 | } | ||
| 131 | 888604 | } | |
| 132 | |||
| 133 | #define SET(dst, x) (dst) = (x) | ||
| 134 | #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift) | ||
| 135 | |||
| 136 | #define TR_4x4_LUMA(dst, src, step, assign) \ | ||
| 137 | do { \ | ||
| 138 | int c0 = src[0 * step] + src[2 * step]; \ | ||
| 139 | int c1 = src[2 * step] + src[3 * step]; \ | ||
| 140 | int c2 = src[0 * step] - src[3 * step]; \ | ||
| 141 | int c3 = 74 * src[1 * step]; \ | ||
| 142 | \ | ||
| 143 | assign(dst[2 * step], 74 * (src[0 * step] - \ | ||
| 144 | src[2 * step] + \ | ||
| 145 | src[3 * step])); \ | ||
| 146 | assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \ | ||
| 147 | assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \ | ||
| 148 | assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \ | ||
| 149 | } while (0) | ||
| 150 | |||
| 151 | 8011170 | static void FUNC(transform_4x4_luma)(int16_t *coeffs) | |
| 152 | { | ||
| 153 | int i; | ||
| 154 | 8011170 | int shift = 7; | |
| 155 | 8011170 | int add = 1 << (shift - 1); | |
| 156 | 8011170 | int16_t *src = coeffs; | |
| 157 | |||
| 158 |
2/2✓ Branch 0 taken 16022340 times.
✓ Branch 1 taken 4005585 times.
|
40055850 | for (i = 0; i < 4; i++) { |
| 159 | 32044680 | TR_4x4_LUMA(src, src, 4, SCALE); | |
| 160 | 32044680 | src++; | |
| 161 | } | ||
| 162 | |||
| 163 | 8011170 | shift = 20 - BIT_DEPTH; | |
| 164 | 8011170 | add = 1 << (shift - 1); | |
| 165 |
2/2✓ Branch 0 taken 16022340 times.
✓ Branch 1 taken 4005585 times.
|
40055850 | for (i = 0; i < 4; i++) { |
| 166 | 32044680 | TR_4x4_LUMA(coeffs, coeffs, 1, SCALE); | |
| 167 | 32044680 | coeffs += 4; | |
| 168 | } | ||
| 169 | 8011170 | } | |
| 170 | |||
| 171 | #undef TR_4x4_LUMA | ||
| 172 | |||
| 173 | #define TR_4(dst, src, dstep, sstep, assign, end) \ | ||
| 174 | do { \ | ||
| 175 | const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \ | ||
| 176 | const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \ | ||
| 177 | const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \ | ||
| 178 | const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \ | ||
| 179 | \ | ||
| 180 | assign(dst[0 * dstep], e0 + o0); \ | ||
| 181 | assign(dst[1 * dstep], e1 + o1); \ | ||
| 182 | assign(dst[2 * dstep], e1 - o1); \ | ||
| 183 | assign(dst[3 * dstep], e0 - o0); \ | ||
| 184 | } while (0) | ||
| 185 | |||
| 186 | #define TR_8(dst, src, dstep, sstep, assign, end) \ | ||
| 187 | do { \ | ||
| 188 | int i, j; \ | ||
| 189 | int e_8[4]; \ | ||
| 190 | int o_8[4] = { 0 }; \ | ||
| 191 | for (i = 0; i < 4; i++) \ | ||
| 192 | for (j = 1; j < end; j += 2) \ | ||
| 193 | o_8[i] += transform[4 * j][i] * src[j * sstep]; \ | ||
| 194 | TR_4(e_8, src, 1, 2 * sstep, SET, 4); \ | ||
| 195 | \ | ||
| 196 | for (i = 0; i < 4; i++) { \ | ||
| 197 | assign(dst[i * dstep], e_8[i] + o_8[i]); \ | ||
| 198 | assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \ | ||
| 199 | } \ | ||
| 200 | } while (0) | ||
| 201 | |||
| 202 | #define TR_16(dst, src, dstep, sstep, assign, end) \ | ||
| 203 | do { \ | ||
| 204 | int i, j; \ | ||
| 205 | int e_16[8]; \ | ||
| 206 | int o_16[8] = { 0 }; \ | ||
| 207 | for (i = 0; i < 8; i++) \ | ||
| 208 | for (j = 1; j < end; j += 2) \ | ||
| 209 | o_16[i] += transform[2 * j][i] * src[j * sstep]; \ | ||
| 210 | TR_8(e_16, src, 1, 2 * sstep, SET, 8); \ | ||
| 211 | \ | ||
| 212 | for (i = 0; i < 8; i++) { \ | ||
| 213 | assign(dst[i * dstep], e_16[i] + o_16[i]); \ | ||
| 214 | assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \ | ||
| 215 | } \ | ||
| 216 | } while (0) | ||
| 217 | |||
| 218 | #define TR_32(dst, src, dstep, sstep, assign, end) \ | ||
| 219 | do { \ | ||
| 220 | int i, j; \ | ||
| 221 | int e_32[16]; \ | ||
| 222 | int o_32[16] = { 0 }; \ | ||
| 223 | for (i = 0; i < 16; i++) \ | ||
| 224 | for (j = 1; j < end; j += 2) \ | ||
| 225 | o_32[i] += transform[j][i] * src[j * sstep]; \ | ||
| 226 | TR_16(e_32, src, 1, 2 * sstep, SET, end / 2); \ | ||
| 227 | \ | ||
| 228 | for (i = 0; i < 16; i++) { \ | ||
| 229 | assign(dst[i * dstep], e_32[i] + o_32[i]); \ | ||
| 230 | assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \ | ||
| 231 | } \ | ||
| 232 | } while (0) | ||
| 233 | |||
| 234 | #define IDCT_VAR4(H) \ | ||
| 235 | int limit2 = FFMIN(col_limit + 4, H) | ||
| 236 | #define IDCT_VAR8(H) \ | ||
| 237 | int limit = FFMIN(col_limit, H); \ | ||
| 238 | int limit2 = FFMIN(col_limit + 4, H) | ||
| 239 | #define IDCT_VAR16(H) IDCT_VAR8(H) | ||
| 240 | #define IDCT_VAR32(H) IDCT_VAR8(H) | ||
| 241 | |||
| 242 | #define IDCT(H) \ | ||
| 243 | static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \ | ||
| 244 | int col_limit) \ | ||
| 245 | { \ | ||
| 246 | int i; \ | ||
| 247 | int shift = 7; \ | ||
| 248 | int add = 1 << (shift - 1); \ | ||
| 249 | int16_t *src = coeffs; \ | ||
| 250 | IDCT_VAR ## H(H); \ | ||
| 251 | \ | ||
| 252 | for (i = 0; i < H; i++) { \ | ||
| 253 | TR_ ## H(src, src, H, H, SCALE, limit2); \ | ||
| 254 | if (limit2 < H && i%4 == 0 && !!i) \ | ||
| 255 | limit2 -= 4; \ | ||
| 256 | src++; \ | ||
| 257 | } \ | ||
| 258 | \ | ||
| 259 | shift = 20 - BIT_DEPTH; \ | ||
| 260 | add = 1 << (shift - 1); \ | ||
| 261 | for (i = 0; i < H; i++) { \ | ||
| 262 | TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \ | ||
| 263 | coeffs += H; \ | ||
| 264 | } \ | ||
| 265 | } | ||
| 266 | |||
| 267 | #define IDCT_DC(H) \ | ||
| 268 | static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs) \ | ||
| 269 | { \ | ||
| 270 | int i, j; \ | ||
| 271 | int shift = 14 - BIT_DEPTH; \ | ||
| 272 | int add = 1 << (shift - 1); \ | ||
| 273 | int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \ | ||
| 274 | \ | ||
| 275 | for (j = 0; j < H; j++) { \ | ||
| 276 | for (i = 0; i < H; i++) { \ | ||
| 277 | coeffs[i + j * H] = coeff; \ | ||
| 278 | } \ | ||
| 279 | } \ | ||
| 280 | } | ||
| 281 | |||
| 282 |
5/10✗ Branch 0 not taken.
✓ Branch 1 taken 11057660 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 11057660 times.
✓ Branch 7 taken 2764415 times.
✓ Branch 8 taken 11057660 times.
✓ Branch 9 taken 2764415 times.
|
49759470 | IDCT( 4) |
| 283 |
17/22✓ Branch 0 taken 423490816 times.
✓ Branch 1 taken 105872704 times.
✓ Branch 2 taken 105872704 times.
✓ Branch 3 taken 26468176 times.
✓ Branch 4 taken 105872704 times.
✓ Branch 5 taken 26468176 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 26468176 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 26468176 times.
✓ Branch 13 taken 3308522 times.
✓ Branch 14 taken 297725184 times.
✓ Branch 15 taken 105872704 times.
✓ Branch 16 taken 105872704 times.
✓ Branch 17 taken 26468176 times.
✓ Branch 18 taken 105872704 times.
✓ Branch 19 taken 26468176 times.
✓ Branch 20 taken 26468176 times.
✓ Branch 21 taken 3308522 times.
|
2401903380 | IDCT( 8) |
| 284 |
34/34✓ Branch 0 taken 856046432 times.
✓ Branch 1 taken 226411648 times.
✓ Branch 2 taken 226411648 times.
✓ Branch 3 taken 28301456 times.
✓ Branch 4 taken 452823296 times.
✓ Branch 5 taken 113205824 times.
✓ Branch 6 taken 113205824 times.
✓ Branch 7 taken 28301456 times.
✓ Branch 8 taken 113205824 times.
✓ Branch 9 taken 28301456 times.
✓ Branch 10 taken 226411648 times.
✓ Branch 11 taken 28301456 times.
✓ Branch 12 taken 21046848 times.
✓ Branch 13 taken 7254608 times.
✓ Branch 14 taken 5261712 times.
✓ Branch 15 taken 15785136 times.
✓ Branch 16 taken 3946284 times.
✓ Branch 17 taken 1315428 times.
✓ Branch 18 taken 28301456 times.
✓ Branch 19 taken 1768841 times.
✓ Branch 20 taken 899776768 times.
✓ Branch 21 taken 226411648 times.
✓ Branch 22 taken 226411648 times.
✓ Branch 23 taken 28301456 times.
✓ Branch 24 taken 452823296 times.
✓ Branch 25 taken 113205824 times.
✓ Branch 26 taken 113205824 times.
✓ Branch 27 taken 28301456 times.
✓ Branch 28 taken 113205824 times.
✓ Branch 29 taken 28301456 times.
✓ Branch 30 taken 226411648 times.
✓ Branch 31 taken 28301456 times.
✓ Branch 32 taken 28301456 times.
✓ Branch 33 taken 1768841 times.
|
8156622866 | IDCT(16) |
| 285 |
46/46✓ Branch 0 taken 1339815808 times.
✓ Branch 1 taken 350989312 times.
✓ Branch 2 taken 350989312 times.
✓ Branch 3 taken 21936832 times.
✓ Branch 4 taken 325871120 times.
✓ Branch 5 taken 175494656 times.
✓ Branch 6 taken 175494656 times.
✓ Branch 7 taken 21936832 times.
✓ Branch 8 taken 350989312 times.
✓ Branch 9 taken 87747328 times.
✓ Branch 10 taken 87747328 times.
✓ Branch 11 taken 21936832 times.
✓ Branch 12 taken 87747328 times.
✓ Branch 13 taken 21936832 times.
✓ Branch 14 taken 175494656 times.
✓ Branch 15 taken 21936832 times.
✓ Branch 16 taken 350989312 times.
✓ Branch 17 taken 21936832 times.
✓ Branch 18 taken 19500512 times.
✓ Branch 19 taken 2436320 times.
✓ Branch 20 taken 4875128 times.
✓ Branch 21 taken 14625384 times.
✓ Branch 22 taken 4265737 times.
✓ Branch 23 taken 609391 times.
✓ Branch 24 taken 21936832 times.
✓ Branch 25 taken 685526 times.
✓ Branch 26 taken 2028360704 times.
✓ Branch 27 taken 350989312 times.
✓ Branch 28 taken 350989312 times.
✓ Branch 29 taken 21936832 times.
✓ Branch 30 taken 494795776 times.
✓ Branch 31 taken 175494656 times.
✓ Branch 32 taken 175494656 times.
✓ Branch 33 taken 21936832 times.
✓ Branch 34 taken 350989312 times.
✓ Branch 35 taken 87747328 times.
✓ Branch 36 taken 87747328 times.
✓ Branch 37 taken 21936832 times.
✓ Branch 38 taken 87747328 times.
✓ Branch 39 taken 21936832 times.
✓ Branch 40 taken 175494656 times.
✓ Branch 41 taken 21936832 times.
✓ Branch 42 taken 350989312 times.
✓ Branch 43 taken 21936832 times.
✓ Branch 44 taken 21936832 times.
✓ Branch 45 taken 685526 times.
|
14784612812 | IDCT(32) |
| 286 | |||
| 287 |
4/4✓ Branch 0 taken 19457376 times.
✓ Branch 1 taken 4864344 times.
✓ Branch 2 taken 4864344 times.
✓ Branch 3 taken 1216086 times.
|
51075612 | IDCT_DC( 4) |
| 288 |
4/4✓ Branch 0 taken 60963584 times.
✓ Branch 1 taken 7620448 times.
✓ Branch 2 taken 7620448 times.
✓ Branch 3 taken 952556 times.
|
139073176 | IDCT_DC( 8) |
| 289 |
4/4✓ Branch 0 taken 114058752 times.
✓ Branch 1 taken 7128672 times.
✓ Branch 2 taken 7128672 times.
✓ Branch 3 taken 445542 times.
|
243265932 | IDCT_DC(16) |
| 290 |
4/4✓ Branch 0 taken 146119680 times.
✓ Branch 1 taken 4566240 times.
✓ Branch 2 taken 4566240 times.
✓ Branch 3 taken 142695 times.
|
301657230 | IDCT_DC(32) |
| 291 | |||
| 292 | #undef TR_4 | ||
| 293 | #undef TR_8 | ||
| 294 | #undef TR_16 | ||
| 295 | #undef TR_32 | ||
| 296 | |||
| 297 | #undef SET | ||
| 298 | #undef SCALE | ||
| 299 | |||
| 300 | //////////////////////////////////////////////////////////////////////////////// | ||
| 301 | // | ||
| 302 | //////////////////////////////////////////////////////////////////////////////// | ||
| 303 | #define ff_hevc_pel_filters ff_hevc_qpel_filters | ||
| 304 | #define DECL_HV_FILTER(f) \ | ||
| 305 | const int8_t *hf = ff_hevc_ ## f ## _filters[mx]; \ | ||
| 306 | const int8_t *vf = ff_hevc_ ## f ## _filters[my]; | ||
| 307 | |||
| 308 | #define FW_PUT(p, f, t) \ | ||
| 309 | static void FUNC(put_hevc_## f)(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, \ | ||
| 310 | intptr_t mx, intptr_t my, int width) \ | ||
| 311 | { \ | ||
| 312 | DECL_HV_FILTER(p) \ | ||
| 313 | FUNC(put_ ## t)(dst, src, srcstride, height, hf, vf, width); \ | ||
| 314 | } | ||
| 315 | |||
| 316 | #define FW_PUT_UNI(p, f, t) \ | ||
| 317 | static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
| 318 | ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width) \ | ||
| 319 | { \ | ||
| 320 | DECL_HV_FILTER(p) \ | ||
| 321 | FUNC(put_ ## t)(dst, dststride, src, srcstride, height, hf, vf, width); \ | ||
| 322 | } | ||
| 323 | |||
| 324 | #define FW_PUT_UNI_W(p, f, t) \ | ||
| 325 | static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
| 326 | ptrdiff_t srcstride,int height, int denom, int wx, int ox, \ | ||
| 327 | intptr_t mx, intptr_t my, int width) \ | ||
| 328 | { \ | ||
| 329 | DECL_HV_FILTER(p) \ | ||
| 330 | FUNC(put_ ## t)(dst, dststride, src, srcstride, height, denom, wx, ox, hf, vf, width); \ | ||
| 331 | } | ||
| 332 | |||
| 333 | #define FW_PUT_FUNCS(f, t, dir) \ | ||
| 334 | FW_PUT(f, f ## _ ## dir, t ## _ ## dir) \ | ||
| 335 | FW_PUT_UNI(f, f ## _uni_ ## dir, uni_ ## t ## _ ## dir) \ | ||
| 336 | FW_PUT_UNI_W(f, f ## _uni_w_ ## dir, uni_## t ## _w_ ## dir) | ||
| 337 | |||
| 338 | 4997298 | FW_PUT(pel, pel_pixels, pixels) | |
| 339 | 6980162 | FW_PUT_UNI(pel, pel_uni_pixels, uni_pixels) | |
| 340 | 145958 | FW_PUT_UNI_W(pel, pel_uni_w_pixels, uni_w_pixels) | |
| 341 | |||
| 342 | 3444674 | FW_PUT_FUNCS(qpel, luma, h ) | |
| 343 | 2977700 | FW_PUT_FUNCS(qpel, luma, v ) | |
| 344 | 9269802 | FW_PUT_FUNCS(qpel, luma, hv ) | |
| 345 | 5498932 | FW_PUT_FUNCS(epel, chroma, h ) | |
| 346 | 4387376 | FW_PUT_FUNCS(epel, chroma, v ) | |
| 347 | 24022572 | FW_PUT_FUNCS(epel, chroma, hv ) | |
| 348 | |||
| 349 | 5462242 | static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, | |
| 350 | const int16_t *src2, | ||
| 351 | int height, intptr_t mx, intptr_t my, int width) | ||
| 352 | { | ||
| 353 | int x, y; | ||
| 354 | 5462242 | const pixel *src = (const pixel *)_src; | |
| 355 | 5462242 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 356 | 5462242 | pixel *dst = (pixel *)_dst; | |
| 357 | 5462242 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 358 | |||
| 359 | 5462242 | int shift = 14 + 1 - BIT_DEPTH; | |
| 360 | #if BIT_DEPTH < 14 | ||
| 361 | 5462242 | int offset = 1 << (shift - 1); | |
| 362 | #else | ||
| 363 | int offset = 0; | ||
| 364 | #endif | ||
| 365 | |||
| 366 |
2/2✓ Branch 0 taken 41305794 times.
✓ Branch 1 taken 2731121 times.
|
88073830 | for (y = 0; y < height; y++) { |
| 367 |
2/2✓ Branch 0 taken 1067725940 times.
✓ Branch 1 taken 41305794 times.
|
2218063468 | for (x = 0; x < width; x++) |
| 368 | 2135451880 | dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift); | |
| 369 | 82611588 | src += srcstride; | |
| 370 | 82611588 | dst += dststride; | |
| 371 | 82611588 | src2 += MAX_PB_SIZE; | |
| 372 | } | ||
| 373 | 5462242 | } | |
| 374 | |||
| 375 | 64154 | static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, | |
| 376 | const int16_t *src2, | ||
| 377 | int height, int denom, int wx0, int wx1, | ||
| 378 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 379 | { | ||
| 380 | int x, y; | ||
| 381 | 64154 | const pixel *src = (const pixel *)_src; | |
| 382 | 64154 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 383 | 64154 | pixel *dst = (pixel *)_dst; | |
| 384 | 64154 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 385 | |||
| 386 | 64154 | int shift = 14 + 1 - BIT_DEPTH; | |
| 387 | 64154 | int log2Wd = denom + shift - 1; | |
| 388 | |||
| 389 | 64154 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 390 | 64154 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 391 |
2/2✓ Branch 0 taken 671000 times.
✓ Branch 1 taken 32077 times.
|
1406154 | for (y = 0; y < height; y++) { |
| 392 |
2/2✓ Branch 0 taken 21890728 times.
✓ Branch 1 taken 671000 times.
|
45123456 | for (x = 0; x < width; x++) { |
| 393 | 43781456 | dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1)); | |
| 394 | } | ||
| 395 | 1342000 | src += srcstride; | |
| 396 | 1342000 | dst += dststride; | |
| 397 | 1342000 | src2 += MAX_PB_SIZE; | |
| 398 | } | ||
| 399 | 64154 | } | |
| 400 | |||
| 401 | //////////////////////////////////////////////////////////////////////////////// | ||
| 402 | // | ||
| 403 | //////////////////////////////////////////////////////////////////////////////// | ||
| 404 | #define QPEL_FILTER(src, stride) \ | ||
| 405 | (filter[0] * src[x - 3 * stride] + \ | ||
| 406 | filter[1] * src[x - 2 * stride] + \ | ||
| 407 | filter[2] * src[x - stride] + \ | ||
| 408 | filter[3] * src[x ] + \ | ||
| 409 | filter[4] * src[x + stride] + \ | ||
| 410 | filter[5] * src[x + 2 * stride] + \ | ||
| 411 | filter[6] * src[x + 3 * stride] + \ | ||
| 412 | filter[7] * src[x + 4 * stride]) | ||
| 413 | |||
| 414 | 1362336 | static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, | |
| 415 | const int16_t *src2, | ||
| 416 | int height, intptr_t mx, intptr_t my, int width) | ||
| 417 | { | ||
| 418 | int x, y; | ||
| 419 | 1362336 | const pixel *src = (const pixel*)_src; | |
| 420 | 1362336 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 421 | 1362336 | pixel *dst = (pixel *)_dst; | |
| 422 | 1362336 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 423 | |||
| 424 | 1362336 | const int8_t *filter = ff_hevc_qpel_filters[mx]; | |
| 425 | |||
| 426 | 1362336 | int shift = 14 + 1 - BIT_DEPTH; | |
| 427 | #if BIT_DEPTH < 14 | ||
| 428 | 1362336 | int offset = 1 << (shift - 1); | |
| 429 | #else | ||
| 430 | int offset = 0; | ||
| 431 | #endif | ||
| 432 | |||
| 433 |
2/2✓ Branch 0 taken 15144496 times.
✓ Branch 1 taken 681168 times.
|
31651328 | for (y = 0; y < height; y++) { |
| 434 |
2/2✓ Branch 0 taken 513210376 times.
✓ Branch 1 taken 15144496 times.
|
1056709744 | for (x = 0; x < width; x++) |
| 435 | 1026420752 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); | |
| 436 | 30288992 | src += srcstride; | |
| 437 | 30288992 | dst += dststride; | |
| 438 | 30288992 | src2 += MAX_PB_SIZE; | |
| 439 | } | ||
| 440 | 1362336 | } | |
| 441 | |||
| 442 | 1073322 | static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 443 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 444 | int height, intptr_t mx, intptr_t my, int width) | ||
| 445 | { | ||
| 446 | int x, y; | ||
| 447 | 1073322 | const pixel *src = (const pixel*)_src; | |
| 448 | 1073322 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 449 | 1073322 | pixel *dst = (pixel *)_dst; | |
| 450 | 1073322 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 451 | |||
| 452 | 1073322 | const int8_t *filter = ff_hevc_qpel_filters[my]; | |
| 453 | |||
| 454 | 1073322 | int shift = 14 + 1 - BIT_DEPTH; | |
| 455 | #if BIT_DEPTH < 14 | ||
| 456 | 1073322 | int offset = 1 << (shift - 1); | |
| 457 | #else | ||
| 458 | int offset = 0; | ||
| 459 | #endif | ||
| 460 | |||
| 461 |
2/2✓ Branch 0 taken 11564808 times.
✓ Branch 1 taken 536661 times.
|
24202938 | for (y = 0; y < height; y++) { |
| 462 |
2/2✓ Branch 0 taken 377783176 times.
✓ Branch 1 taken 11564808 times.
|
778695968 | for (x = 0; x < width; x++) |
| 463 | 755566352 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); | |
| 464 | 23129616 | src += srcstride; | |
| 465 | 23129616 | dst += dststride; | |
| 466 | 23129616 | src2 += MAX_PB_SIZE; | |
| 467 | } | ||
| 468 | 1073322 | } | |
| 469 | |||
| 470 | 3364606 | static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 471 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 472 | int height, intptr_t mx, intptr_t my, int width) | ||
| 473 | { | ||
| 474 | int x, y; | ||
| 475 | const int8_t *filter; | ||
| 476 | 3364606 | const pixel *src = (const pixel*)_src; | |
| 477 | 3364606 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 478 | 3364606 | pixel *dst = (pixel *)_dst; | |
| 479 | 3364606 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 480 | int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; | ||
| 481 | 3364606 | int16_t *tmp = tmp_array; | |
| 482 | 3364606 | int shift = 14 + 1 - BIT_DEPTH; | |
| 483 | #if BIT_DEPTH < 14 | ||
| 484 | 3364606 | int offset = 1 << (shift - 1); | |
| 485 | #else | ||
| 486 | int offset = 0; | ||
| 487 | #endif | ||
| 488 | |||
| 489 | 3364606 | src -= QPEL_EXTRA_BEFORE * srcstride; | |
| 490 | 3364606 | filter = ff_hevc_qpel_filters[mx]; | |
| 491 |
2/2✓ Branch 0 taken 48024893 times.
✓ Branch 1 taken 1682303 times.
|
99414392 | for (y = 0; y < height + QPEL_EXTRA; y++) { |
| 492 |
2/2✓ Branch 0 taken 1424943792 times.
✓ Branch 1 taken 48024893 times.
|
2945937370 | for (x = 0; x < width; x++) |
| 493 | 2849887584 | tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 494 | 96049786 | src += srcstride; | |
| 495 | 96049786 | tmp += MAX_PB_SIZE; | |
| 496 | } | ||
| 497 | |||
| 498 | 3364606 | tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 499 | 3364606 | filter = ff_hevc_qpel_filters[my]; | |
| 500 | |||
| 501 |
2/2✓ Branch 0 taken 36248772 times.
✓ Branch 1 taken 1682303 times.
|
75862150 | for (y = 0; y < height; y++) { |
| 502 |
2/2✓ Branch 0 taken 1174254024 times.
✓ Branch 1 taken 36248772 times.
|
2421005592 | for (x = 0; x < width; x++) |
| 503 | 2348508048 | dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift); | |
| 504 | 72497544 | tmp += MAX_PB_SIZE; | |
| 505 | 72497544 | dst += dststride; | |
| 506 | 72497544 | src2 += MAX_PB_SIZE; | |
| 507 | } | ||
| 508 | 3364606 | } | |
| 509 | |||
| 510 | 22810 | static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 511 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 512 | int height, int denom, int wx0, int wx1, | ||
| 513 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 514 | { | ||
| 515 | int x, y; | ||
| 516 | 22810 | const pixel *src = (const pixel*)_src; | |
| 517 | 22810 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 518 | 22810 | pixel *dst = (pixel *)_dst; | |
| 519 | 22810 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 520 | |||
| 521 | 22810 | const int8_t *filter = ff_hevc_qpel_filters[mx]; | |
| 522 | |||
| 523 | 22810 | int shift = 14 + 1 - BIT_DEPTH; | |
| 524 | 22810 | int log2Wd = denom + shift - 1; | |
| 525 | |||
| 526 | 22810 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 527 | 22810 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 528 |
2/2✓ Branch 0 taken 278600 times.
✓ Branch 1 taken 11405 times.
|
580010 | for (y = 0; y < height; y++) { |
| 529 |
2/2✓ Branch 0 taken 9869200 times.
✓ Branch 1 taken 278600 times.
|
20295600 | for (x = 0; x < width; x++) |
| 530 | 19738400 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + | |
| 531 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
| 532 | 557200 | src += srcstride; | |
| 533 | 557200 | dst += dststride; | |
| 534 | 557200 | src2 += MAX_PB_SIZE; | |
| 535 | } | ||
| 536 | 22810 | } | |
| 537 | |||
| 538 | 20448 | static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 539 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 540 | int height, int denom, int wx0, int wx1, | ||
| 541 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 542 | { | ||
| 543 | int x, y; | ||
| 544 | 20448 | const pixel *src = (const pixel*)_src; | |
| 545 | 20448 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 546 | 20448 | pixel *dst = (pixel *)_dst; | |
| 547 | 20448 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 548 | |||
| 549 | 20448 | const int8_t *filter = ff_hevc_qpel_filters[my]; | |
| 550 | |||
| 551 | 20448 | int shift = 14 + 1 - BIT_DEPTH; | |
| 552 | 20448 | int log2Wd = denom + shift - 1; | |
| 553 | |||
| 554 | 20448 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 555 | 20448 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 556 |
2/2✓ Branch 0 taken 236324 times.
✓ Branch 1 taken 10224 times.
|
493096 | for (y = 0; y < height; y++) { |
| 557 |
2/2✓ Branch 0 taken 7942032 times.
✓ Branch 1 taken 236324 times.
|
16356712 | for (x = 0; x < width; x++) |
| 558 | 15884064 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + | |
| 559 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
| 560 | 472648 | src += srcstride; | |
| 561 | 472648 | dst += dststride; | |
| 562 | 472648 | src2 += MAX_PB_SIZE; | |
| 563 | } | ||
| 564 | 20448 | } | |
| 565 | |||
| 566 | 71042 | static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 567 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 568 | int height, int denom, int wx0, int wx1, | ||
| 569 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 570 | { | ||
| 571 | int x, y; | ||
| 572 | const int8_t *filter; | ||
| 573 | 71042 | const pixel *src = (const pixel*)_src; | |
| 574 | 71042 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 575 | 71042 | pixel *dst = (pixel *)_dst; | |
| 576 | 71042 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 577 | int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; | ||
| 578 | 71042 | int16_t *tmp = tmp_array; | |
| 579 | 71042 | int shift = 14 + 1 - BIT_DEPTH; | |
| 580 | 71042 | int log2Wd = denom + shift - 1; | |
| 581 | |||
| 582 | 71042 | src -= QPEL_EXTRA_BEFORE * srcstride; | |
| 583 | 71042 | filter = ff_hevc_qpel_filters[mx]; | |
| 584 |
2/2✓ Branch 0 taken 1131207 times.
✓ Branch 1 taken 35521 times.
|
2333456 | for (y = 0; y < height + QPEL_EXTRA; y++) { |
| 585 |
2/2✓ Branch 0 taken 36956072 times.
✓ Branch 1 taken 1131207 times.
|
76174558 | for (x = 0; x < width; x++) |
| 586 | 73912144 | tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 587 | 2262414 | src += srcstride; | |
| 588 | 2262414 | tmp += MAX_PB_SIZE; | |
| 589 | } | ||
| 590 | |||
| 591 | 71042 | tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 592 | 71042 | filter = ff_hevc_qpel_filters[my]; | |
| 593 | |||
| 594 | 71042 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 595 | 71042 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 596 |
2/2✓ Branch 0 taken 882560 times.
✓ Branch 1 taken 35521 times.
|
1836162 | for (y = 0; y < height; y++) { |
| 597 |
2/2✓ Branch 0 taken 30984848 times.
✓ Branch 1 taken 882560 times.
|
63734816 | for (x = 0; x < width; x++) |
| 598 | 61969696 | dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + | |
| 599 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
| 600 | 1765120 | tmp += MAX_PB_SIZE; | |
| 601 | 1765120 | dst += dststride; | |
| 602 | 1765120 | src2 += MAX_PB_SIZE; | |
| 603 | } | ||
| 604 | 71042 | } | |
| 605 | |||
| 606 | //////////////////////////////////////////////////////////////////////////////// | ||
| 607 | // | ||
| 608 | //////////////////////////////////////////////////////////////////////////////// | ||
| 609 | #define EPEL_FILTER(src, stride) \ | ||
| 610 | (filter[0] * src[x - stride] + \ | ||
| 611 | filter[1] * src[x] + \ | ||
| 612 | filter[2] * src[x + stride] + \ | ||
| 613 | filter[3] * src[x + 2 * stride]) | ||
| 614 | |||
| 615 | 2165962 | static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 616 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 617 | int height, intptr_t mx, intptr_t my, int width) | ||
| 618 | { | ||
| 619 | int x, y; | ||
| 620 | 2165962 | const pixel *src = (const pixel *)_src; | |
| 621 | 2165962 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 622 | 2165962 | pixel *dst = (pixel *)_dst; | |
| 623 | 2165962 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 624 | 2165962 | const int8_t *filter = ff_hevc_epel_filters[mx]; | |
| 625 | 2165962 | int shift = 14 + 1 - BIT_DEPTH; | |
| 626 | #if BIT_DEPTH < 14 | ||
| 627 | 2165962 | int offset = 1 << (shift - 1); | |
| 628 | #else | ||
| 629 | int offset = 0; | ||
| 630 | #endif | ||
| 631 | |||
| 632 |
2/2✓ Branch 0 taken 12792482 times.
✓ Branch 1 taken 1082981 times.
|
27750926 | for (y = 0; y < height; y++) { |
| 633 |
2/2✓ Branch 0 taken 217326132 times.
✓ Branch 1 taken 12792482 times.
|
460237228 | for (x = 0; x < width; x++) { |
| 634 | 434652264 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); | |
| 635 | } | ||
| 636 | 25584964 | dst += dststride; | |
| 637 | 25584964 | src += srcstride; | |
| 638 | 25584964 | src2 += MAX_PB_SIZE; | |
| 639 | } | ||
| 640 | 2165962 | } | |
| 641 | |||
| 642 | 1506398 | static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 643 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 644 | int height, intptr_t mx, intptr_t my, int width) | ||
| 645 | { | ||
| 646 | int x, y; | ||
| 647 | 1506398 | const pixel *src = (const pixel *)_src; | |
| 648 | 1506398 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 649 | 1506398 | const int8_t *filter = ff_hevc_epel_filters[my]; | |
| 650 | 1506398 | pixel *dst = (pixel *)_dst; | |
| 651 | 1506398 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 652 | 1506398 | int shift = 14 + 1 - BIT_DEPTH; | |
| 653 | #if BIT_DEPTH < 14 | ||
| 654 | 1506398 | int offset = 1 << (shift - 1); | |
| 655 | #else | ||
| 656 | int offset = 0; | ||
| 657 | #endif | ||
| 658 | |||
| 659 |
2/2✓ Branch 0 taken 9087706 times.
✓ Branch 1 taken 753199 times.
|
19681810 | for (y = 0; y < height; y++) { |
| 660 |
2/2✓ Branch 0 taken 149357300 times.
✓ Branch 1 taken 9087706 times.
|
316890012 | for (x = 0; x < width; x++) |
| 661 | 298714600 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); | |
| 662 | 18175412 | dst += dststride; | |
| 663 | 18175412 | src += srcstride; | |
| 664 | 18175412 | src2 += MAX_PB_SIZE; | |
| 665 | } | ||
| 666 | 1506398 | } | |
| 667 | |||
| 668 | 8559622 | static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 669 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 670 | int height, intptr_t mx, intptr_t my, int width) | ||
| 671 | { | ||
| 672 | int x, y; | ||
| 673 | 8559622 | const pixel *src = (const pixel *)_src; | |
| 674 | 8559622 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 675 | 8559622 | pixel *dst = (pixel *)_dst; | |
| 676 | 8559622 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 677 | 8559622 | const int8_t *filter = ff_hevc_epel_filters[mx]; | |
| 678 | int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; | ||
| 679 | 8559622 | int16_t *tmp = tmp_array; | |
| 680 | 8559622 | int shift = 14 + 1 - BIT_DEPTH; | |
| 681 | #if BIT_DEPTH < 14 | ||
| 682 | 8559622 | int offset = 1 << (shift - 1); | |
| 683 | #else | ||
| 684 | int offset = 0; | ||
| 685 | #endif | ||
| 686 | |||
| 687 | 8559622 | src -= EPEL_EXTRA_BEFORE * srcstride; | |
| 688 | |||
| 689 |
2/2✓ Branch 0 taken 61896855 times.
✓ Branch 1 taken 4279811 times.
|
132353332 | for (y = 0; y < height + EPEL_EXTRA; y++) { |
| 690 |
2/2✓ Branch 0 taken 920742850 times.
✓ Branch 1 taken 61896855 times.
|
1965279410 | for (x = 0; x < width; x++) |
| 691 | 1841485700 | tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 692 | 123793710 | src += srcstride; | |
| 693 | 123793710 | tmp += MAX_PB_SIZE; | |
| 694 | } | ||
| 695 | |||
| 696 | 8559622 | tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 697 | 8559622 | filter = ff_hevc_epel_filters[my]; | |
| 698 | |||
| 699 |
2/2✓ Branch 0 taken 49057422 times.
✓ Branch 1 taken 4279811 times.
|
106674466 | for (y = 0; y < height; y++) { |
| 700 |
2/2✓ Branch 0 taken 783203092 times.
✓ Branch 1 taken 49057422 times.
|
1664521028 | for (x = 0; x < width; x++) |
| 701 | 1566406184 | dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift); | |
| 702 | 98114844 | tmp += MAX_PB_SIZE; | |
| 703 | 98114844 | dst += dststride; | |
| 704 | 98114844 | src2 += MAX_PB_SIZE; | |
| 705 | } | ||
| 706 | 8559622 | } | |
| 707 | |||
| 708 | 33080 | static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 709 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 710 | int height, int denom, int wx0, int wx1, | ||
| 711 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 712 | { | ||
| 713 | int x, y; | ||
| 714 | 33080 | const pixel *src = (const pixel *)_src; | |
| 715 | 33080 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 716 | 33080 | pixel *dst = (pixel *)_dst; | |
| 717 | 33080 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 718 | 33080 | const int8_t *filter = ff_hevc_epel_filters[mx]; | |
| 719 | 33080 | int shift = 14 + 1 - BIT_DEPTH; | |
| 720 | 33080 | int log2Wd = denom + shift - 1; | |
| 721 | |||
| 722 | 33080 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 723 | 33080 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 724 |
2/2✓ Branch 0 taken 233792 times.
✓ Branch 1 taken 16540 times.
|
500664 | for (y = 0; y < height; y++) { |
| 725 |
2/2✓ Branch 0 taken 5306152 times.
✓ Branch 1 taken 233792 times.
|
11079888 | for (x = 0; x < width; x++) |
| 726 | 10612304 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + | |
| 727 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
| 728 | 467584 | src += srcstride; | |
| 729 | 467584 | dst += dststride; | |
| 730 | 467584 | src2 += MAX_PB_SIZE; | |
| 731 | } | ||
| 732 | 33080 | } | |
| 733 | |||
| 734 | 24552 | static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 735 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 736 | int height, int denom, int wx0, int wx1, | ||
| 737 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 738 | { | ||
| 739 | int x, y; | ||
| 740 | 24552 | const pixel *src = (const pixel *)_src; | |
| 741 | 24552 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 742 | 24552 | const int8_t *filter = ff_hevc_epel_filters[my]; | |
| 743 | 24552 | pixel *dst = (pixel *)_dst; | |
| 744 | 24552 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 745 | 24552 | int shift = 14 + 1 - BIT_DEPTH; | |
| 746 | 24552 | int log2Wd = denom + shift - 1; | |
| 747 | |||
| 748 | 24552 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 749 | 24552 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 750 |
2/2✓ Branch 0 taken 167956 times.
✓ Branch 1 taken 12276 times.
|
360464 | for (y = 0; y < height; y++) { |
| 751 |
2/2✓ Branch 0 taken 3922696 times.
✓ Branch 1 taken 167956 times.
|
8181304 | for (x = 0; x < width; x++) |
| 752 | 7845392 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + | |
| 753 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
| 754 | 335912 | src += srcstride; | |
| 755 | 335912 | dst += dststride; | |
| 756 | 335912 | src2 += MAX_PB_SIZE; | |
| 757 | } | ||
| 758 | 24552 | } | |
| 759 | |||
| 760 | 166916 | static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, | |
| 761 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
| 762 | int height, int denom, int wx0, int wx1, | ||
| 763 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
| 764 | { | ||
| 765 | int x, y; | ||
| 766 | 166916 | const pixel *src = (const pixel *)_src; | |
| 767 | 166916 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
| 768 | 166916 | pixel *dst = (pixel *)_dst; | |
| 769 | 166916 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
| 770 | 166916 | const int8_t *filter = ff_hevc_epel_filters[mx]; | |
| 771 | int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; | ||
| 772 | 166916 | int16_t *tmp = tmp_array; | |
| 773 | 166916 | int shift = 14 + 1 - BIT_DEPTH; | |
| 774 | 166916 | int log2Wd = denom + shift - 1; | |
| 775 | |||
| 776 | 166916 | src -= EPEL_EXTRA_BEFORE * srcstride; | |
| 777 | |||
| 778 |
2/2✓ Branch 0 taken 1288910 times.
✓ Branch 1 taken 83458 times.
|
2744736 | for (y = 0; y < height + EPEL_EXTRA; y++) { |
| 779 |
2/2✓ Branch 0 taken 21782220 times.
✓ Branch 1 taken 1288910 times.
|
46142260 | for (x = 0; x < width; x++) |
| 780 | 43564440 | tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 781 | 2577820 | src += srcstride; | |
| 782 | 2577820 | tmp += MAX_PB_SIZE; | |
| 783 | } | ||
| 784 | |||
| 785 | 166916 | tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 786 | 166916 | filter = ff_hevc_epel_filters[my]; | |
| 787 | |||
| 788 | 166916 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
| 789 | 166916 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
| 790 |
2/2✓ Branch 0 taken 1038536 times.
✓ Branch 1 taken 83458 times.
|
2243988 | for (y = 0; y < height; y++) { |
| 791 |
2/2✓ Branch 0 taken 18780552 times.
✓ Branch 1 taken 1038536 times.
|
39638176 | for (x = 0; x < width; x++) |
| 792 | 37561104 | dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + | |
| 793 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
| 794 | 2077072 | tmp += MAX_PB_SIZE; | |
| 795 | 2077072 | dst += dststride; | |
| 796 | 2077072 | src2 += MAX_PB_SIZE; | |
| 797 | } | ||
| 798 | 166916 | } | |
| 799 | |||
| 800 | // line zero | ||
| 801 | #define P3 pix[-4 * xstride] | ||
| 802 | #define P2 pix[-3 * xstride] | ||
| 803 | #define P1 pix[-2 * xstride] | ||
| 804 | #define P0 pix[-1 * xstride] | ||
| 805 | #define Q0 pix[0 * xstride] | ||
| 806 | #define Q1 pix[1 * xstride] | ||
| 807 | #define Q2 pix[2 * xstride] | ||
| 808 | #define Q3 pix[3 * xstride] | ||
| 809 | |||
| 810 | // line three. used only for deblocking decision | ||
| 811 | #define TP3 pix[-4 * xstride + 3 * ystride] | ||
| 812 | #define TP2 pix[-3 * xstride + 3 * ystride] | ||
| 813 | #define TP1 pix[-2 * xstride + 3 * ystride] | ||
| 814 | #define TP0 pix[-1 * xstride + 3 * ystride] | ||
| 815 | #define TQ0 pix[0 * xstride + 3 * ystride] | ||
| 816 | #define TQ1 pix[1 * xstride + 3 * ystride] | ||
| 817 | #define TQ2 pix[2 * xstride + 3 * ystride] | ||
| 818 | #define TQ3 pix[3 * xstride + 3 * ystride] | ||
| 819 | |||
| 820 | #include "h26x/h2656_deblock_template.c" | ||
| 821 | |||
| 822 | 85714092 | static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix, | |
| 823 | ptrdiff_t _xstride, ptrdiff_t _ystride, | ||
| 824 | int beta, const int *_tc, | ||
| 825 | const uint8_t *_no_p, const uint8_t *_no_q) | ||
| 826 | { | ||
| 827 | 85714092 | ptrdiff_t xstride = _xstride / sizeof(pixel); | |
| 828 | 85714092 | ptrdiff_t ystride = _ystride / sizeof(pixel); | |
| 829 | |||
| 830 | 85714092 | beta <<= BIT_DEPTH - 8; | |
| 831 | |||
| 832 |
2/2✓ Branch 0 taken 85714092 times.
✓ Branch 1 taken 42857046 times.
|
257142276 | for (int j = 0; j < 2; j++) { |
| 833 | 171428184 | pixel* pix = (pixel*)_pix + j * 4 * ystride; | |
| 834 | 171428184 | const int dp0 = abs(P2 - 2 * P1 + P0); | |
| 835 | 171428184 | const int dq0 = abs(Q2 - 2 * Q1 + Q0); | |
| 836 | 171428184 | const int dp3 = abs(TP2 - 2 * TP1 + TP0); | |
| 837 | 171428184 | const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0); | |
| 838 | 171428184 | const int d0 = dp0 + dq0; | |
| 839 | 171428184 | const int d3 = dp3 + dq3; | |
| 840 | 171428184 | const int tc = _tc[j] << (BIT_DEPTH - 8); | |
| 841 | 171428184 | const int no_p = _no_p[j]; | |
| 842 | 171428184 | const int no_q = _no_q[j]; | |
| 843 | |||
| 844 |
2/2✓ Branch 0 taken 64559993 times.
✓ Branch 1 taken 21154099 times.
|
171428184 | if (d0 + d3 < beta) { |
| 845 | 129119986 | const int beta_3 = beta >> 3; | |
| 846 | 129119986 | const int beta_2 = beta >> 2; | |
| 847 | 129119986 | const int tc25 = ((tc * 5 + 1) >> 1); | |
| 848 | |||
| 849 |
4/4✓ Branch 0 taken 19435367 times.
✓ Branch 1 taken 45124626 times.
✓ Branch 2 taken 18939378 times.
✓ Branch 3 taken 495989 times.
|
129119986 | if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 && |
| 850 |
4/4✓ Branch 0 taken 14278899 times.
✓ Branch 1 taken 4660479 times.
✓ Branch 2 taken 14174894 times.
✓ Branch 3 taken 104005 times.
|
37878756 | abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 && |
| 851 |
4/4✓ Branch 0 taken 13677569 times.
✓ Branch 1 taken 497325 times.
✓ Branch 2 taken 13365843 times.
✓ Branch 3 taken 311726 times.
|
55081474 | (d0 << 1) < beta_2 && (d3 << 1) < beta_2) { |
| 852 | 26731686 | const int tc2 = tc << 1; | |
| 853 | 26731686 | FUNC(loop_filter_luma_strong)(pix, xstride, ystride, tc2, tc2, tc2, no_p, no_q); | |
| 854 | } else { | ||
| 855 | 102388300 | int nd_p = 1; | |
| 856 | 102388300 | int nd_q = 1; | |
| 857 |
2/2✓ Branch 0 taken 35739515 times.
✓ Branch 1 taken 15454635 times.
|
102388300 | if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3)) |
| 858 | 71479030 | nd_p = 2; | |
| 859 |
2/2✓ Branch 0 taken 34866688 times.
✓ Branch 1 taken 16327462 times.
|
102388300 | if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3)) |
| 860 | 69733376 | nd_q = 2; | |
| 861 | 102388300 | FUNC(loop_filter_luma_weak)(pix, xstride, ystride, tc, beta, no_p, no_q, nd_p, nd_q); | |
| 862 | } | ||
| 863 | } | ||
| 864 | } | ||
| 865 | 85714092 | } | |
| 866 | |||
| 867 | 26152884 | static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride, | |
| 868 | ptrdiff_t _ystride, const int *_tc, | ||
| 869 | const uint8_t *_no_p, const uint8_t *_no_q) | ||
| 870 | { | ||
| 871 | int no_p, no_q; | ||
| 872 | 26152884 | ptrdiff_t xstride = _xstride / sizeof(pixel); | |
| 873 | 26152884 | ptrdiff_t ystride = _ystride / sizeof(pixel); | |
| 874 | 26152884 | const int size = 4; | |
| 875 | |||
| 876 |
2/2✓ Branch 0 taken 26152884 times.
✓ Branch 1 taken 13076442 times.
|
78458652 | for (int j = 0; j < 2; j++) { |
| 877 | 52305768 | pixel *pix = (pixel *)_pix + j * size * ystride; | |
| 878 | 52305768 | const int tc = _tc[j] << (BIT_DEPTH - 8); | |
| 879 |
2/2✓ Branch 0 taken 23531915 times.
✓ Branch 1 taken 2620969 times.
|
52305768 | if (tc > 0) { |
| 880 | 47063830 | no_p = _no_p[j]; | |
| 881 | 47063830 | no_q = _no_q[j]; | |
| 882 | |||
| 883 | 47063830 | FUNC(loop_filter_chroma_weak)(pix, xstride, ystride, size, tc, no_p, no_q); | |
| 884 | } | ||
| 885 | } | ||
| 886 | 26152884 | } | |
| 887 | |||
| 888 | 12780930 | static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, | |
| 889 | const int32_t *tc, const uint8_t *no_p, | ||
| 890 | const uint8_t *no_q) | ||
| 891 | { | ||
| 892 | 12780930 | FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q); | |
| 893 | 12780930 | } | |
| 894 | |||
| 895 | 13371954 | static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, | |
| 896 | const int32_t *tc, const uint8_t *no_p, | ||
| 897 | const uint8_t *no_q) | ||
| 898 | { | ||
| 899 | 13371954 | FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q); | |
| 900 | 13371954 | } | |
| 901 | |||
| 902 | 43210442 | static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, | |
| 903 | int beta, const int32_t *tc, const uint8_t *no_p, | ||
| 904 | const uint8_t *no_q) | ||
| 905 | { | ||
| 906 | 43210442 | FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel), | |
| 907 | beta, tc, no_p, no_q); | ||
| 908 | 43210442 | } | |
| 909 | |||
| 910 | 42503650 | static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, | |
| 911 | int beta, const int32_t *tc, const uint8_t *no_p, | ||
| 912 | const uint8_t *no_q) | ||
| 913 | { | ||
| 914 | 42503650 | FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride, | |
| 915 | beta, tc, no_p, no_q); | ||
| 916 | 42503650 | } | |
| 917 | |||
| 918 | #undef P3 | ||
| 919 | #undef P2 | ||
| 920 | #undef P1 | ||
| 921 | #undef P0 | ||
| 922 | #undef Q0 | ||
| 923 | #undef Q1 | ||
| 924 | #undef Q2 | ||
| 925 | #undef Q3 | ||
| 926 | |||
| 927 | #undef TP3 | ||
| 928 | #undef TP2 | ||
| 929 | #undef TP1 | ||
| 930 | #undef TP0 | ||
| 931 | #undef TQ0 | ||
| 932 | #undef TQ1 | ||
| 933 | #undef TQ2 | ||
| 934 | #undef TQ3 | ||
| 935 |