| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * inter prediction template for HEVC/VVC | ||
| 3 | * | ||
| 4 | * Copyright (C) 2022 Nuo Mi | ||
| 5 | * Copyright (C) 2024 Wu Jianhua | ||
| 6 | * | ||
| 7 | * This file is part of FFmpeg. | ||
| 8 | * | ||
| 9 | * FFmpeg is free software; you can redistribute it and/or | ||
| 10 | * modify it under the terms of the GNU Lesser General Public | ||
| 11 | * License as published by the Free Software Foundation; either | ||
| 12 | * version 2.1 of the License, or (at your option) any later version. | ||
| 13 | * | ||
| 14 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 17 | * Lesser General Public License for more details. | ||
| 18 | * | ||
| 19 | * You should have received a copy of the GNU Lesser General Public | ||
| 20 | * License along with FFmpeg; if not, write to the Free Software | ||
| 21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 22 | */ | ||
| 23 | |||
| 24 | #define CHROMA_EXTRA_BEFORE 1 | ||
| 25 | #define CHROMA_EXTRA 3 | ||
| 26 | #define LUMA_EXTRA_BEFORE 3 | ||
| 27 | #define LUMA_EXTRA 7 | ||
| 28 | |||
| 29 | 12935184 | static void FUNC(put_pixels)(int16_t *dst, | |
| 30 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
| 31 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
| 32 | { | ||
| 33 | 12935184 | const pixel *src = (const pixel *)_src; | |
| 34 | 12935184 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 35 | |||
| 36 |
2/2✓ Branch 0 taken 70189518 times.
✓ Branch 1 taken 6467592 times.
|
153314220 | for (int y = 0; y < height; y++) { |
| 37 |
2/2✓ Branch 0 taken 1047343956 times.
✓ Branch 1 taken 70189518 times.
|
2235066948 | for (int x = 0; x < width; x++) |
| 38 | 2094687912 | dst[x] = src[x] << (14 - BIT_DEPTH); | |
| 39 | 140379036 | src += src_stride; | |
| 40 | 140379036 | dst += MAX_PB_SIZE; | |
| 41 | } | ||
| 42 | 12935184 | } | |
| 43 | |||
| 44 | 3539120 | static void FUNC(put_uni_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
| 45 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
| 46 | const int8_t *hf, const int8_t *vf, const int width) | ||
| 47 | { | ||
| 48 | 3539120 | const pixel *src = (const pixel *)_src; | |
| 49 | 3539120 | pixel *dst = (pixel *)_dst; | |
| 50 | 3539120 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 51 | 3539120 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 52 | |||
| 53 |
2/2✓ Branch 0 taken 13296388 times.
✓ Branch 1 taken 1769560 times.
|
30131896 | for (int y = 0; y < height; y++) { |
| 54 | 26592776 | memcpy(dst, src, width * sizeof(pixel)); | |
| 55 | 26592776 | src += src_stride; | |
| 56 | 26592776 | dst += dst_stride; | |
| 57 | } | ||
| 58 | 3539120 | } | |
| 59 | |||
| 60 | 256604 | static void FUNC(put_uni_w_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
| 61 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
| 62 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
| 63 | const int width) | ||
| 64 | { | ||
| 65 | 256604 | const pixel *src = (const pixel *)_src; | |
| 66 | 256604 | pixel *dst = (pixel *)_dst; | |
| 67 | 256604 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 68 | 256604 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 69 | 256604 | const int shift = denom + 14 - BIT_DEPTH; | |
| 70 | #if BIT_DEPTH < 14 | ||
| 71 | 256604 | const int offset = 1 << (shift - 1); | |
| 72 | #else | ||
| 73 | const int offset = 0; | ||
| 74 | #endif | ||
| 75 | 256604 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
| 76 | |||
| 77 |
2/2✓ Branch 0 taken 637352 times.
✓ Branch 1 taken 128302 times.
|
1531308 | for (int y = 0; y < height; y++) { |
| 78 |
2/2✓ Branch 0 taken 5833840 times.
✓ Branch 1 taken 637352 times.
|
12942384 | for (int x = 0; x < width; x++) { |
| 79 | 11667680 | const int v = (src[x] << (14 - BIT_DEPTH)); | |
| 80 | 11667680 | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); | |
| 81 | } | ||
| 82 | 1274704 | src += src_stride; | |
| 83 | 1274704 | dst += dst_stride; | |
| 84 | } | ||
| 85 | 256604 | } | |
| 86 | |||
| 87 | #define LUMA_FILTER(src, stride) \ | ||
| 88 | (filter[0] * src[x - 3 * stride] + \ | ||
| 89 | filter[1] * src[x - 2 * stride] + \ | ||
| 90 | filter[2] * src[x - stride] + \ | ||
| 91 | filter[3] * src[x ] + \ | ||
| 92 | filter[4] * src[x + stride] + \ | ||
| 93 | filter[5] * src[x + 2 * stride] + \ | ||
| 94 | filter[6] * src[x + 3 * stride] + \ | ||
| 95 | filter[7] * src[x + 4 * stride]) | ||
| 96 | |||
| 97 | 1177710 | static void FUNC(put_luma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
| 98 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
| 99 | { | ||
| 100 | 1177710 | const pixel *src = (const pixel*)_src; | |
| 101 | 1177710 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 102 | 1177710 | const int8_t *filter = hf; | |
| 103 | |||
| 104 |
2/2✓ Branch 0 taken 5205668 times.
✓ Branch 1 taken 588855 times.
|
11589046 | for (int y = 0; y < height; y++) { |
| 105 |
2/2✓ Branch 0 taken 79999344 times.
✓ Branch 1 taken 5205668 times.
|
170410024 | for (int x = 0; x < width; x++) |
| 106 | 159998688 | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 107 | 10411336 | src += src_stride; | |
| 108 | 10411336 | dst += MAX_PB_SIZE; | |
| 109 | } | ||
| 110 | 1177710 | } | |
| 111 | |||
| 112 | 1423026 | static void FUNC(put_luma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
| 113 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
| 114 | { | ||
| 115 | 1423026 | const pixel *src = (pixel*)_src; | |
| 116 | 1423026 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 117 | 1423026 | const int8_t *filter = vf; | |
| 118 | |||
| 119 |
2/2✓ Branch 0 taken 6501620 times.
✓ Branch 1 taken 711513 times.
|
14426266 | for (int y = 0; y < height; y++) { |
| 120 |
2/2✓ Branch 0 taken 98222256 times.
✓ Branch 1 taken 6501620 times.
|
209447752 | for (int x = 0; x < width; x++) |
| 121 | 196444512 | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
| 122 | 13003240 | src += src_stride; | |
| 123 | 13003240 | dst += MAX_PB_SIZE; | |
| 124 | } | ||
| 125 | 1423026 | } | |
| 126 | |||
| 127 | 8400618 | static void FUNC(put_luma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
| 128 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
| 129 | { | ||
| 130 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
| 131 | 8400618 | int16_t *tmp = tmp_array; | |
| 132 | 8400618 | const pixel *src = (const pixel*)_src; | |
| 133 | 8400618 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 134 | 8400618 | const int8_t *filter = hf; | |
| 135 | |||
| 136 | 8400618 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
| 137 |
2/2✓ Branch 0 taken 58013095 times.
✓ Branch 1 taken 4200309 times.
|
124426808 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
| 138 |
2/2✓ Branch 0 taken 516991332 times.
✓ Branch 1 taken 58013095 times.
|
1150008854 | for (int x = 0; x < width; x++) |
| 139 | 1033982664 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 140 | 116026190 | src += src_stride; | |
| 141 | 116026190 | tmp += MAX_PB_SIZE; | |
| 142 | } | ||
| 143 | |||
| 144 | 8400618 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 145 | 8400618 | filter = vf; | |
| 146 |
2/2✓ Branch 0 taken 28610932 times.
✓ Branch 1 taken 4200309 times.
|
65622482 | for (int y = 0; y < height; y++) { |
| 147 |
2/2✓ Branch 0 taken 317457872 times.
✓ Branch 1 taken 28610932 times.
|
692137608 | for (int x = 0; x < width; x++) |
| 148 | 634915744 | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
| 149 | 57221864 | tmp += MAX_PB_SIZE; | |
| 150 | 57221864 | dst += MAX_PB_SIZE; | |
| 151 | } | ||
| 152 | 8400618 | } | |
| 153 | |||
| 154 | 101512 | static void FUNC(put_uni_luma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
| 155 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
| 156 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
| 157 | { | ||
| 158 | 101512 | const pixel *src = (const pixel*)_src; | |
| 159 | 101512 | pixel *dst = (pixel *)_dst; | |
| 160 | 101512 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 161 | 101512 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 162 | 101512 | const int8_t *filter = hf; | |
| 163 | 101512 | const int shift = 14 - BIT_DEPTH; | |
| 164 | #if BIT_DEPTH < 14 | ||
| 165 | 101512 | const int offset = 1 << (shift - 1); | |
| 166 | #else | ||
| 167 | const int offset = 0; | ||
| 168 | #endif | ||
| 169 | |||
| 170 |
2/2✓ Branch 0 taken 587508 times.
✓ Branch 1 taken 50756 times.
|
1276528 | for (int y = 0; y < height; y++) { |
| 171 |
2/2✓ Branch 0 taken 12644560 times.
✓ Branch 1 taken 587508 times.
|
26464136 | for (int x = 0; x < width; x++) { |
| 172 | 25289120 | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 173 | 25289120 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
| 174 | } | ||
| 175 | 1175016 | src += src_stride; | |
| 176 | 1175016 | dst += dst_stride; | |
| 177 | } | ||
| 178 | 101512 | } | |
| 179 | |||
| 180 | 119706 | static void FUNC(put_uni_luma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
| 181 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
| 182 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
| 183 | { | ||
| 184 | |||
| 185 | 119706 | const pixel *src = (const pixel*)_src; | |
| 186 | 119706 | pixel *dst = (pixel *)_dst; | |
| 187 | 119706 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 188 | 119706 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 189 | 119706 | const int8_t *filter = vf; | |
| 190 | 119706 | const int shift = 14 - BIT_DEPTH; | |
| 191 | #if BIT_DEPTH < 14 | ||
| 192 | 119706 | const int offset = 1 << (shift - 1); | |
| 193 | #else | ||
| 194 | const int offset = 0; | ||
| 195 | #endif | ||
| 196 | |||
| 197 |
2/2✓ Branch 0 taken 662140 times.
✓ Branch 1 taken 59853 times.
|
1443986 | for (int y = 0; y < height; y++) { |
| 198 |
2/2✓ Branch 0 taken 13909440 times.
✓ Branch 1 taken 662140 times.
|
29143160 | for (int x = 0; x < width; x++) { |
| 199 | 27818880 | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
| 200 | 27818880 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
| 201 | } | ||
| 202 | 1324280 | src += src_stride; | |
| 203 | 1324280 | dst += dst_stride; | |
| 204 | } | ||
| 205 | 119706 | } | |
| 206 | |||
| 207 | 371810 | static void FUNC(put_uni_luma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
| 208 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
| 209 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
| 210 | { | ||
| 211 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
| 212 | 371810 | int16_t *tmp = tmp_array; | |
| 213 | 371810 | const pixel *src = (const pixel*)_src; | |
| 214 | 371810 | pixel *dst = (pixel *)_dst; | |
| 215 | 371810 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 216 | 371810 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 217 | 371810 | const int8_t *filter = hf; | |
| 218 | 371810 | const int shift = 14 - BIT_DEPTH; | |
| 219 | #if BIT_DEPTH < 14 | ||
| 220 | 371810 | const int offset = 1 << (shift - 1); | |
| 221 | #else | ||
| 222 | const int offset = 0; | ||
| 223 | #endif | ||
| 224 | |||
| 225 | 371810 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
| 226 |
2/2✓ Branch 0 taken 3337167 times.
✓ Branch 1 taken 185905 times.
|
7046144 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
| 227 |
2/2✓ Branch 0 taken 49445632 times.
✓ Branch 1 taken 3337167 times.
|
105565598 | for (int x = 0; x < width; x++) |
| 228 | 98891264 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 229 | 6674334 | src += src_stride; | |
| 230 | 6674334 | tmp += MAX_PB_SIZE; | |
| 231 | } | ||
| 232 | |||
| 233 | 371810 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 234 | 371810 | filter = vf; | |
| 235 | |||
| 236 |
2/2✓ Branch 0 taken 2035832 times.
✓ Branch 1 taken 185905 times.
|
4443474 | for (int y = 0; y < height; y++) { |
| 237 |
2/2✓ Branch 0 taken 35767184 times.
✓ Branch 1 taken 2035832 times.
|
75606032 | for (int x = 0; x < width; x++) { |
| 238 | 71534368 | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
| 239 | 71534368 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
| 240 | } | ||
| 241 | 4071664 | tmp += MAX_PB_SIZE; | |
| 242 | 4071664 | dst += dst_stride; | |
| 243 | } | ||
| 244 | |||
| 245 | 371810 | } | |
| 246 | |||
| 247 | 22844 | static void FUNC(put_uni_luma_w_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
| 248 | const uint8_t *_src, const ptrdiff_t _src_stride, int height, | ||
| 249 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
| 250 | const int width) | ||
| 251 | { | ||
| 252 | 22844 | const pixel *src = (const pixel*)_src; | |
| 253 | 22844 | pixel *dst = (pixel *)_dst; | |
| 254 | 22844 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 255 | 22844 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 256 | 22844 | const int8_t *filter = hf; | |
| 257 | 22844 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
| 258 | 22844 | const int shift = denom + 14 - BIT_DEPTH; | |
| 259 | #if BIT_DEPTH < 14 | ||
| 260 | 22844 | const int offset = 1 << (shift - 1); | |
| 261 | #else | ||
| 262 | const int offset = 0; | ||
| 263 | #endif | ||
| 264 | |||
| 265 |
2/2✓ Branch 0 taken 58128 times.
✓ Branch 1 taken 11422 times.
|
139100 | for (int y = 0; y < height; y++) { |
| 266 |
2/2✓ Branch 0 taken 527680 times.
✓ Branch 1 taken 58128 times.
|
1171616 | for (int x = 0; x < width; x++) |
| 267 | 1055360 | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
| 268 | 116256 | src += src_stride; | |
| 269 | 116256 | dst += dst_stride; | |
| 270 | } | ||
| 271 | 22844 | } | |
| 272 | |||
| 273 | 22448 | static void FUNC(put_uni_luma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
| 274 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
| 275 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
| 276 | const int width) | ||
| 277 | { | ||
| 278 | 22448 | const pixel *src = (const pixel*)_src; | |
| 279 | 22448 | pixel *dst = (pixel *)_dst; | |
| 280 | 22448 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 281 | 22448 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 282 | 22448 | const int8_t *filter = vf; | |
| 283 | 22448 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
| 284 | 22448 | const int shift = denom + 14 - BIT_DEPTH; | |
| 285 | #if BIT_DEPTH < 14 | ||
| 286 | 22448 | const int offset = 1 << (shift - 1); | |
| 287 | #else | ||
| 288 | const int offset = 0; | ||
| 289 | #endif | ||
| 290 | |||
| 291 |
2/2✓ Branch 0 taken 54840 times.
✓ Branch 1 taken 11224 times.
|
132128 | for (int y = 0; y < height; y++) { |
| 292 |
2/2✓ Branch 0 taken 440416 times.
✓ Branch 1 taken 54840 times.
|
990512 | for (int x = 0; x < width; x++) |
| 293 | 880832 | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
| 294 | 109680 | src += src_stride; | |
| 295 | 109680 | dst += dst_stride; | |
| 296 | } | ||
| 297 | 22448 | } | |
| 298 | |||
| 299 | 76152 | static void FUNC(put_uni_luma_w_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
| 300 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int denom, | ||
| 301 | const int wx, const int _ox, const int8_t *hf, const int8_t *vf, const int width) | ||
| 302 | { | ||
| 303 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
| 304 | 76152 | int16_t *tmp = tmp_array; | |
| 305 | 76152 | const pixel *src = (const pixel*)_src; | |
| 306 | 76152 | pixel *dst = (pixel *)_dst; | |
| 307 | 76152 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 308 | 76152 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 309 | 76152 | const int8_t *filter = hf; | |
| 310 | 76152 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
| 311 | 76152 | const int shift = denom + 14 - BIT_DEPTH; | |
| 312 | #if BIT_DEPTH < 14 | ||
| 313 | 76152 | const int offset = 1 << (shift - 1); | |
| 314 | #else | ||
| 315 | const int offset = 0; | ||
| 316 | #endif | ||
| 317 | |||
| 318 | 76152 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
| 319 |
2/2✓ Branch 0 taken 456544 times.
✓ Branch 1 taken 38076 times.
|
989240 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
| 320 |
2/2✓ Branch 0 taken 3176504 times.
✓ Branch 1 taken 456544 times.
|
7266096 | for (int x = 0; x < width; x++) |
| 321 | 6353008 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 322 | 913088 | src += src_stride; | |
| 323 | 913088 | tmp += MAX_PB_SIZE; | |
| 324 | } | ||
| 325 | |||
| 326 | 76152 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 327 | 76152 | filter = vf; | |
| 328 |
2/2✓ Branch 0 taken 190012 times.
✓ Branch 1 taken 38076 times.
|
456176 | for (int y = 0; y < height; y++) { |
| 329 |
2/2✓ Branch 0 taken 1834912 times.
✓ Branch 1 taken 190012 times.
|
4049848 | for (int x = 0; x < width; x++) |
| 330 | 3669824 | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | |
| 331 | 380024 | tmp += MAX_PB_SIZE; | |
| 332 | 380024 | dst += dst_stride; | |
| 333 | } | ||
| 334 | 76152 | } | |
| 335 | |||
| 336 | #define CHROMA_FILTER(src, stride) \ | ||
| 337 | (filter[0] * src[x - stride] + \ | ||
| 338 | filter[1] * src[x] + \ | ||
| 339 | filter[2] * src[x + stride] + \ | ||
| 340 | filter[3] * src[x + 2 * stride]) | ||
| 341 | |||
| 342 | 1240798 | static void FUNC(put_chroma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
| 343 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
| 344 | { | ||
| 345 | 1240798 | const pixel *src = (const pixel *)_src; | |
| 346 | 1240798 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 347 | 1240798 | const int8_t *filter = hf; | |
| 348 | |||
| 349 |
2/2✓ Branch 0 taken 5212054 times.
✓ Branch 1 taken 620399 times.
|
11664906 | for (int y = 0; y < height; y++) { |
| 350 |
2/2✓ Branch 0 taken 82765252 times.
✓ Branch 1 taken 5212054 times.
|
175954612 | for (int x = 0; x < width; x++) |
| 351 | 165530504 | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 352 | 10424108 | src += src_stride; | |
| 353 | 10424108 | dst += MAX_PB_SIZE; | |
| 354 | } | ||
| 355 | 1240798 | } | |
| 356 | |||
| 357 | 1768550 | static void FUNC(put_chroma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
| 358 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
| 359 | { | ||
| 360 | 1768550 | const pixel *src = (const pixel *)_src; | |
| 361 | 1768550 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 362 | 1768550 | const int8_t *filter = vf; | |
| 363 | |||
| 364 |
2/2✓ Branch 0 taken 7105114 times.
✓ Branch 1 taken 884275 times.
|
15978778 | for (int y = 0; y < height; y++) { |
| 365 |
2/2✓ Branch 0 taken 106375780 times.
✓ Branch 1 taken 7105114 times.
|
226961788 | for (int x = 0; x < width; x++) |
| 366 | 212751560 | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
| 367 | 14210228 | src += src_stride; | |
| 368 | 14210228 | dst += MAX_PB_SIZE; | |
| 369 | } | ||
| 370 | 1768550 | } | |
| 371 | |||
| 372 | 7493522 | static void FUNC(put_chroma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
| 373 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
| 374 | { | ||
| 375 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
| 376 | 7493522 | int16_t *tmp = tmp_array; | |
| 377 | 7493522 | const pixel *src = (const pixel *)_src; | |
| 378 | 7493522 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 379 | 7493522 | const int8_t *filter = hf; | |
| 380 | |||
| 381 | 7493522 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
| 382 | |||
| 383 |
2/2✓ Branch 0 taken 37333409 times.
✓ Branch 1 taken 3746761 times.
|
82160340 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
| 384 |
2/2✓ Branch 0 taken 349575426 times.
✓ Branch 1 taken 37333409 times.
|
773817670 | for (int x = 0; x < width; x++) |
| 385 | 699150852 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 386 | 74666818 | src += src_stride; | |
| 387 | 74666818 | tmp += MAX_PB_SIZE; | |
| 388 | } | ||
| 389 | |||
| 390 | 7493522 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 391 | 7493522 | filter = vf; | |
| 392 | |||
| 393 |
2/2✓ Branch 0 taken 26093126 times.
✓ Branch 1 taken 3746761 times.
|
59679774 | for (int y = 0; y < height; y++) { |
| 394 |
2/2✓ Branch 0 taken 271521828 times.
✓ Branch 1 taken 26093126 times.
|
595229908 | for (int x = 0; x < width; x++) |
| 395 | 543043656 | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
| 396 | 52186252 | tmp += MAX_PB_SIZE; | |
| 397 | 52186252 | dst += MAX_PB_SIZE; | |
| 398 | } | ||
| 399 | 7493522 | } | |
| 400 | |||
| 401 | 449092 | static void FUNC(put_uni_chroma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
| 402 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
| 403 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
| 404 | { | ||
| 405 | 449092 | const pixel *src = (const pixel *)_src; | |
| 406 | 449092 | pixel *dst = (pixel *)_dst; | |
| 407 | 449092 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 408 | 449092 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 409 | 449092 | const int8_t *filter = hf; | |
| 410 | 449092 | const int shift = 14 - BIT_DEPTH; | |
| 411 | #if BIT_DEPTH < 14 | ||
| 412 | 449092 | const int offset = 1 << (shift - 1); | |
| 413 | #else | ||
| 414 | const int offset = 0; | ||
| 415 | #endif | ||
| 416 | |||
| 417 |
2/2✓ Branch 0 taken 1239892 times.
✓ Branch 1 taken 224546 times.
|
2928876 | for (int y = 0; y < height; y++) { |
| 418 |
2/2✓ Branch 0 taken 14474256 times.
✓ Branch 1 taken 1239892 times.
|
31428296 | for (int x = 0; x < width; x++) |
| 419 | 28948512 | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); | |
| 420 | 2479784 | src += src_stride; | |
| 421 | 2479784 | dst += dst_stride; | |
| 422 | } | ||
| 423 | 449092 | } | |
| 424 | |||
| 425 | 413460 | static void FUNC(put_uni_chroma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
| 426 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
| 427 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
| 428 | { | ||
| 429 | 413460 | const pixel *src = (const pixel *)_src; | |
| 430 | 413460 | pixel *dst = (pixel *)_dst; | |
| 431 | 413460 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 432 | 413460 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 433 | 413460 | const int8_t *filter = vf; | |
| 434 | 413460 | const int shift = 14 - BIT_DEPTH; | |
| 435 | #if BIT_DEPTH < 14 | ||
| 436 | 413460 | const int offset = 1 << (shift - 1); | |
| 437 | #else | ||
| 438 | const int offset = 0; | ||
| 439 | #endif | ||
| 440 | |||
| 441 |
2/2✓ Branch 0 taken 1126000 times.
✓ Branch 1 taken 206730 times.
|
2665460 | for (int y = 0; y < height; y++) { |
| 442 |
2/2✓ Branch 0 taken 14234000 times.
✓ Branch 1 taken 1126000 times.
|
30720000 | for (int x = 0; x < width; x++) |
| 443 | 28468000 | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); | |
| 444 | 2252000 | src += src_stride; | |
| 445 | 2252000 | dst += dst_stride; | |
| 446 | } | ||
| 447 | 413460 | } | |
| 448 | |||
| 449 | 3062180 | static void FUNC(put_uni_chroma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
| 450 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
| 451 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
| 452 | { | ||
| 453 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
| 454 | 3062180 | int16_t *tmp = tmp_array; | |
| 455 | 3062180 | const pixel *src = (const pixel *)_src; | |
| 456 | 3062180 | pixel *dst = (pixel *)_dst; | |
| 457 | 3062180 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 458 | 3062180 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 459 | 3062180 | const int8_t *filter = hf; | |
| 460 | 3062180 | const int shift = 14 - BIT_DEPTH; | |
| 461 | #if BIT_DEPTH < 14 | ||
| 462 | 3062180 | const int offset = 1 << (shift - 1); | |
| 463 | #else | ||
| 464 | const int offset = 0; | ||
| 465 | #endif | ||
| 466 | |||
| 467 | 3062180 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
| 468 | |||
| 469 |
2/2✓ Branch 0 taken 11642698 times.
✓ Branch 1 taken 1531090 times.
|
26347576 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
| 470 |
2/2✓ Branch 0 taken 71707208 times.
✓ Branch 1 taken 11642698 times.
|
166699812 | for (int x = 0; x < width; x++) |
| 471 | 143414416 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 472 | 23285396 | src += src_stride; | |
| 473 | 23285396 | tmp += MAX_PB_SIZE; | |
| 474 | } | ||
| 475 | |||
| 476 | 3062180 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 477 | 3062180 | filter = vf; | |
| 478 | |||
| 479 |
2/2✓ Branch 0 taken 7049428 times.
✓ Branch 1 taken 1531090 times.
|
17161036 | for (int y = 0; y < height; y++) { |
| 480 |
2/2✓ Branch 0 taken 50780720 times.
✓ Branch 1 taken 7049428 times.
|
115660296 | for (int x = 0; x < width; x++) |
| 481 | 101561440 | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); | |
| 482 | 14098856 | tmp += MAX_PB_SIZE; | |
| 483 | 14098856 | dst += dst_stride; | |
| 484 | } | ||
| 485 | 3062180 | } | |
| 486 | |||
| 487 | 19480 | static void FUNC(put_uni_chroma_w_h)(uint8_t *_dst, ptrdiff_t _dst_stride, | |
| 488 | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, | ||
| 489 | const int8_t *hf, const int8_t *vf, int width) | ||
| 490 | { | ||
| 491 | 19480 | const pixel *src = (const pixel *)_src; | |
| 492 | 19480 | pixel *dst = (pixel *)_dst; | |
| 493 | 19480 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 494 | 19480 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 495 | 19480 | const int8_t *filter = hf; | |
| 496 | 19480 | const int shift = denom + 14 - BIT_DEPTH; | |
| 497 | #if BIT_DEPTH < 14 | ||
| 498 | 19480 | const int offset = 1 << (shift - 1); | |
| 499 | #else | ||
| 500 | const int offset = 0; | ||
| 501 | #endif | ||
| 502 | |||
| 503 | 19480 | ox = ox * (1 << (BIT_DEPTH - 8)); | |
| 504 |
2/2✓ Branch 0 taken 46728 times.
✓ Branch 1 taken 9740 times.
|
112936 | for (int y = 0; y < height; y++) { |
| 505 |
2/2✓ Branch 0 taken 292128 times.
✓ Branch 1 taken 46728 times.
|
677712 | for (int x = 0; x < width; x++) { |
| 506 | 584256 | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
| 507 | } | ||
| 508 | 93456 | dst += dst_stride; | |
| 509 | 93456 | src += src_stride; | |
| 510 | } | ||
| 511 | 19480 | } | |
| 512 | |||
| 513 | 19836 | static void FUNC(put_uni_chroma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
| 514 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
| 515 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
| 516 | const int width) | ||
| 517 | { | ||
| 518 | 19836 | const pixel *src = (const pixel *)_src; | |
| 519 | 19836 | pixel *dst = (pixel *)_dst; | |
| 520 | 19836 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 521 | 19836 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 522 | 19836 | const int8_t *filter = vf; | |
| 523 | 19836 | const int shift = denom + 14 - BIT_DEPTH; | |
| 524 | 19836 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
| 525 | #if BIT_DEPTH < 14 | ||
| 526 | 19836 | int offset = 1 << (shift - 1); | |
| 527 | #else | ||
| 528 | int offset = 0; | ||
| 529 | #endif | ||
| 530 | |||
| 531 |
2/2✓ Branch 0 taken 46288 times.
✓ Branch 1 taken 9918 times.
|
112412 | for (int y = 0; y < height; y++) { |
| 532 |
2/2✓ Branch 0 taken 301520 times.
✓ Branch 1 taken 46288 times.
|
695616 | for (int x = 0; x < width; x++) { |
| 533 | 603040 | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
| 534 | } | ||
| 535 | 92576 | dst += dst_stride; | |
| 536 | 92576 | src += src_stride; | |
| 537 | } | ||
| 538 | 19836 | } | |
| 539 | |||
| 540 | 110112 | static void FUNC(put_uni_chroma_w_hv)(uint8_t *_dst, ptrdiff_t _dst_stride, | |
| 541 | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, | ||
| 542 | const int8_t *hf, const int8_t *vf, int width) | ||
| 543 | { | ||
| 544 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
| 545 | 110112 | int16_t *tmp = tmp_array; | |
| 546 | 110112 | const pixel *src = (const pixel *)_src; | |
| 547 | 110112 | pixel *dst = (pixel *)_dst; | |
| 548 | 110112 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
| 549 | 110112 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
| 550 | 110112 | const int8_t *filter = hf; | |
| 551 | 110112 | const int shift = denom + 14 - BIT_DEPTH; | |
| 552 | #if BIT_DEPTH < 14 | ||
| 553 | 110112 | const int offset = 1 << (shift - 1); | |
| 554 | #else | ||
| 555 | const int offset = 0; | ||
| 556 | #endif | ||
| 557 | |||
| 558 | 110112 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
| 559 | |||
| 560 |
2/2✓ Branch 0 taken 417916 times.
✓ Branch 1 taken 55056 times.
|
945944 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
| 561 |
2/2✓ Branch 0 taken 2399396 times.
✓ Branch 1 taken 417916 times.
|
5634624 | for (int x = 0; x < width; x++) |
| 562 | 4798792 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
| 563 | 835832 | src += src_stride; | |
| 564 | 835832 | tmp += MAX_PB_SIZE; | |
| 565 | } | ||
| 566 | |||
| 567 | 110112 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
| 568 | 110112 | filter = vf; | |
| 569 | |||
| 570 | 110112 | ox = ox * (1 << (BIT_DEPTH - 8)); | |
| 571 |
2/2✓ Branch 0 taken 252748 times.
✓ Branch 1 taken 55056 times.
|
615608 | for (int y = 0; y < height; y++) { |
| 572 |
2/2✓ Branch 0 taken 1626656 times.
✓ Branch 1 taken 252748 times.
|
3758808 | for (int x = 0; x < width; x++) |
| 573 | 3253312 | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | |
| 574 | 505496 | tmp += MAX_PB_SIZE; | |
| 575 | 505496 | dst += dst_stride; | |
| 576 | } | ||
| 577 | 110112 | } | |
| 578 |