Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * VVC inter prediction DSP | ||
3 | * | ||
4 | * Copyright (C) 2022 Nuo Mi | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with FFmpeg; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | #include "libavcodec/h26x/h2656_inter_template.c" | ||
24 | |||
25 | 10504168 | static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
26 | const int16_t *src0, const int16_t *src1, const int width, const int height) | ||
27 | { | ||
28 | 10504168 | pixel *dst = (pixel*)_dst; | |
29 | 10504168 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
30 | 10504168 | const int shift = FFMAX(3, 15 - BIT_DEPTH); | |
31 | 10504168 | const int offset = 1 << (shift - 1); | |
32 | |||
33 |
2/2✓ Branch 0 taken 47434030 times.
✓ Branch 1 taken 5252084 times.
|
105372228 | for (int y = 0; y < height; y++) { |
34 |
2/2✓ Branch 0 taken 621086660 times.
✓ Branch 1 taken 47434030 times.
|
1337041380 | for (int x = 0; x < width; x++) |
35 | 1242173320 | dst[x] = av_clip_pixel((src0[x] + src1[x] + offset) >> shift); | |
36 | 94868060 | src0 += MAX_PB_SIZE; | |
37 | 94868060 | src1 += MAX_PB_SIZE; | |
38 | 94868060 | dst += dst_stride; | |
39 | } | ||
40 | 10504168 | } | |
41 | |||
42 | 1026822 | static void FUNC(w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
43 | const int16_t *src0, const int16_t *src1, const int width, const int height, | ||
44 | const int denom, const int w0, const int w1, const int o0, const int o1) | ||
45 | { | ||
46 | 1026822 | pixel *dst = (pixel*)_dst; | |
47 | 1026822 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
48 | 1026822 | const int shift = denom + FFMAX(3, 15 - BIT_DEPTH); | |
49 | 1026822 | const int offset = ((o0 + o1) * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1)); | |
50 | |||
51 |
2/2✓ Branch 0 taken 3844918 times.
✓ Branch 1 taken 513411 times.
|
8716658 | for (int y = 0; y < height; y++) { |
52 |
2/2✓ Branch 0 taken 115466980 times.
✓ Branch 1 taken 3844918 times.
|
238623796 | for (int x = 0; x < width; x++) |
53 | 230933960 | dst[x] = av_clip_pixel((src0[x] * w0 + src1[x] * w1 + offset) >> shift); | |
54 | 7689836 | src0 += MAX_PB_SIZE; | |
55 | 7689836 | src1 += MAX_PB_SIZE; | |
56 | 7689836 | dst += dst_stride; | |
57 | } | ||
58 | 1026822 | } | |
59 | |||
60 | 82326 | static void FUNC(put_ciip)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
61 | const int width, const int height, | ||
62 | const uint8_t *_inter, const ptrdiff_t _inter_stride, const int intra_weight) | ||
63 | { | ||
64 | 82326 | pixel *dst = (pixel *)_dst; | |
65 | 82326 | pixel *inter = (pixel *)_inter; | |
66 | 82326 | const size_t dst_stride = _dst_stride / sizeof(pixel); | |
67 | 82326 | const size_t inter_stride = _inter_stride / sizeof(pixel); | |
68 | 82326 | const int inter_weight = 4 - intra_weight; | |
69 | |||
70 |
2/2✓ Branch 0 taken 496072 times.
✓ Branch 1 taken 41163 times.
|
1074470 | for (int y = 0; y < height; y++) { |
71 |
2/2✓ Branch 0 taken 6568384 times.
✓ Branch 1 taken 496072 times.
|
14128912 | for (int x = 0; x < width; x++) |
72 | 13136768 | dst[x] = (dst[x] * intra_weight + inter[x] * inter_weight + 2) >> 2; | |
73 | 992144 | dst += dst_stride; | |
74 | 992144 | inter += inter_stride; | |
75 | } | ||
76 | 82326 | } | |
77 | |||
78 | 137264 | static void FUNC(put_gpm)(uint8_t *_dst, ptrdiff_t dst_stride, | |
79 | const int width, const int height, | ||
80 | const int16_t *src0, const int16_t *src1, | ||
81 | const uint8_t *weights, const int step_x, const int step_y) | ||
82 | { | ||
83 | 137264 | const int shift = FFMAX(5, 17 - BIT_DEPTH); | |
84 | 137264 | const int offset = 1 << (shift - 1); | |
85 | 137264 | pixel *dst = (pixel *)_dst; | |
86 | |||
87 | 137264 | dst_stride /= sizeof(pixel); | |
88 |
2/2✓ Branch 0 taken 841568 times.
✓ Branch 1 taken 68632 times.
|
1820400 | for (int y = 0; y < height; y++) { |
89 |
2/2✓ Branch 0 taken 12243616 times.
✓ Branch 1 taken 841568 times.
|
26170368 | for (int x = 0; x < width; x++) { |
90 | 24487232 | const uint8_t w = weights[x * step_x]; | |
91 | 24487232 | dst[x] = av_clip_pixel((src0[x] * w + src1[x] * (8 - w) + offset) >> shift); | |
92 | } | ||
93 | 1683136 | dst += dst_stride; | |
94 | 1683136 | src0 += MAX_PB_SIZE; | |
95 | 1683136 | src1 += MAX_PB_SIZE; | |
96 | 1683136 | weights += step_y; | |
97 | } | ||
98 | 137264 | } | |
99 | |||
100 | //8.5.6.3.3 Luma integer sample fetching process, add one extra pad line | ||
101 | 8336572 | static void FUNC(bdof_fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
102 | const int x_frac, const int y_frac, const int width, const int height) | ||
103 | { | ||
104 | 8336572 | const int x_off = (x_frac >> 3) - 1; | |
105 | 8336572 | const int y_off = (y_frac >> 3) - 1; | |
106 | 8336572 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
107 | 8336572 | const pixel *src = (pixel*)_src + (x_off) + y_off * src_stride; | |
108 | 8336572 | int16_t *dst = _dst - 1 - MAX_PB_SIZE; | |
109 | 8336572 | const int shift = 14 - BIT_DEPTH; | |
110 | 8336572 | const int bdof_width = width + 2 * BDOF_BORDER_EXT; | |
111 | |||
112 | // top | ||
113 |
2/2✓ Branch 0 taken 33103164 times.
✓ Branch 1 taken 4168286 times.
|
74542900 | for (int i = 0; i < bdof_width; i++) |
114 | 66206328 | dst[i] = src[i] << shift; | |
115 | |||
116 | 8336572 | dst += MAX_PB_SIZE; | |
117 | 8336572 | src += src_stride; | |
118 | |||
119 |
2/2✓ Branch 0 taken 24845920 times.
✓ Branch 1 taken 4168286 times.
|
58028412 | for (int i = 0; i < height; i++) { |
120 | 49691840 | dst[0] = src[0] << shift; | |
121 | 49691840 | dst[1 + width] = src[1 + width] << shift; | |
122 | 49691840 | dst += MAX_PB_SIZE; | |
123 | 49691840 | src += src_stride; | |
124 | } | ||
125 |
2/2✓ Branch 0 taken 33103164 times.
✓ Branch 1 taken 4168286 times.
|
74542900 | for (int i = 0; i < bdof_width; i++) |
126 | 66206328 | dst[i] = src[i] << shift; | |
127 | 8336572 | } | |
128 | |||
129 | //8.5.6.3.3 Luma integer sample fetching process | ||
130 | 6935984 | static void FUNC(fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int x_frac, const int y_frac) | |
131 | { | ||
132 | 6935984 | FUNC(bdof_fetch_samples)(_dst, _src, _src_stride, x_frac, y_frac, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE); | |
133 | 6935984 | } | |
134 | |||
135 | 8336572 | static void FUNC(prof_grad_filter)(int16_t *_gradient_h, int16_t *_gradient_v, const ptrdiff_t gradient_stride, | |
136 | const int16_t *_src, const ptrdiff_t src_stride, const int width, const int height, const int pad) | ||
137 | { | ||
138 | 8336572 | const int shift = 6; | |
139 | 8336572 | const int16_t *src = _src; | |
140 | 8336572 | int16_t *gradient_h = _gradient_h + pad * (1 + gradient_stride); | |
141 | 8336572 | int16_t *gradient_v = _gradient_v + pad * (1 + gradient_stride); | |
142 | |||
143 |
2/2✓ Branch 0 taken 24845920 times.
✓ Branch 1 taken 4168286 times.
|
58028412 | for (int y = 0; y < height; y++) { |
144 | 49691840 | const int16_t *p = src; | |
145 |
2/2✓ Branch 0 taken 226109824 times.
✓ Branch 1 taken 24845920 times.
|
501911488 | for (int x = 0; x < width; x++) { |
146 | 452219648 | gradient_h[x] = (p[1] >> shift) - (p[-1] >> shift); | |
147 | 452219648 | gradient_v[x] = (p[src_stride] >> shift) - (p[-src_stride] >> shift); | |
148 | 452219648 | p++; | |
149 | } | ||
150 | 49691840 | gradient_h += gradient_stride; | |
151 | 49691840 | gradient_v += gradient_stride; | |
152 | 49691840 | src += src_stride; | |
153 | } | ||
154 |
2/2✓ Branch 0 taken 700294 times.
✓ Branch 1 taken 3467992 times.
|
8336572 | if (pad) { |
155 | 1400588 | pad_int16(_gradient_h + 1 + gradient_stride, gradient_stride, width, height); | |
156 | 1400588 | pad_int16(_gradient_v + 1 + gradient_stride, gradient_stride, width, height); | |
157 | } | ||
158 | 8336572 | } | |
159 | |||
160 | 2726032 | static void FUNC(apply_prof)(int16_t *dst, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y) | |
161 | { | ||
162 | 2726032 | const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit | |
163 | |||
164 | int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; | ||
165 | int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; | ||
166 | 2726032 | FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE, 0); | |
167 | |||
168 |
2/2✓ Branch 0 taken 5452064 times.
✓ Branch 1 taken 1363016 times.
|
13630160 | for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) { |
169 |
2/2✓ Branch 0 taken 21808256 times.
✓ Branch 1 taken 5452064 times.
|
54520640 | for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) { |
170 | 43616512 | const int o = y * AFFINE_MIN_BLOCK_SIZE + x; | |
171 | 43616512 | const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o]; | |
172 | 43616512 | const int val = src[x] + av_clip(di, -limit, limit - 1); | |
173 | 43616512 | dst[x] = val; | |
174 | |||
175 | } | ||
176 | 10904128 | src += MAX_PB_SIZE; | |
177 | 10904128 | dst += MAX_PB_SIZE; | |
178 | } | ||
179 | 2726032 | } | |
180 | |||
181 | 4199384 | static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y) | |
182 | { | ||
183 | 4199384 | const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit | |
184 | 4199384 | pixel *dst = (pixel*)_dst; | |
185 | 4199384 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
186 | 4199384 | const int shift = 14 - BIT_DEPTH; | |
187 | #if BIT_DEPTH < 14 | ||
188 | 4199384 | const int offset = 1 << (shift - 1); | |
189 | #else | ||
190 | const int offset = 0; | ||
191 | #endif | ||
192 | int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; | ||
193 | int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; | ||
194 | |||
195 | 4199384 | FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE, 0); | |
196 | |||
197 |
2/2✓ Branch 0 taken 8398768 times.
✓ Branch 1 taken 2099692 times.
|
20996920 | for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) { |
198 |
2/2✓ Branch 0 taken 33595072 times.
✓ Branch 1 taken 8398768 times.
|
83987680 | for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) { |
199 | 67190144 | const int o = y * AFFINE_MIN_BLOCK_SIZE + x; | |
200 | 67190144 | const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o]; | |
201 | 67190144 | const int val = src[x] + av_clip(di, -limit, limit - 1); | |
202 | 67190144 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
203 | |||
204 | } | ||
205 | 16797536 | src += MAX_PB_SIZE; | |
206 | 16797536 | dst += dst_stride; | |
207 | } | ||
208 | 4199384 | } | |
209 | |||
210 | 10568 | static void FUNC(apply_prof_uni_w)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
211 | const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y, | ||
212 | const int denom, const int wx, const int _ox) | ||
213 | { | ||
214 | 10568 | const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit | |
215 | 10568 | pixel *dst = (pixel*)_dst; | |
216 | 10568 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
217 | 10568 | const int shift = denom + FFMAX(2, 14 - BIT_DEPTH); | |
218 | 10568 | const int offset = 1 << (shift - 1); | |
219 | 10568 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
220 | int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; | ||
221 | int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; | ||
222 | |||
223 | 10568 | FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE, 0); | |
224 | |||
225 |
2/2✓ Branch 0 taken 21136 times.
✓ Branch 1 taken 5284 times.
|
52840 | for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) { |
226 |
2/2✓ Branch 0 taken 84544 times.
✓ Branch 1 taken 21136 times.
|
211360 | for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) { |
227 | 169088 | const int o = y * AFFINE_MIN_BLOCK_SIZE + x; | |
228 | 169088 | const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o]; | |
229 | 169088 | const int val = src[x] + av_clip(di, -limit, limit - 1); | |
230 | 169088 | dst[x] = av_clip_pixel(((val * wx + offset) >> shift) + ox); | |
231 | } | ||
232 | 42272 | src += MAX_PB_SIZE; | |
233 | 42272 | dst += dst_stride; | |
234 | } | ||
235 | 10568 | } | |
236 | |||
237 | 10663872 | static void FUNC(derive_bdof_vx_vy)(const int16_t *_src0, const int16_t *_src1, | |
238 | const int16_t **gradient_h, const int16_t **gradient_v, ptrdiff_t gradient_stride, | ||
239 | int* vx, int* vy) | ||
240 | { | ||
241 | 10663872 | const int shift2 = 4; | |
242 | 10663872 | const int shift3 = 1; | |
243 | 10663872 | const int thres = 1 << 4; | |
244 | 10663872 | int sgx2 = 0, sgy2 = 0, sgxgy = 0, sgxdi = 0, sgydi = 0; | |
245 | 10663872 | const int16_t *src0 = _src0 - 1 - MAX_PB_SIZE; | |
246 | 10663872 | const int16_t *src1 = _src1 - 1 - MAX_PB_SIZE; | |
247 | |||
248 |
2/2✓ Branch 0 taken 31991616 times.
✓ Branch 1 taken 5331936 times.
|
74647104 | for (int y = 0; y < BDOF_GRADIENT_SIZE; y++) { |
249 |
2/2✓ Branch 0 taken 191949696 times.
✓ Branch 1 taken 31991616 times.
|
447882624 | for (int x = 0; x < BDOF_GRADIENT_SIZE; x++) { |
250 | 383899392 | const int diff = (src0[x] >> shift2) - (src1[x] >> shift2); | |
251 | 383899392 | const int idx = gradient_stride * y + x; | |
252 | 383899392 | const int temph = (gradient_h[0][idx] + gradient_h[1][idx]) >> shift3; | |
253 | 383899392 | const int tempv = (gradient_v[0][idx] + gradient_v[1][idx]) >> shift3; | |
254 | 383899392 | sgx2 += FFABS(temph); | |
255 | 383899392 | sgy2 += FFABS(tempv); | |
256 |
2/2✓ Branch 0 taken 104033460 times.
✓ Branch 1 taken 87916236 times.
|
383899392 | sgxgy += VVC_SIGN(tempv) * temph; |
257 |
2/2✓ Branch 0 taken 109571111 times.
✓ Branch 1 taken 82378585 times.
|
383899392 | sgxdi += -VVC_SIGN(temph) * diff; |
258 |
2/2✓ Branch 0 taken 104033460 times.
✓ Branch 1 taken 87916236 times.
|
383899392 | sgydi += -VVC_SIGN(tempv) * diff; |
259 | } | ||
260 | 63983232 | src0 += MAX_PB_SIZE; | |
261 | 63983232 | src1 += MAX_PB_SIZE; | |
262 | } | ||
263 |
2/2✓ Branch 0 taken 5223650 times.
✓ Branch 1 taken 108286 times.
|
10663872 | *vx = sgx2 > 0 ? av_clip((sgxdi * (1 << 2)) >> av_log2(sgx2) , -thres + 1, thres - 1) : 0; |
264 |
2/2✓ Branch 0 taken 5224555 times.
✓ Branch 1 taken 107381 times.
|
10663872 | *vy = sgy2 > 0 ? av_clip(((sgydi * (1 << 2)) - ((*vx * sgxgy) >> 1)) >> av_log2(sgy2), -thres + 1, thres - 1) : 0; |
265 | 10663872 | } | |
266 | |||
267 | 10663872 | static void FUNC(apply_bdof_min_block)(pixel* dst, const ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1, | |
268 | const int16_t **gradient_h, const int16_t **gradient_v, const int vx, const int vy) | ||
269 | { | ||
270 | 10663872 | const int shift4 = 15 - BIT_DEPTH; | |
271 | 10663872 | const int offset4 = 1 << (shift4 - 1); | |
272 | |||
273 | 10663872 | const int16_t* gh[] = { gradient_h[0] + 1 + BDOF_PADDED_SIZE, gradient_h[1] + 1 + BDOF_PADDED_SIZE }; | |
274 | 10663872 | const int16_t* gv[] = { gradient_v[0] + 1 + BDOF_PADDED_SIZE, gradient_v[1] + 1 + BDOF_PADDED_SIZE }; | |
275 | |||
276 |
2/2✓ Branch 0 taken 21327744 times.
✓ Branch 1 taken 5331936 times.
|
53319360 | for (int y = 0; y < BDOF_BLOCK_SIZE; y++) { |
277 |
2/2✓ Branch 0 taken 85310976 times.
✓ Branch 1 taken 21327744 times.
|
213277440 | for (int x = 0; x < BDOF_BLOCK_SIZE; x++) { |
278 | 170621952 | const int idx = y * BDOF_PADDED_SIZE + x; | |
279 | 170621952 | const int bdof_offset = vx * (gh[0][idx] - gh[1][idx]) + vy * (gv[0][idx] - gv[1][idx]); | |
280 | 170621952 | dst[x] = av_clip_pixel((src0[x] + offset4 + src1[x] + bdof_offset) >> shift4); | |
281 | } | ||
282 | 42655488 | dst += dst_stride; | |
283 | 42655488 | src0 += MAX_PB_SIZE; | |
284 | 42655488 | src1 += MAX_PB_SIZE; | |
285 | } | ||
286 | 10663872 | } | |
287 | |||
288 | 700294 | static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, int16_t *_src0, int16_t *_src1, | |
289 | const int block_w, const int block_h) | ||
290 | { | ||
291 | int16_t gradient_h[2][BDOF_PADDED_SIZE * BDOF_PADDED_SIZE]; | ||
292 | int16_t gradient_v[2][BDOF_PADDED_SIZE * BDOF_PADDED_SIZE]; | ||
293 | int vx, vy; | ||
294 | 700294 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
295 | 700294 | pixel* dst = (pixel*)_dst; | |
296 | |||
297 | 700294 | FUNC(prof_grad_filter)(gradient_h[0], gradient_v[0], BDOF_PADDED_SIZE, | |
298 | _src0, MAX_PB_SIZE, block_w, block_h, 1); | ||
299 | 700294 | pad_int16(_src0, MAX_PB_SIZE, block_w, block_h); | |
300 | 700294 | FUNC(prof_grad_filter)(gradient_h[1], gradient_v[1], BDOF_PADDED_SIZE, | |
301 | _src1, MAX_PB_SIZE, block_w, block_h, 1); | ||
302 | 700294 | pad_int16(_src1, MAX_PB_SIZE, block_w, block_h); | |
303 | |||
304 |
2/2✓ Branch 0 taken 1371744 times.
✓ Branch 1 taken 350147 times.
|
3443782 | for (int y = 0; y < block_h; y += BDOF_BLOCK_SIZE) { |
305 |
2/2✓ Branch 0 taken 5331936 times.
✓ Branch 1 taken 1371744 times.
|
13407360 | for (int x = 0; x < block_w; x += BDOF_BLOCK_SIZE) { |
306 | 10663872 | const int16_t* src0 = _src0 + y * MAX_PB_SIZE + x; | |
307 | 10663872 | const int16_t* src1 = _src1 + y * MAX_PB_SIZE + x; | |
308 | 10663872 | pixel *d = dst + x; | |
309 | 10663872 | const int idx = BDOF_PADDED_SIZE * y + x; | |
310 | 10663872 | const int16_t* gh[] = { gradient_h[0] + idx, gradient_h[1] + idx }; | |
311 | 10663872 | const int16_t* gv[] = { gradient_v[0] + idx, gradient_v[1] + idx }; | |
312 | 10663872 | FUNC(derive_bdof_vx_vy)(src0, src1, gh, gv, BDOF_PADDED_SIZE, &vx, &vy); | |
313 | 10663872 | FUNC(apply_bdof_min_block)(d, dst_stride, src0, src1, gh, gv, vx, vy); | |
314 | } | ||
315 | 2743488 | dst += BDOF_BLOCK_SIZE * dst_stride; | |
316 | } | ||
317 | 700294 | } | |
318 | |||
319 | #define DMVR_FILTER(src, stride) \ | ||
320 | (filter[0] * src[x] + \ | ||
321 | filter[1] * src[x + stride]) | ||
322 | |||
323 | //8.5.3.2.2 Luma sample bilinear interpolation process | ||
324 | 2079224 | static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
325 | const int height, const intptr_t mx, const intptr_t my, const int width) | ||
326 | { | ||
327 | 2079224 | const pixel *src = (const pixel *)_src; | |
328 | 2079224 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
329 | #if BIT_DEPTH > 10 | ||
330 | ✗ | const int shift4 = BIT_DEPTH - 10; | |
331 | ✗ | const int offset4 = 1 << (shift4 - 1); | |
332 | #define DMVR_SHIFT(s) (((s) + offset4) >> shift4) | ||
333 | #else | ||
334 | #define DMVR_SHIFT(s) ((s) << (10 - BIT_DEPTH)) | ||
335 | #endif | ||
336 | |||
337 |
2/2✓ Branch 0 taken 20669552 times.
✓ Branch 1 taken 1039612 times.
|
43418328 | for (int y = 0; y < height; y++) { |
338 |
2/2✓ Branch 0 taken 412548960 times.
✓ Branch 1 taken 20669552 times.
|
866437024 | for (int x = 0; x < width; x++) |
339 | 825097920 | dst[x] = DMVR_SHIFT(src[x]); | |
340 | 41339104 | src += src_stride; | |
341 | 41339104 | dst += MAX_PB_SIZE; | |
342 | } | ||
343 | #undef DMVR_SHIFT | ||
344 | 2079224 | } | |
345 | |||
346 | //8.5.3.2.2 Luma sample bilinear interpolation process | ||
347 | 301270 | static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
348 | const int height, const intptr_t mx, const intptr_t my, const int width) | ||
349 | { | ||
350 | 301270 | const pixel *src = (const pixel*)_src; | |
351 | 301270 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
352 | 301270 | const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[mx]; | |
353 | 301270 | const int shift1 = BIT_DEPTH - 6; | |
354 | 301270 | const int offset1 = 1 << (shift1 - 1); | |
355 | |||
356 |
2/2✓ Branch 0 taken 2981452 times.
✓ Branch 1 taken 150635 times.
|
6264174 | for (int y = 0; y < height; y++) { |
357 |
2/2✓ Branch 0 taken 58947280 times.
✓ Branch 1 taken 2981452 times.
|
123857464 | for (int x = 0; x < width; x++) |
358 | 117894560 | dst[x] = (DMVR_FILTER(src, 1) + offset1) >> shift1; | |
359 | 5962904 | src += src_stride; | |
360 | 5962904 | dst += MAX_PB_SIZE; | |
361 | } | ||
362 | 301270 | } | |
363 | |||
364 | //8.5.3.2.2 Luma sample bilinear interpolation process | ||
365 | 413184 | static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
366 | const int height, const intptr_t mx, const intptr_t my, const int width) | ||
367 | { | ||
368 | 413184 | const pixel *src = (pixel*)_src; | |
369 | 413184 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
370 | 413184 | const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[my]; | |
371 | 413184 | const int shift1 = BIT_DEPTH - 6; | |
372 | 413184 | const int offset1 = 1 << (shift1 - 1); | |
373 | |||
374 |
2/2✓ Branch 0 taken 4091688 times.
✓ Branch 1 taken 206592 times.
|
8596560 | for (int y = 0; y < height; y++) { |
375 |
2/2✓ Branch 0 taken 81195520 times.
✓ Branch 1 taken 4091688 times.
|
170574416 | for (int x = 0; x < width; x++) |
376 | 162391040 | dst[x] = (DMVR_FILTER(src, src_stride) + offset1) >> shift1; | |
377 | 8183376 | src += src_stride; | |
378 | 8183376 | dst += MAX_PB_SIZE; | |
379 | } | ||
380 | |||
381 | 413184 | } | |
382 | |||
383 | //8.5.3.2.2 Luma sample bilinear interpolation process | ||
384 | 1151786 | static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
385 | const int height, const intptr_t mx, const intptr_t my, const int width) | ||
386 | { | ||
387 | int16_t tmp_array[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE]; | ||
388 | 1151786 | int16_t *tmp = tmp_array; | |
389 | 1151786 | const pixel *src = (const pixel*)_src; | |
390 | 1151786 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
391 | 1151786 | const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[mx]; | |
392 | 1151786 | const int shift1 = BIT_DEPTH - 6; | |
393 | 1151786 | const int offset1 = 1 << (shift1 - 1); | |
394 | 1151786 | const int shift2 = 4; | |
395 | 1151786 | const int offset2 = 1 << (shift2 - 1); | |
396 | |||
397 | 1151786 | src -= BILINEAR_EXTRA_BEFORE * src_stride; | |
398 |
2/2✓ Branch 0 taken 11952065 times.
✓ Branch 1 taken 575893 times.
|
25055916 | for (int y = 0; y < height + BILINEAR_EXTRA; y++) { |
399 |
2/2✓ Branch 0 taken 235692556 times.
✓ Branch 1 taken 11952065 times.
|
495289242 | for (int x = 0; x < width; x++) |
400 | 471385112 | tmp[x] = (DMVR_FILTER(src, 1) + offset1) >> shift1; | |
401 | 23904130 | src += src_stride; | |
402 | 23904130 | tmp += MAX_PB_SIZE; | |
403 | } | ||
404 | |||
405 | 1151786 | tmp = tmp_array + BILINEAR_EXTRA_BEFORE * MAX_PB_SIZE; | |
406 | 1151786 | filter = ff_vvc_inter_luma_dmvr_filters[my]; | |
407 |
2/2✓ Branch 0 taken 11376172 times.
✓ Branch 1 taken 575893 times.
|
23904130 | for (int y = 0; y < height; y++) { |
408 |
2/2✓ Branch 0 taken 224334160 times.
✓ Branch 1 taken 11376172 times.
|
471420664 | for (int x = 0; x < width; x++) |
409 | 448668320 | dst[x] = (DMVR_FILTER(tmp, MAX_PB_SIZE) + offset2) >> shift2; | |
410 | 22752344 | tmp += MAX_PB_SIZE; | |
411 | 22752344 | dst += MAX_PB_SIZE; | |
412 | } | ||
413 | 1151786 | } | |
414 | |||
415 | #define PEL_FUNC(dst, C, idx1, idx2, a) \ | ||
416 | do { \ | ||
417 | for (int w = 0; w < 7; w++) \ | ||
418 | inter->dst[C][w][idx1][idx2] = FUNC(a); \ | ||
419 | } while (0) \ | ||
420 | |||
421 | #define DIR_FUNCS(d, C, c) \ | ||
422 | PEL_FUNC(put_##d, C, 0, 0, put_##d##_pixels); \ | ||
423 | PEL_FUNC(put_##d, C, 0, 1, put_##d##_##c##_h); \ | ||
424 | PEL_FUNC(put_##d, C, 1, 0, put_##d##_##c##_v); \ | ||
425 | PEL_FUNC(put_##d, C, 1, 1, put_##d##_##c##_hv); \ | ||
426 | PEL_FUNC(put_##d##_w, C, 0, 0, put_##d##_w_pixels); \ | ||
427 | PEL_FUNC(put_##d##_w, C, 0, 1, put_##d##_##c##_w_h); \ | ||
428 | PEL_FUNC(put_##d##_w, C, 1, 0, put_##d##_##c##_w_v); \ | ||
429 | PEL_FUNC(put_##d##_w, C, 1, 1, put_##d##_##c##_w_hv); | ||
430 | |||
431 | #define FUNCS(C, c) \ | ||
432 | PEL_FUNC(put, C, 0, 0, put_pixels); \ | ||
433 | PEL_FUNC(put, C, 0, 1, put_##c##_h); \ | ||
434 | PEL_FUNC(put, C, 1, 0, put_##c##_v); \ | ||
435 | PEL_FUNC(put, C, 1, 1, put_##c##_hv); \ | ||
436 | DIR_FUNCS(uni, C, c); \ | ||
437 | |||
438 | 2134 | static void FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter) | |
439 | { | ||
440 |
24/24✓ Branch 0 taken 7469 times.
✓ Branch 1 taken 1067 times.
✓ Branch 2 taken 7469 times.
✓ Branch 3 taken 1067 times.
✓ Branch 4 taken 7469 times.
✓ Branch 5 taken 1067 times.
✓ Branch 6 taken 7469 times.
✓ Branch 7 taken 1067 times.
✓ Branch 8 taken 7469 times.
✓ Branch 9 taken 1067 times.
✓ Branch 10 taken 7469 times.
✓ Branch 11 taken 1067 times.
✓ Branch 12 taken 7469 times.
✓ Branch 13 taken 1067 times.
✓ Branch 14 taken 7469 times.
✓ Branch 15 taken 1067 times.
✓ Branch 16 taken 7469 times.
✓ Branch 17 taken 1067 times.
✓ Branch 18 taken 7469 times.
✓ Branch 19 taken 1067 times.
✓ Branch 20 taken 7469 times.
✓ Branch 21 taken 1067 times.
✓ Branch 22 taken 7469 times.
✓ Branch 23 taken 1067 times.
|
181390 | FUNCS(LUMA, luma); |
441 |
24/24✓ Branch 0 taken 7469 times.
✓ Branch 1 taken 1067 times.
✓ Branch 2 taken 7469 times.
✓ Branch 3 taken 1067 times.
✓ Branch 4 taken 7469 times.
✓ Branch 5 taken 1067 times.
✓ Branch 6 taken 7469 times.
✓ Branch 7 taken 1067 times.
✓ Branch 8 taken 7469 times.
✓ Branch 9 taken 1067 times.
✓ Branch 10 taken 7469 times.
✓ Branch 11 taken 1067 times.
✓ Branch 12 taken 7469 times.
✓ Branch 13 taken 1067 times.
✓ Branch 14 taken 7469 times.
✓ Branch 15 taken 1067 times.
✓ Branch 16 taken 7469 times.
✓ Branch 17 taken 1067 times.
✓ Branch 18 taken 7469 times.
✓ Branch 19 taken 1067 times.
✓ Branch 20 taken 7469 times.
✓ Branch 21 taken 1067 times.
✓ Branch 22 taken 7469 times.
✓ Branch 23 taken 1067 times.
|
181390 | FUNCS(CHROMA, chroma); |
442 | |||
443 | 2134 | inter->avg = FUNC(avg); | |
444 | 2134 | inter->w_avg = FUNC(w_avg); | |
445 | |||
446 | 2134 | inter->dmvr[0][0] = FUNC(dmvr); | |
447 | 2134 | inter->dmvr[0][1] = FUNC(dmvr_h); | |
448 | 2134 | inter->dmvr[1][0] = FUNC(dmvr_v); | |
449 | 2134 | inter->dmvr[1][1] = FUNC(dmvr_hv); | |
450 | |||
451 | 2134 | inter->put_ciip = FUNC(put_ciip); | |
452 | 2134 | inter->put_gpm = FUNC(put_gpm); | |
453 | |||
454 | 2134 | inter->fetch_samples = FUNC(fetch_samples); | |
455 | 2134 | inter->bdof_fetch_samples = FUNC(bdof_fetch_samples); | |
456 | 2134 | inter->apply_prof = FUNC(apply_prof); | |
457 | 2134 | inter->apply_prof_uni = FUNC(apply_prof_uni); | |
458 | 2134 | inter->apply_prof_uni_w = FUNC(apply_prof_uni_w); | |
459 | 2134 | inter->apply_bdof = FUNC(apply_bdof); | |
460 | 2134 | inter->prof_grad_filter = FUNC(prof_grad_filter); | |
461 | 2134 | inter->sad = vvc_sad; | |
462 | 2134 | } | |
463 | |||
464 | #undef FUNCS | ||
465 | #undef PEL_FUNC | ||
466 | #undef DMVR_FUNCS | ||
467 |