FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/vvc/inter_template.c
Date: 2024-05-03 15:42:48
Exec Total Coverage
Lines: 268 270 99.3%
Functions: 24 54 44.4%
Branches: 130 130 100.0%

Line Branch Exec Source
1 /*
2 * VVC inter prediction DSP
3 *
4 * Copyright (C) 2022 Nuo Mi
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "libavcodec/h26x/h2656_inter_template.c"
24
25 10504168 static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
26 const int16_t *src0, const int16_t *src1, const int width, const int height)
27 {
28 10504168 pixel *dst = (pixel*)_dst;
29 10504168 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
30 10504168 const int shift = FFMAX(3, 15 - BIT_DEPTH);
31 10504168 const int offset = 1 << (shift - 1);
32
33
2/2
✓ Branch 0 taken 47434030 times.
✓ Branch 1 taken 5252084 times.
105372228 for (int y = 0; y < height; y++) {
34
2/2
✓ Branch 0 taken 621086660 times.
✓ Branch 1 taken 47434030 times.
1337041380 for (int x = 0; x < width; x++)
35 1242173320 dst[x] = av_clip_pixel((src0[x] + src1[x] + offset) >> shift);
36 94868060 src0 += MAX_PB_SIZE;
37 94868060 src1 += MAX_PB_SIZE;
38 94868060 dst += dst_stride;
39 }
40 10504168 }
41
42 1026822 static void FUNC(w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
43 const int16_t *src0, const int16_t *src1, const int width, const int height,
44 const int denom, const int w0, const int w1, const int o0, const int o1)
45 {
46 1026822 pixel *dst = (pixel*)_dst;
47 1026822 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
48 1026822 const int shift = denom + FFMAX(3, 15 - BIT_DEPTH);
49 1026822 const int offset = ((o0 + o1) * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1));
50
51
2/2
✓ Branch 0 taken 3844918 times.
✓ Branch 1 taken 513411 times.
8716658 for (int y = 0; y < height; y++) {
52
2/2
✓ Branch 0 taken 115466980 times.
✓ Branch 1 taken 3844918 times.
238623796 for (int x = 0; x < width; x++)
53 230933960 dst[x] = av_clip_pixel((src0[x] * w0 + src1[x] * w1 + offset) >> shift);
54 7689836 src0 += MAX_PB_SIZE;
55 7689836 src1 += MAX_PB_SIZE;
56 7689836 dst += dst_stride;
57 }
58 1026822 }
59
60 82326 static void FUNC(put_ciip)(uint8_t *_dst, const ptrdiff_t _dst_stride,
61 const int width, const int height,
62 const uint8_t *_inter, const ptrdiff_t _inter_stride, const int intra_weight)
63 {
64 82326 pixel *dst = (pixel *)_dst;
65 82326 pixel *inter = (pixel *)_inter;
66 82326 const size_t dst_stride = _dst_stride / sizeof(pixel);
67 82326 const size_t inter_stride = _inter_stride / sizeof(pixel);
68 82326 const int inter_weight = 4 - intra_weight;
69
70
2/2
✓ Branch 0 taken 496072 times.
✓ Branch 1 taken 41163 times.
1074470 for (int y = 0; y < height; y++) {
71
2/2
✓ Branch 0 taken 6568384 times.
✓ Branch 1 taken 496072 times.
14128912 for (int x = 0; x < width; x++)
72 13136768 dst[x] = (dst[x] * intra_weight + inter[x] * inter_weight + 2) >> 2;
73 992144 dst += dst_stride;
74 992144 inter += inter_stride;
75 }
76 82326 }
77
78 137264 static void FUNC(put_gpm)(uint8_t *_dst, ptrdiff_t dst_stride,
79 const int width, const int height,
80 const int16_t *src0, const int16_t *src1,
81 const uint8_t *weights, const int step_x, const int step_y)
82 {
83 137264 const int shift = FFMAX(5, 17 - BIT_DEPTH);
84 137264 const int offset = 1 << (shift - 1);
85 137264 pixel *dst = (pixel *)_dst;
86
87 137264 dst_stride /= sizeof(pixel);
88
2/2
✓ Branch 0 taken 841568 times.
✓ Branch 1 taken 68632 times.
1820400 for (int y = 0; y < height; y++) {
89
2/2
✓ Branch 0 taken 12243616 times.
✓ Branch 1 taken 841568 times.
26170368 for (int x = 0; x < width; x++) {
90 24487232 const uint8_t w = weights[x * step_x];
91 24487232 dst[x] = av_clip_pixel((src0[x] * w + src1[x] * (8 - w) + offset) >> shift);
92 }
93 1683136 dst += dst_stride;
94 1683136 src0 += MAX_PB_SIZE;
95 1683136 src1 += MAX_PB_SIZE;
96 1683136 weights += step_y;
97 }
98 137264 }
99
100 //8.5.6.3.3 Luma integer sample fetching process, add one extra pad line
101 8336572 static void FUNC(bdof_fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride,
102 const int x_frac, const int y_frac, const int width, const int height)
103 {
104 8336572 const int x_off = (x_frac >> 3) - 1;
105 8336572 const int y_off = (y_frac >> 3) - 1;
106 8336572 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
107 8336572 const pixel *src = (pixel*)_src + (x_off) + y_off * src_stride;
108 8336572 int16_t *dst = _dst - 1 - MAX_PB_SIZE;
109 8336572 const int shift = 14 - BIT_DEPTH;
110 8336572 const int bdof_width = width + 2 * BDOF_BORDER_EXT;
111
112 // top
113
2/2
✓ Branch 0 taken 33103164 times.
✓ Branch 1 taken 4168286 times.
74542900 for (int i = 0; i < bdof_width; i++)
114 66206328 dst[i] = src[i] << shift;
115
116 8336572 dst += MAX_PB_SIZE;
117 8336572 src += src_stride;
118
119
2/2
✓ Branch 0 taken 24845920 times.
✓ Branch 1 taken 4168286 times.
58028412 for (int i = 0; i < height; i++) {
120 49691840 dst[0] = src[0] << shift;
121 49691840 dst[1 + width] = src[1 + width] << shift;
122 49691840 dst += MAX_PB_SIZE;
123 49691840 src += src_stride;
124 }
125
2/2
✓ Branch 0 taken 33103164 times.
✓ Branch 1 taken 4168286 times.
74542900 for (int i = 0; i < bdof_width; i++)
126 66206328 dst[i] = src[i] << shift;
127 8336572 }
128
129 //8.5.6.3.3 Luma integer sample fetching process
130 6935984 static void FUNC(fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int x_frac, const int y_frac)
131 {
132 6935984 FUNC(bdof_fetch_samples)(_dst, _src, _src_stride, x_frac, y_frac, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE);
133 6935984 }
134
135 8336572 static void FUNC(prof_grad_filter)(int16_t *_gradient_h, int16_t *_gradient_v, const ptrdiff_t gradient_stride,
136 const int16_t *_src, const ptrdiff_t src_stride, const int width, const int height, const int pad)
137 {
138 8336572 const int shift = 6;
139 8336572 const int16_t *src = _src;
140 8336572 int16_t *gradient_h = _gradient_h + pad * (1 + gradient_stride);
141 8336572 int16_t *gradient_v = _gradient_v + pad * (1 + gradient_stride);
142
143
2/2
✓ Branch 0 taken 24845920 times.
✓ Branch 1 taken 4168286 times.
58028412 for (int y = 0; y < height; y++) {
144 49691840 const int16_t *p = src;
145
2/2
✓ Branch 0 taken 226109824 times.
✓ Branch 1 taken 24845920 times.
501911488 for (int x = 0; x < width; x++) {
146 452219648 gradient_h[x] = (p[1] >> shift) - (p[-1] >> shift);
147 452219648 gradient_v[x] = (p[src_stride] >> shift) - (p[-src_stride] >> shift);
148 452219648 p++;
149 }
150 49691840 gradient_h += gradient_stride;
151 49691840 gradient_v += gradient_stride;
152 49691840 src += src_stride;
153 }
154
2/2
✓ Branch 0 taken 700294 times.
✓ Branch 1 taken 3467992 times.
8336572 if (pad) {
155 1400588 pad_int16(_gradient_h + 1 + gradient_stride, gradient_stride, width, height);
156 1400588 pad_int16(_gradient_v + 1 + gradient_stride, gradient_stride, width, height);
157 }
158 8336572 }
159
160 2726032 static void FUNC(apply_prof)(int16_t *dst, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
161 {
162 2726032 const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit
163
164 int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
165 int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
166 2726032 FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE, 0);
167
168
2/2
✓ Branch 0 taken 5452064 times.
✓ Branch 1 taken 1363016 times.
13630160 for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) {
169
2/2
✓ Branch 0 taken 21808256 times.
✓ Branch 1 taken 5452064 times.
54520640 for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) {
170 43616512 const int o = y * AFFINE_MIN_BLOCK_SIZE + x;
171 43616512 const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
172 43616512 const int val = src[x] + av_clip(di, -limit, limit - 1);
173 43616512 dst[x] = val;
174
175 }
176 10904128 src += MAX_PB_SIZE;
177 10904128 dst += MAX_PB_SIZE;
178 }
179 2726032 }
180
181 4199384 static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
182 {
183 4199384 const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit
184 4199384 pixel *dst = (pixel*)_dst;
185 4199384 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
186 4199384 const int shift = 14 - BIT_DEPTH;
187 #if BIT_DEPTH < 14
188 4199384 const int offset = 1 << (shift - 1);
189 #else
190 const int offset = 0;
191 #endif
192 int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
193 int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
194
195 4199384 FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE, 0);
196
197
2/2
✓ Branch 0 taken 8398768 times.
✓ Branch 1 taken 2099692 times.
20996920 for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) {
198
2/2
✓ Branch 0 taken 33595072 times.
✓ Branch 1 taken 8398768 times.
83987680 for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) {
199 67190144 const int o = y * AFFINE_MIN_BLOCK_SIZE + x;
200 67190144 const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
201 67190144 const int val = src[x] + av_clip(di, -limit, limit - 1);
202 67190144 dst[x] = av_clip_pixel((val + offset) >> shift);
203
204 }
205 16797536 src += MAX_PB_SIZE;
206 16797536 dst += dst_stride;
207 }
208 4199384 }
209
210 10568 static void FUNC(apply_prof_uni_w)(uint8_t *_dst, const ptrdiff_t _dst_stride,
211 const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y,
212 const int denom, const int wx, const int _ox)
213 {
214 10568 const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit
215 10568 pixel *dst = (pixel*)_dst;
216 10568 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
217 10568 const int shift = denom + FFMAX(2, 14 - BIT_DEPTH);
218 10568 const int offset = 1 << (shift - 1);
219 10568 const int ox = _ox * (1 << (BIT_DEPTH - 8));
220 int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
221 int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
222
223 10568 FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE, 0);
224
225
2/2
✓ Branch 0 taken 21136 times.
✓ Branch 1 taken 5284 times.
52840 for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) {
226
2/2
✓ Branch 0 taken 84544 times.
✓ Branch 1 taken 21136 times.
211360 for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) {
227 169088 const int o = y * AFFINE_MIN_BLOCK_SIZE + x;
228 169088 const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
229 169088 const int val = src[x] + av_clip(di, -limit, limit - 1);
230 169088 dst[x] = av_clip_pixel(((val * wx + offset) >> shift) + ox);
231 }
232 42272 src += MAX_PB_SIZE;
233 42272 dst += dst_stride;
234 }
235 10568 }
236
237 10663872 static void FUNC(derive_bdof_vx_vy)(const int16_t *_src0, const int16_t *_src1,
238 const int16_t **gradient_h, const int16_t **gradient_v, ptrdiff_t gradient_stride,
239 int* vx, int* vy)
240 {
241 10663872 const int shift2 = 4;
242 10663872 const int shift3 = 1;
243 10663872 const int thres = 1 << 4;
244 10663872 int sgx2 = 0, sgy2 = 0, sgxgy = 0, sgxdi = 0, sgydi = 0;
245 10663872 const int16_t *src0 = _src0 - 1 - MAX_PB_SIZE;
246 10663872 const int16_t *src1 = _src1 - 1 - MAX_PB_SIZE;
247
248
2/2
✓ Branch 0 taken 31991616 times.
✓ Branch 1 taken 5331936 times.
74647104 for (int y = 0; y < BDOF_GRADIENT_SIZE; y++) {
249
2/2
✓ Branch 0 taken 191949696 times.
✓ Branch 1 taken 31991616 times.
447882624 for (int x = 0; x < BDOF_GRADIENT_SIZE; x++) {
250 383899392 const int diff = (src0[x] >> shift2) - (src1[x] >> shift2);
251 383899392 const int idx = gradient_stride * y + x;
252 383899392 const int temph = (gradient_h[0][idx] + gradient_h[1][idx]) >> shift3;
253 383899392 const int tempv = (gradient_v[0][idx] + gradient_v[1][idx]) >> shift3;
254 383899392 sgx2 += FFABS(temph);
255 383899392 sgy2 += FFABS(tempv);
256
2/2
✓ Branch 0 taken 104033460 times.
✓ Branch 1 taken 87916236 times.
383899392 sgxgy += VVC_SIGN(tempv) * temph;
257
2/2
✓ Branch 0 taken 109571111 times.
✓ Branch 1 taken 82378585 times.
383899392 sgxdi += -VVC_SIGN(temph) * diff;
258
2/2
✓ Branch 0 taken 104033460 times.
✓ Branch 1 taken 87916236 times.
383899392 sgydi += -VVC_SIGN(tempv) * diff;
259 }
260 63983232 src0 += MAX_PB_SIZE;
261 63983232 src1 += MAX_PB_SIZE;
262 }
263
2/2
✓ Branch 0 taken 5223650 times.
✓ Branch 1 taken 108286 times.
10663872 *vx = sgx2 > 0 ? av_clip((sgxdi * (1 << 2)) >> av_log2(sgx2) , -thres + 1, thres - 1) : 0;
264
2/2
✓ Branch 0 taken 5224555 times.
✓ Branch 1 taken 107381 times.
10663872 *vy = sgy2 > 0 ? av_clip(((sgydi * (1 << 2)) - ((*vx * sgxgy) >> 1)) >> av_log2(sgy2), -thres + 1, thres - 1) : 0;
265 10663872 }
266
267 10663872 static void FUNC(apply_bdof_min_block)(pixel* dst, const ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1,
268 const int16_t **gradient_h, const int16_t **gradient_v, const int vx, const int vy)
269 {
270 10663872 const int shift4 = 15 - BIT_DEPTH;
271 10663872 const int offset4 = 1 << (shift4 - 1);
272
273 10663872 const int16_t* gh[] = { gradient_h[0] + 1 + BDOF_PADDED_SIZE, gradient_h[1] + 1 + BDOF_PADDED_SIZE };
274 10663872 const int16_t* gv[] = { gradient_v[0] + 1 + BDOF_PADDED_SIZE, gradient_v[1] + 1 + BDOF_PADDED_SIZE };
275
276
2/2
✓ Branch 0 taken 21327744 times.
✓ Branch 1 taken 5331936 times.
53319360 for (int y = 0; y < BDOF_BLOCK_SIZE; y++) {
277
2/2
✓ Branch 0 taken 85310976 times.
✓ Branch 1 taken 21327744 times.
213277440 for (int x = 0; x < BDOF_BLOCK_SIZE; x++) {
278 170621952 const int idx = y * BDOF_PADDED_SIZE + x;
279 170621952 const int bdof_offset = vx * (gh[0][idx] - gh[1][idx]) + vy * (gv[0][idx] - gv[1][idx]);
280 170621952 dst[x] = av_clip_pixel((src0[x] + offset4 + src1[x] + bdof_offset) >> shift4);
281 }
282 42655488 dst += dst_stride;
283 42655488 src0 += MAX_PB_SIZE;
284 42655488 src1 += MAX_PB_SIZE;
285 }
286 10663872 }
287
288 700294 static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, int16_t *_src0, int16_t *_src1,
289 const int block_w, const int block_h)
290 {
291 int16_t gradient_h[2][BDOF_PADDED_SIZE * BDOF_PADDED_SIZE];
292 int16_t gradient_v[2][BDOF_PADDED_SIZE * BDOF_PADDED_SIZE];
293 int vx, vy;
294 700294 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
295 700294 pixel* dst = (pixel*)_dst;
296
297 700294 FUNC(prof_grad_filter)(gradient_h[0], gradient_v[0], BDOF_PADDED_SIZE,
298 _src0, MAX_PB_SIZE, block_w, block_h, 1);
299 700294 pad_int16(_src0, MAX_PB_SIZE, block_w, block_h);
300 700294 FUNC(prof_grad_filter)(gradient_h[1], gradient_v[1], BDOF_PADDED_SIZE,
301 _src1, MAX_PB_SIZE, block_w, block_h, 1);
302 700294 pad_int16(_src1, MAX_PB_SIZE, block_w, block_h);
303
304
2/2
✓ Branch 0 taken 1371744 times.
✓ Branch 1 taken 350147 times.
3443782 for (int y = 0; y < block_h; y += BDOF_BLOCK_SIZE) {
305
2/2
✓ Branch 0 taken 5331936 times.
✓ Branch 1 taken 1371744 times.
13407360 for (int x = 0; x < block_w; x += BDOF_BLOCK_SIZE) {
306 10663872 const int16_t* src0 = _src0 + y * MAX_PB_SIZE + x;
307 10663872 const int16_t* src1 = _src1 + y * MAX_PB_SIZE + x;
308 10663872 pixel *d = dst + x;
309 10663872 const int idx = BDOF_PADDED_SIZE * y + x;
310 10663872 const int16_t* gh[] = { gradient_h[0] + idx, gradient_h[1] + idx };
311 10663872 const int16_t* gv[] = { gradient_v[0] + idx, gradient_v[1] + idx };
312 10663872 FUNC(derive_bdof_vx_vy)(src0, src1, gh, gv, BDOF_PADDED_SIZE, &vx, &vy);
313 10663872 FUNC(apply_bdof_min_block)(d, dst_stride, src0, src1, gh, gv, vx, vy);
314 }
315 2743488 dst += BDOF_BLOCK_SIZE * dst_stride;
316 }
317 700294 }
318
319 #define DMVR_FILTER(src, stride) \
320 (filter[0] * src[x] + \
321 filter[1] * src[x + stride])
322
323 //8.5.3.2.2 Luma sample bilinear interpolation process
324 2079224 static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
325 const int height, const intptr_t mx, const intptr_t my, const int width)
326 {
327 2079224 const pixel *src = (const pixel *)_src;
328 2079224 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
329 #if BIT_DEPTH > 10
330 const int shift4 = BIT_DEPTH - 10;
331 const int offset4 = 1 << (shift4 - 1);
332 #define DMVR_SHIFT(s) (((s) + offset4) >> shift4)
333 #else
334 #define DMVR_SHIFT(s) ((s) << (10 - BIT_DEPTH))
335 #endif
336
337
2/2
✓ Branch 0 taken 20669552 times.
✓ Branch 1 taken 1039612 times.
43418328 for (int y = 0; y < height; y++) {
338
2/2
✓ Branch 0 taken 412548960 times.
✓ Branch 1 taken 20669552 times.
866437024 for (int x = 0; x < width; x++)
339 825097920 dst[x] = DMVR_SHIFT(src[x]);
340 41339104 src += src_stride;
341 41339104 dst += MAX_PB_SIZE;
342 }
343 #undef DMVR_SHIFT
344 2079224 }
345
346 //8.5.3.2.2 Luma sample bilinear interpolation process
347 301270 static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
348 const int height, const intptr_t mx, const intptr_t my, const int width)
349 {
350 301270 const pixel *src = (const pixel*)_src;
351 301270 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
352 301270 const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[mx];
353 301270 const int shift1 = BIT_DEPTH - 6;
354 301270 const int offset1 = 1 << (shift1 - 1);
355
356
2/2
✓ Branch 0 taken 2981452 times.
✓ Branch 1 taken 150635 times.
6264174 for (int y = 0; y < height; y++) {
357
2/2
✓ Branch 0 taken 58947280 times.
✓ Branch 1 taken 2981452 times.
123857464 for (int x = 0; x < width; x++)
358 117894560 dst[x] = (DMVR_FILTER(src, 1) + offset1) >> shift1;
359 5962904 src += src_stride;
360 5962904 dst += MAX_PB_SIZE;
361 }
362 301270 }
363
364 //8.5.3.2.2 Luma sample bilinear interpolation process
365 413184 static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
366 const int height, const intptr_t mx, const intptr_t my, const int width)
367 {
368 413184 const pixel *src = (pixel*)_src;
369 413184 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
370 413184 const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[my];
371 413184 const int shift1 = BIT_DEPTH - 6;
372 413184 const int offset1 = 1 << (shift1 - 1);
373
374
2/2
✓ Branch 0 taken 4091688 times.
✓ Branch 1 taken 206592 times.
8596560 for (int y = 0; y < height; y++) {
375
2/2
✓ Branch 0 taken 81195520 times.
✓ Branch 1 taken 4091688 times.
170574416 for (int x = 0; x < width; x++)
376 162391040 dst[x] = (DMVR_FILTER(src, src_stride) + offset1) >> shift1;
377 8183376 src += src_stride;
378 8183376 dst += MAX_PB_SIZE;
379 }
380
381 413184 }
382
383 //8.5.3.2.2 Luma sample bilinear interpolation process
384 1151786 static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
385 const int height, const intptr_t mx, const intptr_t my, const int width)
386 {
387 int16_t tmp_array[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE];
388 1151786 int16_t *tmp = tmp_array;
389 1151786 const pixel *src = (const pixel*)_src;
390 1151786 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
391 1151786 const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[mx];
392 1151786 const int shift1 = BIT_DEPTH - 6;
393 1151786 const int offset1 = 1 << (shift1 - 1);
394 1151786 const int shift2 = 4;
395 1151786 const int offset2 = 1 << (shift2 - 1);
396
397 1151786 src -= BILINEAR_EXTRA_BEFORE * src_stride;
398
2/2
✓ Branch 0 taken 11952065 times.
✓ Branch 1 taken 575893 times.
25055916 for (int y = 0; y < height + BILINEAR_EXTRA; y++) {
399
2/2
✓ Branch 0 taken 235692556 times.
✓ Branch 1 taken 11952065 times.
495289242 for (int x = 0; x < width; x++)
400 471385112 tmp[x] = (DMVR_FILTER(src, 1) + offset1) >> shift1;
401 23904130 src += src_stride;
402 23904130 tmp += MAX_PB_SIZE;
403 }
404
405 1151786 tmp = tmp_array + BILINEAR_EXTRA_BEFORE * MAX_PB_SIZE;
406 1151786 filter = ff_vvc_inter_luma_dmvr_filters[my];
407
2/2
✓ Branch 0 taken 11376172 times.
✓ Branch 1 taken 575893 times.
23904130 for (int y = 0; y < height; y++) {
408
2/2
✓ Branch 0 taken 224334160 times.
✓ Branch 1 taken 11376172 times.
471420664 for (int x = 0; x < width; x++)
409 448668320 dst[x] = (DMVR_FILTER(tmp, MAX_PB_SIZE) + offset2) >> shift2;
410 22752344 tmp += MAX_PB_SIZE;
411 22752344 dst += MAX_PB_SIZE;
412 }
413 1151786 }
414
415 #define PEL_FUNC(dst, C, idx1, idx2, a) \
416 do { \
417 for (int w = 0; w < 7; w++) \
418 inter->dst[C][w][idx1][idx2] = FUNC(a); \
419 } while (0) \
420
421 #define DIR_FUNCS(d, C, c) \
422 PEL_FUNC(put_##d, C, 0, 0, put_##d##_pixels); \
423 PEL_FUNC(put_##d, C, 0, 1, put_##d##_##c##_h); \
424 PEL_FUNC(put_##d, C, 1, 0, put_##d##_##c##_v); \
425 PEL_FUNC(put_##d, C, 1, 1, put_##d##_##c##_hv); \
426 PEL_FUNC(put_##d##_w, C, 0, 0, put_##d##_w_pixels); \
427 PEL_FUNC(put_##d##_w, C, 0, 1, put_##d##_##c##_w_h); \
428 PEL_FUNC(put_##d##_w, C, 1, 0, put_##d##_##c##_w_v); \
429 PEL_FUNC(put_##d##_w, C, 1, 1, put_##d##_##c##_w_hv);
430
431 #define FUNCS(C, c) \
432 PEL_FUNC(put, C, 0, 0, put_pixels); \
433 PEL_FUNC(put, C, 0, 1, put_##c##_h); \
434 PEL_FUNC(put, C, 1, 0, put_##c##_v); \
435 PEL_FUNC(put, C, 1, 1, put_##c##_hv); \
436 DIR_FUNCS(uni, C, c); \
437
438 2134 static void FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter)
439 {
440
24/24
✓ Branch 0 taken 7469 times.
✓ Branch 1 taken 1067 times.
✓ Branch 2 taken 7469 times.
✓ Branch 3 taken 1067 times.
✓ Branch 4 taken 7469 times.
✓ Branch 5 taken 1067 times.
✓ Branch 6 taken 7469 times.
✓ Branch 7 taken 1067 times.
✓ Branch 8 taken 7469 times.
✓ Branch 9 taken 1067 times.
✓ Branch 10 taken 7469 times.
✓ Branch 11 taken 1067 times.
✓ Branch 12 taken 7469 times.
✓ Branch 13 taken 1067 times.
✓ Branch 14 taken 7469 times.
✓ Branch 15 taken 1067 times.
✓ Branch 16 taken 7469 times.
✓ Branch 17 taken 1067 times.
✓ Branch 18 taken 7469 times.
✓ Branch 19 taken 1067 times.
✓ Branch 20 taken 7469 times.
✓ Branch 21 taken 1067 times.
✓ Branch 22 taken 7469 times.
✓ Branch 23 taken 1067 times.
181390 FUNCS(LUMA, luma);
441
24/24
✓ Branch 0 taken 7469 times.
✓ Branch 1 taken 1067 times.
✓ Branch 2 taken 7469 times.
✓ Branch 3 taken 1067 times.
✓ Branch 4 taken 7469 times.
✓ Branch 5 taken 1067 times.
✓ Branch 6 taken 7469 times.
✓ Branch 7 taken 1067 times.
✓ Branch 8 taken 7469 times.
✓ Branch 9 taken 1067 times.
✓ Branch 10 taken 7469 times.
✓ Branch 11 taken 1067 times.
✓ Branch 12 taken 7469 times.
✓ Branch 13 taken 1067 times.
✓ Branch 14 taken 7469 times.
✓ Branch 15 taken 1067 times.
✓ Branch 16 taken 7469 times.
✓ Branch 17 taken 1067 times.
✓ Branch 18 taken 7469 times.
✓ Branch 19 taken 1067 times.
✓ Branch 20 taken 7469 times.
✓ Branch 21 taken 1067 times.
✓ Branch 22 taken 7469 times.
✓ Branch 23 taken 1067 times.
181390 FUNCS(CHROMA, chroma);
442
443 2134 inter->avg = FUNC(avg);
444 2134 inter->w_avg = FUNC(w_avg);
445
446 2134 inter->dmvr[0][0] = FUNC(dmvr);
447 2134 inter->dmvr[0][1] = FUNC(dmvr_h);
448 2134 inter->dmvr[1][0] = FUNC(dmvr_v);
449 2134 inter->dmvr[1][1] = FUNC(dmvr_hv);
450
451 2134 inter->put_ciip = FUNC(put_ciip);
452 2134 inter->put_gpm = FUNC(put_gpm);
453
454 2134 inter->fetch_samples = FUNC(fetch_samples);
455 2134 inter->bdof_fetch_samples = FUNC(bdof_fetch_samples);
456 2134 inter->apply_prof = FUNC(apply_prof);
457 2134 inter->apply_prof_uni = FUNC(apply_prof_uni);
458 2134 inter->apply_prof_uni_w = FUNC(apply_prof_uni_w);
459 2134 inter->apply_bdof = FUNC(apply_bdof);
460 2134 inter->prof_grad_filter = FUNC(prof_grad_filter);
461 2134 inter->sad = vvc_sad;
462 2134 }
463
464 #undef FUNCS
465 #undef PEL_FUNC
466 #undef DMVR_FUNCS
467