FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/vvc/inter_template.c
Date: 2025-08-19 23:55:23
Exec Total Coverage
Lines: 321 364 88.2%
Functions: 45 78 57.7%
Branches: 166 184 90.2%

Line Branch Exec Source
1 /*
2 * VVC inter prediction DSP
3 *
4 * Copyright (C) 2022 Nuo Mi
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "libavcodec/h26x/h2656_inter_template.c"
24 #include "libavutil/imgutils.h"
25
26 #define TMP_STRIDE EDGE_EMU_BUFFER_STRIDE
27 65406 static void av_always_inline FUNC(put_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
28 const uint8_t *const _src, ptrdiff_t _src_stride, const int src_height,
29 const int _x, const int _y, const int dx, const int dy,
30 const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_uni, const int is_chroma)
31 {
32 int16_t tmp_array[TMP_STRIDE * MAX_PB_SIZE];
33 65406 int16_t *tmp = tmp_array;
34 65406 pixel *dst = (pixel*)_dst;
35 65406 int16_t *dst16 = (int16_t*)_dst;
36 65406 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
37 65406 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
38 65406 const int shift = FFMAX(2, 14 - BIT_DEPTH);
39 65406 const int offset = 1 << (shift - 1);
40
2/2
✓ Branch 0 taken 12262 times.
✓ Branch 1 taken 20441 times.
65406 const int taps = is_chroma ? VVC_INTER_CHROMA_TAPS : VVC_INTER_LUMA_TAPS;
41
2/2
✓ Branch 0 taken 12262 times.
✓ Branch 1 taken 20441 times.
65406 const int extra = is_chroma ? CHROMA_EXTRA : LUMA_EXTRA;
42
2/2
✓ Branch 0 taken 12262 times.
✓ Branch 1 taken 20441 times.
65406 const int extra_before = is_chroma ? CHROMA_EXTRA_BEFORE : LUMA_EXTRA_BEFORE;
43 65406 const int shift1 = 6 - is_chroma;
44 65406 const int shift2 = 4 + is_chroma;
45 65406 const int x0 = SCALED_INT(_x);
46 65406 const int y0 = SCALED_INT(_y);
47
48
2/2
✓ Branch 0 taken 191000 times.
✓ Branch 1 taken 32703 times.
447406 for (int i = 0; i < width; i++) {
49 382000 const int tx = _x + dx * i;
50 382000 const int x = SCALED_INT(tx) - x0;
51 382000 const int mx = av_zero_extend(tx >> shift1, shift2);
52 382000 const int8_t *filter = hf + mx * taps;
53 382000 const pixel *src = (pixel*)_src - extra_before * src_stride;
54
55
2/2
✓ Branch 0 taken 2455364 times.
✓ Branch 1 taken 191000 times.
5292728 for (int j = 0; j < src_height + extra; j++) {
56
2/2
✓ Branch 0 taken 718092 times.
✓ Branch 1 taken 1737272 times.
4910728 tmp[j] = (is_chroma ? CHROMA_FILTER(src, 1) : LUMA_FILTER(src, 1)) >> (BIT_DEPTH - 8);
57 4910728 src += src_stride;
58 }
59 382000 tmp += TMP_STRIDE;
60 }
61
62
2/2
✓ Branch 0 taken 208640 times.
✓ Branch 1 taken 32703 times.
482686 for (int i = 0; i < height; i++) {
63 417280 const int ty = _y + dy * i;
64 417280 const int x = SCALED_INT(ty) - y0;
65 417280 const int mx = av_zero_extend(ty >> shift1, shift2);
66 417280 const int8_t *filter = vf + mx * taps;
67
68 417280 tmp = tmp_array + extra_before;
69
2/2
✓ Branch 0 taken 2656704 times.
✓ Branch 1 taken 208640 times.
5730688 for (int j = 0; j < width; j++) {
70
2/2
✓ Branch 0 taken 885568 times.
✓ Branch 1 taken 1771136 times.
5313408 const int val = (is_chroma ? CHROMA_FILTER(tmp, 1) : LUMA_FILTER(tmp, 1)) >> 6;
71
2/2
✓ Branch 0 taken 1820352 times.
✓ Branch 1 taken 836352 times.
5313408 if (is_uni)
72 3640704 dst[j] = av_clip_pixel((val + offset) >> shift);
73 else
74 1672704 dst16[j] = val;
75 5313408 tmp += TMP_STRIDE;
76 }
77
2/2
✓ Branch 0 taken 159280 times.
✓ Branch 1 taken 49360 times.
417280 if (is_uni)
78 318560 dst += dst_stride;
79 else
80 98720 dst16 += dst_stride;
81 }
82 65406 }
83
84 7036 static void FUNC(put_luma_scaled)(int16_t *_dst,
85 const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
86 const int x, const int y, const int dx, const int dy,
87 const int height, const int8_t *hf, const int8_t *vf, const int width)
88 {
89 7036 FUNC(put_scaled)((uint8_t *)_dst, MAX_PB_SIZE * sizeof(pixel), _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 0, 0);
90 7036 }
91
92 4856 static void FUNC(put_chroma_scaled)(int16_t *_dst,
93 const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
94 const int x, const int y, const int dx, const int dy,
95 const int height, const int8_t *hf, const int8_t *vf, const int width)
96 {
97 4856 FUNC(put_scaled)((uint8_t *)_dst, MAX_PB_SIZE * sizeof(pixel), _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 0, 1);
98 4856 }
99
100 33846 static void FUNC(put_uni_luma_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
101 const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
102 const int x, const int y, const int dx, const int dy,
103 const int height, const int8_t *hf, const int8_t *vf, const int width)
104 {
105 33846 FUNC(put_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 1, 0);
106 33846 }
107
108 19668 static void FUNC(put_uni_chroma_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
109 const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
110 const int x, const int y, const int dx, const int dy,
111 const int height, const int8_t *hf, const int8_t *vf, const int width)
112 {
113 19668 FUNC(put_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 1, 1);
114 19668 }
115
116 static void av_always_inline FUNC(put_uni_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
117 const uint8_t *const _src, ptrdiff_t _src_stride, const int src_height,
118 const int _x, const int _y, const int dx, const int dy, const int denom, const int wx, const int _ox,
119 const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_chroma)
120 {
121 int16_t tmp_array[TMP_STRIDE * MAX_PB_SIZE];
122 int16_t *tmp = tmp_array;
123 pixel *dst = (pixel*)_dst;
124 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
125 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
126 const int shift = FFMAX(2, 14 - BIT_DEPTH);
127 const int offset = 1 << (shift - 1);
128 const int ox = _ox * (1 << (BIT_DEPTH - 8));
129 const int taps = is_chroma ? VVC_INTER_CHROMA_TAPS : VVC_INTER_LUMA_TAPS;
130 const int extra = is_chroma ? CHROMA_EXTRA : LUMA_EXTRA;
131 const int extra_before = is_chroma ? CHROMA_EXTRA_BEFORE : LUMA_EXTRA_BEFORE;
132 const int shift1 = 6 - is_chroma;
133 const int shift2 = 4 + is_chroma;
134 const int x0 = SCALED_INT(_x);
135 const int y0 = SCALED_INT(_y);
136
137 for (int i = 0; i < width; i++) {
138 const int tx = _x + dx * i;
139 const int x = SCALED_INT(tx) - x0;
140 const int mx = av_zero_extend(tx >> shift1, shift2);
141 const int8_t *filter = hf + mx * taps;
142 const pixel *src = (pixel*)_src - extra_before * src_stride;
143
144 for (int j = 0; j < src_height + extra; j++) {
145 tmp[j] = (is_chroma ? CHROMA_FILTER(src, 1) : LUMA_FILTER(src, 1)) >> (BIT_DEPTH - 8);
146 src += src_stride;
147 }
148 tmp += TMP_STRIDE;
149 }
150
151 for (int i = 0; i < height; i++) {
152 const int ty = _y + dy * i;
153 const int x = SCALED_INT(ty) - y0;
154 const int mx = av_zero_extend(ty >> shift1, shift2);
155 const int8_t *filter = vf + mx * taps;
156
157 tmp = tmp_array + extra_before;
158 for (int j = 0; j < width; j++) {
159 const int val = (is_chroma ? CHROMA_FILTER(tmp, 1) : LUMA_FILTER(tmp, 1)) >> 6;
160 dst[j] = av_clip_pixel(((wx * val + offset) >> shift) + ox);
161 tmp += TMP_STRIDE;
162 }
163 dst += dst_stride;
164 }
165 }
166
167 static void FUNC(put_uni_luma_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
168 const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
169 const int x, const int y, const int dx, const int dy, const int denom, const int wx, const int ox,
170 const int height, const int8_t *hf, const int8_t *vf, const int width)
171 {
172 FUNC(put_uni_w_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, denom, wx, ox, height, hf, vf, width, 0);
173 }
174
175 static void FUNC(put_uni_chroma_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
176 const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
177 const int x, const int y, const int dx, const int dy, const int denom, const int wx, const int ox,
178 const int height, const int8_t *hf, const int8_t *vf, const int width)
179 {
180 FUNC(put_uni_w_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, denom, wx, ox, height, hf, vf, width, 1);
181 }
182
183 #undef TMP_STRIDE
184
185 12825408 static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
186 const int16_t *src0, const int16_t *src1, const int width, const int height)
187 {
188 12825408 pixel *dst = (pixel*)_dst;
189 12825408 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
190 12825408 const int shift = FFMAX(3, 15 - BIT_DEPTH);
191 12825408 const int offset = 1 << (shift - 1);
192
193
2/2
✓ Branch 0 taken 58804226 times.
✓ Branch 1 taken 6412704 times.
130433860 for (int y = 0; y < height; y++) {
194
2/2
✓ Branch 0 taken 754882468 times.
✓ Branch 1 taken 58804226 times.
1627373388 for (int x = 0; x < width; x++)
195 1509764936 dst[x] = av_clip_pixel((src0[x] + src1[x] + offset) >> shift);
196 117608452 src0 += MAX_PB_SIZE;
197 117608452 src1 += MAX_PB_SIZE;
198 117608452 dst += dst_stride;
199 }
200 12825408 }
201
202 1336866 static void FUNC(w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
203 const int16_t *src0, const int16_t *src1, const int width, const int height,
204 const int denom, const int w0, const int w1, const int o0, const int o1)
205 {
206 1336866 pixel *dst = (pixel*)_dst;
207 1336866 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
208 1336866 const int shift = denom + FFMAX(3, 15 - BIT_DEPTH);
209 1336866 const int offset = ((o0 + o1) * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1));
210
211
2/2
✓ Branch 0 taken 4619374 times.
✓ Branch 1 taken 668433 times.
10575614 for (int y = 0; y < height; y++) {
212
2/2
✓ Branch 0 taken 124831876 times.
✓ Branch 1 taken 4619374 times.
258902500 for (int x = 0; x < width; x++)
213 249663752 dst[x] = av_clip_pixel((src0[x] * w0 + src1[x] * w1 + offset) >> shift);
214 9238748 src0 += MAX_PB_SIZE;
215 9238748 src1 += MAX_PB_SIZE;
216 9238748 dst += dst_stride;
217 }
218 1336866 }
219
220 87666 static void FUNC(put_ciip)(uint8_t *_dst, const ptrdiff_t _dst_stride,
221 const int width, const int height,
222 const uint8_t *_inter, const ptrdiff_t _inter_stride, const int intra_weight)
223 {
224 87666 pixel *dst = (pixel *)_dst;
225 87666 pixel *inter = (pixel *)_inter;
226 87666 const size_t dst_stride = _dst_stride / sizeof(pixel);
227 87666 const size_t inter_stride = _inter_stride / sizeof(pixel);
228 87666 const int inter_weight = 4 - intra_weight;
229
230
2/2
✓ Branch 0 taken 533728 times.
✓ Branch 1 taken 43833 times.
1155122 for (int y = 0; y < height; y++) {
231
2/2
✓ Branch 0 taken 7331040 times.
✓ Branch 1 taken 533728 times.
15729536 for (int x = 0; x < width; x++)
232 14662080 dst[x] = (dst[x] * intra_weight + inter[x] * inter_weight + 2) >> 2;
233 1067456 dst += dst_stride;
234 1067456 inter += inter_stride;
235 }
236 87666 }
237
238 156422 static void FUNC(put_gpm)(uint8_t *_dst, ptrdiff_t dst_stride,
239 const int width, const int height,
240 const int16_t *src0, const int16_t *src1,
241 const uint8_t *weights, const int step_x, const int step_y)
242 {
243 156422 const int shift = FFMAX(5, 17 - BIT_DEPTH);
244 156422 const int offset = 1 << (shift - 1);
245 156422 pixel *dst = (pixel *)_dst;
246
247 156422 dst_stride /= sizeof(pixel);
248
2/2
✓ Branch 0 taken 998480 times.
✓ Branch 1 taken 78211 times.
2153382 for (int y = 0; y < height; y++) {
249
2/2
✓ Branch 0 taken 16048192 times.
✓ Branch 1 taken 998480 times.
34093344 for (int x = 0; x < width; x++) {
250 32096384 const uint8_t w = weights[x * step_x];
251 32096384 dst[x] = av_clip_pixel((src0[x] * w + src1[x] * (8 - w) + offset) >> shift);
252 }
253 1996960 dst += dst_stride;
254 1996960 src0 += MAX_PB_SIZE;
255 1996960 src1 += MAX_PB_SIZE;
256 1996960 weights += step_y;
257 }
258 156422 }
259
260 //8.5.6.3.3 Luma integer sample fetching process, add one extra pad line
261 8539920 static void FUNC(bdof_fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride,
262 const int x_frac, const int y_frac, const int width, const int height)
263 {
264 8539920 const int x_off = (x_frac >> 3) - 1;
265 8539920 const int y_off = (y_frac >> 3) - 1;
266 8539920 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
267 8539920 const pixel *src = (pixel*)_src + (x_off) + y_off * src_stride;
268 8539920 int16_t *dst = _dst - 1 - MAX_PB_SIZE;
269 8539920 const int shift = 14 - BIT_DEPTH;
270 8539920 const int bdof_width = width + 2 * BDOF_BORDER_EXT;
271
272 // top
273
2/2
✓ Branch 0 taken 33839984 times.
✓ Branch 1 taken 4269960 times.
76219888 for (int i = 0; i < bdof_width; i++)
274 67679968 dst[i] = src[i] << shift;
275
276 8539920 dst += MAX_PB_SIZE;
277 8539920 src += src_stride;
278
279
2/2
✓ Branch 0 taken 25373184 times.
✓ Branch 1 taken 4269960 times.
59286288 for (int i = 0; i < height; i++) {
280 50746368 dst[0] = src[0] << shift;
281 50746368 dst[1 + width] = src[1 + width] << shift;
282 50746368 dst += MAX_PB_SIZE;
283 50746368 src += src_stride;
284 }
285
2/2
✓ Branch 0 taken 33839984 times.
✓ Branch 1 taken 4269960 times.
76219888 for (int i = 0; i < bdof_width; i++)
286 67679968 dst[i] = src[i] << shift;
287 8539920 }
288
289 //8.5.6.3.3 Luma integer sample fetching process
290 7117392 static void FUNC(fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int x_frac, const int y_frac)
291 {
292 7117392 FUNC(bdof_fetch_samples)(_dst, _src, _src_stride, x_frac, y_frac, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE);
293 7117392 }
294
295 8540028 static void FUNC(prof_grad_filter)(int16_t *gradient_h, int16_t *gradient_v, const ptrdiff_t gradient_stride,
296 const int16_t *_src, const ptrdiff_t src_stride, const int width, const int height)
297 {
298 8540028 const int shift = 6;
299 8540028 const int16_t *src = _src;
300
301
2/2
✓ Branch 0 taken 25373904 times.
✓ Branch 1 taken 4270014 times.
59287836 for (int y = 0; y < height; y++) {
302 50747808 const int16_t *p = src;
303
2/2
✓ Branch 0 taken 230123648 times.
✓ Branch 1 taken 25373904 times.
510995104 for (int x = 0; x < width; x++) {
304 460247296 gradient_h[x] = (p[1] >> shift) - (p[-1] >> shift);
305 460247296 gradient_v[x] = (p[src_stride] >> shift) - (p[-src_stride] >> shift);
306 460247296 p++;
307 }
308 50747808 gradient_h += gradient_stride;
309 50747808 gradient_v += gradient_stride;
310 50747808 src += src_stride;
311 }
312 8540028 }
313
314 2818944 static void FUNC(apply_prof)(int16_t *dst, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
315 {
316 2818944 const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit
317
318 int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
319 int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
320 2818944 FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE);
321
322
2/2
✓ Branch 0 taken 5637888 times.
✓ Branch 1 taken 1409472 times.
14094720 for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) {
323
2/2
✓ Branch 0 taken 22551552 times.
✓ Branch 1 taken 5637888 times.
56378880 for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) {
324 45103104 const int o = y * AFFINE_MIN_BLOCK_SIZE + x;
325 45103104 const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
326 45103104 const int val = src[x] + av_clip(di, -limit, limit - 1);
327 45103104 dst[x] = val;
328
329 }
330 11275776 src += MAX_PB_SIZE;
331 11275776 dst += MAX_PB_SIZE;
332 }
333 2818944 }
334
335 4269784 static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
336 {
337 4269784 const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit
338 4269784 pixel *dst = (pixel*)_dst;
339 4269784 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
340 4269784 const int shift = 14 - BIT_DEPTH;
341 #if BIT_DEPTH < 14
342 4269784 const int offset = 1 << (shift - 1);
343 #else
344 const int offset = 0;
345 #endif
346 int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
347 int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
348
349 4269784 FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE);
350
351
2/2
✓ Branch 0 taken 8539568 times.
✓ Branch 1 taken 2134892 times.
21348920 for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) {
352
2/2
✓ Branch 0 taken 34158272 times.
✓ Branch 1 taken 8539568 times.
85395680 for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) {
353 68316544 const int o = y * AFFINE_MIN_BLOCK_SIZE + x;
354 68316544 const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
355 68316544 const int val = src[x] + av_clip(di, -limit, limit - 1);
356 68316544 dst[x] = av_clip_pixel((val + offset) >> shift);
357
358 }
359 17079136 src += MAX_PB_SIZE;
360 17079136 dst += dst_stride;
361 }
362 4269784 }
363
364 28664 static void FUNC(apply_prof_uni_w)(uint8_t *_dst, const ptrdiff_t _dst_stride,
365 const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y,
366 const int denom, const int wx, const int _ox)
367 {
368 28664 const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit
369 28664 pixel *dst = (pixel*)_dst;
370 28664 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
371 28664 const int shift = denom + FFMAX(2, 14 - BIT_DEPTH);
372 28664 const int offset = 1 << (shift - 1);
373 28664 const int ox = _ox * (1 << (BIT_DEPTH - 8));
374 int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
375 int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
376
377 28664 FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE);
378
379
2/2
✓ Branch 0 taken 57328 times.
✓ Branch 1 taken 14332 times.
143320 for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) {
380
2/2
✓ Branch 0 taken 229312 times.
✓ Branch 1 taken 57328 times.
573280 for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) {
381 458624 const int o = y * AFFINE_MIN_BLOCK_SIZE + x;
382 458624 const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
383 458624 const int val = src[x] + av_clip(di, -limit, limit - 1);
384 458624 dst[x] = av_clip_pixel(((val * wx + offset) >> shift) + ox);
385 }
386 114656 src += MAX_PB_SIZE;
387 114656 dst += dst_stride;
388 }
389 28664 }
390
391 10824032 static void FUNC(derive_bdof_vx_vy)(const int16_t *_src0, const int16_t *_src1,
392 const int pad_left, const int pad_top, const int pad_right, const int pad_bottom,
393 const int16_t **gradient_h, const int16_t **gradient_v,
394 int* vx, int* vy)
395 {
396 10824032 const int shift2 = 4;
397 10824032 const int shift3 = 1;
398 10824032 const int thres = 1 << 4;
399 10824032 int sgx2 = 0, sgy2 = 0, sgxgy = 0, sgxdi = 0, sgydi = 0;
400
401
2/2
✓ Branch 0 taken 32472096 times.
✓ Branch 1 taken 5412016 times.
75768224 for (int y = -1; y < BDOF_MIN_BLOCK_SIZE + 1; y++) {
402
8/8
✓ Branch 0 taken 8299500 times.
✓ Branch 1 taken 24172596 times.
✓ Branch 2 taken 1383250 times.
✓ Branch 3 taken 6916250 times.
✓ Branch 4 taken 8299500 times.
✓ Branch 5 taken 24172596 times.
✓ Branch 6 taken 1383250 times.
✓ Branch 7 taken 6916250 times.
64944192 const int dy = y + (pad_top && y < 0) - (pad_bottom && y == BDOF_MIN_BLOCK_SIZE); // we pad for the first and last row
403 64944192 const int16_t *src0 = _src0 + dy * MAX_PB_SIZE;
404 64944192 const int16_t *src1 = _src1 + dy * MAX_PB_SIZE;
405
406
2/2
✓ Branch 0 taken 194832576 times.
✓ Branch 1 taken 32472096 times.
454609344 for (int x = -1; x < BDOF_MIN_BLOCK_SIZE + 1; x++) {
407
8/8
✓ Branch 0 taken 50126040 times.
✓ Branch 1 taken 144706536 times.
✓ Branch 2 taken 8354340 times.
✓ Branch 3 taken 41771700 times.
✓ Branch 4 taken 50126040 times.
✓ Branch 5 taken 144706536 times.
✓ Branch 6 taken 8354340 times.
✓ Branch 7 taken 41771700 times.
389665152 const int dx = x + (pad_left && x < 0) - (pad_right && x == BDOF_MIN_BLOCK_SIZE); // we pad for the first and last col
408 389665152 const int diff = (src0[dx] >> shift2) - (src1[dx] >> shift2);
409 389665152 const int idx = BDOF_BLOCK_SIZE * dy + dx;
410 389665152 const int temph = (gradient_h[0][idx] + gradient_h[1][idx]) >> shift3;
411 389665152 const int tempv = (gradient_v[0][idx] + gradient_v[1][idx]) >> shift3;
412
413 389665152 sgx2 += FFABS(temph);
414 389665152 sgy2 += FFABS(tempv);
415
2/2
✓ Branch 0 taken 105575621 times.
✓ Branch 1 taken 89256955 times.
389665152 sgxgy += VVC_SIGN(tempv) * temph;
416
2/2
✓ Branch 0 taken 111406523 times.
✓ Branch 1 taken 83426053 times.
389665152 sgxdi += -VVC_SIGN(temph) * diff;
417
2/2
✓ Branch 0 taken 105575621 times.
✓ Branch 1 taken 89256955 times.
389665152 sgydi += -VVC_SIGN(tempv) * diff;
418 }
419 }
420
2/2
✓ Branch 0 taken 5295464 times.
✓ Branch 1 taken 116552 times.
10824032 *vx = sgx2 > 0 ? av_clip((sgxdi * (1 << 2)) >> av_log2(sgx2) , -thres + 1, thres - 1) : 0;
421
2/2
✓ Branch 0 taken 5301476 times.
✓ Branch 1 taken 110540 times.
10824032 *vy = sgy2 > 0 ? av_clip(((sgydi * (1 << 2)) - ((*vx * sgxgy) >> 1)) >> av_log2(sgy2), -thres + 1, thres - 1) : 0;
422 10824032 }
423
424 10824032 static void FUNC(apply_bdof_min_block)(pixel* dst, const ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1,
425 const int16_t **gh, const int16_t **gv, const int vx, const int vy)
426 {
427 10824032 const int shift4 = 15 - BIT_DEPTH;
428 10824032 const int offset4 = 1 << (shift4 - 1);
429
430
2/2
✓ Branch 0 taken 21648064 times.
✓ Branch 1 taken 5412016 times.
54120160 for (int y = 0; y < BDOF_MIN_BLOCK_SIZE; y++) {
431
2/2
✓ Branch 0 taken 86592256 times.
✓ Branch 1 taken 21648064 times.
216480640 for (int x = 0; x < BDOF_MIN_BLOCK_SIZE; x++) {
432 173184512 const int idx = y * BDOF_BLOCK_SIZE + x;
433 173184512 const int bdof_offset = vx * (gh[0][idx] - gh[1][idx]) + vy * (gv[0][idx] - gv[1][idx]);
434 173184512 dst[x] = av_clip_pixel((src0[x] + offset4 + src1[x] + bdof_offset) >> shift4);
435 }
436 43296128 dst += dst_stride;
437 43296128 src0 += MAX_PB_SIZE;
438 43296128 src1 += MAX_PB_SIZE;
439 }
440 10824032 }
441
442 711318 static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *_src0, const int16_t *_src1,
443 const int block_w, const int block_h)
444 {
445 int16_t gradient_h[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE];
446 int16_t gradient_v[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE];
447 int vx, vy;
448 711318 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
449 711318 pixel* dst = (pixel*)_dst;
450
451 711318 FUNC(prof_grad_filter)(gradient_h[0], gradient_v[0], BDOF_BLOCK_SIZE,
452 _src0, MAX_PB_SIZE, block_w, block_h);
453 711318 FUNC(prof_grad_filter)(gradient_h[1], gradient_v[1], BDOF_BLOCK_SIZE,
454 _src1, MAX_PB_SIZE, block_w, block_h);
455
456
2/2
✓ Branch 0 taken 1392390 times.
✓ Branch 1 taken 355659 times.
3496098 for (int y = 0; y < block_h; y += BDOF_MIN_BLOCK_SIZE) {
457
2/2
✓ Branch 0 taken 5412016 times.
✓ Branch 1 taken 1392390 times.
13608812 for (int x = 0; x < block_w; x += BDOF_MIN_BLOCK_SIZE) {
458 10824032 const int16_t* src0 = _src0 + y * MAX_PB_SIZE + x;
459 10824032 const int16_t* src1 = _src1 + y * MAX_PB_SIZE + x;
460 10824032 pixel *d = dst + x;
461 10824032 const int idx = BDOF_BLOCK_SIZE * y + x;
462 10824032 const int16_t* gh[] = { gradient_h[0] + idx, gradient_h[1] + idx };
463 10824032 const int16_t* gv[] = { gradient_v[0] + idx, gradient_v[1] + idx };
464 10824032 FUNC(derive_bdof_vx_vy)(src0, src1, !x, !y, x + BDOF_MIN_BLOCK_SIZE == block_w, y + BDOF_MIN_BLOCK_SIZE == block_h, gh, gv, &vx, &vy);
465 10824032 FUNC(apply_bdof_min_block)(d, dst_stride, src0, src1, gh, gv, vx, vy);
466 }
467 2784780 dst += BDOF_MIN_BLOCK_SIZE * dst_stride;
468 }
469 711318 }
470
471 #define DMVR_FILTER(src, stride) \
472 (filter[0] * src[x] + \
473 filter[1] * src[x + stride])
474
475 #define DMVR_FILTER2(filter, src0, src1) \
476 (filter[0] * src0 + filter[1] * src1)
477
478 //8.5.3.2.2 Luma sample bilinear interpolation process
479 2579762 static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
480 const int height, const intptr_t mx, const intptr_t my, const int width)
481 {
482 #if BIT_DEPTH != 10
483 36 const pixel *src = (const pixel *)_src;
484 36 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
485 #if BIT_DEPTH > 10
486 18 const int shift4 = BIT_DEPTH - 10;
487 18 const int offset4 = 1 << (shift4 - 1);
488 #define DMVR_SHIFT(s) (((s) + offset4) >> shift4)
489 #else
490 #define DMVR_SHIFT(s) ((s) << (10 - BIT_DEPTH))
491 #endif // BIT_DEPTH > 10
492
493
2/2
✓ Branch 0 taken 312 times.
✓ Branch 1 taken 18 times.
660 for (int y = 0; y < height; y++) {
494
2/2
✓ Branch 0 taken 5280 times.
✓ Branch 1 taken 312 times.
11184 for (int x = 0; x < width; x++)
495 10560 dst[x] = DMVR_SHIFT(src[x]);
496 624 src += src_stride;
497 624 dst += MAX_PB_SIZE;
498 }
499 #undef DMVR_SHIFT
500 #else
501 2579726 av_image_copy_plane((uint8_t*)dst, sizeof(int16_t) * MAX_PB_SIZE, _src, _src_stride,
502 width * sizeof(pixel), height);
503 #endif // BIT_DEPTH != 10
504 2579762 }
505
506 //8.5.3.2.2 Luma sample bilinear interpolation process
507 307172 static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
508 const int height, const intptr_t mx, const intptr_t my, const int width)
509 {
510 307172 const pixel *src = (const pixel*)_src;
511 307172 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
512 307172 const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[mx];
513 307172 const int shift1 = BIT_DEPTH - 6;
514 307172 const int offset1 = 1 << (shift1 - 1);
515
516
2/2
✓ Branch 0 taken 3038496 times.
✓ Branch 1 taken 153586 times.
6384164 for (int y = 0; y < height; y++) {
517
2/2
✓ Branch 0 taken 60069120 times.
✓ Branch 1 taken 3038496 times.
126215232 for (int x = 0; x < width; x++)
518 120138240 dst[x] = (DMVR_FILTER(src, 1) + offset1) >> shift1;
519 6076992 src += src_stride;
520 6076992 dst += MAX_PB_SIZE;
521 }
522 307172 }
523
524 //8.5.3.2.2 Luma sample bilinear interpolation process
525 414328 static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
526 const int height, const intptr_t mx, const intptr_t my, const int width)
527 {
528 414328 const pixel *src = (pixel*)_src;
529 414328 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
530 414328 const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[my];
531 414328 const int shift1 = BIT_DEPTH - 6;
532 414328 const int offset1 = 1 << (shift1 - 1);
533
534
2/2
✓ Branch 0 taken 4102464 times.
✓ Branch 1 taken 207164 times.
8619256 for (int y = 0; y < height; y++) {
535
2/2
✓ Branch 0 taken 81403840 times.
✓ Branch 1 taken 4102464 times.
171012608 for (int x = 0; x < width; x++)
536 162807680 dst[x] = (DMVR_FILTER(src, src_stride) + offset1) >> shift1;
537 8204928 src += src_stride;
538 8204928 dst += MAX_PB_SIZE;
539 }
540
541 414328 }
542
543 //8.5.3.2.2 Luma sample bilinear interpolation process
544 1152482 static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
545 const int height, const intptr_t mx, const intptr_t my, const int width)
546 {
547 int16_t tmp_array[MAX_PB_SIZE * 2];
548 1152482 int16_t *tmp0 = tmp_array;
549 1152482 int16_t *tmp1 = tmp_array + MAX_PB_SIZE;
550 1152482 const pixel *src = (const pixel*)_src;
551 1152482 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
552 1152482 const int8_t *filter_x = ff_vvc_inter_luma_dmvr_filters[mx];
553 1152482 const int8_t *filter_y = ff_vvc_inter_luma_dmvr_filters[my];
554 1152482 const int shift1 = BIT_DEPTH - 6;
555 1152482 const int offset1 = 1 << (shift1 - 1);
556 1152482 const int shift2 = 4;
557 1152482 const int offset2 = 1 << (shift2 - 1);
558
559 1152482 src -= BILINEAR_EXTRA_BEFORE * src_stride;
560
2/2
✓ Branch 0 taken 11364356 times.
✓ Branch 1 taken 576241 times.
23881194 for (int x = 0; x < width; x++)
561 22728712 tmp0[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) + offset1) >> shift1;
562 1152482 src += src_stride;
563
564
2/2
✓ Branch 0 taken 11381260 times.
✓ Branch 1 taken 576241 times.
23915002 for (int y = 1; y < height + BILINEAR_EXTRA; y++) {
565
2/2
✓ Branch 0 taken 224415920 times.
✓ Branch 1 taken 11381260 times.
471594360 for (int x = 0; x < width; x++) {
566 448831840 tmp1[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) + offset1) >> shift1;
567 448831840 dst[x] = (DMVR_FILTER2(filter_y, tmp0[x], tmp1[x]) + offset2) >> shift2;
568 }
569 22762520 src += src_stride;
570 22762520 dst += MAX_PB_SIZE;
571 22762520 FFSWAP(int16_t *, tmp0, tmp1);
572 }
573 1152482 }
574
575 #define PEL_FUNC(dst, C, idx1, idx2, a) \
576 do { \
577 for (int w = 0; w < 7; w++) \
578 inter->dst[C][w][idx1][idx2] = FUNC(a); \
579 } while (0) \
580
581 #define DIR_FUNCS(d, C, c) \
582 PEL_FUNC(put_##d, C, 0, 0, put_##d##_pixels); \
583 PEL_FUNC(put_##d, C, 0, 1, put_##d##_##c##_h); \
584 PEL_FUNC(put_##d, C, 1, 0, put_##d##_##c##_v); \
585 PEL_FUNC(put_##d, C, 1, 1, put_##d##_##c##_hv); \
586 PEL_FUNC(put_##d##_w, C, 0, 0, put_##d##_w_pixels); \
587 PEL_FUNC(put_##d##_w, C, 0, 1, put_##d##_##c##_w_h); \
588 PEL_FUNC(put_##d##_w, C, 1, 0, put_##d##_##c##_w_v); \
589 PEL_FUNC(put_##d##_w, C, 1, 1, put_##d##_##c##_w_hv);
590
591 #define FUNCS(C, c) \
592 PEL_FUNC(put, C, 0, 0, put_pixels); \
593 PEL_FUNC(put, C, 0, 1, put_##c##_h); \
594 PEL_FUNC(put, C, 1, 0, put_##c##_v); \
595 PEL_FUNC(put, C, 1, 1, put_##c##_hv); \
596 DIR_FUNCS(uni, C, c); \
597
598 2860 static void FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter)
599 {
600
24/24
✓ Branch 0 taken 10010 times.
✓ Branch 1 taken 1430 times.
✓ Branch 2 taken 10010 times.
✓ Branch 3 taken 1430 times.
✓ Branch 4 taken 10010 times.
✓ Branch 5 taken 1430 times.
✓ Branch 6 taken 10010 times.
✓ Branch 7 taken 1430 times.
✓ Branch 8 taken 10010 times.
✓ Branch 9 taken 1430 times.
✓ Branch 10 taken 10010 times.
✓ Branch 11 taken 1430 times.
✓ Branch 12 taken 10010 times.
✓ Branch 13 taken 1430 times.
✓ Branch 14 taken 10010 times.
✓ Branch 15 taken 1430 times.
✓ Branch 16 taken 10010 times.
✓ Branch 17 taken 1430 times.
✓ Branch 18 taken 10010 times.
✓ Branch 19 taken 1430 times.
✓ Branch 20 taken 10010 times.
✓ Branch 21 taken 1430 times.
✓ Branch 22 taken 10010 times.
✓ Branch 23 taken 1430 times.
243100 FUNCS(LUMA, luma);
601
24/24
✓ Branch 0 taken 10010 times.
✓ Branch 1 taken 1430 times.
✓ Branch 2 taken 10010 times.
✓ Branch 3 taken 1430 times.
✓ Branch 4 taken 10010 times.
✓ Branch 5 taken 1430 times.
✓ Branch 6 taken 10010 times.
✓ Branch 7 taken 1430 times.
✓ Branch 8 taken 10010 times.
✓ Branch 9 taken 1430 times.
✓ Branch 10 taken 10010 times.
✓ Branch 11 taken 1430 times.
✓ Branch 12 taken 10010 times.
✓ Branch 13 taken 1430 times.
✓ Branch 14 taken 10010 times.
✓ Branch 15 taken 1430 times.
✓ Branch 16 taken 10010 times.
✓ Branch 17 taken 1430 times.
✓ Branch 18 taken 10010 times.
✓ Branch 19 taken 1430 times.
✓ Branch 20 taken 10010 times.
✓ Branch 21 taken 1430 times.
✓ Branch 22 taken 10010 times.
✓ Branch 23 taken 1430 times.
243100 FUNCS(CHROMA, chroma);
602
603
2/2
✓ Branch 0 taken 10010 times.
✓ Branch 1 taken 1430 times.
22880 for (int i = 0; i < FF_ARRAY_ELEMS(inter->put_scaled[LUMA]); i++) {
604 20020 inter->put_scaled[LUMA][i] = FUNC(put_luma_scaled);
605 20020 inter->put_scaled[CHROMA][i] = FUNC(put_chroma_scaled);
606 20020 inter->put_uni_scaled[LUMA][i] = FUNC(put_uni_luma_scaled);
607 20020 inter->put_uni_scaled[CHROMA][i] = FUNC(put_uni_chroma_scaled);
608 20020 inter->put_uni_w_scaled[LUMA][i] = FUNC(put_uni_luma_w_scaled);
609 20020 inter->put_uni_w_scaled[CHROMA][i] = FUNC(put_uni_chroma_w_scaled);
610 }
611
612 2860 inter->avg = FUNC(avg);
613 2860 inter->w_avg = FUNC(w_avg);
614
615 2860 inter->dmvr[0][0] = FUNC(dmvr);
616 2860 inter->dmvr[0][1] = FUNC(dmvr_h);
617 2860 inter->dmvr[1][0] = FUNC(dmvr_v);
618 2860 inter->dmvr[1][1] = FUNC(dmvr_hv);
619
620 2860 inter->put_ciip = FUNC(put_ciip);
621 2860 inter->put_gpm = FUNC(put_gpm);
622
623 2860 inter->fetch_samples = FUNC(fetch_samples);
624 2860 inter->bdof_fetch_samples = FUNC(bdof_fetch_samples);
625 2860 inter->apply_prof = FUNC(apply_prof);
626 2860 inter->apply_prof_uni = FUNC(apply_prof_uni);
627 2860 inter->apply_prof_uni_w = FUNC(apply_prof_uni_w);
628 2860 inter->apply_bdof = FUNC(apply_bdof);
629 2860 inter->sad = vvc_sad;
630 2860 }
631
632 #undef FUNCS
633 #undef PEL_FUNC
634 #undef DMVR_FUNCS
635