FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/vvc/inter_template.c
Date: 2024-07-16 12:46:59
Exec Total Coverage
Lines: 328 373 87.9%
Functions: 29 78 37.2%
Branches: 154 172 89.5%

Line Branch Exec Source
1 /*
2 * VVC inter prediction DSP
3 *
4 * Copyright (C) 2022 Nuo Mi
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "libavcodec/h26x/h2656_inter_template.c"
24
25 #define TMP_STRIDE EDGE_EMU_BUFFER_STRIDE
26 65406 static void av_always_inline FUNC(put_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
27 const uint8_t *const _src, ptrdiff_t _src_stride, const int src_height,
28 const int _x, const int _y, const int dx, const int dy,
29 const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_uni, const int is_chroma)
30 {
31 int16_t tmp_array[TMP_STRIDE * MAX_PB_SIZE];
32 65406 int16_t *tmp = tmp_array;
33 65406 pixel *dst = (pixel*)_dst;
34 65406 int16_t *dst16 = (int16_t*)_dst;
35 65406 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
36 65406 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
37 65406 const int shift = FFMAX(2, 14 - BIT_DEPTH);
38 65406 const int offset = 1 << (shift - 1);
39
2/2
✓ Branch 0 taken 12262 times.
✓ Branch 1 taken 20441 times.
65406 const int taps = is_chroma ? VVC_INTER_CHROMA_TAPS : VVC_INTER_LUMA_TAPS;
40
2/2
✓ Branch 0 taken 12262 times.
✓ Branch 1 taken 20441 times.
65406 const int extra = is_chroma ? CHROMA_EXTRA : LUMA_EXTRA;
41
2/2
✓ Branch 0 taken 12262 times.
✓ Branch 1 taken 20441 times.
65406 const int extra_before = is_chroma ? CHROMA_EXTRA_BEFORE : LUMA_EXTRA_BEFORE;
42 65406 const int shift1 = 6 - is_chroma;
43 65406 const int shift2 = 4 + is_chroma;
44 65406 const int x0 = SCALED_INT(_x);
45 65406 const int y0 = SCALED_INT(_y);
46
47
2/2
✓ Branch 0 taken 191000 times.
✓ Branch 1 taken 32703 times.
447406 for (int i = 0; i < width; i++) {
48 382000 const int tx = _x + dx * i;
49 382000 const int x = SCALED_INT(tx) - x0;
50 382000 const int mx = av_zero_extend(tx >> shift1, shift2);
51 382000 const int8_t *filter = hf + mx * taps;
52 382000 const pixel *src = (pixel*)_src - extra_before * src_stride;
53
54
2/2
✓ Branch 0 taken 2455364 times.
✓ Branch 1 taken 191000 times.
5292728 for (int j = 0; j < src_height + extra; j++) {
55
2/2
✓ Branch 0 taken 718092 times.
✓ Branch 1 taken 1737272 times.
4910728 tmp[j] = (is_chroma ? CHROMA_FILTER(src, 1) : LUMA_FILTER(src, 1)) >> (BIT_DEPTH - 8);
56 4910728 src += src_stride;
57 }
58 382000 tmp += TMP_STRIDE;
59 }
60
61
2/2
✓ Branch 0 taken 208640 times.
✓ Branch 1 taken 32703 times.
482686 for (int i = 0; i < height; i++) {
62 417280 const int ty = _y + dy * i;
63 417280 const int x = SCALED_INT(ty) - y0;
64 417280 const int mx = av_zero_extend(ty >> shift1, shift2);
65 417280 const int8_t *filter = vf + mx * taps;
66
67 417280 tmp = tmp_array + extra_before;
68
2/2
✓ Branch 0 taken 2656704 times.
✓ Branch 1 taken 208640 times.
5730688 for (int j = 0; j < width; j++) {
69
2/2
✓ Branch 0 taken 885568 times.
✓ Branch 1 taken 1771136 times.
5313408 const int val = (is_chroma ? CHROMA_FILTER(tmp, 1) : LUMA_FILTER(tmp, 1)) >> 6;
70
2/2
✓ Branch 0 taken 1820352 times.
✓ Branch 1 taken 836352 times.
5313408 if (is_uni)
71 3640704 dst[j] = av_clip_pixel((val + offset) >> shift);
72 else
73 1672704 dst16[j] = val;
74 5313408 tmp += TMP_STRIDE;
75 }
76
2/2
✓ Branch 0 taken 159280 times.
✓ Branch 1 taken 49360 times.
417280 if (is_uni)
77 318560 dst += dst_stride;
78 else
79 98720 dst16 += dst_stride;
80 }
81 65406 }
82
83 7036 static void FUNC(put_luma_scaled)(int16_t *_dst,
84 const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
85 const int x, const int y, const int dx, const int dy,
86 const int height, const int8_t *hf, const int8_t *vf, const int width)
87 {
88 7036 FUNC(put_scaled)((uint8_t *)_dst, MAX_PB_SIZE * sizeof(pixel), _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 0, 0);
89 7036 }
90
91 4856 static void FUNC(put_chroma_scaled)(int16_t *_dst,
92 const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
93 const int x, const int y, const int dx, const int dy,
94 const int height, const int8_t *hf, const int8_t *vf, const int width)
95 {
96 4856 FUNC(put_scaled)((uint8_t *)_dst, MAX_PB_SIZE * sizeof(pixel), _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 0, 1);
97 4856 }
98
99 33846 static void FUNC(put_uni_luma_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
100 const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
101 const int x, const int y, const int dx, const int dy,
102 const int height, const int8_t *hf, const int8_t *vf, const int width)
103 {
104 33846 FUNC(put_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 1, 0);
105 33846 }
106
107 19668 static void FUNC(put_uni_chroma_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
108 const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
109 const int x, const int y, const int dx, const int dy,
110 const int height, const int8_t *hf, const int8_t *vf, const int width)
111 {
112 19668 FUNC(put_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 1, 1);
113 19668 }
114
115 static void av_always_inline FUNC(put_uni_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
116 const uint8_t *const _src, ptrdiff_t _src_stride, const int src_height,
117 const int _x, const int _y, const int dx, const int dy, const int denom, const int wx, const int _ox,
118 const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_chroma)
119 {
120 int16_t tmp_array[TMP_STRIDE * MAX_PB_SIZE];
121 int16_t *tmp = tmp_array;
122 pixel *dst = (pixel*)_dst;
123 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
124 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
125 const int shift = FFMAX(2, 14 - BIT_DEPTH);
126 const int offset = 1 << (shift - 1);
127 const int ox = _ox * (1 << (BIT_DEPTH - 8));
128 const int taps = is_chroma ? VVC_INTER_CHROMA_TAPS : VVC_INTER_LUMA_TAPS;
129 const int extra = is_chroma ? CHROMA_EXTRA : LUMA_EXTRA;
130 const int extra_before = is_chroma ? CHROMA_EXTRA_BEFORE : LUMA_EXTRA_BEFORE;
131 const int shift1 = 6 - is_chroma;
132 const int shift2 = 4 + is_chroma;
133 const int x0 = SCALED_INT(_x);
134 const int y0 = SCALED_INT(_y);
135
136 for (int i = 0; i < width; i++) {
137 const int tx = _x + dx * i;
138 const int x = SCALED_INT(tx) - x0;
139 const int mx = av_zero_extend(tx >> shift1, shift2);
140 const int8_t *filter = hf + mx * taps;
141 const pixel *src = (pixel*)_src - extra_before * src_stride;
142
143 for (int j = 0; j < src_height + extra; j++) {
144 tmp[j] = (is_chroma ? CHROMA_FILTER(src, 1) : LUMA_FILTER(src, 1)) >> (BIT_DEPTH - 8);
145 src += src_stride;
146 }
147 tmp += TMP_STRIDE;
148 }
149
150 for (int i = 0; i < height; i++) {
151 const int ty = _y + dy * i;
152 const int x = SCALED_INT(ty) - y0;
153 const int mx = av_zero_extend(ty >> shift1, shift2);
154 const int8_t *filter = vf + mx * taps;
155
156 tmp = tmp_array + extra_before;
157 for (int j = 0; j < width; j++) {
158 const int val = (is_chroma ? CHROMA_FILTER(tmp, 1) : LUMA_FILTER(tmp, 1)) >> 6;
159 dst[j] = av_clip_pixel(((wx * val + offset) >> shift) + ox);
160 tmp += TMP_STRIDE;
161 }
162 dst += dst_stride;
163 }
164 }
165
166 static void FUNC(put_uni_luma_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
167 const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
168 const int x, const int y, const int dx, const int dy, const int denom, const int wx, const int ox,
169 const int height, const int8_t *hf, const int8_t *vf, const int width)
170 {
171 FUNC(put_uni_w_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, denom, wx, ox, height, hf, vf, width, 0);
172 }
173
174 static void FUNC(put_uni_chroma_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
175 const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
176 const int x, const int y, const int dx, const int dy, const int denom, const int wx, const int ox,
177 const int height, const int8_t *hf, const int8_t *vf, const int width)
178 {
179 FUNC(put_uni_w_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, denom, wx, ox, height, hf, vf, width, 1);
180 }
181
182 #undef TMP_STRIDE
183
184 10521390 static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
185 const int16_t *src0, const int16_t *src1, const int width, const int height)
186 {
187 10521390 pixel *dst = (pixel*)_dst;
188 10521390 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
189 10521390 const int shift = FFMAX(3, 15 - BIT_DEPTH);
190 10521390 const int offset = 1 << (shift - 1);
191
192
2/2
✓ Branch 0 taken 47490974 times.
✓ Branch 1 taken 5260695 times.
105503338 for (int y = 0; y < height; y++) {
193
2/2
✓ Branch 0 taken 621894340 times.
✓ Branch 1 taken 47490974 times.
1338770628 for (int x = 0; x < width; x++)
194 1243788680 dst[x] = av_clip_pixel((src0[x] + src1[x] + offset) >> shift);
195 94981948 src0 += MAX_PB_SIZE;
196 94981948 src1 += MAX_PB_SIZE;
197 94981948 dst += dst_stride;
198 }
199 10521390 }
200
201 1026822 static void FUNC(w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
202 const int16_t *src0, const int16_t *src1, const int width, const int height,
203 const int denom, const int w0, const int w1, const int o0, const int o1)
204 {
205 1026822 pixel *dst = (pixel*)_dst;
206 1026822 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
207 1026822 const int shift = denom + FFMAX(3, 15 - BIT_DEPTH);
208 1026822 const int offset = ((o0 + o1) * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1));
209
210
2/2
✓ Branch 0 taken 3844918 times.
✓ Branch 1 taken 513411 times.
8716658 for (int y = 0; y < height; y++) {
211
2/2
✓ Branch 0 taken 115466980 times.
✓ Branch 1 taken 3844918 times.
238623796 for (int x = 0; x < width; x++)
212 230933960 dst[x] = av_clip_pixel((src0[x] * w0 + src1[x] * w1 + offset) >> shift);
213 7689836 src0 += MAX_PB_SIZE;
214 7689836 src1 += MAX_PB_SIZE;
215 7689836 dst += dst_stride;
216 }
217 1026822 }
218
219 82950 static void FUNC(put_ciip)(uint8_t *_dst, const ptrdiff_t _dst_stride,
220 const int width, const int height,
221 const uint8_t *_inter, const ptrdiff_t _inter_stride, const int intra_weight)
222 {
223 82950 pixel *dst = (pixel *)_dst;
224 82950 pixel *inter = (pixel *)_inter;
225 82950 const size_t dst_stride = _dst_stride / sizeof(pixel);
226 82950 const size_t inter_stride = _inter_stride / sizeof(pixel);
227 82950 const int inter_weight = 4 - intra_weight;
228
229
2/2
✓ Branch 0 taken 502608 times.
✓ Branch 1 taken 41475 times.
1088166 for (int y = 0; y < height; y++) {
230
2/2
✓ Branch 0 taken 6707648 times.
✓ Branch 1 taken 502608 times.
14420512 for (int x = 0; x < width; x++)
231 13415296 dst[x] = (dst[x] * intra_weight + inter[x] * inter_weight + 2) >> 2;
232 1005216 dst += dst_stride;
233 1005216 inter += inter_stride;
234 }
235 82950 }
236
237 139298 static void FUNC(put_gpm)(uint8_t *_dst, ptrdiff_t dst_stride,
238 const int width, const int height,
239 const int16_t *src0, const int16_t *src1,
240 const uint8_t *weights, const int step_x, const int step_y)
241 {
242 139298 const int shift = FFMAX(5, 17 - BIT_DEPTH);
243 139298 const int offset = 1 << (shift - 1);
244 139298 pixel *dst = (pixel *)_dst;
245
246 139298 dst_stride /= sizeof(pixel);
247
2/2
✓ Branch 0 taken 862640 times.
✓ Branch 1 taken 69649 times.
1864578 for (int y = 0; y < height; y++) {
248
2/2
✓ Branch 0 taken 12683296 times.
✓ Branch 1 taken 862640 times.
27091872 for (int x = 0; x < width; x++) {
249 25366592 const uint8_t w = weights[x * step_x];
250 25366592 dst[x] = av_clip_pixel((src0[x] * w + src1[x] * (8 - w) + offset) >> shift);
251 }
252 1725280 dst += dst_stride;
253 1725280 src0 += MAX_PB_SIZE;
254 1725280 src1 += MAX_PB_SIZE;
255 1725280 weights += step_y;
256 }
257 139298 }
258
259 //8.5.6.3.3 Luma integer sample fetching process, add one extra pad line
260 8390108 static void FUNC(bdof_fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride,
261 const int x_frac, const int y_frac, const int width, const int height)
262 {
263 8390108 const int x_off = (x_frac >> 3) - 1;
264 8390108 const int y_off = (y_frac >> 3) - 1;
265 8390108 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
266 8390108 const pixel *src = (pixel*)_src + (x_off) + y_off * src_stride;
267 8390108 int16_t *dst = _dst - 1 - MAX_PB_SIZE;
268 8390108 const int shift = 14 - BIT_DEPTH;
269 8390108 const int bdof_width = width + 2 * BDOF_BORDER_EXT;
270
271 // top
272
2/2
✓ Branch 0 taken 33263676 times.
✓ Branch 1 taken 4195054 times.
74917460 for (int i = 0; i < bdof_width; i++)
273 66527352 dst[i] = src[i] << shift;
274
275 8390108 dst += MAX_PB_SIZE;
276 8390108 src += src_stride;
277
278
2/2
✓ Branch 0 taken 24952912 times.
✓ Branch 1 taken 4195054 times.
58295932 for (int i = 0; i < height; i++) {
279 49905824 dst[0] = src[0] << shift;
280 49905824 dst[1 + width] = src[1 + width] << shift;
281 49905824 dst += MAX_PB_SIZE;
282 49905824 src += src_stride;
283 }
284
2/2
✓ Branch 0 taken 33263676 times.
✓ Branch 1 taken 4195054 times.
74917460 for (int i = 0; i < bdof_width; i++)
285 66527352 dst[i] = src[i] << shift;
286 8390108 }
287
288 //8.5.6.3.3 Luma integer sample fetching process
289 6989536 static void FUNC(fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int x_frac, const int y_frac)
290 {
291 6989536 FUNC(bdof_fetch_samples)(_dst, _src, _src_stride, x_frac, y_frac, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE);
292 6989536 }
293
294 8390108 static void FUNC(prof_grad_filter)(int16_t *_gradient_h, int16_t *_gradient_v, const ptrdiff_t gradient_stride,
295 const int16_t *_src, const ptrdiff_t src_stride, const int width, const int height, const int pad)
296 {
297 8390108 const int shift = 6;
298 8390108 const int16_t *src = _src;
299 8390108 int16_t *gradient_h = _gradient_h + pad * (1 + gradient_stride);
300 8390108 int16_t *gradient_v = _gradient_v + pad * (1 + gradient_stride);
301
302
2/2
✓ Branch 0 taken 24952912 times.
✓ Branch 1 taken 4195054 times.
58295932 for (int y = 0; y < height; y++) {
303 49905824 const int16_t *p = src;
304
2/2
✓ Branch 0 taken 226536448 times.
✓ Branch 1 taken 24952912 times.
502978720 for (int x = 0; x < width; x++) {
305 453072896 gradient_h[x] = (p[1] >> shift) - (p[-1] >> shift);
306 453072896 gradient_v[x] = (p[src_stride] >> shift) - (p[-src_stride] >> shift);
307 453072896 p++;
308 }
309 49905824 gradient_h += gradient_stride;
310 49905824 gradient_v += gradient_stride;
311 49905824 src += src_stride;
312 }
313
2/2
✓ Branch 0 taken 700286 times.
✓ Branch 1 taken 3494768 times.
8390108 if (pad) {
314 1400572 pad_int16(_gradient_h + 1 + gradient_stride, gradient_stride, width, height);
315 1400572 pad_int16(_gradient_v + 1 + gradient_stride, gradient_stride, width, height);
316 }
317 8390108 }
318
319 2732816 static void FUNC(apply_prof)(int16_t *dst, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
320 {
321 2732816 const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit
322
323 int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
324 int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
325 2732816 FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE, 0);
326
327
2/2
✓ Branch 0 taken 5465632 times.
✓ Branch 1 taken 1366408 times.
13664080 for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) {
328
2/2
✓ Branch 0 taken 21862528 times.
✓ Branch 1 taken 5465632 times.
54656320 for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) {
329 43725056 const int o = y * AFFINE_MIN_BLOCK_SIZE + x;
330 43725056 const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
331 43725056 const int val = src[x] + av_clip(di, -limit, limit - 1);
332 43725056 dst[x] = val;
333
334 }
335 10931264 src += MAX_PB_SIZE;
336 10931264 dst += MAX_PB_SIZE;
337 }
338 2732816 }
339
340 4246152 static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
341 {
342 4246152 const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit
343 4246152 pixel *dst = (pixel*)_dst;
344 4246152 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
345 4246152 const int shift = 14 - BIT_DEPTH;
346 #if BIT_DEPTH < 14
347 4246152 const int offset = 1 << (shift - 1);
348 #else
349 const int offset = 0;
350 #endif
351 int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
352 int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
353
354 4246152 FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE, 0);
355
356
2/2
✓ Branch 0 taken 8492304 times.
✓ Branch 1 taken 2123076 times.
21230760 for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) {
357
2/2
✓ Branch 0 taken 33969216 times.
✓ Branch 1 taken 8492304 times.
84923040 for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) {
358 67938432 const int o = y * AFFINE_MIN_BLOCK_SIZE + x;
359 67938432 const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
360 67938432 const int val = src[x] + av_clip(di, -limit, limit - 1);
361 67938432 dst[x] = av_clip_pixel((val + offset) >> shift);
362
363 }
364 16984608 src += MAX_PB_SIZE;
365 16984608 dst += dst_stride;
366 }
367 4246152 }
368
369 10568 static void FUNC(apply_prof_uni_w)(uint8_t *_dst, const ptrdiff_t _dst_stride,
370 const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y,
371 const int denom, const int wx, const int _ox)
372 {
373 10568 const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit
374 10568 pixel *dst = (pixel*)_dst;
375 10568 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
376 10568 const int shift = denom + FFMAX(2, 14 - BIT_DEPTH);
377 10568 const int offset = 1 << (shift - 1);
378 10568 const int ox = _ox * (1 << (BIT_DEPTH - 8));
379 int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
380 int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
381
382 10568 FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE, 0);
383
384
2/2
✓ Branch 0 taken 21136 times.
✓ Branch 1 taken 5284 times.
52840 for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) {
385
2/2
✓ Branch 0 taken 84544 times.
✓ Branch 1 taken 21136 times.
211360 for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) {
386 169088 const int o = y * AFFINE_MIN_BLOCK_SIZE + x;
387 169088 const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
388 169088 const int val = src[x] + av_clip(di, -limit, limit - 1);
389 169088 dst[x] = av_clip_pixel(((val * wx + offset) >> shift) + ox);
390 }
391 42272 src += MAX_PB_SIZE;
392 42272 dst += dst_stride;
393 }
394 10568 }
395
396 10663760 static void FUNC(derive_bdof_vx_vy)(const int16_t *_src0, const int16_t *_src1,
397 const int16_t **gradient_h, const int16_t **gradient_v, ptrdiff_t gradient_stride,
398 int* vx, int* vy)
399 {
400 10663760 const int shift2 = 4;
401 10663760 const int shift3 = 1;
402 10663760 const int thres = 1 << 4;
403 10663760 int sgx2 = 0, sgy2 = 0, sgxgy = 0, sgxdi = 0, sgydi = 0;
404 10663760 const int16_t *src0 = _src0 - 1 - MAX_PB_SIZE;
405 10663760 const int16_t *src1 = _src1 - 1 - MAX_PB_SIZE;
406
407
2/2
✓ Branch 0 taken 31991280 times.
✓ Branch 1 taken 5331880 times.
74646320 for (int y = 0; y < BDOF_GRADIENT_SIZE; y++) {
408
2/2
✓ Branch 0 taken 191947680 times.
✓ Branch 1 taken 31991280 times.
447877920 for (int x = 0; x < BDOF_GRADIENT_SIZE; x++) {
409 383895360 const int diff = (src0[x] >> shift2) - (src1[x] >> shift2);
410 383895360 const int idx = gradient_stride * y + x;
411 383895360 const int temph = (gradient_h[0][idx] + gradient_h[1][idx]) >> shift3;
412 383895360 const int tempv = (gradient_v[0][idx] + gradient_v[1][idx]) >> shift3;
413 383895360 sgx2 += FFABS(temph);
414 383895360 sgy2 += FFABS(tempv);
415
2/2
✓ Branch 0 taken 104029594 times.
✓ Branch 1 taken 87918086 times.
383895360 sgxgy += VVC_SIGN(tempv) * temph;
416
2/2
✓ Branch 0 taken 109566968 times.
✓ Branch 1 taken 82380712 times.
383895360 sgxdi += -VVC_SIGN(temph) * diff;
417
2/2
✓ Branch 0 taken 104029594 times.
✓ Branch 1 taken 87918086 times.
383895360 sgydi += -VVC_SIGN(tempv) * diff;
418 }
419 63982560 src0 += MAX_PB_SIZE;
420 63982560 src1 += MAX_PB_SIZE;
421 }
422
2/2
✓ Branch 0 taken 5223826 times.
✓ Branch 1 taken 108054 times.
10663760 *vx = sgx2 > 0 ? av_clip((sgxdi * (1 << 2)) >> av_log2(sgx2) , -thres + 1, thres - 1) : 0;
423
2/2
✓ Branch 0 taken 5224604 times.
✓ Branch 1 taken 107276 times.
10663760 *vy = sgy2 > 0 ? av_clip(((sgydi * (1 << 2)) - ((*vx * sgxgy) >> 1)) >> av_log2(sgy2), -thres + 1, thres - 1) : 0;
424 10663760 }
425
426 10663760 static void FUNC(apply_bdof_min_block)(pixel* dst, const ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1,
427 const int16_t **gradient_h, const int16_t **gradient_v, const int vx, const int vy)
428 {
429 10663760 const int shift4 = 15 - BIT_DEPTH;
430 10663760 const int offset4 = 1 << (shift4 - 1);
431
432 10663760 const int16_t* gh[] = { gradient_h[0] + 1 + BDOF_PADDED_SIZE, gradient_h[1] + 1 + BDOF_PADDED_SIZE };
433 10663760 const int16_t* gv[] = { gradient_v[0] + 1 + BDOF_PADDED_SIZE, gradient_v[1] + 1 + BDOF_PADDED_SIZE };
434
435
2/2
✓ Branch 0 taken 21327520 times.
✓ Branch 1 taken 5331880 times.
53318800 for (int y = 0; y < BDOF_BLOCK_SIZE; y++) {
436
2/2
✓ Branch 0 taken 85310080 times.
✓ Branch 1 taken 21327520 times.
213275200 for (int x = 0; x < BDOF_BLOCK_SIZE; x++) {
437 170620160 const int idx = y * BDOF_PADDED_SIZE + x;
438 170620160 const int bdof_offset = vx * (gh[0][idx] - gh[1][idx]) + vy * (gv[0][idx] - gv[1][idx]);
439 170620160 dst[x] = av_clip_pixel((src0[x] + offset4 + src1[x] + bdof_offset) >> shift4);
440 }
441 42655040 dst += dst_stride;
442 42655040 src0 += MAX_PB_SIZE;
443 42655040 src1 += MAX_PB_SIZE;
444 }
445 10663760 }
446
447 700286 static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, int16_t *_src0, int16_t *_src1,
448 const int block_w, const int block_h)
449 {
450 int16_t gradient_h[2][BDOF_PADDED_SIZE * BDOF_PADDED_SIZE];
451 int16_t gradient_v[2][BDOF_PADDED_SIZE * BDOF_PADDED_SIZE];
452 int vx, vy;
453 700286 const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
454 700286 pixel* dst = (pixel*)_dst;
455
456 700286 FUNC(prof_grad_filter)(gradient_h[0], gradient_v[0], BDOF_PADDED_SIZE,
457 _src0, MAX_PB_SIZE, block_w, block_h, 1);
458 700286 pad_int16(_src0, MAX_PB_SIZE, block_w, block_h);
459 700286 FUNC(prof_grad_filter)(gradient_h[1], gradient_v[1], BDOF_PADDED_SIZE,
460 _src1, MAX_PB_SIZE, block_w, block_h, 1);
461 700286 pad_int16(_src1, MAX_PB_SIZE, block_w, block_h);
462
463
2/2
✓ Branch 0 taken 1371730 times.
✓ Branch 1 taken 350143 times.
3443746 for (int y = 0; y < block_h; y += BDOF_BLOCK_SIZE) {
464
2/2
✓ Branch 0 taken 5331880 times.
✓ Branch 1 taken 1371730 times.
13407220 for (int x = 0; x < block_w; x += BDOF_BLOCK_SIZE) {
465 10663760 const int16_t* src0 = _src0 + y * MAX_PB_SIZE + x;
466 10663760 const int16_t* src1 = _src1 + y * MAX_PB_SIZE + x;
467 10663760 pixel *d = dst + x;
468 10663760 const int idx = BDOF_PADDED_SIZE * y + x;
469 10663760 const int16_t* gh[] = { gradient_h[0] + idx, gradient_h[1] + idx };
470 10663760 const int16_t* gv[] = { gradient_v[0] + idx, gradient_v[1] + idx };
471 10663760 FUNC(derive_bdof_vx_vy)(src0, src1, gh, gv, BDOF_PADDED_SIZE, &vx, &vy);
472 10663760 FUNC(apply_bdof_min_block)(d, dst_stride, src0, src1, gh, gv, vx, vy);
473 }
474 2743460 dst += BDOF_BLOCK_SIZE * dst_stride;
475 }
476 700286 }
477
478 #define DMVR_FILTER(src, stride) \
479 (filter[0] * src[x] + \
480 filter[1] * src[x + stride])
481
482 //8.5.3.2.2 Luma sample bilinear interpolation process
483 2079224 static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
484 const int height, const intptr_t mx, const intptr_t my, const int width)
485 {
486 2079224 const pixel *src = (const pixel *)_src;
487 2079224 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
488 #if BIT_DEPTH > 10
489 const int shift4 = BIT_DEPTH - 10;
490 const int offset4 = 1 << (shift4 - 1);
491 #define DMVR_SHIFT(s) (((s) + offset4) >> shift4)
492 #else
493 #define DMVR_SHIFT(s) ((s) << (10 - BIT_DEPTH))
494 #endif
495
496
2/2
✓ Branch 0 taken 20669552 times.
✓ Branch 1 taken 1039612 times.
43418328 for (int y = 0; y < height; y++) {
497
2/2
✓ Branch 0 taken 412548960 times.
✓ Branch 1 taken 20669552 times.
866437024 for (int x = 0; x < width; x++)
498 825097920 dst[x] = DMVR_SHIFT(src[x]);
499 41339104 src += src_stride;
500 41339104 dst += MAX_PB_SIZE;
501 }
502 #undef DMVR_SHIFT
503 2079224 }
504
505 //8.5.3.2.2 Luma sample bilinear interpolation process
506 301270 static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
507 const int height, const intptr_t mx, const intptr_t my, const int width)
508 {
509 301270 const pixel *src = (const pixel*)_src;
510 301270 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
511 301270 const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[mx];
512 301270 const int shift1 = BIT_DEPTH - 6;
513 301270 const int offset1 = 1 << (shift1 - 1);
514
515
2/2
✓ Branch 0 taken 2981452 times.
✓ Branch 1 taken 150635 times.
6264174 for (int y = 0; y < height; y++) {
516
2/2
✓ Branch 0 taken 58947280 times.
✓ Branch 1 taken 2981452 times.
123857464 for (int x = 0; x < width; x++)
517 117894560 dst[x] = (DMVR_FILTER(src, 1) + offset1) >> shift1;
518 5962904 src += src_stride;
519 5962904 dst += MAX_PB_SIZE;
520 }
521 301270 }
522
523 //8.5.3.2.2 Luma sample bilinear interpolation process
524 413184 static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
525 const int height, const intptr_t mx, const intptr_t my, const int width)
526 {
527 413184 const pixel *src = (pixel*)_src;
528 413184 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
529 413184 const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[my];
530 413184 const int shift1 = BIT_DEPTH - 6;
531 413184 const int offset1 = 1 << (shift1 - 1);
532
533
2/2
✓ Branch 0 taken 4091688 times.
✓ Branch 1 taken 206592 times.
8596560 for (int y = 0; y < height; y++) {
534
2/2
✓ Branch 0 taken 81195520 times.
✓ Branch 1 taken 4091688 times.
170574416 for (int x = 0; x < width; x++)
535 162391040 dst[x] = (DMVR_FILTER(src, src_stride) + offset1) >> shift1;
536 8183376 src += src_stride;
537 8183376 dst += MAX_PB_SIZE;
538 }
539
540 413184 }
541
542 //8.5.3.2.2 Luma sample bilinear interpolation process
543 1151786 static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
544 const int height, const intptr_t mx, const intptr_t my, const int width)
545 {
546 int16_t tmp_array[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE];
547 1151786 int16_t *tmp = tmp_array;
548 1151786 const pixel *src = (const pixel*)_src;
549 1151786 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
550 1151786 const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[mx];
551 1151786 const int shift1 = BIT_DEPTH - 6;
552 1151786 const int offset1 = 1 << (shift1 - 1);
553 1151786 const int shift2 = 4;
554 1151786 const int offset2 = 1 << (shift2 - 1);
555
556 1151786 src -= BILINEAR_EXTRA_BEFORE * src_stride;
557
2/2
✓ Branch 0 taken 11952065 times.
✓ Branch 1 taken 575893 times.
25055916 for (int y = 0; y < height + BILINEAR_EXTRA; y++) {
558
2/2
✓ Branch 0 taken 235692556 times.
✓ Branch 1 taken 11952065 times.
495289242 for (int x = 0; x < width; x++)
559 471385112 tmp[x] = (DMVR_FILTER(src, 1) + offset1) >> shift1;
560 23904130 src += src_stride;
561 23904130 tmp += MAX_PB_SIZE;
562 }
563
564 1151786 tmp = tmp_array + BILINEAR_EXTRA_BEFORE * MAX_PB_SIZE;
565 1151786 filter = ff_vvc_inter_luma_dmvr_filters[my];
566
2/2
✓ Branch 0 taken 11376172 times.
✓ Branch 1 taken 575893 times.
23904130 for (int y = 0; y < height; y++) {
567
2/2
✓ Branch 0 taken 224334160 times.
✓ Branch 1 taken 11376172 times.
471420664 for (int x = 0; x < width; x++)
568 448668320 dst[x] = (DMVR_FILTER(tmp, MAX_PB_SIZE) + offset2) >> shift2;
569 22752344 tmp += MAX_PB_SIZE;
570 22752344 dst += MAX_PB_SIZE;
571 }
572 1151786 }
573
574 #define PEL_FUNC(dst, C, idx1, idx2, a) \
575 do { \
576 for (int w = 0; w < 7; w++) \
577 inter->dst[C][w][idx1][idx2] = FUNC(a); \
578 } while (0) \
579
580 #define DIR_FUNCS(d, C, c) \
581 PEL_FUNC(put_##d, C, 0, 0, put_##d##_pixels); \
582 PEL_FUNC(put_##d, C, 0, 1, put_##d##_##c##_h); \
583 PEL_FUNC(put_##d, C, 1, 0, put_##d##_##c##_v); \
584 PEL_FUNC(put_##d, C, 1, 1, put_##d##_##c##_hv); \
585 PEL_FUNC(put_##d##_w, C, 0, 0, put_##d##_w_pixels); \
586 PEL_FUNC(put_##d##_w, C, 0, 1, put_##d##_##c##_w_h); \
587 PEL_FUNC(put_##d##_w, C, 1, 0, put_##d##_##c##_w_v); \
588 PEL_FUNC(put_##d##_w, C, 1, 1, put_##d##_##c##_w_hv);
589
590 #define FUNCS(C, c) \
591 PEL_FUNC(put, C, 0, 0, put_pixels); \
592 PEL_FUNC(put, C, 0, 1, put_##c##_h); \
593 PEL_FUNC(put, C, 1, 0, put_##c##_v); \
594 PEL_FUNC(put, C, 1, 1, put_##c##_hv); \
595 DIR_FUNCS(uni, C, c); \
596
597 2326 static void FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter)
598 {
599
24/24
✓ Branch 0 taken 8141 times.
✓ Branch 1 taken 1163 times.
✓ Branch 2 taken 8141 times.
✓ Branch 3 taken 1163 times.
✓ Branch 4 taken 8141 times.
✓ Branch 5 taken 1163 times.
✓ Branch 6 taken 8141 times.
✓ Branch 7 taken 1163 times.
✓ Branch 8 taken 8141 times.
✓ Branch 9 taken 1163 times.
✓ Branch 10 taken 8141 times.
✓ Branch 11 taken 1163 times.
✓ Branch 12 taken 8141 times.
✓ Branch 13 taken 1163 times.
✓ Branch 14 taken 8141 times.
✓ Branch 15 taken 1163 times.
✓ Branch 16 taken 8141 times.
✓ Branch 17 taken 1163 times.
✓ Branch 18 taken 8141 times.
✓ Branch 19 taken 1163 times.
✓ Branch 20 taken 8141 times.
✓ Branch 21 taken 1163 times.
✓ Branch 22 taken 8141 times.
✓ Branch 23 taken 1163 times.
197710 FUNCS(LUMA, luma);
600
24/24
✓ Branch 0 taken 8141 times.
✓ Branch 1 taken 1163 times.
✓ Branch 2 taken 8141 times.
✓ Branch 3 taken 1163 times.
✓ Branch 4 taken 8141 times.
✓ Branch 5 taken 1163 times.
✓ Branch 6 taken 8141 times.
✓ Branch 7 taken 1163 times.
✓ Branch 8 taken 8141 times.
✓ Branch 9 taken 1163 times.
✓ Branch 10 taken 8141 times.
✓ Branch 11 taken 1163 times.
✓ Branch 12 taken 8141 times.
✓ Branch 13 taken 1163 times.
✓ Branch 14 taken 8141 times.
✓ Branch 15 taken 1163 times.
✓ Branch 16 taken 8141 times.
✓ Branch 17 taken 1163 times.
✓ Branch 18 taken 8141 times.
✓ Branch 19 taken 1163 times.
✓ Branch 20 taken 8141 times.
✓ Branch 21 taken 1163 times.
✓ Branch 22 taken 8141 times.
✓ Branch 23 taken 1163 times.
197710 FUNCS(CHROMA, chroma);
601
602
2/2
✓ Branch 0 taken 8141 times.
✓ Branch 1 taken 1163 times.
18608 for (int i = 0; i < FF_ARRAY_ELEMS(inter->put_scaled[LUMA]); i++) {
603 16282 inter->put_scaled[LUMA][i] = FUNC(put_luma_scaled);
604 16282 inter->put_scaled[CHROMA][i] = FUNC(put_chroma_scaled);
605 16282 inter->put_uni_scaled[LUMA][i] = FUNC(put_uni_luma_scaled);
606 16282 inter->put_uni_scaled[CHROMA][i] = FUNC(put_uni_chroma_scaled);
607 16282 inter->put_uni_w_scaled[LUMA][i] = FUNC(put_uni_luma_w_scaled);
608 16282 inter->put_uni_w_scaled[CHROMA][i] = FUNC(put_uni_chroma_w_scaled);
609 }
610
611 2326 inter->avg = FUNC(avg);
612 2326 inter->w_avg = FUNC(w_avg);
613
614 2326 inter->dmvr[0][0] = FUNC(dmvr);
615 2326 inter->dmvr[0][1] = FUNC(dmvr_h);
616 2326 inter->dmvr[1][0] = FUNC(dmvr_v);
617 2326 inter->dmvr[1][1] = FUNC(dmvr_hv);
618
619 2326 inter->put_ciip = FUNC(put_ciip);
620 2326 inter->put_gpm = FUNC(put_gpm);
621
622 2326 inter->fetch_samples = FUNC(fetch_samples);
623 2326 inter->bdof_fetch_samples = FUNC(bdof_fetch_samples);
624 2326 inter->apply_prof = FUNC(apply_prof);
625 2326 inter->apply_prof_uni = FUNC(apply_prof_uni);
626 2326 inter->apply_prof_uni_w = FUNC(apply_prof_uni_w);
627 2326 inter->apply_bdof = FUNC(apply_bdof);
628 2326 inter->prof_grad_filter = FUNC(prof_grad_filter);
629 2326 inter->sad = vvc_sad;
630 2326 }
631
632 #undef FUNCS
633 #undef PEL_FUNC
634 #undef DMVR_FUNCS
635