FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/vvc/filter_template.c
Date: 2024-11-20 23:03:26
Exec Total Coverage
Lines: 514 523 98.3%
Functions: 48 72 66.7%
Branches: 316 336 94.0%

Line Branch Exec Source
1 /*
2 * VVC filters DSP
3 *
4 * Copyright (C) 2022 Nuo Mi
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "libavcodec/h26x/h2656_sao_template.c"
24
25 387312 static void FUNC(lmcs_filter_luma)(uint8_t *_dst, ptrdiff_t dst_stride, const int width, const int height, const void *_lut)
26 {
27 387312 const pixel *lut = _lut;
28 387312 pixel *dst = (pixel*)_dst;
29 387312 dst_stride /= sizeof(pixel);
30
31
2/2
✓ Branch 0 taken 7010508 times.
✓ Branch 1 taken 193656 times.
14408328 for (int y = 0; y < height; y++) {
32
2/2
✓ Branch 0 taken 550203296 times.
✓ Branch 1 taken 7010508 times.
1114427608 for (int x = 0; x < width; x++)
33 1100406592 dst[x] = lut[dst[x]];
34 14021016 dst += dst_stride;
35 }
36 387312 }
37
38 9114352128 static av_always_inline int16_t FUNC(alf_clip)(pixel curr, pixel v0, pixel v1, int16_t clip)
39 {
40 9114352128 return av_clip(v0 - curr, -clip, clip) + av_clip(v1 - curr, -clip, clip);
41 }
42
43 42280 static void FUNC(alf_filter_luma)(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride,
44 const int width, const int height, const int16_t *filter, const int16_t *clip, const int vb_pos)
45 {
46 42280 const pixel *src = (pixel *)_src;
47 42280 const int shift = 7;
48 42280 const int offset = 1 << ( shift - 1 );
49 42280 const int vb_above = vb_pos - 4;
50 42280 const int vb_below = vb_pos + 3;
51
52 42280 dst_stride /= sizeof(pixel);
53 42280 src_stride /= sizeof(pixel);
54
55
2/2
✓ Branch 0 taken 600484 times.
✓ Branch 1 taken 21140 times.
1243248 for (int y = 0; y < height; y += ALF_BLOCK_SIZE) {
56
2/2
✓ Branch 0 taken 18086248 times.
✓ Branch 1 taken 600484 times.
37373464 for (int x = 0; x < width; x += ALF_BLOCK_SIZE) {
57 36172496 const pixel *s0 = src + y * src_stride + x;
58 36172496 const pixel *s1 = s0 + src_stride;
59 36172496 const pixel *s2 = s0 - src_stride;
60 36172496 const pixel *s3 = s1 + src_stride;
61 36172496 const pixel *s4 = s2 - src_stride;
62 36172496 const pixel *s5 = s3 + src_stride;
63 36172496 const pixel *s6 = s4 - src_stride;
64
65
2/2
✓ Branch 0 taken 72344992 times.
✓ Branch 1 taken 18086248 times.
180862480 for (int i = 0; i < ALF_BLOCK_SIZE; i++) {
66 144689984 pixel *dst = (pixel *)_dst + (y + i) * dst_stride + x;
67
68 144689984 const pixel *p0 = s0 + i * src_stride;
69 144689984 const pixel *p1 = s1 + i * src_stride;
70 144689984 const pixel *p2 = s2 + i * src_stride;
71 144689984 const pixel *p3 = s3 + i * src_stride;
72 144689984 const pixel *p4 = s4 + i * src_stride;
73 144689984 const pixel *p5 = s5 + i * src_stride;
74 144689984 const pixel *p6 = s6 + i * src_stride;
75
76
4/4
✓ Branch 0 taken 70363848 times.
✓ Branch 1 taken 1981144 times.
✓ Branch 2 taken 495286 times.
✓ Branch 3 taken 69868562 times.
144689984 const int is_near_vb_above = (y + i < vb_pos) && (y + i >= vb_pos - 1);
77
4/4
✓ Branch 0 taken 1981144 times.
✓ Branch 1 taken 70363848 times.
✓ Branch 2 taken 495286 times.
✓ Branch 3 taken 1485858 times.
144689984 const int is_near_vb_below = (y + i >= vb_pos) && (y + i <= vb_pos);
78
4/4
✓ Branch 0 taken 71849706 times.
✓ Branch 1 taken 495286 times.
✓ Branch 2 taken 495286 times.
✓ Branch 3 taken 71354420 times.
144689984 const int is_near_vb = is_near_vb_above || is_near_vb_below;
79
80
4/4
✓ Branch 0 taken 70363848 times.
✓ Branch 1 taken 1981144 times.
✓ Branch 2 taken 1485858 times.
✓ Branch 3 taken 68877990 times.
144689984 if ((y + i < vb_pos) && ((y + i) > vb_above)) {
81
2/2
✓ Branch 0 taken 495286 times.
✓ Branch 1 taken 990572 times.
2971716 p1 = (y + i == vb_pos - 1) ? p0 : p1;
82
2/2
✓ Branch 0 taken 990572 times.
✓ Branch 1 taken 495286 times.
2971716 p3 = (y + i >= vb_pos - 2) ? p1 : p3;
83
1/2
✓ Branch 0 taken 1485858 times.
✗ Branch 1 not taken.
2971716 p5 = (y + i >= vb_pos - 3) ? p3 : p5;
84
85
2/2
✓ Branch 0 taken 495286 times.
✓ Branch 1 taken 990572 times.
2971716 p2 = (y + i == vb_pos - 1) ? p0 : p2;
86
2/2
✓ Branch 0 taken 990572 times.
✓ Branch 1 taken 495286 times.
2971716 p4 = (y + i >= vb_pos - 2) ? p2 : p4;
87
1/2
✓ Branch 0 taken 1485858 times.
✗ Branch 1 not taken.
2971716 p6 = (y + i >= vb_pos - 3) ? p4 : p6;
88
4/4
✓ Branch 0 taken 1981144 times.
✓ Branch 1 taken 68877990 times.
✓ Branch 2 taken 1485858 times.
✓ Branch 3 taken 495286 times.
141718268 } else if ((y + i >= vb_pos) && ((y + i) < vb_below)) {
89
2/2
✓ Branch 0 taken 495286 times.
✓ Branch 1 taken 990572 times.
2971716 p2 = (y + i == vb_pos ) ? p0 : p2;
90
2/2
✓ Branch 0 taken 990572 times.
✓ Branch 1 taken 495286 times.
2971716 p4 = (y + i <= vb_pos + 1) ? p2 : p4;
91
1/2
✓ Branch 0 taken 1485858 times.
✗ Branch 1 not taken.
2971716 p6 = (y + i <= vb_pos + 2) ? p4 : p6;
92
93
2/2
✓ Branch 0 taken 495286 times.
✓ Branch 1 taken 990572 times.
2971716 p1 = (y + i == vb_pos ) ? p0 : p1;
94
2/2
✓ Branch 0 taken 990572 times.
✓ Branch 1 taken 495286 times.
2971716 p3 = (y + i <= vb_pos + 1) ? p1 : p3;
95
1/2
✓ Branch 0 taken 1485858 times.
✗ Branch 1 not taken.
2971716 p5 = (y + i <= vb_pos + 2) ? p3 : p5;
96 }
97
98
2/2
✓ Branch 0 taken 289379968 times.
✓ Branch 1 taken 72344992 times.
723449920 for (int j = 0; j < ALF_BLOCK_SIZE; j++) {
99 578759936 int sum = 0;
100 578759936 const pixel curr = *p0;
101
102 578759936 sum += filter[0] * FUNC(alf_clip)(curr, p5[+0], p6[+0], clip[0]);
103 578759936 sum += filter[1] * FUNC(alf_clip)(curr, p3[+1], p4[-1], clip[1]);
104 578759936 sum += filter[2] * FUNC(alf_clip)(curr, p3[+0], p4[+0], clip[2]);
105 578759936 sum += filter[3] * FUNC(alf_clip)(curr, p3[-1], p4[+1], clip[3]);
106 578759936 sum += filter[4] * FUNC(alf_clip)(curr, p1[+2], p2[-2], clip[4]);
107 578759936 sum += filter[5] * FUNC(alf_clip)(curr, p1[+1], p2[-1], clip[5]);
108 578759936 sum += filter[6] * FUNC(alf_clip)(curr, p1[+0], p2[+0], clip[6]);
109 578759936 sum += filter[7] * FUNC(alf_clip)(curr, p1[-1], p2[+1], clip[7]);
110 578759936 sum += filter[8] * FUNC(alf_clip)(curr, p1[-2], p2[+2], clip[8]);
111 578759936 sum += filter[9] * FUNC(alf_clip)(curr, p0[+3], p0[-3], clip[9]);
112 578759936 sum += filter[10] * FUNC(alf_clip)(curr, p0[+2], p0[-2], clip[10]);
113 578759936 sum += filter[11] * FUNC(alf_clip)(curr, p0[+1], p0[-1], clip[11]);
114
115
2/2
✓ Branch 0 taken 285417680 times.
✓ Branch 1 taken 3962288 times.
578759936 if (!is_near_vb)
116 570835360 sum = (sum + offset) >> shift;
117 else
118 7924576 sum = (sum + (1 << ((shift + 3) - 1))) >> (shift + 3);
119 578759936 sum += curr;
120 578759936 dst[j] = CLIP(sum);
121
122 578759936 p0++;
123 578759936 p1++;
124 578759936 p2++;
125 578759936 p3++;
126 578759936 p4++;
127 578759936 p5++;
128 578759936 p6++;
129 }
130 }
131 36172496 filter += ALF_NUM_COEFF_LUMA;
132 36172496 clip += ALF_NUM_COEFF_LUMA;
133 }
134 }
135 42280 }
136
137 56832 static void FUNC(alf_filter_chroma)(uint8_t* _dst, ptrdiff_t dst_stride, const uint8_t* _src, ptrdiff_t src_stride,
138 const int width, const int height, const int16_t* filter, const int16_t* clip, const int vb_pos)
139 {
140 56832 const pixel *src = (pixel *)_src;
141 56832 const int shift = 7;
142 56832 const int offset = 1 << ( shift - 1 );
143 56832 const int vb_above = vb_pos - 2;
144 56832 const int vb_below = vb_pos + 1;
145
146 56832 dst_stride /= sizeof(pixel);
147 56832 src_stride /= sizeof(pixel);
148
149
2/2
✓ Branch 0 taken 521707 times.
✓ Branch 1 taken 28416 times.
1100246 for (int y = 0; y < height; y += ALF_BLOCK_SIZE) {
150
2/2
✓ Branch 0 taken 11298088 times.
✓ Branch 1 taken 521707 times.
23639590 for (int x = 0; x < width; x += ALF_BLOCK_SIZE) {
151 22596176 const pixel *s0 = src + y * src_stride + x;
152 22596176 const pixel *s1 = s0 + src_stride;
153 22596176 const pixel *s2 = s0 - src_stride;
154 22596176 const pixel *s3 = s1 + src_stride;
155 22596176 const pixel *s4 = s2 - src_stride;
156
157
2/2
✓ Branch 0 taken 45192352 times.
✓ Branch 1 taken 11298088 times.
112980880 for (int i = 0; i < ALF_BLOCK_SIZE; i++) {
158 90384704 pixel *dst = (pixel *)_dst + (y + i) * dst_stride + x;
159
160 90384704 const pixel *p0 = s0 + i * src_stride;
161 90384704 const pixel *p1 = s1 + i * src_stride;
162 90384704 const pixel *p2 = s2 + i * src_stride;
163 90384704 const pixel *p3 = s3 + i * src_stride;
164 90384704 const pixel *p4 = s4 + i * src_stride;
165
166
4/4
✓ Branch 0 taken 44328236 times.
✓ Branch 1 taken 864116 times.
✓ Branch 2 taken 432058 times.
✓ Branch 3 taken 43896178 times.
90384704 const int is_near_vb_above = (y + i < vb_pos) && (y + i >= vb_pos - 1);
167
4/4
✓ Branch 0 taken 864116 times.
✓ Branch 1 taken 44328236 times.
✓ Branch 2 taken 432058 times.
✓ Branch 3 taken 432058 times.
90384704 const int is_near_vb_below = (y + i >= vb_pos) && (y + i <= vb_pos);
168
4/4
✓ Branch 0 taken 44760294 times.
✓ Branch 1 taken 432058 times.
✓ Branch 2 taken 432058 times.
✓ Branch 3 taken 44328236 times.
90384704 const int is_near_vb = is_near_vb_above || is_near_vb_below;
169
170
4/4
✓ Branch 0 taken 44328236 times.
✓ Branch 1 taken 864116 times.
✓ Branch 2 taken 864116 times.
✓ Branch 3 taken 43464120 times.
90384704 if ((y + i < vb_pos) && ((y + i) >= vb_above)) {
171
2/2
✓ Branch 0 taken 432058 times.
✓ Branch 1 taken 432058 times.
1728232 p1 = (y + i == vb_pos - 1) ? p0 : p1;
172
1/2
✓ Branch 0 taken 864116 times.
✗ Branch 1 not taken.
1728232 p3 = (y + i >= vb_pos - 2) ? p1 : p3;
173
174
2/2
✓ Branch 0 taken 432058 times.
✓ Branch 1 taken 432058 times.
1728232 p2 = (y + i == vb_pos - 1) ? p0 : p2;
175
1/2
✓ Branch 0 taken 864116 times.
✗ Branch 1 not taken.
1728232 p4 = (y + i >= vb_pos - 2) ? p2 : p4;
176
3/4
✓ Branch 0 taken 864116 times.
✓ Branch 1 taken 43464120 times.
✓ Branch 2 taken 864116 times.
✗ Branch 3 not taken.
88656472 } else if ((y + i >= vb_pos) && ((y + i) <= vb_below)) {
177
2/2
✓ Branch 0 taken 432058 times.
✓ Branch 1 taken 432058 times.
1728232 p2 = (y + i == vb_pos ) ? p0 : p2;
178
1/2
✓ Branch 0 taken 864116 times.
✗ Branch 1 not taken.
1728232 p4 = (y + i <= vb_pos + 1) ? p2 : p4;
179
180
2/2
✓ Branch 0 taken 432058 times.
✓ Branch 1 taken 432058 times.
1728232 p1 = (y + i == vb_pos ) ? p0 : p1;
181
1/2
✓ Branch 0 taken 864116 times.
✗ Branch 1 not taken.
1728232 p3 = (y + i <= vb_pos + 1) ? p1 : p3;
182 }
183
184
2/2
✓ Branch 0 taken 180769408 times.
✓ Branch 1 taken 45192352 times.
451923520 for (int j = 0; j < ALF_BLOCK_SIZE; j++) {
185 361538816 int sum = 0;
186 361538816 const pixel curr = *p0;
187
188 361538816 sum += filter[0] * FUNC(alf_clip)(curr, p3[+0], p4[+0], clip[0]);
189 361538816 sum += filter[1] * FUNC(alf_clip)(curr, p1[+1], p2[-1], clip[1]);
190 361538816 sum += filter[2] * FUNC(alf_clip)(curr, p1[+0], p2[+0], clip[2]);
191 361538816 sum += filter[3] * FUNC(alf_clip)(curr, p1[-1], p2[+1], clip[3]);
192 361538816 sum += filter[4] * FUNC(alf_clip)(curr, p0[+2], p0[-2], clip[4]);
193 361538816 sum += filter[5] * FUNC(alf_clip)(curr, p0[+1], p0[-1], clip[5]);
194
195
2/2
✓ Branch 0 taken 177312944 times.
✓ Branch 1 taken 3456464 times.
361538816 if (!is_near_vb)
196 354625888 sum = (sum + offset) >> shift;
197 else
198 6912928 sum = (sum + (1 << ((shift + 3) - 1))) >> (shift + 3);
199 361538816 sum += curr;
200 361538816 dst[j] = CLIP(sum);
201
202 361538816 p0++;
203 361538816 p1++;
204 361538816 p2++;
205 361538816 p3++;
206 361538816 p4++;
207 }
208 }
209 }
210 }
211 56832 }
212
213 18894 static void FUNC(alf_filter_cc)(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_luma, const ptrdiff_t luma_stride,
214 const int width, const int height, const int hs, const int vs, const int16_t *filter, const int vb_pos)
215 {
216 18894 const ptrdiff_t stride = luma_stride / sizeof(pixel);
217
218 18894 dst_stride /= sizeof(pixel);
219
220
2/2
✓ Branch 0 taken 730324 times.
✓ Branch 1 taken 9447 times.
1479542 for (int y = 0; y < height; y++) {
221
2/2
✓ Branch 0 taken 67181056 times.
✓ Branch 1 taken 730324 times.
135822760 for (int x = 0; x < width; x++) {
222 134362112 int sum = 0;
223 134362112 pixel *dst = (pixel *)_dst + y * dst_stride + x;
224 134362112 const pixel *src = (pixel *)_luma + (y << vs) * stride + (x << hs);
225
226 134362112 const pixel *s0 = src - stride;
227 134362112 const pixel *s1 = src;
228 134362112 const pixel *s2 = src + stride;
229 134362112 const pixel *s3 = src + 2 * stride;
230
231 134362112 const int pos = y << vs;
232
6/6
✓ Branch 0 taken 41829376 times.
✓ Branch 1 taken 25351680 times.
✓ Branch 2 taken 41522688 times.
✓ Branch 3 taken 306688 times.
✓ Branch 4 taken 306688 times.
✓ Branch 5 taken 41216000 times.
134362112 if (!vs && (pos == vb_pos || pos == vb_pos + 1))
233 1226752 continue;
234
235
3/4
✓ Branch 0 taken 65896448 times.
✓ Branch 1 taken 671232 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 65896448 times.
133135360 if (pos == (vb_pos - 2) || pos == (vb_pos + 1))
236 1342464 s3 = s2;
237
4/4
✓ Branch 0 taken 65589760 times.
✓ Branch 1 taken 306688 times.
✓ Branch 2 taken 364544 times.
✓ Branch 3 taken 65225216 times.
131792896 else if (pos == (vb_pos - 1) || pos == vb_pos)
238 1342464 s3 = s2 = s0 = s1;
239
240
241 133135360 sum += filter[0] * (*s0 - *src);
242 133135360 sum += filter[1] * (*(s1 - 1) - *src);
243 133135360 sum += filter[2] * (*(s1 + 1) - *src);
244 133135360 sum += filter[3] * (*(s2 - 1) - *src);
245 133135360 sum += filter[4] * (*s2 - *src);
246 133135360 sum += filter[5] * (*(s2 + 1) - *src);
247 133135360 sum += filter[6] * (*s3 - *src);
248 133135360 sum = av_clip((sum + 64) >> 7, -(1 << (BIT_DEPTH - 1)), (1 << (BIT_DEPTH - 1)) - 1);
249 133135360 sum += *dst;
250 133135360 *dst = av_clip_pixel(sum);
251 }
252 }
253 18894 }
254
255 #define ALF_DIR_VERT 0
256 #define ALF_DIR_HORZ 1
257 #define ALF_DIR_DIGA0 2
258 #define ALF_DIR_DIGA1 3
259
260 36172496 static void FUNC(alf_get_idx)(int *class_idx, int *transpose_idx, const int *sum, const int ac)
261 {
262 static const int arg_var[] = {0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 };
263
264 int hv0, hv1, dir_hv, d0, d1, dir_d, hvd1, hvd0, sum_hv, dir1;
265
266 36172496 dir_hv = sum[ALF_DIR_VERT] <= sum[ALF_DIR_HORZ];
267 36172496 hv1 = FFMAX(sum[ALF_DIR_VERT], sum[ALF_DIR_HORZ]);
268 36172496 hv0 = FFMIN(sum[ALF_DIR_VERT], sum[ALF_DIR_HORZ]);
269
270 36172496 dir_d = sum[ALF_DIR_DIGA0] <= sum[ALF_DIR_DIGA1];
271 36172496 d1 = FFMAX(sum[ALF_DIR_DIGA0], sum[ALF_DIR_DIGA1]);
272 36172496 d0 = FFMIN(sum[ALF_DIR_DIGA0], sum[ALF_DIR_DIGA1]);
273
274 //promote to avoid overflow
275 36172496 dir1 = (uint64_t)d1 * hv0 <= (uint64_t)hv1 * d0;
276
2/2
✓ Branch 0 taken 13178722 times.
✓ Branch 1 taken 4907526 times.
36172496 hvd1 = dir1 ? hv1 : d1;
277
2/2
✓ Branch 0 taken 13178722 times.
✓ Branch 1 taken 4907526 times.
36172496 hvd0 = dir1 ? hv0 : d0;
278
279 36172496 sum_hv = sum[ALF_DIR_HORZ] + sum[ALF_DIR_VERT];
280 36172496 *class_idx = arg_var[av_clip_uintp2(sum_hv * ac >> (BIT_DEPTH - 1), 4)];
281
2/2
✓ Branch 0 taken 3461416 times.
✓ Branch 1 taken 14624832 times.
36172496 if (hvd1 * 2 > 9 * hvd0)
282 6922832 *class_idx += ((dir1 << 1) + 2) * 5;
283
2/2
✓ Branch 0 taken 4614674 times.
✓ Branch 1 taken 10010158 times.
29249664 else if (hvd1 > 2 * hvd0)
284 9229348 *class_idx += ((dir1 << 1) + 1) * 5;
285
286 36172496 *transpose_idx = dir_d * 2 + dir_hv;
287 36172496 }
288
289 42280 static void FUNC(alf_classify)(int *class_idx, int *transpose_idx,
290 const uint8_t *_src, const ptrdiff_t _src_stride, const int width, const int height,
291 const int vb_pos, int *gradient_tmp)
292 {
293 int *grad;
294
295 42280 const int h = height + ALF_GRADIENT_BORDER * 2;
296 42280 const int w = width + ALF_GRADIENT_BORDER * 2;
297 42280 const int size = (ALF_BLOCK_SIZE + ALF_GRADIENT_BORDER * 2) / ALF_GRADIENT_STEP;
298 42280 const int gstride = (w / ALF_GRADIENT_STEP) * ALF_NUM_DIR;
299
300 42280 const pixel *src = (const pixel *)_src;
301 42280 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
302 42280 src -= (ALF_GRADIENT_BORDER + 1) * src_stride + ALF_GRADIENT_BORDER;
303
304 42280 grad = gradient_tmp;
305
2/2
✓ Branch 0 taken 1243248 times.
✓ Branch 1 taken 21140 times.
2528776 for (int y = 0; y < h; y += ALF_GRADIENT_STEP) {
306 2486496 const pixel *s0 = src + y * src_stride;
307 2486496 const pixel *s1 = s0 + src_stride;
308 2486496 const pixel *s2 = s1 + src_stride;
309 2486496 const pixel *s3 = s2 + src_stride;
310
311
2/2
✓ Branch 0 taken 16452 times.
✓ Branch 1 taken 1226796 times.
2486496 if (y == vb_pos) //above
312 32904 s3 = s2;
313
2/2
✓ Branch 0 taken 16452 times.
✓ Branch 1 taken 1210344 times.
2453592 else if (y == vb_pos + ALF_GRADIENT_BORDER)
314 32904 s0 = s1;
315
316
2/2
✓ Branch 0 taken 77326560 times.
✓ Branch 1 taken 1243248 times.
157139616 for (int x = 0; x < w; x += ALF_GRADIENT_STEP) {
317 //two points a time
318 154653120 const pixel *a0 = s0 + x;
319 154653120 const pixel *p0 = s1 + x;
320 154653120 const pixel *b0 = s2 + x;
321 154653120 const int val0 = (*p0) << 1;
322
323 154653120 const pixel *a1 = s1 + x + 1;
324 154653120 const pixel *p1 = s2 + x + 1;
325 154653120 const pixel *b1 = s3 + x + 1;
326 154653120 const int val1 = (*p1) << 1;
327
328 154653120 grad[ALF_DIR_VERT] = FFABS(val0 - *a0 - *b0) + FFABS(val1 - *a1 - *b1);
329 154653120 grad[ALF_DIR_HORZ] = FFABS(val0 - *(p0 - 1) - *(p0 + 1)) + FFABS(val1 - *(p1 - 1) - *(p1 + 1));
330 154653120 grad[ALF_DIR_DIGA0] = FFABS(val0 - *(a0 - 1) - *(b0 + 1)) + FFABS(val1 - *(a1 - 1) - *(b1 + 1));
331 154653120 grad[ALF_DIR_DIGA1] = FFABS(val0 - *(a0 + 1) - *(b0 - 1)) + FFABS(val1 - *(a1 + 1) - *(b1 - 1));
332 154653120 grad += ALF_NUM_DIR;
333 }
334 }
335
336
2/2
✓ Branch 0 taken 600484 times.
✓ Branch 1 taken 21140 times.
1243248 for (int y = 0; y < height ; y += ALF_BLOCK_SIZE ) {
337 1200968 int start = 0;
338 1200968 int end = (ALF_BLOCK_SIZE + ALF_GRADIENT_BORDER * 2) / ALF_GRADIENT_STEP;
339 1200968 int ac = 2;
340
2/2
✓ Branch 0 taken 16452 times.
✓ Branch 1 taken 584032 times.
1200968 if (y + ALF_BLOCK_SIZE == vb_pos) {
341 32904 end -= ALF_GRADIENT_BORDER / ALF_GRADIENT_STEP;
342 32904 ac = 3;
343
2/2
✓ Branch 0 taken 16452 times.
✓ Branch 1 taken 567580 times.
1168064 } else if (y == vb_pos) {
344 32904 start += ALF_GRADIENT_BORDER / ALF_GRADIENT_STEP;
345 32904 ac = 3;
346 }
347
2/2
✓ Branch 0 taken 18086248 times.
✓ Branch 1 taken 600484 times.
37373464 for (int x = 0; x < width; x += ALF_BLOCK_SIZE) {
348 36172496 const int xg = x / ALF_GRADIENT_STEP;
349 36172496 const int yg = y / ALF_GRADIENT_STEP;
350 36172496 int sum[ALF_NUM_DIR] = { 0 };
351
352 36172496 grad = gradient_tmp + (yg + start) * gstride + xg * ALF_NUM_DIR;
353 //todo: optimize this loop
354
2/2
✓ Branch 0 taken 71352148 times.
✓ Branch 1 taken 18086248 times.
178876792 for (int i = start; i < end; i++) {
355
2/2
✓ Branch 0 taken 285408592 times.
✓ Branch 1 taken 71352148 times.
713521480 for (int j = 0; j < size; j++) {
356 570817184 sum[ALF_DIR_VERT] += grad[ALF_DIR_VERT];
357 570817184 sum[ALF_DIR_HORZ] += grad[ALF_DIR_HORZ];
358 570817184 sum[ALF_DIR_DIGA0] += grad[ALF_DIR_DIGA0];
359 570817184 sum[ALF_DIR_DIGA1] += grad[ALF_DIR_DIGA1];
360 570817184 grad += ALF_NUM_DIR;
361 }
362 142704296 grad += gstride - size * ALF_NUM_DIR;
363 }
364 36172496 FUNC(alf_get_idx)(class_idx, transpose_idx, sum, ac);
365
366 36172496 class_idx++;
367 36172496 transpose_idx++;
368 }
369 }
370
371 42280 }
372
373 37672 static void FUNC(alf_recon_coeff_and_clip)(int16_t *coeff, int16_t *clip,
374 const int *class_idx, const int *transpose_idx, const int size,
375 const int16_t *coeff_set, const uint8_t *clip_idx_set, const uint8_t *class_to_filt)
376 {
377 const static int index[][ALF_NUM_COEFF_LUMA] = {
378 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
379 { 9, 4, 10, 8, 1, 5, 11, 7, 3, 0, 2, 6 },
380 { 0, 3, 2, 1, 8, 7, 6, 5, 4, 9, 10, 11 },
381 { 9, 8, 10, 4, 3, 7, 11, 5, 1, 0, 2, 6 },
382 };
383
384 37672 const int16_t clip_set[] = {
385 1 << BIT_DEPTH, 1 << (BIT_DEPTH - 3), 1 << (BIT_DEPTH - 5), 1 << (BIT_DEPTH - 7)
386 };
387
388
2/2
✓ Branch 0 taken 17420392 times.
✓ Branch 1 taken 18836 times.
34878456 for (int i = 0; i < size; i++) {
389 34840784 const int16_t *src_coeff = coeff_set + class_to_filt[class_idx[i]] * ALF_NUM_COEFF_LUMA;
390 34840784 const uint8_t *clip_idx = clip_idx_set + class_idx[i] * ALF_NUM_COEFF_LUMA;
391
392
2/2
✓ Branch 0 taken 209044704 times.
✓ Branch 1 taken 17420392 times.
452930192 for (int j = 0; j < ALF_NUM_COEFF_LUMA; j++) {
393 418089408 const int idx = index[transpose_idx[i]][j];
394 418089408 *coeff++ = src_coeff[idx];
395 418089408 *clip++ = clip_set[clip_idx[idx]];
396 }
397 }
398 37672 }
399
400 #undef ALF_DIR_HORZ
401 #undef ALF_DIR_VERT
402 #undef ALF_DIR_DIGA0
403 #undef ALF_DIR_DIGA1
404
405 // line zero
406 #define P7 pix[-8 * xstride]
407 #define P6 pix[-7 * xstride]
408 #define P5 pix[-6 * xstride]
409 #define P4 pix[-5 * xstride]
410 #define P3 pix[-4 * xstride]
411 #define P2 pix[-3 * xstride]
412 #define P1 pix[-2 * xstride]
413 #define P0 pix[-1 * xstride]
414 #define Q0 pix[0 * xstride]
415 #define Q1 pix[1 * xstride]
416 #define Q2 pix[2 * xstride]
417 #define Q3 pix[3 * xstride]
418 #define Q4 pix[4 * xstride]
419 #define Q5 pix[5 * xstride]
420 #define Q6 pix[6 * xstride]
421 #define Q7 pix[7 * xstride]
422 #define P(x) pix[(-(x)-1) * xstride]
423 #define Q(x) pix[(x) * xstride]
424
425 // line three. used only for deblocking decision
426 #define TP7 pix[-8 * xstride + 3 * ystride]
427 #define TP6 pix[-7 * xstride + 3 * ystride]
428 #define TP5 pix[-6 * xstride + 3 * ystride]
429 #define TP4 pix[-5 * xstride + 3 * ystride]
430 #define TP3 pix[-4 * xstride + 3 * ystride]
431 #define TP2 pix[-3 * xstride + 3 * ystride]
432 #define TP1 pix[-2 * xstride + 3 * ystride]
433 #define TP0 pix[-1 * xstride + 3 * ystride]
434 #define TQ0 pix[0 * xstride + 3 * ystride]
435 #define TQ1 pix[1 * xstride + 3 * ystride]
436 #define TQ2 pix[2 * xstride + 3 * ystride]
437 #define TQ3 pix[3 * xstride + 3 * ystride]
438 #define TQ4 pix[4 * xstride + 3 * ystride]
439 #define TQ5 pix[5 * xstride + 3 * ystride]
440 #define TQ6 pix[6 * xstride + 3 * ystride]
441 #define TQ7 pix[7 * xstride + 3 * ystride]
442 #define TP(x) pix[(-(x)-1) * xstride + 3 * ystride]
443 #define TQ(x) pix[(x) * xstride + 3 * ystride]
444
445 #define FP3 pix[-4 * xstride + 1 * ystride]
446 #define FP2 pix[-3 * xstride + 1 * ystride]
447 #define FP1 pix[-2 * xstride + 1 * ystride]
448 #define FP0 pix[-1 * xstride + 1 * ystride]
449 #define FQ0 pix[0 * xstride + 1 * ystride]
450 #define FQ1 pix[1 * xstride + 1 * ystride]
451 #define FQ2 pix[2 * xstride + 1 * ystride]
452 #define FQ3 pix[3 * xstride + 1 * ystride]
453
454 #include "libavcodec/h26x/h2656_deblock_template.c"
455
456 1479856 static void FUNC(loop_filter_luma_large)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride, const int32_t tc,
457 const uint8_t no_p, const uint8_t no_q, const uint8_t max_len_p, const uint8_t max_len_q)
458 {
459
2/2
✓ Branch 0 taken 2959712 times.
✓ Branch 1 taken 739928 times.
7399280 for (int d = 0; d < 4; d++) {
460 5919424 const int p6 = P6;
461 5919424 const int p5 = P5;
462 5919424 const int p4 = P4;
463 5919424 const int p3 = P3;
464 5919424 const int p2 = P2;
465 5919424 const int p1 = P1;
466 5919424 const int p0 = P0;
467 5919424 const int q0 = Q0;
468 5919424 const int q1 = Q1;
469 5919424 const int q2 = Q2;
470 5919424 const int q3 = Q3;
471 5919424 const int q4 = Q4;
472 5919424 const int q5 = Q5;
473 5919424 const int q6 = Q6;
474 int m;
475
4/4
✓ Branch 0 taken 423640 times.
✓ Branch 1 taken 2536072 times.
✓ Branch 2 taken 238828 times.
✓ Branch 3 taken 184812 times.
5919424 if (max_len_p == 5 && max_len_q == 5)
476 477656 m = (p4 + p3 + 2 * (p2 + p1 + p0 + q0 + q1 + q2) + q3 + q4 + 8) >> 4;
477
2/2
✓ Branch 0 taken 1260656 times.
✓ Branch 1 taken 1460228 times.
5441768 else if (max_len_p == max_len_q)
478 2521312 m = (p6 + p5 + p4 + p3 + p2 + p1 + 2 * (p0 + q0) + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
479
2/2
✓ Branch 0 taken 273380 times.
✓ Branch 1 taken 1186848 times.
2920456 else if (max_len_p + max_len_q == 12)
480 546760 m = (p5 + p4 + p3 + p2 + 2 * (p1 + p0 + q0 + q1) + q2 + q3 + q4 + q5 + 8) >> 4;
481
2/2
✓ Branch 0 taken 263632 times.
✓ Branch 1 taken 923216 times.
2373696 else if (max_len_p + max_len_q == 8)
482 527264 m = (p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 + 4) >> 3;
483
2/2
✓ Branch 0 taken 669300 times.
✓ Branch 1 taken 253916 times.
1846432 else if (max_len_q == 7)
484 1338600 m = (2 * (p2 + p1 + p0 + q0) + p0 + p1 + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
485 else
486 507832 m = (p6 + p5 + p4 + p3 + p2 + p1 + 2 * (q2 + q1 + q0 + p0) + q0 + q1 + 8) >> 4;
487
1/2
✓ Branch 0 taken 2959712 times.
✗ Branch 1 not taken.
5919424 if (!no_p) {
488 5919424 const int refp = (P(max_len_p) + P(max_len_p - 1) + 1) >> 1;
489
2/2
✓ Branch 0 taken 881608 times.
✓ Branch 1 taken 2078104 times.
5919424 if (max_len_p == 3) {
490 1763216 P0 = p0 + av_clip(((m * 53 + refp * 11 + 32) >> 6) - p0, -(tc * 6 >> 1), (tc * 6 >> 1));
491 1763216 P1 = p1 + av_clip(((m * 32 + refp * 32 + 32) >> 6) - p1, -(tc * 4 >> 1), (tc * 4 >> 1));
492 1763216 P2 = p2 + av_clip(((m * 11 + refp * 53 + 32) >> 6) - p2, -(tc * 2 >> 1), (tc * 2 >> 1));
493
2/2
✓ Branch 0 taken 423640 times.
✓ Branch 1 taken 1654464 times.
4156208 } else if (max_len_p == 5) {
494 847280 P0 = p0 + av_clip(((m * 58 + refp * 6 + 32) >> 6) - p0, -(tc * 6 >> 1), (tc * 6 >> 1));
495 847280 P1 = p1 + av_clip(((m * 45 + refp * 19 + 32) >> 6) - p1, -(tc * 5 >> 1), (tc * 5 >> 1));
496 847280 P2 = p2 + av_clip(((m * 32 + refp * 32 + 32) >> 6) - p2, -(tc * 4 >> 1), (tc * 4 >> 1));
497 847280 P3 = p3 + av_clip(((m * 19 + refp * 45 + 32) >> 6) - p3, -(tc * 3 >> 1), (tc * 3 >> 1));
498 847280 P4 = p4 + av_clip(((m * 6 + refp * 58 + 32) >> 6) - p4, -(tc * 2 >> 1), (tc * 2 >> 1));
499 } else {
500 3308928 P0 = p0 + av_clip(((m * 59 + refp * 5 + 32) >> 6) - p0, -(tc * 6 >> 1), (tc * 6 >> 1));
501 3308928 P1 = p1 + av_clip(((m * 50 + refp * 14 + 32) >> 6) - p1, -(tc * 5 >> 1), (tc * 5 >> 1));
502 3308928 P2 = p2 + av_clip(((m * 41 + refp * 23 + 32) >> 6) - p2, -(tc * 4 >> 1), (tc * 4 >> 1));
503 3308928 P3 = p3 + av_clip(((m * 32 + refp * 32 + 32) >> 6) - p3, -(tc * 3 >> 1), (tc * 3 >> 1));
504 3308928 P4 = p4 + av_clip(((m * 23 + refp * 41 + 32) >> 6) - p4, -(tc * 2 >> 1), (tc * 2 >> 1));
505 3308928 P5 = p5 + av_clip(((m * 14 + refp * 50 + 32) >> 6) - p5, -(tc * 1 >> 1), (tc * 1 >> 1));
506 3308928 P6 = p6 + av_clip(((m * 5 + refp * 59 + 32) >> 6) - p6, -(tc * 1 >> 1), (tc * 1 >> 1));
507 }
508 }
509
1/2
✓ Branch 0 taken 2959712 times.
✗ Branch 1 not taken.
5919424 if (!no_q) {
510 5919424 const int refq = (Q(max_len_q) + Q(max_len_q - 1) + 1) >> 1;
511
2/2
✓ Branch 0 taken 305240 times.
✓ Branch 1 taken 2654472 times.
5919424 if (max_len_q == 3) {
512 610480 Q0 = q0 + av_clip(((m * 53 + refq * 11 + 32) >> 6) - q0, -(tc * 6 >> 1), (tc * 6 >> 1));
513 610480 Q1 = q1 + av_clip(((m * 32 + refq * 32 + 32) >> 6) - q1, -(tc * 4 >> 1), (tc * 4 >> 1));
514 610480 Q2 = q2 + av_clip(((m * 11 + refq * 53 + 32) >> 6) - q2, -(tc * 2 >> 1), (tc * 2 >> 1));
515
2/2
✓ Branch 0 taken 591028 times.
✓ Branch 1 taken 2063444 times.
5308944 } else if (max_len_q == 5) {
516 1182056 Q0 = q0 + av_clip(((m * 58 + refq * 6 + 32) >> 6) - q0, -(tc * 6 >> 1), (tc * 6 >> 1));
517 1182056 Q1 = q1 + av_clip(((m * 45 + refq * 19 + 32) >> 6) - q1, -(tc * 5 >> 1), (tc * 5 >> 1));
518 1182056 Q2 = q2 + av_clip(((m * 32 + refq * 32 + 32) >> 6) - q2, -(tc * 4 >> 1), (tc * 4 >> 1));
519 1182056 Q3 = q3 + av_clip(((m * 19 + refq * 45 + 32) >> 6) - q3, -(tc * 3 >> 1), (tc * 3 >> 1));
520 1182056 Q4 = q4 + av_clip(((m * 6 + refq * 58 + 32) >> 6) - q4, -(tc * 2 >> 1), (tc * 2 >> 1));
521 } else {
522 4126888 Q0 = q0 + av_clip(((m * 59 + refq * 5 + 32) >> 6) - q0, -(tc * 6 >> 1), (tc * 6 >> 1));
523 4126888 Q1 = q1 + av_clip(((m * 50 + refq * 14 + 32) >> 6) - q1, -(tc * 5 >> 1), (tc * 5 >> 1));
524 4126888 Q2 = q2 + av_clip(((m * 41 + refq * 23 + 32) >> 6) - q2, -(tc * 4 >> 1), (tc * 4 >> 1));
525 4126888 Q3 = q3 + av_clip(((m * 32 + refq * 32 + 32) >> 6) - q3, -(tc * 3 >> 1), (tc * 3 >> 1));
526 4126888 Q4 = q4 + av_clip(((m * 23 + refq * 41 + 32) >> 6) - q4, -(tc * 2 >> 1), (tc * 2 >> 1));
527 4126888 Q5 = q5 + av_clip(((m * 14 + refq * 50 + 32) >> 6) - q5, -(tc * 1 >> 1), (tc * 1 >> 1));
528 4126888 Q6 = q6 + av_clip(((m * 5 + refq * 59 + 32) >> 6) - q6, -(tc * 1 >> 1), (tc * 1 >> 1));
529 }
530
531 }
532 5919424 pix += ystride;
533 }
534 1479856 }
535
536 7095970 static void FUNC(vvc_loop_filter_luma)(uint8_t* _pix, ptrdiff_t _xstride, ptrdiff_t _ystride,
537 const int32_t *_beta, const int32_t *_tc, const uint8_t *_no_p, const uint8_t *_no_q,
538 const uint8_t *_max_len_p, const uint8_t *_max_len_q, const int hor_ctu_edge)
539 {
540 7095970 const ptrdiff_t xstride = _xstride / sizeof(pixel);
541 7095970 const ptrdiff_t ystride = _ystride / sizeof(pixel);
542
543
2/2
✓ Branch 0 taken 7095970 times.
✓ Branch 1 taken 3547985 times.
21287910 for (int i = 0; i < 2; i++) {
544 #if BIT_DEPTH < 10
545 179908 const int tc = (_tc[i] + (1 << (9 - BIT_DEPTH))) >> (10 - BIT_DEPTH);
546 #else
547 14012032 const int tc = _tc[i] << (BIT_DEPTH - 10);
548 #endif
549
2/2
✓ Branch 0 taken 6950073 times.
✓ Branch 1 taken 145897 times.
14191940 if (tc) {
550 13900146 pixel* pix = (pixel*)_pix + i * 4 * ystride;
551 13900146 const int dp0 = abs(P2 - 2 * P1 + P0);
552 13900146 const int dq0 = abs(Q2 - 2 * Q1 + Q0);
553 13900146 const int dp3 = abs(TP2 - 2 * TP1 + TP0);
554 13900146 const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
555 13900146 const int d0 = dp0 + dq0;
556 13900146 const int d3 = dp3 + dq3;
557 13900146 const int tc25 = ((tc * 5 + 1) >> 1);
558
559 13900146 const int no_p = _no_p[i];
560 13900146 const int no_q = _no_q[i];
561
562 13900146 int max_len_p = _max_len_p[i];
563 13900146 int max_len_q = _max_len_q[i];
564
565
4/4
✓ Branch 0 taken 2014352 times.
✓ Branch 1 taken 4935721 times.
✓ Branch 2 taken 1600920 times.
✓ Branch 3 taken 413432 times.
13900146 const int large_p = (max_len_p > 3 && !hor_ctu_edge);
566 13900146 const int large_q = max_len_q > 3;
567
568 13900146 const int beta = _beta[i] << BIT_DEPTH - 8;
569 13900146 const int beta_3 = beta >> 3;
570 13900146 const int beta_2 = beta >> 2;
571
572
4/4
✓ Branch 0 taken 5349153 times.
✓ Branch 1 taken 1600920 times.
✓ Branch 2 taken 799471 times.
✓ Branch 3 taken 4549682 times.
13900146 if (large_p || large_q) {
573
2/2
✓ Branch 0 taken 1600920 times.
✓ Branch 1 taken 799471 times.
4800782 const int dp0l = large_p ? ((dp0 + abs(P5 - 2 * P4 + P3) + 1) >> 1) : dp0;
574
2/2
✓ Branch 0 taken 1994889 times.
✓ Branch 1 taken 405502 times.
4800782 const int dq0l = large_q ? ((dq0 + abs(Q5 - 2 * Q4 + Q3) + 1) >> 1) : dq0;
575
2/2
✓ Branch 0 taken 1600920 times.
✓ Branch 1 taken 799471 times.
4800782 const int dp3l = large_p ? ((dp3 + abs(TP5 - 2 * TP4 + TP3) + 1) >> 1) : dp3;
576
2/2
✓ Branch 0 taken 1994889 times.
✓ Branch 1 taken 405502 times.
4800782 const int dq3l = large_q ? ((dq3 + abs(TQ5 - 2 * TQ4 + TQ3) + 1) >> 1) : dq3;
577 4800782 const int d0l = dp0l + dq0l;
578 4800782 const int d3l = dp3l + dq3l;
579 4800782 const int beta53 = beta * 3 >> 5;
580 4800782 const int beta_4 = beta >> 4;
581
2/2
✓ Branch 0 taken 1600920 times.
✓ Branch 1 taken 799471 times.
4800782 max_len_p = large_p ? max_len_p : 3;
582
2/2
✓ Branch 0 taken 1994889 times.
✓ Branch 1 taken 405502 times.
4800782 max_len_q = large_q ? max_len_q : 3;
583
584
2/2
✓ Branch 0 taken 2245772 times.
✓ Branch 1 taken 154619 times.
4800782 if (d0l + d3l < beta) {
585
2/2
✓ Branch 0 taken 1236427 times.
✓ Branch 1 taken 1009345 times.
4491544 const int sp0l = abs(P3 - P0) + (max_len_p == 7 ? abs(P7 - P6 - P5 + P4) : 0);
586
2/2
✓ Branch 0 taken 1528317 times.
✓ Branch 1 taken 717455 times.
4491544 const int sq0l = abs(Q0 - Q3) + (max_len_q == 7 ? abs(Q4 - Q5 - Q6 + Q7) : 0);
587
2/2
✓ Branch 0 taken 1236427 times.
✓ Branch 1 taken 1009345 times.
4491544 const int sp3l = abs(TP3 - TP0) + (max_len_p == 7 ? abs(TP7 - TP6 - TP5 + TP4) : 0);
588
2/2
✓ Branch 0 taken 1528317 times.
✓ Branch 1 taken 717455 times.
4491544 const int sq3l = abs(TQ0 - TQ3) + (max_len_q == 7 ? abs(TQ4 - TQ5 - TQ6 + TQ7) : 0);
589
2/2
✓ Branch 0 taken 1505952 times.
✓ Branch 1 taken 739820 times.
4491544 const int sp0 = large_p ? ((sp0l + abs(P3 - P(max_len_p)) + 1) >> 1) : sp0l;
590
2/2
✓ Branch 0 taken 1505952 times.
✓ Branch 1 taken 739820 times.
4491544 const int sp3 = large_p ? ((sp3l + abs(TP3 - TP(max_len_p)) + 1) >> 1) : sp3l;
591
2/2
✓ Branch 0 taken 1883765 times.
✓ Branch 1 taken 362007 times.
4491544 const int sq0 = large_q ? ((sq0l + abs(Q3 - Q(max_len_q)) + 1) >> 1) : sq0l;
592
2/2
✓ Branch 0 taken 1883765 times.
✓ Branch 1 taken 362007 times.
4491544 const int sq3 = large_q ? ((sq3l + abs(TQ3 - TQ(max_len_q)) + 1) >> 1) : sq3l;
593
4/4
✓ Branch 0 taken 918379 times.
✓ Branch 1 taken 1327393 times.
✓ Branch 2 taken 916350 times.
✓ Branch 3 taken 2029 times.
4491544 if (sp0 + sq0 < beta53 && abs(P0 - Q0) < tc25 &&
594
4/4
✓ Branch 0 taken 810715 times.
✓ Branch 1 taken 105635 times.
✓ Branch 2 taken 810044 times.
✓ Branch 3 taken 671 times.
1832700 sp3 + sq3 < beta53 && abs(TP0 - TQ0) < tc25 &&
595
4/4
✓ Branch 0 taken 763519 times.
✓ Branch 1 taken 46525 times.
✓ Branch 2 taken 739928 times.
✓ Branch 3 taken 23591 times.
1620088 (d0l << 1) < beta_4 && (d3l << 1) < beta_4) {
596 1479856 FUNC(loop_filter_luma_large)(pix, xstride, ystride, tc, no_p, no_q, max_len_p, max_len_q);
597 1479856 continue;
598 }
599 }
600 }
601
2/2
✓ Branch 0 taken 4470025 times.
✓ Branch 1 taken 1740120 times.
12420290 if (d0 + d3 < beta) {
602
3/4
✓ Branch 0 taken 3286669 times.
✓ Branch 1 taken 1183356 times.
✓ Branch 2 taken 3286669 times.
✗ Branch 3 not taken.
8940050 if (max_len_p > 2 && max_len_q > 2 &&
603
4/4
✓ Branch 0 taken 1088497 times.
✓ Branch 1 taken 2198172 times.
✓ Branch 2 taken 1077434 times.
✓ Branch 3 taken 11063 times.
6573338 abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
604
4/4
✓ Branch 0 taken 818242 times.
✓ Branch 1 taken 259192 times.
✓ Branch 2 taken 815445 times.
✓ Branch 3 taken 2797 times.
2154868 abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
605
4/4
✓ Branch 0 taken 803538 times.
✓ Branch 1 taken 11907 times.
✓ Branch 2 taken 798455 times.
✓ Branch 3 taken 5083 times.
1630890 (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
606 1596910 FUNC(loop_filter_luma_strong)(pix, xstride, ystride, tc, tc << 1, tc * 3, no_p, no_q);
607 } else {
608 7343140 int nd_p = 1;
609 7343140 int nd_q = 1;
610
3/4
✓ Branch 0 taken 2649764 times.
✓ Branch 1 taken 1021806 times.
✓ Branch 2 taken 2649764 times.
✗ Branch 3 not taken.
7343140 if (max_len_p > 1 && max_len_q > 1) {
611
2/2
✓ Branch 0 taken 2134831 times.
✓ Branch 1 taken 514933 times.
5299528 if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
612 4269662 nd_p = 2;
613
2/2
✓ Branch 0 taken 2044359 times.
✓ Branch 1 taken 605405 times.
5299528 if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
614 4088718 nd_q = 2;
615 }
616 7343140 FUNC(loop_filter_luma_weak)(pix, xstride, ystride, tc, beta, no_p, no_q, nd_p, nd_q);
617 }
618 }
619 }
620 }
621 7095970 }
622
623 3095272 static void FUNC(loop_filter_chroma_strong)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride,
624 const int size, const int32_t tc, const uint8_t no_p, const uint8_t no_q)
625 {
626
2/2
✓ Branch 0 taken 3472406 times.
✓ Branch 1 taken 1547636 times.
10040084 for (int d = 0; d < size; d++) {
627 6944812 const int p3 = P3;
628 6944812 const int p2 = P2;
629 6944812 const int p1 = P1;
630 6944812 const int p0 = P0;
631 6944812 const int q0 = Q0;
632 6944812 const int q1 = Q1;
633 6944812 const int q2 = Q2;
634 6944812 const int q3 = Q3;
635
1/2
✓ Branch 0 taken 3472406 times.
✗ Branch 1 not taken.
6944812 if (!no_p) {
636 6944812 P0 = av_clip((p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3, p0 - tc, p0 + tc);
637 6944812 P1 = av_clip((2 * p3 + p2 + 2 * p1 + p0 + q0 + q1 + 4) >> 3, p1 - tc, p1 + tc);
638 6944812 P2 = av_clip((3 * p3 + 2 * p2 + p1 + p0 + q0 + 4) >> 3, p2 - tc, p2 + tc );
639 }
640
1/2
✓ Branch 0 taken 3472406 times.
✗ Branch 1 not taken.
6944812 if (!no_q) {
641 6944812 Q0 = av_clip((p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3, q0 - tc, q0 + tc);
642 6944812 Q1 = av_clip((p1 + p0 + q0 + 2 * q1 + q2 + 2 * q3 + 4) >> 3, q1 - tc, q1 + tc);
643 6944812 Q2 = av_clip((p0 + q0 + q1 + 2 * q2 + 3 * q3 + 4) >> 3, q2 - tc, q2 + tc);
644 }
645 6944812 pix += ystride;
646 }
647 3095272 }
648
649 564806 static void FUNC(loop_filter_chroma_strong_one_side)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride,
650 const int size, const int32_t tc, const uint8_t no_p, const uint8_t no_q)
651 {
652
2/2
✓ Branch 0 taken 620064 times.
✓ Branch 1 taken 282403 times.
1804934 for (int d = 0; d < size; d++) {
653 1240128 const int p1 = P1;
654 1240128 const int p0 = P0;
655 1240128 const int q0 = Q0;
656 1240128 const int q1 = Q1;
657 1240128 const int q2 = Q2;
658 1240128 const int q3 = Q3;
659
1/2
✓ Branch 0 taken 620064 times.
✗ Branch 1 not taken.
1240128 if (!no_p) {
660 1240128 P0 = av_clip((3 * p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3, p0 - tc, p0 + tc);
661 }
662
1/2
✓ Branch 0 taken 620064 times.
✗ Branch 1 not taken.
1240128 if (!no_q) {
663 1240128 Q0 = av_clip((2 * p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3, q0 - tc, q0 + tc);
664 1240128 Q1 = av_clip((p1 + p0 + q0 + 2 * q1 + q2 + 2 * q3 + 4) >> 3, q1 - tc, q1 + tc);
665 1240128 Q2 = av_clip((p0 + q0 + q1 + 2 * q2 + 3 * q3 + 4) >> 3, q2 - tc, q2 + tc);
666 }
667 1240128 pix += ystride;
668 }
669 564806 }
670
671 2286726 static void FUNC(vvc_loop_filter_chroma)(uint8_t *_pix, const ptrdiff_t _xstride, const ptrdiff_t _ystride,
672 const int32_t *_beta, const int32_t *_tc, const uint8_t *_no_p, const uint8_t *_no_q,
673 const uint8_t *_max_len_p, const uint8_t *_max_len_q, const int shift)
674 {
675 2286726 const ptrdiff_t xstride = _xstride / sizeof(pixel);
676 2286726 const ptrdiff_t ystride = _ystride / sizeof(pixel);
677
2/2
✓ Branch 0 taken 839454 times.
✓ Branch 1 taken 303909 times.
2286726 const int size = shift ? 2 : 4;
678 2286726 const int end = 8 / size; // 8 samples a loop
679
680
2/2
✓ Branch 0 taken 3965634 times.
✓ Branch 1 taken 1143363 times.
10217994 for (int i = 0; i < end; i++) {
681 #if BIT_DEPTH < 10
682 124656 const int tc = (_tc[i] + (1 << (9 - BIT_DEPTH))) >> (10 - BIT_DEPTH);
683 #else
684 7806612 const int tc = _tc[i] << (BIT_DEPTH - 10);
685 #endif
686
2/2
✓ Branch 0 taken 3811275 times.
✓ Branch 1 taken 154359 times.
7931268 if (tc) {
687 7622550 pixel *pix = (pixel *)_pix + i * size * ystride;
688 7622550 const uint8_t no_p = _no_p[i];
689 7622550 const uint8_t no_q = _no_q[i];
690
691 7622550 const int beta = _beta[i] << (BIT_DEPTH - 8);
692 7622550 const int beta_3 = beta >> 3;
693 7622550 const int beta_2 = beta >> 2;
694
695 7622550 const int tc25 = ((tc * 5 + 1) >> 1);
696
697 7622550 uint8_t max_len_p = _max_len_p[i];
698 7622550 uint8_t max_len_q = _max_len_q[i];
699
700
3/4
✓ Branch 0 taken 3733547 times.
✓ Branch 1 taken 77728 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 3733547 times.
7622550 if (!max_len_p || !max_len_q)
701 155456 continue;
702
703
2/2
✓ Branch 0 taken 2710603 times.
✓ Branch 1 taken 1022944 times.
7467094 if (max_len_q == 3){
704
2/2
✓ Branch 0 taken 2220390 times.
✓ Branch 1 taken 490213 times.
5421206 const int p1n = shift ? FP1 : TP1;
705
4/4
✓ Branch 0 taken 2368542 times.
✓ Branch 1 taken 342061 times.
✓ Branch 2 taken 1924326 times.
✓ Branch 3 taken 444216 times.
5421206 const int p2n = max_len_p == 1 ? p1n : (shift ? FP2 : TP2);
706
2/2
✓ Branch 0 taken 2220390 times.
✓ Branch 1 taken 490213 times.
5421206 const int p0n = shift ? FP0 : TP0;
707
2/2
✓ Branch 0 taken 2220390 times.
✓ Branch 1 taken 490213 times.
5421206 const int q0n = shift ? FQ0 : TQ0;
708
2/2
✓ Branch 0 taken 2220390 times.
✓ Branch 1 taken 490213 times.
5421206 const int q1n = shift ? FQ1 : TQ1;
709
2/2
✓ Branch 0 taken 2220390 times.
✓ Branch 1 taken 490213 times.
5421206 const int q2n = shift ? FQ2 : TQ2;
710
2/2
✓ Branch 0 taken 342061 times.
✓ Branch 1 taken 2368542 times.
5421206 const int p3 = max_len_p == 1 ? P1 : P3;
711
2/2
✓ Branch 0 taken 342061 times.
✓ Branch 1 taken 2368542 times.
5421206 const int p2 = max_len_p == 1 ? P1 : P2;
712 5421206 const int p1 = P1;
713 5421206 const int p0 = P0;
714 5421206 const int dp0 = abs(p2 - 2 * p1 + p0);
715 5421206 const int dq0 = abs(Q2 - 2 * Q1 + Q0);
716
717 5421206 const int dp1 = abs(p2n - 2 * p1n + p0n);
718 5421206 const int dq1 = abs(q2n - 2 * q1n + q0n);
719 5421206 const int d0 = dp0 + dq0;
720 5421206 const int d1 = dp1 + dq1;
721
722
2/2
✓ Branch 0 taken 2555685 times.
✓ Branch 1 taken 154918 times.
5421206 if (d0 + d1 < beta) {
723
4/4
✓ Branch 0 taken 2221666 times.
✓ Branch 1 taken 334019 times.
✓ Branch 2 taken 1880023 times.
✓ Branch 3 taken 341643 times.
5111370 const int p3n = max_len_p == 1 ? p1n : (shift ? FP3 : TP3);
724
2/2
✓ Branch 0 taken 2172901 times.
✓ Branch 1 taken 382784 times.
5111370 const int q3n = shift ? FQ3 : TQ3;
725
4/4
✓ Branch 0 taken 2287421 times.
✓ Branch 1 taken 268264 times.
✓ Branch 2 taken 1907152 times.
✓ Branch 3 taken 380269 times.
8925674 const int dsam0 = (d0 << 1) < beta_2 && (abs(p3 - p0) + abs(Q0 - Q3) < beta_3) &&
726
2/2
✓ Branch 0 taken 1900609 times.
✓ Branch 1 taken 6543 times.
3814304 abs(p0 - Q0) < tc25;
727
4/4
✓ Branch 0 taken 2289966 times.
✓ Branch 1 taken 265719 times.
✓ Branch 2 taken 1910196 times.
✓ Branch 3 taken 379770 times.
8931762 const int dsam1 = (d1 << 1) < beta_2 && (abs(p3n - p0n) + abs(q0n - q3n) < beta_3) &&
728
2/2
✓ Branch 0 taken 1903256 times.
✓ Branch 1 taken 6940 times.
3820392 abs(p0n - q0n) < tc25;
729
4/4
✓ Branch 0 taken 1900609 times.
✓ Branch 1 taken 655076 times.
✓ Branch 2 taken 70570 times.
✓ Branch 3 taken 1830039 times.
5111370 if (!dsam0 || !dsam1)
730 1451292 max_len_p = max_len_q = 1;
731 } else {
732 309836 max_len_p = max_len_q = 1;
733 }
734 }
735
736
3/4
✓ Branch 0 taken 1547636 times.
✓ Branch 1 taken 2185911 times.
✓ Branch 2 taken 1547636 times.
✗ Branch 3 not taken.
7467094 if (max_len_p == 3 && max_len_q == 3)
737 3095272 FUNC(loop_filter_chroma_strong)(pix, xstride, ystride, size, tc, no_p, no_q);
738
2/2
✓ Branch 0 taken 282403 times.
✓ Branch 1 taken 1903508 times.
4371822 else if (max_len_q == 3)
739 564806 FUNC(loop_filter_chroma_strong_one_side)(pix, xstride, ystride, size, tc, no_p, no_q);
740 else
741 3807016 FUNC(loop_filter_chroma_weak)(pix, xstride, ystride, size, tc, no_p, no_q);
742 }
743 }
744 2286726 }
745
746 1174724 static void FUNC(vvc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
747 const int32_t *beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q,
748 const uint8_t *max_len_p, const uint8_t *max_len_q, int shift)
749 {
750 1174724 FUNC(vvc_loop_filter_chroma)(pix, stride, sizeof(pixel), beta, tc,
751 no_p, no_q, max_len_p, max_len_q, shift);
752 1174724 }
753
754 1112002 static void FUNC(vvc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
755 const int32_t *beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q,
756 const uint8_t *max_len_p, const uint8_t *max_len_q, int shift)
757 {
758 1112002 FUNC(vvc_loop_filter_chroma)(pix, sizeof(pixel), stride, beta, tc,
759 no_p, no_q, max_len_p, max_len_q, shift);
760 1112002 }
761
762 3674118 static void FUNC(vvc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
763 const int32_t *beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q,
764 const uint8_t *max_len_p, const uint8_t *max_len_q, const int hor_ctu_edge)
765 {
766 3674118 FUNC(vvc_loop_filter_luma)(pix, stride, sizeof(pixel), beta, tc,
767 no_p, no_q, max_len_p, max_len_q, hor_ctu_edge);
768 3674118 }
769
770 3421852 static void FUNC(vvc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
771 const int32_t *beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q,
772 const uint8_t *max_len_p, const uint8_t *max_len_q, const int hor_ctu_edge)
773 {
774 3421852 FUNC(vvc_loop_filter_luma)(pix, sizeof(pixel), stride, beta, tc,
775 no_p, no_q, max_len_p, max_len_q, hor_ctu_edge);
776 3421852 }
777
778 static int FUNC(vvc_loop_ladf_level)(const uint8_t *_pix, const ptrdiff_t _xstride, const ptrdiff_t _ystride)
779 {
780 const pixel *pix = (pixel *)_pix;
781 const ptrdiff_t xstride = _xstride / sizeof(pixel);
782 const ptrdiff_t ystride = _ystride / sizeof(pixel);
783 return (P0 + TP0 + Q0 + TQ0) >> 2;
784 }
785
786 static int FUNC(vvc_h_loop_ladf_level)(const uint8_t *pix, ptrdiff_t stride)
787 {
788 return FUNC(vvc_loop_ladf_level)(pix, stride, sizeof(pixel));
789 }
790
791 static int FUNC(vvc_v_loop_ladf_level)(const uint8_t *pix, ptrdiff_t stride)
792 {
793 return FUNC(vvc_loop_ladf_level)(pix, sizeof(pixel), stride);
794 }
795
796 #undef P7
797 #undef P6
798 #undef P5
799 #undef P4
800 #undef P3
801 #undef P2
802 #undef P1
803 #undef P0
804 #undef Q0
805 #undef Q1
806 #undef Q2
807 #undef Q3
808 #undef Q4
809 #undef Q5
810 #undef Q6
811 #undef Q7
812
813 #undef TP7
814 #undef TP6
815 #undef TP5
816 #undef TP4
817 #undef TP3
818 #undef TP2
819 #undef TP1
820 #undef TP0
821 #undef TQ0
822 #undef TQ1
823 #undef TQ2
824 #undef TQ3
825 #undef TQ4
826 #undef TQ5
827 #undef TQ6
828 #undef TQ7
829
830 2600 static void FUNC(ff_vvc_lmcs_dsp_init)(VVCLMCSDSPContext *const lmcs)
831 {
832 2600 lmcs->filter = FUNC(lmcs_filter_luma);
833 2600 }
834
835 2600 static void FUNC(ff_vvc_lf_dsp_init)(VVCLFDSPContext *const lf)
836 {
837 2600 lf->ladf_level[0] = FUNC(vvc_h_loop_ladf_level);
838 2600 lf->ladf_level[1] = FUNC(vvc_v_loop_ladf_level);
839 2600 lf->filter_luma[0] = FUNC(vvc_h_loop_filter_luma);
840 2600 lf->filter_luma[1] = FUNC(vvc_v_loop_filter_luma);
841 2600 lf->filter_chroma[0] = FUNC(vvc_h_loop_filter_chroma);
842 2600 lf->filter_chroma[1] = FUNC(vvc_v_loop_filter_chroma);
843 2600 }
844
845 2600 static void FUNC(ff_vvc_sao_dsp_init)(VVCSAODSPContext *const sao)
846 {
847
2/2
✓ Branch 0 taken 11700 times.
✓ Branch 1 taken 1300 times.
26000 for (int i = 0; i < FF_ARRAY_ELEMS(sao->band_filter); i++)
848 23400 sao->band_filter[i] = FUNC(sao_band_filter);
849
2/2
✓ Branch 0 taken 11700 times.
✓ Branch 1 taken 1300 times.
26000 for (int i = 0; i < FF_ARRAY_ELEMS(sao->edge_filter); i++)
850 23400 sao->edge_filter[i] = FUNC(sao_edge_filter);
851 2600 sao->edge_restore[0] = FUNC(sao_edge_restore_0);
852 2600 sao->edge_restore[1] = FUNC(sao_edge_restore_1);
853 2600 }
854
855 2600 static void FUNC(ff_vvc_alf_dsp_init)(VVCALFDSPContext *const alf)
856 {
857 2600 alf->filter[LUMA] = FUNC(alf_filter_luma);
858 2600 alf->filter[CHROMA] = FUNC(alf_filter_chroma);
859 2600 alf->filter_cc = FUNC(alf_filter_cc);
860 2600 alf->classify = FUNC(alf_classify);
861 2600 alf->recon_coeff_and_clip = FUNC(alf_recon_coeff_and_clip);
862 2600 }
863