FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/vvc/filter_template.c
Date: 2025-01-20 09:27:23
Exec Total Coverage
Lines: 523 523 100.0%
Functions: 51 72 70.8%
Branches: 316 336 94.0%

Line Branch Exec Source
1 /*
2 * VVC filters DSP
3 *
4 * Copyright (C) 2022 Nuo Mi
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "libavcodec/h26x/h2656_sao_template.c"
24
25 410676 static void FUNC(lmcs_filter_luma)(uint8_t *_dst, ptrdiff_t dst_stride, const int width, const int height, const void *_lut)
26 {
27 410676 const pixel *lut = _lut;
28 410676 pixel *dst = (pixel*)_dst;
29 410676 dst_stride /= sizeof(pixel);
30
31
2/2
✓ Branch 0 taken 7211480 times.
✓ Branch 1 taken 205338 times.
14833636 for (int y = 0; y < height; y++) {
32
2/2
✓ Branch 0 taken 560131872 times.
✓ Branch 1 taken 7211480 times.
1134686704 for (int x = 0; x < width; x++)
33 1120263744 dst[x] = lut[dst[x]];
34 14422960 dst += dst_stride;
35 }
36 410676 }
37
38 9400504320 static av_always_inline int16_t FUNC(alf_clip)(pixel curr, pixel v0, pixel v1, int16_t clip)
39 {
40 9400504320 return av_clip(v0 - curr, -clip, clip) + av_clip(v1 - curr, -clip, clip);
41 }
42
43 43710 static void FUNC(alf_filter_luma)(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride,
44 const int width, const int height, const int16_t *filter, const int16_t *clip, const int vb_pos)
45 {
46 43710 const pixel *src = (pixel *)_src;
47 43710 const int shift = 7;
48 43710 const int offset = 1 << ( shift - 1 );
49 43710 const int vb_above = vb_pos - 4;
50 43710 const int vb_below = vb_pos + 3;
51
52 43710 dst_stride /= sizeof(pixel);
53 43710 src_stride /= sizeof(pixel);
54
55
2/2
✓ Branch 0 taken 622738 times.
✓ Branch 1 taken 21855 times.
1289186 for (int y = 0; y < height; y += ALF_BLOCK_SIZE) {
56
2/2
✓ Branch 0 taken 18725176 times.
✓ Branch 1 taken 622738 times.
38695828 for (int x = 0; x < width; x += ALF_BLOCK_SIZE) {
57 37450352 const pixel *s0 = src + y * src_stride + x;
58 37450352 const pixel *s1 = s0 + src_stride;
59 37450352 const pixel *s2 = s0 - src_stride;
60 37450352 const pixel *s3 = s1 + src_stride;
61 37450352 const pixel *s4 = s2 - src_stride;
62 37450352 const pixel *s5 = s3 + src_stride;
63 37450352 const pixel *s6 = s4 - src_stride;
64
65
2/2
✓ Branch 0 taken 74900704 times.
✓ Branch 1 taken 18725176 times.
187251760 for (int i = 0; i < ALF_BLOCK_SIZE; i++) {
66 149801408 pixel *dst = (pixel *)_dst + (y + i) * dst_stride + x;
67
68 149801408 const pixel *p0 = s0 + i * src_stride;
69 149801408 const pixel *p1 = s1 + i * src_stride;
70 149801408 const pixel *p2 = s2 + i * src_stride;
71 149801408 const pixel *p3 = s3 + i * src_stride;
72 149801408 const pixel *p4 = s4 + i * src_stride;
73 149801408 const pixel *p5 = s5 + i * src_stride;
74 149801408 const pixel *p6 = s6 + i * src_stride;
75
76
4/4
✓ Branch 0 taken 72849880 times.
✓ Branch 1 taken 2050824 times.
✓ Branch 2 taken 512706 times.
✓ Branch 3 taken 72337174 times.
149801408 const int is_near_vb_above = (y + i < vb_pos) && (y + i >= vb_pos - 1);
77
4/4
✓ Branch 0 taken 2050824 times.
✓ Branch 1 taken 72849880 times.
✓ Branch 2 taken 512706 times.
✓ Branch 3 taken 1538118 times.
149801408 const int is_near_vb_below = (y + i >= vb_pos) && (y + i <= vb_pos);
78
4/4
✓ Branch 0 taken 74387998 times.
✓ Branch 1 taken 512706 times.
✓ Branch 2 taken 512706 times.
✓ Branch 3 taken 73875292 times.
149801408 const int is_near_vb = is_near_vb_above || is_near_vb_below;
79
80
4/4
✓ Branch 0 taken 72849880 times.
✓ Branch 1 taken 2050824 times.
✓ Branch 2 taken 1538118 times.
✓ Branch 3 taken 71311762 times.
149801408 if ((y + i < vb_pos) && ((y + i) > vb_above)) {
81
2/2
✓ Branch 0 taken 512706 times.
✓ Branch 1 taken 1025412 times.
3076236 p1 = (y + i == vb_pos - 1) ? p0 : p1;
82
2/2
✓ Branch 0 taken 1025412 times.
✓ Branch 1 taken 512706 times.
3076236 p3 = (y + i >= vb_pos - 2) ? p1 : p3;
83
1/2
✓ Branch 0 taken 1538118 times.
✗ Branch 1 not taken.
3076236 p5 = (y + i >= vb_pos - 3) ? p3 : p5;
84
85
2/2
✓ Branch 0 taken 512706 times.
✓ Branch 1 taken 1025412 times.
3076236 p2 = (y + i == vb_pos - 1) ? p0 : p2;
86
2/2
✓ Branch 0 taken 1025412 times.
✓ Branch 1 taken 512706 times.
3076236 p4 = (y + i >= vb_pos - 2) ? p2 : p4;
87
1/2
✓ Branch 0 taken 1538118 times.
✗ Branch 1 not taken.
3076236 p6 = (y + i >= vb_pos - 3) ? p4 : p6;
88
4/4
✓ Branch 0 taken 2050824 times.
✓ Branch 1 taken 71311762 times.
✓ Branch 2 taken 1538118 times.
✓ Branch 3 taken 512706 times.
146725172 } else if ((y + i >= vb_pos) && ((y + i) < vb_below)) {
89
2/2
✓ Branch 0 taken 512706 times.
✓ Branch 1 taken 1025412 times.
3076236 p2 = (y + i == vb_pos ) ? p0 : p2;
90
2/2
✓ Branch 0 taken 1025412 times.
✓ Branch 1 taken 512706 times.
3076236 p4 = (y + i <= vb_pos + 1) ? p2 : p4;
91
1/2
✓ Branch 0 taken 1538118 times.
✗ Branch 1 not taken.
3076236 p6 = (y + i <= vb_pos + 2) ? p4 : p6;
92
93
2/2
✓ Branch 0 taken 512706 times.
✓ Branch 1 taken 1025412 times.
3076236 p1 = (y + i == vb_pos ) ? p0 : p1;
94
2/2
✓ Branch 0 taken 1025412 times.
✓ Branch 1 taken 512706 times.
3076236 p3 = (y + i <= vb_pos + 1) ? p1 : p3;
95
1/2
✓ Branch 0 taken 1538118 times.
✗ Branch 1 not taken.
3076236 p5 = (y + i <= vb_pos + 2) ? p3 : p5;
96 }
97
98
2/2
✓ Branch 0 taken 299602816 times.
✓ Branch 1 taken 74900704 times.
749007040 for (int j = 0; j < ALF_BLOCK_SIZE; j++) {
99 599205632 int sum = 0;
100 599205632 const pixel curr = *p0;
101
102 599205632 sum += filter[0] * FUNC(alf_clip)(curr, p5[+0], p6[+0], clip[0]);
103 599205632 sum += filter[1] * FUNC(alf_clip)(curr, p3[+1], p4[-1], clip[1]);
104 599205632 sum += filter[2] * FUNC(alf_clip)(curr, p3[+0], p4[+0], clip[2]);
105 599205632 sum += filter[3] * FUNC(alf_clip)(curr, p3[-1], p4[+1], clip[3]);
106 599205632 sum += filter[4] * FUNC(alf_clip)(curr, p1[+2], p2[-2], clip[4]);
107 599205632 sum += filter[5] * FUNC(alf_clip)(curr, p1[+1], p2[-1], clip[5]);
108 599205632 sum += filter[6] * FUNC(alf_clip)(curr, p1[+0], p2[+0], clip[6]);
109 599205632 sum += filter[7] * FUNC(alf_clip)(curr, p1[-1], p2[+1], clip[7]);
110 599205632 sum += filter[8] * FUNC(alf_clip)(curr, p1[-2], p2[+2], clip[8]);
111 599205632 sum += filter[9] * FUNC(alf_clip)(curr, p0[+3], p0[-3], clip[9]);
112 599205632 sum += filter[10] * FUNC(alf_clip)(curr, p0[+2], p0[-2], clip[10]);
113 599205632 sum += filter[11] * FUNC(alf_clip)(curr, p0[+1], p0[-1], clip[11]);
114
115
2/2
✓ Branch 0 taken 295501168 times.
✓ Branch 1 taken 4101648 times.
599205632 if (!is_near_vb)
116 591002336 sum = (sum + offset) >> shift;
117 else
118 8203296 sum = (sum + (1 << ((shift + 3) - 1))) >> (shift + 3);
119 599205632 sum += curr;
120 599205632 dst[j] = CLIP(sum);
121
122 599205632 p0++;
123 599205632 p1++;
124 599205632 p2++;
125 599205632 p3++;
126 599205632 p4++;
127 599205632 p5++;
128 599205632 p6++;
129 }
130 }
131 37450352 filter += ALF_NUM_COEFF_LUMA;
132 37450352 clip += ALF_NUM_COEFF_LUMA;
133 }
134 }
135 43710 }
136
137 58746 static void FUNC(alf_filter_chroma)(uint8_t* _dst, ptrdiff_t dst_stride, const uint8_t* _src, ptrdiff_t src_stride,
138 const int width, const int height, const int16_t* filter, const int16_t* clip, const int vb_pos)
139 {
140 58746 const pixel *src = (pixel *)_src;
141 58746 const int shift = 7;
142 58746 const int offset = 1 << ( shift - 1 );
143 58746 const int vb_above = vb_pos - 2;
144 58746 const int vb_below = vb_pos + 1;
145
146 58746 dst_stride /= sizeof(pixel);
147 58746 src_stride /= sizeof(pixel);
148
149
2/2
✓ Branch 0 taken 536605 times.
✓ Branch 1 taken 29373 times.
1131956 for (int y = 0; y < height; y += ALF_BLOCK_SIZE) {
150
2/2
✓ Branch 0 taken 11510608 times.
✓ Branch 1 taken 536605 times.
24094426 for (int x = 0; x < width; x += ALF_BLOCK_SIZE) {
151 23021216 const pixel *s0 = src + y * src_stride + x;
152 23021216 const pixel *s1 = s0 + src_stride;
153 23021216 const pixel *s2 = s0 - src_stride;
154 23021216 const pixel *s3 = s1 + src_stride;
155 23021216 const pixel *s4 = s2 - src_stride;
156
157
2/2
✓ Branch 0 taken 46042432 times.
✓ Branch 1 taken 11510608 times.
115106080 for (int i = 0; i < ALF_BLOCK_SIZE; i++) {
158 92084864 pixel *dst = (pixel *)_dst + (y + i) * dst_stride + x;
159
160 92084864 const pixel *p0 = s0 + i * src_stride;
161 92084864 const pixel *p1 = s1 + i * src_stride;
162 92084864 const pixel *p2 = s2 + i * src_stride;
163 92084864 const pixel *p3 = s3 + i * src_stride;
164 92084864 const pixel *p4 = s4 + i * src_stride;
165
166
4/4
✓ Branch 0 taken 45152398 times.
✓ Branch 1 taken 890034 times.
✓ Branch 2 taken 445017 times.
✓ Branch 3 taken 44707381 times.
92084864 const int is_near_vb_above = (y + i < vb_pos) && (y + i >= vb_pos - 1);
167
4/4
✓ Branch 0 taken 890034 times.
✓ Branch 1 taken 45152398 times.
✓ Branch 2 taken 445017 times.
✓ Branch 3 taken 445017 times.
92084864 const int is_near_vb_below = (y + i >= vb_pos) && (y + i <= vb_pos);
168
4/4
✓ Branch 0 taken 45597415 times.
✓ Branch 1 taken 445017 times.
✓ Branch 2 taken 445017 times.
✓ Branch 3 taken 45152398 times.
92084864 const int is_near_vb = is_near_vb_above || is_near_vb_below;
169
170
4/4
✓ Branch 0 taken 45152398 times.
✓ Branch 1 taken 890034 times.
✓ Branch 2 taken 890034 times.
✓ Branch 3 taken 44262364 times.
92084864 if ((y + i < vb_pos) && ((y + i) >= vb_above)) {
171
2/2
✓ Branch 0 taken 445017 times.
✓ Branch 1 taken 445017 times.
1780068 p1 = (y + i == vb_pos - 1) ? p0 : p1;
172
1/2
✓ Branch 0 taken 890034 times.
✗ Branch 1 not taken.
1780068 p3 = (y + i >= vb_pos - 2) ? p1 : p3;
173
174
2/2
✓ Branch 0 taken 445017 times.
✓ Branch 1 taken 445017 times.
1780068 p2 = (y + i == vb_pos - 1) ? p0 : p2;
175
1/2
✓ Branch 0 taken 890034 times.
✗ Branch 1 not taken.
1780068 p4 = (y + i >= vb_pos - 2) ? p2 : p4;
176
3/4
✓ Branch 0 taken 890034 times.
✓ Branch 1 taken 44262364 times.
✓ Branch 2 taken 890034 times.
✗ Branch 3 not taken.
90304796 } else if ((y + i >= vb_pos) && ((y + i) <= vb_below)) {
177
2/2
✓ Branch 0 taken 445017 times.
✓ Branch 1 taken 445017 times.
1780068 p2 = (y + i == vb_pos ) ? p0 : p2;
178
1/2
✓ Branch 0 taken 890034 times.
✗ Branch 1 not taken.
1780068 p4 = (y + i <= vb_pos + 1) ? p2 : p4;
179
180
2/2
✓ Branch 0 taken 445017 times.
✓ Branch 1 taken 445017 times.
1780068 p1 = (y + i == vb_pos ) ? p0 : p1;
181
1/2
✓ Branch 0 taken 890034 times.
✗ Branch 1 not taken.
1780068 p3 = (y + i <= vb_pos + 1) ? p1 : p3;
182 }
183
184
2/2
✓ Branch 0 taken 184169728 times.
✓ Branch 1 taken 46042432 times.
460424320 for (int j = 0; j < ALF_BLOCK_SIZE; j++) {
185 368339456 int sum = 0;
186 368339456 const pixel curr = *p0;
187
188 368339456 sum += filter[0] * FUNC(alf_clip)(curr, p3[+0], p4[+0], clip[0]);
189 368339456 sum += filter[1] * FUNC(alf_clip)(curr, p1[+1], p2[-1], clip[1]);
190 368339456 sum += filter[2] * FUNC(alf_clip)(curr, p1[+0], p2[+0], clip[2]);
191 368339456 sum += filter[3] * FUNC(alf_clip)(curr, p1[-1], p2[+1], clip[3]);
192 368339456 sum += filter[4] * FUNC(alf_clip)(curr, p0[+2], p0[-2], clip[4]);
193 368339456 sum += filter[5] * FUNC(alf_clip)(curr, p0[+1], p0[-1], clip[5]);
194
195
2/2
✓ Branch 0 taken 180609592 times.
✓ Branch 1 taken 3560136 times.
368339456 if (!is_near_vb)
196 361219184 sum = (sum + offset) >> shift;
197 else
198 7120272 sum = (sum + (1 << ((shift + 3) - 1))) >> (shift + 3);
199 368339456 sum += curr;
200 368339456 dst[j] = CLIP(sum);
201
202 368339456 p0++;
203 368339456 p1++;
204 368339456 p2++;
205 368339456 p3++;
206 368339456 p4++;
207 }
208 }
209 }
210 }
211 58746 }
212
213 21038 static void FUNC(alf_filter_cc)(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_luma, const ptrdiff_t luma_stride,
214 const int width, const int height, const int hs, const int vs, const int16_t *filter, const int vb_pos)
215 {
216 21038 const ptrdiff_t stride = luma_stride / sizeof(pixel);
217
218 21038 dst_stride /= sizeof(pixel);
219
220
2/2
✓ Branch 0 taken 797156 times.
✓ Branch 1 taken 10519 times.
1615350 for (int y = 0; y < height; y++) {
221
2/2
✓ Branch 0 taken 70994816 times.
✓ Branch 1 taken 797156 times.
143583944 for (int x = 0; x < width; x++) {
222 141989632 int sum = 0;
223 141989632 pixel *dst = (pixel *)_dst + y * dst_stride + x;
224 141989632 const pixel *src = (pixel *)_luma + (y << vs) * stride + (x << hs);
225
226 141989632 const pixel *s0 = src - stride;
227 141989632 const pixel *s1 = src;
228 141989632 const pixel *s2 = src + stride;
229 141989632 const pixel *s3 = src + 2 * stride;
230
231 141989632 const int pos = y << vs;
232
6/6
✓ Branch 0 taken 41829376 times.
✓ Branch 1 taken 29165440 times.
✓ Branch 2 taken 41522688 times.
✓ Branch 3 taken 306688 times.
✓ Branch 4 taken 306688 times.
✓ Branch 5 taken 41216000 times.
141989632 if (!vs && (pos == vb_pos || pos == vb_pos + 1))
233 1226752 continue;
234
235
3/4
✓ Branch 0 taken 69657456 times.
✓ Branch 1 taken 723984 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 69657456 times.
140762880 if (pos == (vb_pos - 2) || pos == (vb_pos + 1))
236 1447968 s3 = s2;
237
4/4
✓ Branch 0 taken 69350768 times.
✓ Branch 1 taken 306688 times.
✓ Branch 2 taken 417296 times.
✓ Branch 3 taken 68933472 times.
139314912 else if (pos == (vb_pos - 1) || pos == vb_pos)
238 1447968 s3 = s2 = s0 = s1;
239
240
241 140762880 sum += filter[0] * (*s0 - *src);
242 140762880 sum += filter[1] * (*(s1 - 1) - *src);
243 140762880 sum += filter[2] * (*(s1 + 1) - *src);
244 140762880 sum += filter[3] * (*(s2 - 1) - *src);
245 140762880 sum += filter[4] * (*s2 - *src);
246 140762880 sum += filter[5] * (*(s2 + 1) - *src);
247 140762880 sum += filter[6] * (*s3 - *src);
248 140762880 sum = av_clip((sum + 64) >> 7, -(1 << (BIT_DEPTH - 1)), (1 << (BIT_DEPTH - 1)) - 1);
249 140762880 sum += *dst;
250 140762880 *dst = av_clip_pixel(sum);
251 }
252 }
253 21038 }
254
255 #define ALF_DIR_VERT 0
256 #define ALF_DIR_HORZ 1
257 #define ALF_DIR_DIGA0 2
258 #define ALF_DIR_DIGA1 3
259
260 37450352 static void FUNC(alf_get_idx)(int *class_idx, int *transpose_idx, const int *sum, const int ac)
261 {
262 static const int arg_var[] = {0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 };
263
264 int hv0, hv1, dir_hv, d0, d1, dir_d, hvd1, hvd0, sum_hv, dir1;
265
266 37450352 dir_hv = sum[ALF_DIR_VERT] <= sum[ALF_DIR_HORZ];
267 37450352 hv1 = FFMAX(sum[ALF_DIR_VERT], sum[ALF_DIR_HORZ]);
268 37450352 hv0 = FFMIN(sum[ALF_DIR_VERT], sum[ALF_DIR_HORZ]);
269
270 37450352 dir_d = sum[ALF_DIR_DIGA0] <= sum[ALF_DIR_DIGA1];
271 37450352 d1 = FFMAX(sum[ALF_DIR_DIGA0], sum[ALF_DIR_DIGA1]);
272 37450352 d0 = FFMIN(sum[ALF_DIR_DIGA0], sum[ALF_DIR_DIGA1]);
273
274 //promote to avoid overflow
275 37450352 dir1 = (uint64_t)d1 * hv0 <= (uint64_t)hv1 * d0;
276
2/2
✓ Branch 0 taken 13776204 times.
✓ Branch 1 taken 4948972 times.
37450352 hvd1 = dir1 ? hv1 : d1;
277
2/2
✓ Branch 0 taken 13776204 times.
✓ Branch 1 taken 4948972 times.
37450352 hvd0 = dir1 ? hv0 : d0;
278
279 37450352 sum_hv = sum[ALF_DIR_HORZ] + sum[ALF_DIR_VERT];
280 37450352 *class_idx = arg_var[av_clip_uintp2(sum_hv * ac >> (BIT_DEPTH - 1), 4)];
281
2/2
✓ Branch 0 taken 3704008 times.
✓ Branch 1 taken 15021168 times.
37450352 if (hvd1 * 2 > 9 * hvd0)
282 7408016 *class_idx += ((dir1 << 1) + 2) * 5;
283
2/2
✓ Branch 0 taken 4730731 times.
✓ Branch 1 taken 10290437 times.
30042336 else if (hvd1 > 2 * hvd0)
284 9461462 *class_idx += ((dir1 << 1) + 1) * 5;
285
286 37450352 *transpose_idx = dir_d * 2 + dir_hv;
287 37450352 }
288
289 43710 static void FUNC(alf_classify)(int *class_idx, int *transpose_idx,
290 const uint8_t *_src, const ptrdiff_t _src_stride, const int width, const int height,
291 const int vb_pos, int *gradient_tmp)
292 {
293 int *grad;
294
295 43710 const int h = height + ALF_GRADIENT_BORDER * 2;
296 43710 const int w = width + ALF_GRADIENT_BORDER * 2;
297 43710 const int size = (ALF_BLOCK_SIZE + ALF_GRADIENT_BORDER * 2) / ALF_GRADIENT_STEP;
298 43710 const int gstride = (w / ALF_GRADIENT_STEP) * ALF_NUM_DIR;
299
300 43710 const pixel *src = (const pixel *)_src;
301 43710 const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
302 43710 src -= (ALF_GRADIENT_BORDER + 1) * src_stride + ALF_GRADIENT_BORDER;
303
304 43710 grad = gradient_tmp;
305
2/2
✓ Branch 0 taken 1289186 times.
✓ Branch 1 taken 21855 times.
2622082 for (int y = 0; y < h; y += ALF_GRADIENT_STEP) {
306 2578372 const pixel *s0 = src + y * src_stride;
307 2578372 const pixel *s1 = s0 + src_stride;
308 2578372 const pixel *s2 = s1 + src_stride;
309 2578372 const pixel *s3 = s2 + src_stride;
310
311
2/2
✓ Branch 0 taken 17023 times.
✓ Branch 1 taken 1272163 times.
2578372 if (y == vb_pos) //above
312 34046 s3 = s2;
313
2/2
✓ Branch 0 taken 17023 times.
✓ Branch 1 taken 1255140 times.
2544326 else if (y == vb_pos + ALF_GRADIENT_BORDER)
314 34046 s0 = s1;
315
316
2/2
✓ Branch 0 taken 80056260 times.
✓ Branch 1 taken 1289186 times.
162690892 for (int x = 0; x < w; x += ALF_GRADIENT_STEP) {
317 //two points a time
318 160112520 const pixel *a0 = s0 + x;
319 160112520 const pixel *p0 = s1 + x;
320 160112520 const pixel *b0 = s2 + x;
321 160112520 const int val0 = (*p0) << 1;
322
323 160112520 const pixel *a1 = s1 + x + 1;
324 160112520 const pixel *p1 = s2 + x + 1;
325 160112520 const pixel *b1 = s3 + x + 1;
326 160112520 const int val1 = (*p1) << 1;
327
328 160112520 grad[ALF_DIR_VERT] = FFABS(val0 - *a0 - *b0) + FFABS(val1 - *a1 - *b1);
329 160112520 grad[ALF_DIR_HORZ] = FFABS(val0 - *(p0 - 1) - *(p0 + 1)) + FFABS(val1 - *(p1 - 1) - *(p1 + 1));
330 160112520 grad[ALF_DIR_DIGA0] = FFABS(val0 - *(a0 - 1) - *(b0 + 1)) + FFABS(val1 - *(a1 - 1) - *(b1 + 1));
331 160112520 grad[ALF_DIR_DIGA1] = FFABS(val0 - *(a0 + 1) - *(b0 - 1)) + FFABS(val1 - *(a1 + 1) - *(b1 - 1));
332 160112520 grad += ALF_NUM_DIR;
333 }
334 }
335
336
2/2
✓ Branch 0 taken 622738 times.
✓ Branch 1 taken 21855 times.
1289186 for (int y = 0; y < height ; y += ALF_BLOCK_SIZE ) {
337 1245476 int start = 0;
338 1245476 int end = (ALF_BLOCK_SIZE + ALF_GRADIENT_BORDER * 2) / ALF_GRADIENT_STEP;
339 1245476 int ac = 2;
340
2/2
✓ Branch 0 taken 17023 times.
✓ Branch 1 taken 605715 times.
1245476 if (y + ALF_BLOCK_SIZE == vb_pos) {
341 34046 end -= ALF_GRADIENT_BORDER / ALF_GRADIENT_STEP;
342 34046 ac = 3;
343
2/2
✓ Branch 0 taken 17023 times.
✓ Branch 1 taken 588692 times.
1211430 } else if (y == vb_pos) {
344 34046 start += ALF_GRADIENT_BORDER / ALF_GRADIENT_STEP;
345 34046 ac = 3;
346 }
347
2/2
✓ Branch 0 taken 18725176 times.
✓ Branch 1 taken 622738 times.
38695828 for (int x = 0; x < width; x += ALF_BLOCK_SIZE) {
348 37450352 const int xg = x / ALF_GRADIENT_STEP;
349 37450352 const int yg = y / ALF_GRADIENT_STEP;
350 37450352 int sum[ALF_NUM_DIR] = { 0 };
351
352 37450352 grad = gradient_tmp + (yg + start) * gstride + xg * ALF_NUM_DIR;
353 //todo: optimize this loop
354
2/2
✓ Branch 0 taken 73874112 times.
✓ Branch 1 taken 18725176 times.
185198576 for (int i = start; i < end; i++) {
355
2/2
✓ Branch 0 taken 295496448 times.
✓ Branch 1 taken 73874112 times.
738741120 for (int j = 0; j < size; j++) {
356 590992896 sum[ALF_DIR_VERT] += grad[ALF_DIR_VERT];
357 590992896 sum[ALF_DIR_HORZ] += grad[ALF_DIR_HORZ];
358 590992896 sum[ALF_DIR_DIGA0] += grad[ALF_DIR_DIGA0];
359 590992896 sum[ALF_DIR_DIGA1] += grad[ALF_DIR_DIGA1];
360 590992896 grad += ALF_NUM_DIR;
361 }
362 147748224 grad += gstride - size * ALF_NUM_DIR;
363 }
364 37450352 FUNC(alf_get_idx)(class_idx, transpose_idx, sum, ac);
365
366 37450352 class_idx++;
367 37450352 transpose_idx++;
368 }
369 }
370
371 43710 }
372
373 39102 static void FUNC(alf_recon_coeff_and_clip)(int16_t *coeff, int16_t *clip,
374 const int *class_idx, const int *transpose_idx, const int size,
375 const int16_t *coeff_set, const uint8_t *clip_idx_set, const uint8_t *class_to_filt)
376 {
377 const static int index[][ALF_NUM_COEFF_LUMA] = {
378 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
379 { 9, 4, 10, 8, 1, 5, 11, 7, 3, 0, 2, 6 },
380 { 0, 3, 2, 1, 8, 7, 6, 5, 4, 9, 10, 11 },
381 { 9, 8, 10, 4, 3, 7, 11, 5, 1, 0, 2, 6 },
382 };
383
384 39102 const int16_t clip_set[] = {
385 1 << BIT_DEPTH, 1 << (BIT_DEPTH - 3), 1 << (BIT_DEPTH - 5), 1 << (BIT_DEPTH - 7)
386 };
387
388
2/2
✓ Branch 0 taken 18059320 times.
✓ Branch 1 taken 19551 times.
36157742 for (int i = 0; i < size; i++) {
389 36118640 const int16_t *src_coeff = coeff_set + class_to_filt[class_idx[i]] * ALF_NUM_COEFF_LUMA;
390 36118640 const uint8_t *clip_idx = clip_idx_set + class_idx[i] * ALF_NUM_COEFF_LUMA;
391
392
2/2
✓ Branch 0 taken 216711840 times.
✓ Branch 1 taken 18059320 times.
469542320 for (int j = 0; j < ALF_NUM_COEFF_LUMA; j++) {
393 433423680 const int idx = index[transpose_idx[i]][j];
394 433423680 *coeff++ = src_coeff[idx];
395 433423680 *clip++ = clip_set[clip_idx[idx]];
396 }
397 }
398 39102 }
399
400 #undef ALF_DIR_HORZ
401 #undef ALF_DIR_VERT
402 #undef ALF_DIR_DIGA0
403 #undef ALF_DIR_DIGA1
404
405 // line zero
406 #define P7 pix[-8 * xstride]
407 #define P6 pix[-7 * xstride]
408 #define P5 pix[-6 * xstride]
409 #define P4 pix[-5 * xstride]
410 #define P3 pix[-4 * xstride]
411 #define P2 pix[-3 * xstride]
412 #define P1 pix[-2 * xstride]
413 #define P0 pix[-1 * xstride]
414 #define Q0 pix[0 * xstride]
415 #define Q1 pix[1 * xstride]
416 #define Q2 pix[2 * xstride]
417 #define Q3 pix[3 * xstride]
418 #define Q4 pix[4 * xstride]
419 #define Q5 pix[5 * xstride]
420 #define Q6 pix[6 * xstride]
421 #define Q7 pix[7 * xstride]
422 #define P(x) pix[(-(x)-1) * xstride]
423 #define Q(x) pix[(x) * xstride]
424
425 // line three. used only for deblocking decision
426 #define TP7 pix[-8 * xstride + 3 * ystride]
427 #define TP6 pix[-7 * xstride + 3 * ystride]
428 #define TP5 pix[-6 * xstride + 3 * ystride]
429 #define TP4 pix[-5 * xstride + 3 * ystride]
430 #define TP3 pix[-4 * xstride + 3 * ystride]
431 #define TP2 pix[-3 * xstride + 3 * ystride]
432 #define TP1 pix[-2 * xstride + 3 * ystride]
433 #define TP0 pix[-1 * xstride + 3 * ystride]
434 #define TQ0 pix[0 * xstride + 3 * ystride]
435 #define TQ1 pix[1 * xstride + 3 * ystride]
436 #define TQ2 pix[2 * xstride + 3 * ystride]
437 #define TQ3 pix[3 * xstride + 3 * ystride]
438 #define TQ4 pix[4 * xstride + 3 * ystride]
439 #define TQ5 pix[5 * xstride + 3 * ystride]
440 #define TQ6 pix[6 * xstride + 3 * ystride]
441 #define TQ7 pix[7 * xstride + 3 * ystride]
442 #define TP(x) pix[(-(x)-1) * xstride + 3 * ystride]
443 #define TQ(x) pix[(x) * xstride + 3 * ystride]
444
445 #define FP3 pix[-4 * xstride + 1 * ystride]
446 #define FP2 pix[-3 * xstride + 1 * ystride]
447 #define FP1 pix[-2 * xstride + 1 * ystride]
448 #define FP0 pix[-1 * xstride + 1 * ystride]
449 #define FQ0 pix[0 * xstride + 1 * ystride]
450 #define FQ1 pix[1 * xstride + 1 * ystride]
451 #define FQ2 pix[2 * xstride + 1 * ystride]
452 #define FQ3 pix[3 * xstride + 1 * ystride]
453
454 #include "libavcodec/h26x/h2656_deblock_template.c"
455
456 1577772 static void FUNC(loop_filter_luma_large)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride, const int32_t tc,
457 const uint8_t no_p, const uint8_t no_q, const uint8_t max_len_p, const uint8_t max_len_q)
458 {
459
2/2
✓ Branch 0 taken 3155544 times.
✓ Branch 1 taken 788886 times.
7888860 for (int d = 0; d < 4; d++) {
460 6311088 const int p6 = P6;
461 6311088 const int p5 = P5;
462 6311088 const int p4 = P4;
463 6311088 const int p3 = P3;
464 6311088 const int p2 = P2;
465 6311088 const int p1 = P1;
466 6311088 const int p0 = P0;
467 6311088 const int q0 = Q0;
468 6311088 const int q1 = Q1;
469 6311088 const int q2 = Q2;
470 6311088 const int q3 = Q3;
471 6311088 const int q4 = Q4;
472 6311088 const int q5 = Q5;
473 6311088 const int q6 = Q6;
474 int m;
475
4/4
✓ Branch 0 taken 477608 times.
✓ Branch 1 taken 2677936 times.
✓ Branch 2 taken 281092 times.
✓ Branch 3 taken 196516 times.
6311088 if (max_len_p == 5 && max_len_q == 5)
476 562184 m = (p4 + p3 + 2 * (p2 + p1 + p0 + q0 + q1 + q2) + q3 + q4 + 8) >> 4;
477
2/2
✓ Branch 0 taken 1349584 times.
✓ Branch 1 taken 1524868 times.
5748904 else if (max_len_p == max_len_q)
478 2699168 m = (p6 + p5 + p4 + p3 + p2 + p1 + 2 * (p0 + q0) + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
479
2/2
✓ Branch 0 taken 287652 times.
✓ Branch 1 taken 1237216 times.
3049736 else if (max_len_p + max_len_q == 12)
480 575304 m = (p5 + p4 + p3 + p2 + 2 * (p1 + p0 + q0 + q1) + q2 + q3 + q4 + q5 + 8) >> 4;
481
2/2
✓ Branch 0 taken 277264 times.
✓ Branch 1 taken 959952 times.
2474432 else if (max_len_p + max_len_q == 8)
482 554528 m = (p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 + 4) >> 3;
483
2/2
✓ Branch 0 taken 691356 times.
✓ Branch 1 taken 268596 times.
1919904 else if (max_len_q == 7)
484 1382712 m = (2 * (p2 + p1 + p0 + q0) + p0 + p1 + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
485 else
486 537192 m = (p6 + p5 + p4 + p3 + p2 + p1 + 2 * (q2 + q1 + q0 + p0) + q0 + q1 + 8) >> 4;
487
1/2
✓ Branch 0 taken 3155544 times.
✗ Branch 1 not taken.
6311088 if (!no_p) {
488 6311088 const int refp = (P(max_len_p) + P(max_len_p - 1) + 1) >> 1;
489
2/2
✓ Branch 0 taken 912248 times.
✓ Branch 1 taken 2243296 times.
6311088 if (max_len_p == 3) {
490 1824496 P0 = p0 + av_clip(((m * 53 + refp * 11 + 32) >> 6) - p0, -(tc * 6 >> 1), (tc * 6 >> 1));
491 1824496 P1 = p1 + av_clip(((m * 32 + refp * 32 + 32) >> 6) - p1, -(tc * 4 >> 1), (tc * 4 >> 1));
492 1824496 P2 = p2 + av_clip(((m * 11 + refp * 53 + 32) >> 6) - p2, -(tc * 2 >> 1), (tc * 2 >> 1));
493
2/2
✓ Branch 0 taken 477608 times.
✓ Branch 1 taken 1765688 times.
4486592 } else if (max_len_p == 5) {
494 955216 P0 = p0 + av_clip(((m * 58 + refp * 6 + 32) >> 6) - p0, -(tc * 6 >> 1), (tc * 6 >> 1));
495 955216 P1 = p1 + av_clip(((m * 45 + refp * 19 + 32) >> 6) - p1, -(tc * 5 >> 1), (tc * 5 >> 1));
496 955216 P2 = p2 + av_clip(((m * 32 + refp * 32 + 32) >> 6) - p2, -(tc * 4 >> 1), (tc * 4 >> 1));
497 955216 P3 = p3 + av_clip(((m * 19 + refp * 45 + 32) >> 6) - p3, -(tc * 3 >> 1), (tc * 3 >> 1));
498 955216 P4 = p4 + av_clip(((m * 6 + refp * 58 + 32) >> 6) - p4, -(tc * 2 >> 1), (tc * 2 >> 1));
499 } else {
500 3531376 P0 = p0 + av_clip(((m * 59 + refp * 5 + 32) >> 6) - p0, -(tc * 6 >> 1), (tc * 6 >> 1));
501 3531376 P1 = p1 + av_clip(((m * 50 + refp * 14 + 32) >> 6) - p1, -(tc * 5 >> 1), (tc * 5 >> 1));
502 3531376 P2 = p2 + av_clip(((m * 41 + refp * 23 + 32) >> 6) - p2, -(tc * 4 >> 1), (tc * 4 >> 1));
503 3531376 P3 = p3 + av_clip(((m * 32 + refp * 32 + 32) >> 6) - p3, -(tc * 3 >> 1), (tc * 3 >> 1));
504 3531376 P4 = p4 + av_clip(((m * 23 + refp * 41 + 32) >> 6) - p4, -(tc * 2 >> 1), (tc * 2 >> 1));
505 3531376 P5 = p5 + av_clip(((m * 14 + refp * 50 + 32) >> 6) - p5, -(tc * 1 >> 1), (tc * 1 >> 1));
506 3531376 P6 = p6 + av_clip(((m * 5 + refp * 59 + 32) >> 6) - p6, -(tc * 1 >> 1), (tc * 1 >> 1));
507 }
508 }
509
1/2
✓ Branch 0 taken 3155544 times.
✗ Branch 1 not taken.
6311088 if (!no_q) {
510 6311088 const int refq = (Q(max_len_q) + Q(max_len_q - 1) + 1) >> 1;
511
2/2
✓ Branch 0 taken 324968 times.
✓ Branch 1 taken 2830576 times.
6311088 if (max_len_q == 3) {
512 649936 Q0 = q0 + av_clip(((m * 53 + refq * 11 + 32) >> 6) - q0, -(tc * 6 >> 1), (tc * 6 >> 1));
513 649936 Q1 = q1 + av_clip(((m * 32 + refq * 32 + 32) >> 6) - q1, -(tc * 4 >> 1), (tc * 4 >> 1));
514 649936 Q2 = q2 + av_clip(((m * 11 + refq * 53 + 32) >> 6) - q2, -(tc * 2 >> 1), (tc * 2 >> 1));
515
2/2
✓ Branch 0 taken 649492 times.
✓ Branch 1 taken 2181084 times.
5661152 } else if (max_len_q == 5) {
516 1298984 Q0 = q0 + av_clip(((m * 58 + refq * 6 + 32) >> 6) - q0, -(tc * 6 >> 1), (tc * 6 >> 1));
517 1298984 Q1 = q1 + av_clip(((m * 45 + refq * 19 + 32) >> 6) - q1, -(tc * 5 >> 1), (tc * 5 >> 1));
518 1298984 Q2 = q2 + av_clip(((m * 32 + refq * 32 + 32) >> 6) - q2, -(tc * 4 >> 1), (tc * 4 >> 1));
519 1298984 Q3 = q3 + av_clip(((m * 19 + refq * 45 + 32) >> 6) - q3, -(tc * 3 >> 1), (tc * 3 >> 1));
520 1298984 Q4 = q4 + av_clip(((m * 6 + refq * 58 + 32) >> 6) - q4, -(tc * 2 >> 1), (tc * 2 >> 1));
521 } else {
522 4362168 Q0 = q0 + av_clip(((m * 59 + refq * 5 + 32) >> 6) - q0, -(tc * 6 >> 1), (tc * 6 >> 1));
523 4362168 Q1 = q1 + av_clip(((m * 50 + refq * 14 + 32) >> 6) - q1, -(tc * 5 >> 1), (tc * 5 >> 1));
524 4362168 Q2 = q2 + av_clip(((m * 41 + refq * 23 + 32) >> 6) - q2, -(tc * 4 >> 1), (tc * 4 >> 1));
525 4362168 Q3 = q3 + av_clip(((m * 32 + refq * 32 + 32) >> 6) - q3, -(tc * 3 >> 1), (tc * 3 >> 1));
526 4362168 Q4 = q4 + av_clip(((m * 23 + refq * 41 + 32) >> 6) - q4, -(tc * 2 >> 1), (tc * 2 >> 1));
527 4362168 Q5 = q5 + av_clip(((m * 14 + refq * 50 + 32) >> 6) - q5, -(tc * 1 >> 1), (tc * 1 >> 1));
528 4362168 Q6 = q6 + av_clip(((m * 5 + refq * 59 + 32) >> 6) - q6, -(tc * 1 >> 1), (tc * 1 >> 1));
529 }
530
531 }
532 6311088 pix += ystride;
533 }
534 1577772 }
535
536 7602638 static void FUNC(vvc_loop_filter_luma)(uint8_t* _pix, ptrdiff_t _xstride, ptrdiff_t _ystride,
537 const int32_t *_beta, const int32_t *_tc, const uint8_t *_no_p, const uint8_t *_no_q,
538 const uint8_t *_max_len_p, const uint8_t *_max_len_q, const int hor_ctu_edge)
539 {
540 7602638 const ptrdiff_t xstride = _xstride / sizeof(pixel);
541 7602638 const ptrdiff_t ystride = _ystride / sizeof(pixel);
542
543
2/2
✓ Branch 0 taken 7602638 times.
✓ Branch 1 taken 3801319 times.
22807914 for (int i = 0; i < 2; i++) {
544 #if BIT_DEPTH < 10
545 179908 const int tc = (_tc[i] + (1 << (9 - BIT_DEPTH))) >> (10 - BIT_DEPTH);
546 #else
547 15025368 const int tc = _tc[i] << (BIT_DEPTH - 10);
548 #endif
549
2/2
✓ Branch 0 taken 7384332 times.
✓ Branch 1 taken 218306 times.
15205276 if (tc) {
550 14768664 pixel* pix = (pixel*)_pix + i * 4 * ystride;
551 14768664 const int dp0 = abs(P2 - 2 * P1 + P0);
552 14768664 const int dq0 = abs(Q2 - 2 * Q1 + Q0);
553 14768664 const int dp3 = abs(TP2 - 2 * TP1 + TP0);
554 14768664 const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
555 14768664 const int d0 = dp0 + dq0;
556 14768664 const int d3 = dp3 + dq3;
557 14768664 const int tc25 = ((tc * 5 + 1) >> 1);
558
559 14768664 const int no_p = _no_p[i];
560 14768664 const int no_q = _no_q[i];
561
562 14768664 int max_len_p = _max_len_p[i];
563 14768664 int max_len_q = _max_len_q[i];
564
565
4/4
✓ Branch 0 taken 2111007 times.
✓ Branch 1 taken 5273325 times.
✓ Branch 2 taken 1689710 times.
✓ Branch 3 taken 421297 times.
14768664 const int large_p = (max_len_p > 3 && !hor_ctu_edge);
566 14768664 const int large_q = max_len_q > 3;
567
568 14768664 const int beta = _beta[i] << BIT_DEPTH - 8;
569 14768664 const int beta_3 = beta >> 3;
570 14768664 const int beta_2 = beta >> 2;
571
572
4/4
✓ Branch 0 taken 5694622 times.
✓ Branch 1 taken 1689710 times.
✓ Branch 2 taken 818711 times.
✓ Branch 3 taken 4875911 times.
14768664 if (large_p || large_q) {
573
2/2
✓ Branch 0 taken 1689710 times.
✓ Branch 1 taken 818711 times.
5016842 const int dp0l = large_p ? ((dp0 + abs(P5 - 2 * P4 + P3) + 1) >> 1) : dp0;
574
2/2
✓ Branch 0 taken 2090340 times.
✓ Branch 1 taken 418081 times.
5016842 const int dq0l = large_q ? ((dq0 + abs(Q5 - 2 * Q4 + Q3) + 1) >> 1) : dq0;
575
2/2
✓ Branch 0 taken 1689710 times.
✓ Branch 1 taken 818711 times.
5016842 const int dp3l = large_p ? ((dp3 + abs(TP5 - 2 * TP4 + TP3) + 1) >> 1) : dp3;
576
2/2
✓ Branch 0 taken 2090340 times.
✓ Branch 1 taken 418081 times.
5016842 const int dq3l = large_q ? ((dq3 + abs(TQ5 - 2 * TQ4 + TQ3) + 1) >> 1) : dq3;
577 5016842 const int d0l = dp0l + dq0l;
578 5016842 const int d3l = dp3l + dq3l;
579 5016842 const int beta53 = beta * 3 >> 5;
580 5016842 const int beta_4 = beta >> 4;
581
2/2
✓ Branch 0 taken 1689710 times.
✓ Branch 1 taken 818711 times.
5016842 max_len_p = large_p ? max_len_p : 3;
582
2/2
✓ Branch 0 taken 2090340 times.
✓ Branch 1 taken 418081 times.
5016842 max_len_q = large_q ? max_len_q : 3;
583
584
2/2
✓ Branch 0 taken 2335004 times.
✓ Branch 1 taken 173417 times.
5016842 if (d0l + d3l < beta) {
585
2/2
✓ Branch 0 taken 1288593 times.
✓ Branch 1 taken 1046411 times.
4670008 const int sp0l = abs(P3 - P0) + (max_len_p == 7 ? abs(P7 - P6 - P5 + P4) : 0);
586
2/2
✓ Branch 0 taken 1583805 times.
✓ Branch 1 taken 751199 times.
4670008 const int sq0l = abs(Q0 - Q3) + (max_len_q == 7 ? abs(Q4 - Q5 - Q6 + Q7) : 0);
587
2/2
✓ Branch 0 taken 1288593 times.
✓ Branch 1 taken 1046411 times.
4670008 const int sp3l = abs(TP3 - TP0) + (max_len_p == 7 ? abs(TP7 - TP6 - TP5 + TP4) : 0);
588
2/2
✓ Branch 0 taken 1583805 times.
✓ Branch 1 taken 751199 times.
4670008 const int sq3l = abs(TQ0 - TQ3) + (max_len_q == 7 ? abs(TQ4 - TQ5 - TQ6 + TQ7) : 0);
589
2/2
✓ Branch 0 taken 1580074 times.
✓ Branch 1 taken 754930 times.
4670008 const int sp0 = large_p ? ((sp0l + abs(P3 - P(max_len_p)) + 1) >> 1) : sp0l;
590
2/2
✓ Branch 0 taken 1580074 times.
✓ Branch 1 taken 754930 times.
4670008 const int sp3 = large_p ? ((sp3l + abs(TP3 - TP(max_len_p)) + 1) >> 1) : sp3l;
591
2/2
✓ Branch 0 taken 1962624 times.
✓ Branch 1 taken 372380 times.
4670008 const int sq0 = large_q ? ((sq0l + abs(Q3 - Q(max_len_q)) + 1) >> 1) : sq0l;
592
2/2
✓ Branch 0 taken 1962624 times.
✓ Branch 1 taken 372380 times.
4670008 const int sq3 = large_q ? ((sq3l + abs(TQ3 - TQ(max_len_q)) + 1) >> 1) : sq3l;
593
4/4
✓ Branch 0 taken 978062 times.
✓ Branch 1 taken 1356942 times.
✓ Branch 2 taken 975396 times.
✓ Branch 3 taken 2666 times.
4670008 if (sp0 + sq0 < beta53 && abs(P0 - Q0) < tc25 &&
594
4/4
✓ Branch 0 taken 865545 times.
✓ Branch 1 taken 109851 times.
✓ Branch 2 taken 864689 times.
✓ Branch 3 taken 856 times.
1950792 sp3 + sq3 < beta53 && abs(TP0 - TQ0) < tc25 &&
595
4/4
✓ Branch 0 taken 813834 times.
✓ Branch 1 taken 50855 times.
✓ Branch 2 taken 788886 times.
✓ Branch 3 taken 24948 times.
1729378 (d0l << 1) < beta_4 && (d3l << 1) < beta_4) {
596 1577772 FUNC(loop_filter_luma_large)(pix, xstride, ystride, tc, no_p, no_q, max_len_p, max_len_q);
597 1577772 continue;
598 }
599 }
600 }
601
2/2
✓ Branch 0 taken 4780111 times.
✓ Branch 1 taken 1815335 times.
13190892 if (d0 + d3 < beta) {
602
3/4
✓ Branch 0 taken 3361387 times.
✓ Branch 1 taken 1418724 times.
✓ Branch 2 taken 3361387 times.
✗ Branch 3 not taken.
9560222 if (max_len_p > 2 && max_len_q > 2 &&
603
4/4
✓ Branch 0 taken 1130195 times.
✓ Branch 1 taken 2231192 times.
✓ Branch 2 taken 1117611 times.
✓ Branch 3 taken 12584 times.
6722774 abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
604
4/4
✓ Branch 0 taken 851416 times.
✓ Branch 1 taken 266195 times.
✓ Branch 2 taken 848278 times.
✓ Branch 3 taken 3138 times.
2235222 abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
605
4/4
✓ Branch 0 taken 832653 times.
✓ Branch 1 taken 15625 times.
✓ Branch 2 taken 826397 times.
✓ Branch 3 taken 6256 times.
1696556 (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
606 1652794 FUNC(loop_filter_luma_strong)(pix, xstride, ystride, tc, tc << 1, tc * 3, no_p, no_q);
607 } else {
608 7907428 int nd_p = 1;
609 7907428 int nd_q = 1;
610
3/4
✓ Branch 0 taken 2700627 times.
✓ Branch 1 taken 1253087 times.
✓ Branch 2 taken 2700627 times.
✗ Branch 3 not taken.
7907428 if (max_len_p > 1 && max_len_q > 1) {
611
2/2
✓ Branch 0 taken 2171438 times.
✓ Branch 1 taken 529189 times.
5401254 if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
612 4342876 nd_p = 2;
613
2/2
✓ Branch 0 taken 2079768 times.
✓ Branch 1 taken 620859 times.
5401254 if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
614 4159536 nd_q = 2;
615 }
616 7907428 FUNC(loop_filter_luma_weak)(pix, xstride, ystride, tc, beta, no_p, no_q, nd_p, nd_q);
617 }
618 }
619 }
620 }
621 7602638 }
622
623 3164934 static void FUNC(loop_filter_chroma_strong)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride,
624 const int size, const int32_t tc, const uint8_t no_p, const uint8_t no_q)
625 {
626
2/2
✓ Branch 0 taken 3542068 times.
✓ Branch 1 taken 1582467 times.
10249070 for (int d = 0; d < size; d++) {
627 7084136 const int p3 = P3;
628 7084136 const int p2 = P2;
629 7084136 const int p1 = P1;
630 7084136 const int p0 = P0;
631 7084136 const int q0 = Q0;
632 7084136 const int q1 = Q1;
633 7084136 const int q2 = Q2;
634 7084136 const int q3 = Q3;
635
1/2
✓ Branch 0 taken 3542068 times.
✗ Branch 1 not taken.
7084136 if (!no_p) {
636 7084136 P0 = av_clip((p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3, p0 - tc, p0 + tc);
637 7084136 P1 = av_clip((2 * p3 + p2 + 2 * p1 + p0 + q0 + q1 + 4) >> 3, p1 - tc, p1 + tc);
638 7084136 P2 = av_clip((3 * p3 + 2 * p2 + p1 + p0 + q0 + 4) >> 3, p2 - tc, p2 + tc );
639 }
640
1/2
✓ Branch 0 taken 3542068 times.
✗ Branch 1 not taken.
7084136 if (!no_q) {
641 7084136 Q0 = av_clip((p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3, q0 - tc, q0 + tc);
642 7084136 Q1 = av_clip((p1 + p0 + q0 + 2 * q1 + q2 + 2 * q3 + 4) >> 3, q1 - tc, q1 + tc);
643 7084136 Q2 = av_clip((p0 + q0 + q1 + 2 * q2 + 3 * q3 + 4) >> 3, q2 - tc, q2 + tc);
644 }
645 7084136 pix += ystride;
646 }
647 3164934 }
648
649 566218 static void FUNC(loop_filter_chroma_strong_one_side)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride,
650 const int size, const int32_t tc, const uint8_t no_p, const uint8_t no_q)
651 {
652
2/2
✓ Branch 0 taken 621476 times.
✓ Branch 1 taken 283109 times.
1809170 for (int d = 0; d < size; d++) {
653 1242952 const int p1 = P1;
654 1242952 const int p0 = P0;
655 1242952 const int q0 = Q0;
656 1242952 const int q1 = Q1;
657 1242952 const int q2 = Q2;
658 1242952 const int q3 = Q3;
659
1/2
✓ Branch 0 taken 621476 times.
✗ Branch 1 not taken.
1242952 if (!no_p) {
660 1242952 P0 = av_clip((3 * p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3, p0 - tc, p0 + tc);
661 }
662
1/2
✓ Branch 0 taken 621476 times.
✗ Branch 1 not taken.
1242952 if (!no_q) {
663 1242952 Q0 = av_clip((2 * p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3, q0 - tc, q0 + tc);
664 1242952 Q1 = av_clip((p1 + p0 + q0 + 2 * q1 + q2 + 2 * q3 + 4) >> 3, q1 - tc, q1 + tc);
665 1242952 Q2 = av_clip((p0 + q0 + q1 + 2 * q2 + 3 * q3 + 4) >> 3, q2 - tc, q2 + tc);
666 }
667 1242952 pix += ystride;
668 }
669 566218 }
670
671 2546954 static void FUNC(vvc_loop_filter_chroma)(uint8_t *_pix, const ptrdiff_t _xstride, const ptrdiff_t _ystride,
672 const int32_t *_beta, const int32_t *_tc, const uint8_t *_no_p, const uint8_t *_no_q,
673 const uint8_t *_max_len_p, const uint8_t *_max_len_q, const int shift)
674 {
675 2546954 const ptrdiff_t xstride = _xstride / sizeof(pixel);
676 2546954 const ptrdiff_t ystride = _ystride / sizeof(pixel);
677
2/2
✓ Branch 0 taken 969568 times.
✓ Branch 1 taken 303909 times.
2546954 const int size = shift ? 2 : 4;
678 2546954 const int end = 8 / size; // 8 samples a loop
679
680
2/2
✓ Branch 0 taken 4486090 times.
✓ Branch 1 taken 1273477 times.
11519134 for (int i = 0; i < end; i++) {
681 #if BIT_DEPTH < 10
682 124656 const int tc = (_tc[i] + (1 << (9 - BIT_DEPTH))) >> (10 - BIT_DEPTH);
683 #else
684 8847524 const int tc = _tc[i] << (BIT_DEPTH - 10);
685 #endif
686
2/2
✓ Branch 0 taken 4188855 times.
✓ Branch 1 taken 297235 times.
8972180 if (tc) {
687 8377710 pixel *pix = (pixel *)_pix + i * size * ystride;
688 8377710 const uint8_t no_p = _no_p[i];
689 8377710 const uint8_t no_q = _no_q[i];
690
691 8377710 const int beta = _beta[i] << (BIT_DEPTH - 8);
692 8377710 const int beta_3 = beta >> 3;
693 8377710 const int beta_2 = beta >> 2;
694
695 8377710 const int tc25 = ((tc * 5 + 1) >> 1);
696
697 8377710 uint8_t max_len_p = _max_len_p[i];
698 8377710 uint8_t max_len_q = _max_len_q[i];
699
700
3/4
✓ Branch 0 taken 4069669 times.
✓ Branch 1 taken 119186 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 4069669 times.
8377710 if (!max_len_p || !max_len_q)
701 238372 continue;
702
703
2/2
✓ Branch 0 taken 2933197 times.
✓ Branch 1 taken 1136472 times.
8139338 if (max_len_q == 3){
704
2/2
✓ Branch 0 taken 2442984 times.
✓ Branch 1 taken 490213 times.
5866394 const int p1n = shift ? FP1 : TP1;
705
4/4
✓ Branch 0 taken 2575470 times.
✓ Branch 1 taken 357727 times.
✓ Branch 2 taken 2131254 times.
✓ Branch 3 taken 444216 times.
5866394 const int p2n = max_len_p == 1 ? p1n : (shift ? FP2 : TP2);
706
2/2
✓ Branch 0 taken 2442984 times.
✓ Branch 1 taken 490213 times.
5866394 const int p0n = shift ? FP0 : TP0;
707
2/2
✓ Branch 0 taken 2442984 times.
✓ Branch 1 taken 490213 times.
5866394 const int q0n = shift ? FQ0 : TQ0;
708
2/2
✓ Branch 0 taken 2442984 times.
✓ Branch 1 taken 490213 times.
5866394 const int q1n = shift ? FQ1 : TQ1;
709
2/2
✓ Branch 0 taken 2442984 times.
✓ Branch 1 taken 490213 times.
5866394 const int q2n = shift ? FQ2 : TQ2;
710
2/2
✓ Branch 0 taken 357727 times.
✓ Branch 1 taken 2575470 times.
5866394 const int p3 = max_len_p == 1 ? P1 : P3;
711
2/2
✓ Branch 0 taken 357727 times.
✓ Branch 1 taken 2575470 times.
5866394 const int p2 = max_len_p == 1 ? P1 : P2;
712 5866394 const int p1 = P1;
713 5866394 const int p0 = P0;
714 5866394 const int dp0 = abs(p2 - 2 * p1 + p0);
715 5866394 const int dq0 = abs(Q2 - 2 * Q1 + Q0);
716
717 5866394 const int dp1 = abs(p2n - 2 * p1n + p0n);
718 5866394 const int dq1 = abs(q2n - 2 * q1n + q0n);
719 5866394 const int d0 = dp0 + dq0;
720 5866394 const int d1 = dp1 + dq1;
721
722
2/2
✓ Branch 0 taken 2661594 times.
✓ Branch 1 taken 271603 times.
5866394 if (d0 + d1 < beta) {
723
4/4
✓ Branch 0 taken 2322526 times.
✓ Branch 1 taken 339068 times.
✓ Branch 2 taken 1980883 times.
✓ Branch 3 taken 341643 times.
5323188 const int p3n = max_len_p == 1 ? p1n : (shift ? FP3 : TP3);
724
2/2
✓ Branch 0 taken 2278810 times.
✓ Branch 1 taken 382784 times.
5323188 const int q3n = shift ? FQ3 : TQ3;
725
4/4
✓ Branch 0 taken 2364683 times.
✓ Branch 1 taken 296911 times.
✓ Branch 2 taken 1966317 times.
✓ Branch 3 taken 398366 times.
9255822 const int dsam0 = (d0 << 1) < beta_2 && (abs(p3 - p0) + abs(Q0 - Q3) < beta_3) &&
726
2/2
✓ Branch 0 taken 1941083 times.
✓ Branch 1 taken 25234 times.
3932634 abs(p0 - Q0) < tc25;
727
4/4
✓ Branch 0 taken 2367857 times.
✓ Branch 1 taken 293737 times.
✓ Branch 2 taken 1969615 times.
✓ Branch 3 taken 398242 times.
9262418 const int dsam1 = (d1 << 1) < beta_2 && (abs(p3n - p0n) + abs(q0n - q3n) < beta_3) &&
728
2/2
✓ Branch 0 taken 1943917 times.
✓ Branch 1 taken 25698 times.
3939230 abs(p0n - q0n) < tc25;
729
4/4
✓ Branch 0 taken 1941083 times.
✓ Branch 1 taken 720511 times.
✓ Branch 2 taken 75507 times.
✓ Branch 3 taken 1865576 times.
5323188 if (!dsam0 || !dsam1)
730 1592036 max_len_p = max_len_q = 1;
731 } else {
732 543206 max_len_p = max_len_q = 1;
733 }
734 }
735
736
3/4
✓ Branch 0 taken 1582467 times.
✓ Branch 1 taken 2487202 times.
✓ Branch 2 taken 1582467 times.
✗ Branch 3 not taken.
8139338 if (max_len_p == 3 && max_len_q == 3)
737 3164934 FUNC(loop_filter_chroma_strong)(pix, xstride, ystride, size, tc, no_p, no_q);
738
2/2
✓ Branch 0 taken 283109 times.
✓ Branch 1 taken 2204093 times.
4974404 else if (max_len_q == 3)
739 566218 FUNC(loop_filter_chroma_strong_one_side)(pix, xstride, ystride, size, tc, no_p, no_q);
740 else
741 4408186 FUNC(loop_filter_chroma_weak)(pix, xstride, ystride, size, tc, no_p, no_q);
742 }
743 }
744 2546954 }
745
746 1303412 static void FUNC(vvc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
747 const int32_t *beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q,
748 const uint8_t *max_len_p, const uint8_t *max_len_q, int shift)
749 {
750 1303412 FUNC(vvc_loop_filter_chroma)(pix, stride, sizeof(pixel), beta, tc,
751 no_p, no_q, max_len_p, max_len_q, shift);
752 1303412 }
753
754 1243542 static void FUNC(vvc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
755 const int32_t *beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q,
756 const uint8_t *max_len_p, const uint8_t *max_len_q, int shift)
757 {
758 1243542 FUNC(vvc_loop_filter_chroma)(pix, sizeof(pixel), stride, beta, tc,
759 no_p, no_q, max_len_p, max_len_q, shift);
760 1243542 }
761
762 3930048 static void FUNC(vvc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
763 const int32_t *beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q,
764 const uint8_t *max_len_p, const uint8_t *max_len_q, const int hor_ctu_edge)
765 {
766 3930048 FUNC(vvc_loop_filter_luma)(pix, stride, sizeof(pixel), beta, tc,
767 no_p, no_q, max_len_p, max_len_q, hor_ctu_edge);
768 3930048 }
769
770 3672590 static void FUNC(vvc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
771 const int32_t *beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q,
772 const uint8_t *max_len_p, const uint8_t *max_len_q, const int hor_ctu_edge)
773 {
774 3672590 FUNC(vvc_loop_filter_luma)(pix, sizeof(pixel), stride, beta, tc,
775 no_p, no_q, max_len_p, max_len_q, hor_ctu_edge);
776 3672590 }
777
778 943758 static int FUNC(vvc_loop_ladf_level)(const uint8_t *_pix, const ptrdiff_t _xstride, const ptrdiff_t _ystride)
779 {
780 943758 const pixel *pix = (pixel *)_pix;
781 943758 const ptrdiff_t xstride = _xstride / sizeof(pixel);
782 943758 const ptrdiff_t ystride = _ystride / sizeof(pixel);
783 943758 return (P0 + TP0 + Q0 + TQ0) >> 2;
784 }
785
786 477242 static int FUNC(vvc_h_loop_ladf_level)(const uint8_t *pix, ptrdiff_t stride)
787 {
788 477242 return FUNC(vvc_loop_ladf_level)(pix, stride, sizeof(pixel));
789 }
790
791 466516 static int FUNC(vvc_v_loop_ladf_level)(const uint8_t *pix, ptrdiff_t stride)
792 {
793 466516 return FUNC(vvc_loop_ladf_level)(pix, sizeof(pixel), stride);
794 }
795
796 #undef P7
797 #undef P6
798 #undef P5
799 #undef P4
800 #undef P3
801 #undef P2
802 #undef P1
803 #undef P0
804 #undef Q0
805 #undef Q1
806 #undef Q2
807 #undef Q3
808 #undef Q4
809 #undef Q5
810 #undef Q6
811 #undef Q7
812
813 #undef TP7
814 #undef TP6
815 #undef TP5
816 #undef TP4
817 #undef TP3
818 #undef TP2
819 #undef TP1
820 #undef TP0
821 #undef TQ0
822 #undef TQ1
823 #undef TQ2
824 #undef TQ3
825 #undef TQ4
826 #undef TQ5
827 #undef TQ6
828 #undef TQ7
829
830 2646 static void FUNC(ff_vvc_lmcs_dsp_init)(VVCLMCSDSPContext *const lmcs)
831 {
832 2646 lmcs->filter = FUNC(lmcs_filter_luma);
833 2646 }
834
835 2646 static void FUNC(ff_vvc_lf_dsp_init)(VVCLFDSPContext *const lf)
836 {
837 2646 lf->ladf_level[0] = FUNC(vvc_h_loop_ladf_level);
838 2646 lf->ladf_level[1] = FUNC(vvc_v_loop_ladf_level);
839 2646 lf->filter_luma[0] = FUNC(vvc_h_loop_filter_luma);
840 2646 lf->filter_luma[1] = FUNC(vvc_v_loop_filter_luma);
841 2646 lf->filter_chroma[0] = FUNC(vvc_h_loop_filter_chroma);
842 2646 lf->filter_chroma[1] = FUNC(vvc_v_loop_filter_chroma);
843 2646 }
844
845 2646 static void FUNC(ff_vvc_sao_dsp_init)(VVCSAODSPContext *const sao)
846 {
847
2/2
✓ Branch 0 taken 11907 times.
✓ Branch 1 taken 1323 times.
26460 for (int i = 0; i < FF_ARRAY_ELEMS(sao->band_filter); i++)
848 23814 sao->band_filter[i] = FUNC(sao_band_filter);
849
2/2
✓ Branch 0 taken 11907 times.
✓ Branch 1 taken 1323 times.
26460 for (int i = 0; i < FF_ARRAY_ELEMS(sao->edge_filter); i++)
850 23814 sao->edge_filter[i] = FUNC(sao_edge_filter);
851 2646 sao->edge_restore[0] = FUNC(sao_edge_restore_0);
852 2646 sao->edge_restore[1] = FUNC(sao_edge_restore_1);
853 2646 }
854
855 2646 static void FUNC(ff_vvc_alf_dsp_init)(VVCALFDSPContext *const alf)
856 {
857 2646 alf->filter[LUMA] = FUNC(alf_filter_luma);
858 2646 alf->filter[CHROMA] = FUNC(alf_filter_chroma);
859 2646 alf->filter_cc = FUNC(alf_filter_cc);
860 2646 alf->classify = FUNC(alf_classify);
861 2646 alf->recon_coeff_and_clip = FUNC(alf_recon_coeff_and_clip);
862 2646 }
863