Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (c) 2023-2024 Nuo Mi | ||
3 | * Copyright (c) 2023-2024 Wu Jianhua | ||
4 | * | ||
5 | * This file is part of FFmpeg. | ||
6 | * | ||
7 | * FFmpeg is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * FFmpeg is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along | ||
18 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | |||
22 | #include <string.h> | ||
23 | |||
24 | #include "checkasm.h" | ||
25 | #include "libavcodec/vvc/ctu.h" | ||
26 | #include "libavcodec/vvc/data.h" | ||
27 | #include "libavcodec/vvc/dsp.h" | ||
28 | |||
29 | #include "libavutil/common.h" | ||
30 | #include "libavutil/intreadwrite.h" | ||
31 | #include "libavutil/mem_internal.h" | ||
32 | |||
33 | static const uint32_t pixel_mask[] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff, 0x3fff3fff, 0xffffffff }; | ||
34 | static const int sizes[] = { 2, 4, 8, 16, 32, 64, 128 }; | ||
35 | |||
36 | #define SIZEOF_PIXEL ((bit_depth + 7) / 8) | ||
37 | #define PIXEL_STRIDE (MAX_CTU_SIZE * 2) | ||
38 | #define EXTRA_BEFORE 3 | ||
39 | #define EXTRA_AFTER 4 | ||
40 | #define SRC_EXTRA (EXTRA_BEFORE + EXTRA_AFTER) * 2 | ||
41 | #define SRC_BUF_SIZE (PIXEL_STRIDE + SRC_EXTRA) * (PIXEL_STRIDE + SRC_EXTRA) | ||
42 | #define DST_BUF_SIZE (MAX_CTU_SIZE * MAX_CTU_SIZE * 2) | ||
43 | #define SRC_OFFSET ((PIXEL_STRIDE + EXTRA_BEFORE * 2) * EXTRA_BEFORE) | ||
44 | |||
45 | #define randomize_buffers(buf0, buf1, size, mask) \ | ||
46 | do { \ | ||
47 | int k; \ | ||
48 | for (k = 0; k < size; k += 4 / sizeof(*buf0)) { \ | ||
49 | uint32_t r = rnd() & mask; \ | ||
50 | AV_WN32A(buf0 + k, r); \ | ||
51 | AV_WN32A(buf1 + k, r); \ | ||
52 | } \ | ||
53 | } while (0) | ||
54 | |||
55 | #define randomize_pixels(buf0, buf1, size) \ | ||
56 | do { \ | ||
57 | uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \ | ||
58 | randomize_buffers(buf0, buf1, size, mask); \ | ||
59 | } while (0) | ||
60 | |||
61 | #define randomize_avg_src(buf0, buf1, size) \ | ||
62 | do { \ | ||
63 | uint32_t mask = 0x3fff3fff; \ | ||
64 | randomize_buffers(buf0, buf1, size, mask); \ | ||
65 | } while (0) | ||
66 | |||
67 | #define randomize_prof_src(buf0, buf1, size) \ | ||
68 | do { \ | ||
69 | const int shift = 14 - bit_depth; \ | ||
70 | const int mask16 = 0x3fff >> shift << shift; \ | ||
71 | uint32_t mask = (mask16 << 16) | mask16; \ | ||
72 | randomize_buffers(buf0, buf1, size, mask); \ | ||
73 | } while (0) | ||
74 | |||
75 | 13 | static void check_put_vvc_luma(void) | |
76 | { | ||
77 | 13 | LOCAL_ALIGNED_32(int16_t, dst0, [DST_BUF_SIZE / 2]); | |
78 | 13 | LOCAL_ALIGNED_32(int16_t, dst1, [DST_BUF_SIZE / 2]); | |
79 | 13 | LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]); | |
80 | 13 | LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]); | |
81 | VVCDSPContext c; | ||
82 | |||
83 | 13 | declare_func(void, int16_t *dst, const uint8_t *src, const ptrdiff_t src_stride, | |
84 | const int height, const int8_t *hf, const int8_t *vf, const int width); | ||
85 | |||
86 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 13 times.
|
52 | for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) { |
87 |
2/2✓ Branch 1 taken 710775 times.
✓ Branch 2 taken 39 times.
|
710814 | randomize_pixels(src0, src1, SRC_BUF_SIZE); |
88 | 39 | ff_vvc_dsp_init(&c, bit_depth); | |
89 |
2/2✓ Branch 0 taken 78 times.
✓ Branch 1 taken 39 times.
|
117 | for (int i = 0; i < 2; i++) { |
90 |
2/2✓ Branch 0 taken 156 times.
✓ Branch 1 taken 78 times.
|
234 | for (int j = 0; j < 2; j++) { |
91 |
2/2✓ Branch 0 taken 936 times.
✓ Branch 1 taken 156 times.
|
1092 | for (int h = 4; h <= MAX_CTU_SIZE; h *= 2) { |
92 |
2/2✓ Branch 0 taken 5616 times.
✓ Branch 1 taken 936 times.
|
6552 | for (int w = 4; w <= MAX_CTU_SIZE; w *= 2) { |
93 | 5616 | const int idx = av_log2(w) - 1; | |
94 | 5616 | const int mx = rnd() % 16; | |
95 | 5616 | const int my = rnd() % 16; | |
96 | 5616 | const int8_t *hf = ff_vvc_inter_luma_filters[rnd() % 3][mx]; | |
97 | 5616 | const int8_t *vf = ff_vvc_inter_luma_filters[rnd() % 3][my]; | |
98 | const char *type; | ||
99 |
4/5✓ Branch 0 taken 1404 times.
✓ Branch 1 taken 1404 times.
✓ Branch 2 taken 1404 times.
✓ Branch 3 taken 1404 times.
✗ Branch 4 not taken.
|
5616 | switch ((j << 1) | i) { |
100 | 1404 | case 0: type = "put_luma_pixels"; break; // 0 0 | |
101 | 1404 | case 1: type = "put_luma_h"; break; // 0 1 | |
102 | 1404 | case 2: type = "put_luma_v"; break; // 1 0 | |
103 | 1404 | case 3: type = "put_luma_hv"; break; // 1 1 | |
104 | } | ||
105 |
2/2✓ Branch 3 taken 1110 times.
✓ Branch 4 taken 4506 times.
|
5616 | if (check_func(c.inter.put[LUMA][idx][j][i], "%s_%d_%dx%d", type, bit_depth, w, h)) { |
106 | 1110 | memset(dst0, 0, DST_BUF_SIZE); | |
107 | 1110 | memset(dst1, 0, DST_BUF_SIZE); | |
108 | 1110 | call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w); | |
109 | 1110 | call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w); | |
110 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1110 times.
|
1110 | if (memcmp(dst0, dst1, DST_BUF_SIZE)) |
111 | ✗ | fail(); | |
112 |
2/2✓ Branch 0 taken 185 times.
✓ Branch 1 taken 925 times.
|
1110 | if (w == h) |
113 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 185 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
185 | bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w); |
114 | } | ||
115 | } | ||
116 | } | ||
117 | } | ||
118 | } | ||
119 | } | ||
120 | 13 | report("put_luma"); | |
121 | 13 | } | |
122 | |||
123 | 13 | static void check_put_vvc_luma_uni(void) | |
124 | { | ||
125 | 13 | LOCAL_ALIGNED_32(uint8_t, dst0, [DST_BUF_SIZE]); | |
126 | 13 | LOCAL_ALIGNED_32(uint8_t, dst1, [DST_BUF_SIZE]); | |
127 | 13 | LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]); | |
128 | 13 | LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]); | |
129 | |||
130 | VVCDSPContext c; | ||
131 | 13 | declare_func(void, uint8_t *dst, ptrdiff_t dststride, | |
132 | const uint8_t *src, ptrdiff_t srcstride, int height, | ||
133 | const int8_t *hf, const int8_t *vf, int width); | ||
134 | |||
135 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 13 times.
|
52 | for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) { |
136 | 39 | ff_vvc_dsp_init(&c, bit_depth); | |
137 |
2/2✓ Branch 1 taken 710775 times.
✓ Branch 2 taken 39 times.
|
710814 | randomize_pixels(src0, src1, SRC_BUF_SIZE); |
138 |
2/2✓ Branch 0 taken 78 times.
✓ Branch 1 taken 39 times.
|
117 | for (int i = 0; i < 2; i++) { |
139 |
2/2✓ Branch 0 taken 156 times.
✓ Branch 1 taken 78 times.
|
234 | for (int j = 0; j < 2; j++) { |
140 |
2/2✓ Branch 0 taken 936 times.
✓ Branch 1 taken 156 times.
|
1092 | for (int h = 4; h <= MAX_CTU_SIZE; h *= 2) { |
141 |
2/2✓ Branch 0 taken 5616 times.
✓ Branch 1 taken 936 times.
|
6552 | for (int w = 4; w <= MAX_CTU_SIZE; w *= 2) { |
142 | 5616 | const int idx = av_log2(w) - 1; | |
143 | 5616 | const int mx = rnd() % VVC_INTER_LUMA_FACTS; | |
144 | 5616 | const int my = rnd() % VVC_INTER_LUMA_FACTS; | |
145 | 5616 | const int8_t *hf = ff_vvc_inter_luma_filters[rnd() % VVC_INTER_LUMA_FILTER_TYPES][mx]; | |
146 | 5616 | const int8_t *vf = ff_vvc_inter_luma_filters[rnd() % VVC_INTER_LUMA_FILTER_TYPES][my]; | |
147 | const char *type; | ||
148 | |||
149 |
4/5✓ Branch 0 taken 1404 times.
✓ Branch 1 taken 1404 times.
✓ Branch 2 taken 1404 times.
✓ Branch 3 taken 1404 times.
✗ Branch 4 not taken.
|
5616 | switch ((j << 1) | i) { |
150 | 1404 | case 0: type = "put_uni_pixels"; break; // 0 0 | |
151 | 1404 | case 1: type = "put_uni_h"; break; // 0 1 | |
152 | 1404 | case 2: type = "put_uni_v"; break; // 1 0 | |
153 | 1404 | case 3: type = "put_uni_hv"; break; // 1 1 | |
154 | } | ||
155 | |||
156 |
2/2✓ Branch 3 taken 1110 times.
✓ Branch 4 taken 4506 times.
|
5616 | if (check_func(c.inter.put_uni[LUMA][idx][j][i], "%s_luma_%d_%dx%d", type, bit_depth, w, h)) { |
157 | 1110 | memset(dst0, 0, DST_BUF_SIZE); | |
158 | 1110 | memset(dst1, 0, DST_BUF_SIZE); | |
159 | 1110 | call_ref(dst0, PIXEL_STRIDE, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w); | |
160 | 1110 | call_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w); | |
161 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1110 times.
|
1110 | if (memcmp(dst0, dst1, DST_BUF_SIZE)) |
162 | ✗ | fail(); | |
163 |
2/2✓ Branch 0 taken 185 times.
✓ Branch 1 taken 925 times.
|
1110 | if (w == h) |
164 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 185 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
185 | bench_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w); |
165 | } | ||
166 | } | ||
167 | } | ||
168 | } | ||
169 | } | ||
170 | } | ||
171 | 13 | report("put_uni_luma"); | |
172 | 13 | } | |
173 | |||
174 | 13 | static void check_put_vvc_chroma(void) | |
175 | { | ||
176 | 13 | LOCAL_ALIGNED_32(int16_t, dst0, [DST_BUF_SIZE / 2]); | |
177 | 13 | LOCAL_ALIGNED_32(int16_t, dst1, [DST_BUF_SIZE / 2]); | |
178 | 13 | LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]); | |
179 | 13 | LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]); | |
180 | VVCDSPContext c; | ||
181 | |||
182 | 13 | declare_func(void, int16_t *dst, const uint8_t *src, const ptrdiff_t src_stride, | |
183 | const int height, const int8_t *hf, const int8_t *vf, const int width); | ||
184 | |||
185 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 13 times.
|
52 | for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) { |
186 |
2/2✓ Branch 1 taken 710775 times.
✓ Branch 2 taken 39 times.
|
710814 | randomize_pixels(src0, src1, SRC_BUF_SIZE); |
187 | 39 | ff_vvc_dsp_init(&c, bit_depth); | |
188 |
2/2✓ Branch 0 taken 78 times.
✓ Branch 1 taken 39 times.
|
117 | for (int i = 0; i < 2; i++) { |
189 |
2/2✓ Branch 0 taken 156 times.
✓ Branch 1 taken 78 times.
|
234 | for (int j = 0; j < 2; j++) { |
190 |
2/2✓ Branch 0 taken 1092 times.
✓ Branch 1 taken 156 times.
|
1248 | for (int h = 2; h <= MAX_CTU_SIZE; h *= 2) { |
191 |
2/2✓ Branch 0 taken 7644 times.
✓ Branch 1 taken 1092 times.
|
8736 | for (int w = 2; w <= MAX_CTU_SIZE; w *= 2) { |
192 | 7644 | const int idx = av_log2(w) - 1; | |
193 | 7644 | const int mx = rnd() % VVC_INTER_CHROMA_FACTS; | |
194 | 7644 | const int my = rnd() % VVC_INTER_CHROMA_FACTS; | |
195 | 7644 | const int8_t *hf = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_CHROMA_FILTER_TYPES][mx]; | |
196 | 7644 | const int8_t *vf = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_CHROMA_FILTER_TYPES][my]; | |
197 | const char *type; | ||
198 |
4/5✓ Branch 0 taken 1911 times.
✓ Branch 1 taken 1911 times.
✓ Branch 2 taken 1911 times.
✓ Branch 3 taken 1911 times.
✗ Branch 4 not taken.
|
7644 | switch ((j << 1) | i) { |
199 | 1911 | case 0: type = "put_chroma_pixels"; break; // 0 0 | |
200 | 1911 | case 1: type = "put_chroma_h"; break; // 0 1 | |
201 | 1911 | case 2: type = "put_chroma_v"; break; // 1 0 | |
202 | 1911 | case 3: type = "put_chroma_hv"; break; // 1 1 | |
203 | } | ||
204 |
2/2✓ Branch 3 taken 1463 times.
✓ Branch 4 taken 6181 times.
|
7644 | if (check_func(c.inter.put[CHROMA][idx][j][i], "%s_%d_%dx%d", type, bit_depth, w, h)) { |
205 | 1463 | memset(dst0, 0, DST_BUF_SIZE); | |
206 | 1463 | memset(dst1, 0, DST_BUF_SIZE); | |
207 | 1463 | call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w); | |
208 | 1463 | call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w); | |
209 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1463 times.
|
1463 | if (memcmp(dst0, dst1, DST_BUF_SIZE)) |
210 | ✗ | fail(); | |
211 |
2/2✓ Branch 0 taken 209 times.
✓ Branch 1 taken 1254 times.
|
1463 | if (w == h) |
212 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 209 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
209 | bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w); |
213 | } | ||
214 | } | ||
215 | } | ||
216 | } | ||
217 | } | ||
218 | } | ||
219 | 13 | report("put_chroma"); | |
220 | 13 | } | |
221 | |||
222 | 13 | static void check_put_vvc_chroma_uni(void) | |
223 | { | ||
224 | 13 | LOCAL_ALIGNED_32(uint8_t, dst0, [DST_BUF_SIZE]); | |
225 | 13 | LOCAL_ALIGNED_32(uint8_t, dst1, [DST_BUF_SIZE]); | |
226 | 13 | LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]); | |
227 | 13 | LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]); | |
228 | |||
229 | VVCDSPContext c; | ||
230 | 13 | declare_func(void, uint8_t *dst, ptrdiff_t dststride, | |
231 | const uint8_t *src, ptrdiff_t srcstride, int height, | ||
232 | const int8_t *hf, const int8_t *vf, int width); | ||
233 | |||
234 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 13 times.
|
52 | for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) { |
235 | 39 | ff_vvc_dsp_init(&c, bit_depth); | |
236 |
2/2✓ Branch 1 taken 710775 times.
✓ Branch 2 taken 39 times.
|
710814 | randomize_pixels(src0, src1, SRC_BUF_SIZE); |
237 |
2/2✓ Branch 0 taken 78 times.
✓ Branch 1 taken 39 times.
|
117 | for (int i = 0; i < 2; i++) { |
238 |
2/2✓ Branch 0 taken 156 times.
✓ Branch 1 taken 78 times.
|
234 | for (int j = 0; j < 2; j++) { |
239 |
2/2✓ Branch 0 taken 936 times.
✓ Branch 1 taken 156 times.
|
1092 | for (int h = 4; h <= MAX_CTU_SIZE; h *= 2) { |
240 |
2/2✓ Branch 0 taken 5616 times.
✓ Branch 1 taken 936 times.
|
6552 | for (int w = 4; w <= MAX_CTU_SIZE; w *= 2) { |
241 | 5616 | const int idx = av_log2(w) - 1; | |
242 | 5616 | const int mx = rnd() % VVC_INTER_CHROMA_FACTS; | |
243 | 5616 | const int my = rnd() % VVC_INTER_CHROMA_FACTS; | |
244 | 5616 | const int8_t *hf = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_CHROMA_FILTER_TYPES][mx]; | |
245 | 5616 | const int8_t *vf = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_CHROMA_FILTER_TYPES][my]; | |
246 | const char *type; | ||
247 | |||
248 |
4/5✓ Branch 0 taken 1404 times.
✓ Branch 1 taken 1404 times.
✓ Branch 2 taken 1404 times.
✓ Branch 3 taken 1404 times.
✗ Branch 4 not taken.
|
5616 | switch ((j << 1) | i) { |
249 | 1404 | case 0: type = "put_uni_pixels"; break; // 0 0 | |
250 | 1404 | case 1: type = "put_uni_h"; break; // 0 1 | |
251 | 1404 | case 2: type = "put_uni_v"; break; // 1 0 | |
252 | 1404 | case 3: type = "put_uni_hv"; break; // 1 1 | |
253 | } | ||
254 | |||
255 |
2/2✓ Branch 3 taken 1110 times.
✓ Branch 4 taken 4506 times.
|
5616 | if (check_func(c.inter.put_uni[CHROMA][idx][j][i], "%s_chroma_%d_%dx%d", type, bit_depth, w, h)) { |
256 | 1110 | memset(dst0, 0, DST_BUF_SIZE); | |
257 | 1110 | memset(dst1, 0, DST_BUF_SIZE); | |
258 | 1110 | call_ref(dst0, PIXEL_STRIDE, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w); | |
259 | 1110 | call_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w); | |
260 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1110 times.
|
1110 | if (memcmp(dst0, dst1, DST_BUF_SIZE)) |
261 | ✗ | fail(); | |
262 |
2/2✓ Branch 0 taken 185 times.
✓ Branch 1 taken 925 times.
|
1110 | if (w == h) |
263 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 185 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
185 | bench_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w); |
264 | } | ||
265 | } | ||
266 | } | ||
267 | } | ||
268 | } | ||
269 | } | ||
270 | 13 | report("put_uni_chroma"); | |
271 | 13 | } | |
272 | |||
273 | #define AVG_SRC_BUF_SIZE (MAX_CTU_SIZE * MAX_CTU_SIZE) | ||
274 | #define AVG_DST_BUF_SIZE (MAX_PB_SIZE * MAX_PB_SIZE * 2) | ||
275 | |||
276 | 13 | static void check_avg(void) | |
277 | { | ||
278 | 13 | LOCAL_ALIGNED_32(int16_t, src00, [AVG_SRC_BUF_SIZE]); | |
279 | 13 | LOCAL_ALIGNED_32(int16_t, src01, [AVG_SRC_BUF_SIZE]); | |
280 | 13 | LOCAL_ALIGNED_32(int16_t, src10, [AVG_SRC_BUF_SIZE]); | |
281 | 13 | LOCAL_ALIGNED_32(int16_t, src11, [AVG_SRC_BUF_SIZE]); | |
282 | 13 | LOCAL_ALIGNED_32(uint8_t, dst0, [AVG_DST_BUF_SIZE]); | |
283 | 13 | LOCAL_ALIGNED_32(uint8_t, dst1, [AVG_DST_BUF_SIZE]); | |
284 | VVCDSPContext c; | ||
285 | |||
286 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 13 times.
|
52 | for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) { |
287 |
2/2✓ Branch 1 taken 319488 times.
✓ Branch 2 taken 39 times.
|
319527 | randomize_avg_src((uint8_t*)src00, (uint8_t*)src10, AVG_SRC_BUF_SIZE * sizeof(int16_t)); |
288 |
2/2✓ Branch 1 taken 319488 times.
✓ Branch 2 taken 39 times.
|
319527 | randomize_avg_src((uint8_t*)src01, (uint8_t*)src11, AVG_SRC_BUF_SIZE * sizeof(int16_t)); |
289 | 39 | ff_vvc_dsp_init(&c, bit_depth); | |
290 |
2/2✓ Branch 0 taken 273 times.
✓ Branch 1 taken 39 times.
|
312 | for (int h = 2; h <= MAX_CTU_SIZE; h *= 2) { |
291 |
2/2✓ Branch 0 taken 1911 times.
✓ Branch 1 taken 273 times.
|
2184 | for (int w = 2; w <= MAX_CTU_SIZE; w *= 2) { |
292 | { | ||
293 | 1911 | declare_func(void, uint8_t *dst, ptrdiff_t dst_stride, | |
294 | const int16_t *src0, const int16_t *src1, int width, int height); | ||
295 |
2/2✓ Branch 3 taken 294 times.
✓ Branch 4 taken 1617 times.
|
1911 | if (check_func(c.inter.avg, "avg_%d_%dx%d", bit_depth, w, h)) { |
296 | 294 | memset(dst0, 0, AVG_DST_BUF_SIZE); | |
297 | 294 | memset(dst1, 0, AVG_DST_BUF_SIZE); | |
298 | 294 | call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h); | |
299 | 294 | call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h); | |
300 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 294 times.
|
294 | if (memcmp(dst0, dst1, DST_BUF_SIZE)) |
301 | ✗ | fail(); | |
302 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 252 times.
|
294 | if (w == h) |
303 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 42 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
42 | bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h); |
304 | } | ||
305 | } | ||
306 | { | ||
307 | 1911 | declare_func(void, uint8_t *dst, ptrdiff_t dst_stride, | |
308 | const int16_t *src0, const int16_t *src1, int width, int height, | ||
309 | int denom, int w0, int w1, int o0, int o1); | ||
310 | { | ||
311 | 1911 | const int denom = rnd() % 8; | |
312 | 1911 | const int w0 = rnd() % 256 - 128; | |
313 | 1911 | const int w1 = rnd() % 256 - 128; | |
314 | 1911 | const int o0 = rnd() % 256 - 128; | |
315 | 1911 | const int o1 = rnd() % 256 - 128; | |
316 |
2/2✓ Branch 3 taken 294 times.
✓ Branch 4 taken 1617 times.
|
1911 | if (check_func(c.inter.w_avg, "w_avg_%d_%dx%d", bit_depth, w, h)) { |
317 | 294 | memset(dst0, 0, AVG_DST_BUF_SIZE); | |
318 | 294 | memset(dst1, 0, AVG_DST_BUF_SIZE); | |
319 | |||
320 | 294 | call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1); | |
321 | 294 | call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0, o1); | |
322 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 294 times.
|
294 | if (memcmp(dst0, dst1, DST_BUF_SIZE)) |
323 | ✗ | fail(); | |
324 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 252 times.
|
294 | if (w == h) |
325 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 42 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
42 | bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1); |
326 | } | ||
327 | } | ||
328 | } | ||
329 | } | ||
330 | } | ||
331 | } | ||
332 | 13 | report("avg"); | |
333 | 13 | } | |
334 | |||
335 | #define SR_RANGE 2 | ||
336 | 13 | static void check_dmvr(void) | |
337 | { | ||
338 | 13 | LOCAL_ALIGNED_32(uint16_t, dst0, [DST_BUF_SIZE]); | |
339 | 13 | LOCAL_ALIGNED_32(uint16_t, dst1, [DST_BUF_SIZE]); | |
340 | 13 | LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]); | |
341 | 13 | LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]); | |
342 | 13 | const int dst_stride = MAX_PB_SIZE * sizeof(int16_t); | |
343 | |||
344 | VVCDSPContext c; | ||
345 | 13 | declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height, | |
346 | intptr_t mx, intptr_t my, int width); | ||
347 | |||
348 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 13 times.
|
52 | for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) { |
349 | 39 | ff_vvc_dsp_init(&c, bit_depth); | |
350 |
2/2✓ Branch 1 taken 710775 times.
✓ Branch 2 taken 39 times.
|
710814 | randomize_pixels(src0, src1, SRC_BUF_SIZE); |
351 |
2/2✓ Branch 0 taken 78 times.
✓ Branch 1 taken 39 times.
|
117 | for (int i = 0; i < 2; i++) { |
352 |
2/2✓ Branch 0 taken 156 times.
✓ Branch 1 taken 78 times.
|
234 | for (int j = 0; j < 2; j++) { |
353 |
2/2✓ Branch 0 taken 312 times.
✓ Branch 1 taken 156 times.
|
468 | for (int h = 8; h <= 16; h *= 2) { |
354 |
2/2✓ Branch 0 taken 624 times.
✓ Branch 1 taken 312 times.
|
936 | for (int w = 8; w <= 16; w *= 2) { |
355 | 624 | const int pred_w = w + 2 * SR_RANGE; | |
356 | 624 | const int pred_h = h + 2 * SR_RANGE; | |
357 | 624 | const int mx = rnd() % VVC_INTER_LUMA_DMVR_FACTS; | |
358 | 624 | const int my = rnd() % VVC_INTER_LUMA_DMVR_FACTS; | |
359 | const char *type; | ||
360 | |||
361 |
2/2✓ Branch 0 taken 156 times.
✓ Branch 1 taken 468 times.
|
624 | if (w * h < 128) |
362 | 156 | continue; | |
363 | |||
364 |
4/5✓ Branch 0 taken 117 times.
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 117 times.
✓ Branch 3 taken 117 times.
✗ Branch 4 not taken.
|
468 | switch ((j << 1) | i) { |
365 | 117 | case 0: type = "dmvr"; break; // 0 0 | |
366 | 117 | case 1: type = "dmvr_h"; break; // 0 1 | |
367 | 117 | case 2: type = "dmvr_v"; break; // 1 0 | |
368 | 117 | case 3: type = "dmvr_hv"; break; // 1 1 | |
369 | } | ||
370 | |||
371 |
2/2✓ Branch 3 taken 72 times.
✓ Branch 4 taken 396 times.
|
468 | if (check_func(c.inter.dmvr[j][i], "%s_%d_%dx%d", type, bit_depth, pred_w, pred_h)) { |
372 | 72 | memset(dst0, 0, DST_BUF_SIZE); | |
373 | 72 | memset(dst1, 0, DST_BUF_SIZE); | |
374 | 72 | call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w); | |
375 | 72 | call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w); | |
376 |
2/2✓ Branch 0 taken 1248 times.
✓ Branch 1 taken 72 times.
|
1320 | for (int k = 0; k < pred_h; k++) { |
377 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1248 times.
|
1248 | if (memcmp(dst0 + k * dst_stride, dst1 + k * dst_stride, pred_w * sizeof(int16_t))) { |
378 | ✗ | fail(); | |
379 | ✗ | break; | |
380 | } | ||
381 | } | ||
382 | |||
383 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 72 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
72 | bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w); |
384 | } | ||
385 | } | ||
386 | } | ||
387 | } | ||
388 | } | ||
389 | } | ||
390 | 13 | report("dmvr"); | |
391 | 13 | } | |
392 | |||
393 | #define BDOF_BLOCK_SIZE 16 | ||
394 | #define BDOF_SRC_SIZE (MAX_PB_SIZE* (BDOF_BLOCK_SIZE + 2)) | ||
395 | #define BDOF_SRC_OFFSET (MAX_PB_SIZE + 1) | ||
396 | #define BDOF_DST_SIZE (BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE * 2) | ||
397 | 13 | static void check_bdof(void) | |
398 | { | ||
399 | 13 | LOCAL_ALIGNED_32(uint8_t, dst0, [BDOF_DST_SIZE]); | |
400 | 13 | LOCAL_ALIGNED_32(uint8_t, dst1, [BDOF_DST_SIZE]); | |
401 | 13 | LOCAL_ALIGNED_32(uint16_t, src00, [BDOF_SRC_SIZE]); | |
402 | 13 | LOCAL_ALIGNED_32(uint16_t, src01, [BDOF_SRC_SIZE]); | |
403 | 13 | LOCAL_ALIGNED_32(uint16_t, src10, [BDOF_SRC_SIZE]); | |
404 | 13 | LOCAL_ALIGNED_32(uint16_t, src11, [BDOF_SRC_SIZE]); | |
405 | |||
406 | VVCDSPContext c; | ||
407 | 13 | declare_func(void, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1, int block_w, int block_h); | |
408 | |||
409 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 13 times.
|
52 | for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) { |
410 | 39 | const int dst_stride = BDOF_BLOCK_SIZE * SIZEOF_PIXEL; | |
411 | |||
412 | 39 | ff_vvc_dsp_init(&c, bit_depth); | |
413 |
2/2✓ Branch 1 taken 44928 times.
✓ Branch 2 taken 39 times.
|
44967 | randomize_prof_src(src00, src10, BDOF_SRC_SIZE); |
414 |
2/2✓ Branch 1 taken 44928 times.
✓ Branch 2 taken 39 times.
|
44967 | randomize_prof_src(src01, src11, BDOF_SRC_SIZE); |
415 |
2/2✓ Branch 0 taken 78 times.
✓ Branch 1 taken 39 times.
|
117 | for (int h = 8; h <= 16; h *= 2) { |
416 |
2/2✓ Branch 0 taken 156 times.
✓ Branch 1 taken 78 times.
|
234 | for (int w = 8; w <= 16; w *= 2) { |
417 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 117 times.
|
156 | if (w * h < 128) |
418 | 39 | continue; | |
419 |
2/2✓ Branch 3 taken 18 times.
✓ Branch 4 taken 99 times.
|
117 | if (check_func(c.inter.apply_bdof, "apply_bdof_%d_%dx%d", bit_depth, w, h)) { |
420 | 18 | memset(dst0, 0, BDOF_DST_SIZE); | |
421 | 18 | memset(dst1, 0, BDOF_DST_SIZE); | |
422 | 18 | call_ref(dst0, dst_stride, src00 + BDOF_SRC_OFFSET, src01 + BDOF_SRC_OFFSET, w, h); | |
423 | 18 | call_new(dst1, dst_stride, src10 + BDOF_SRC_OFFSET, src11 + BDOF_SRC_OFFSET, w, h); | |
424 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
|
18 | if (memcmp(dst0, dst1, BDOF_DST_SIZE)) |
425 | ✗ | fail(); | |
426 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 18 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
18 | bench_new(dst0, dst_stride, src00 + BDOF_SRC_OFFSET, src01 + BDOF_SRC_OFFSET, w, h); |
427 | } | ||
428 | } | ||
429 | } | ||
430 | } | ||
431 | 13 | report("apply_bdof"); | |
432 | 13 | } | |
433 | |||
434 | 13 | static void check_vvc_sad(void) | |
435 | { | ||
436 | 13 | const int bit_depth = 10; | |
437 | VVCDSPContext c; | ||
438 | 13 | LOCAL_ALIGNED_32(uint16_t, src0, [MAX_CTU_SIZE * MAX_CTU_SIZE * 4]); | |
439 | 13 | LOCAL_ALIGNED_32(uint16_t, src1, [MAX_CTU_SIZE * MAX_CTU_SIZE * 4]); | |
440 | 13 | declare_func(int, const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h); | |
441 | |||
442 | 13 | ff_vvc_dsp_init(&c, bit_depth); | |
443 |
2/2✓ Branch 1 taken 425984 times.
✓ Branch 2 taken 13 times.
|
425997 | randomize_pixels(src0, src1, MAX_CTU_SIZE * MAX_CTU_SIZE * 4); |
444 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 13 times.
|
39 | for (int h = 8; h <= 16; h *= 2) { |
445 |
2/2✓ Branch 0 taken 52 times.
✓ Branch 1 taken 26 times.
|
78 | for (int w = 8; w <= 16; w *= 2) { |
446 |
2/2✓ Branch 0 taken 260 times.
✓ Branch 1 taken 52 times.
|
312 | for(int offy = 0; offy <= 4; offy++) { |
447 |
2/2✓ Branch 0 taken 1300 times.
✓ Branch 1 taken 260 times.
|
1560 | for(int offx = 0; offx <= 4; offx++) { |
448 |
2/2✓ Branch 0 taken 325 times.
✓ Branch 1 taken 975 times.
|
1300 | if (w * h < 128) |
449 | 325 | continue; | |
450 | |||
451 |
2/2✓ Branch 3 taken 6 times.
✓ Branch 4 taken 969 times.
|
975 | if (check_func(c.inter.sad, "sad_%dx%d", w, h)) { |
452 | int result0; | ||
453 | int result1; | ||
454 | |||
455 | 6 | result0 = call_ref(src0 + PIXEL_STRIDE * 2 + 2, src1 + PIXEL_STRIDE * 2 + 2, offx, offy, w, h); | |
456 | 6 | result1 = call_new(src0 + PIXEL_STRIDE * 2 + 2, src1 + PIXEL_STRIDE * 2 + 2, offx, offy, w, h); | |
457 | |||
458 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
|
6 | if (result1 != result0) |
459 | ✗ | fail(); | |
460 |
2/4✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
|
6 | if(offx == 0 && offy == 0) |
461 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 6 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
6 | bench_new(src0 + PIXEL_STRIDE * 2 + 2, src1 + PIXEL_STRIDE * 2 + 2, offx, offy, w, h); |
462 | } | ||
463 | } | ||
464 | } | ||
465 | } | ||
466 | } | ||
467 | |||
468 | 13 | report("sad"); | |
469 | 13 | } | |
470 | |||
471 | 13 | void checkasm_check_vvc_mc(void) | |
472 | { | ||
473 | 13 | check_dmvr(); | |
474 | 13 | check_bdof(); | |
475 | 13 | check_vvc_sad(); | |
476 | 13 | check_put_vvc_luma(); | |
477 | 13 | check_put_vvc_luma_uni(); | |
478 | 13 | check_put_vvc_chroma(); | |
479 | 13 | check_put_vvc_chroma_uni(); | |
480 | 13 | check_avg(); | |
481 | 13 | } | |
482 |