Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Assembly testing and benchmarking tool | ||
3 | * Copyright (c) 2015 Henrik Gramner | ||
4 | * Copyright (c) 2008 Loren Merritt | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License along | ||
19 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
21 | * | ||
22 | * Copyright © 2018, VideoLAN and dav1d authors | ||
23 | * Copyright © 2018, Two Orioles, LLC | ||
24 | * All rights reserved. | ||
25 | * | ||
26 | * Redistribution and use in source and binary forms, with or without | ||
27 | * modification, are permitted provided that the following conditions are met: | ||
28 | * | ||
29 | * 1. Redistributions of source code must retain the above copyright notice, this | ||
30 | * list of conditions and the following disclaimer. | ||
31 | * | ||
32 | * 2. Redistributions in binary form must reproduce the above copyright notice, | ||
33 | * this list of conditions and the following disclaimer in the documentation | ||
34 | * and/or other materials provided with the distribution. | ||
35 | * | ||
36 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
37 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
38 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
39 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | ||
40 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
41 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
42 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
43 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
44 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
45 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
46 | */ | ||
47 | |||
48 | #include "config.h" | ||
49 | #include "config_components.h" | ||
50 | |||
51 | #ifndef _GNU_SOURCE | ||
52 | # define _GNU_SOURCE // for syscall (performance monitoring API), strsignal() | ||
53 | #endif | ||
54 | |||
55 | #include <signal.h> | ||
56 | #include <stdarg.h> | ||
57 | #include <stdio.h> | ||
58 | #include <stdlib.h> | ||
59 | #include <string.h> | ||
60 | #include "checkasm.h" | ||
61 | #include "libavutil/common.h" | ||
62 | #include "libavutil/cpu.h" | ||
63 | #include "libavutil/intfloat.h" | ||
64 | #include "libavutil/random_seed.h" | ||
65 | |||
66 | #if HAVE_IO_H | ||
67 | #include <io.h> | ||
68 | #endif | ||
69 | #if HAVE_PRCTL | ||
70 | #include <sys/prctl.h> | ||
71 | #endif | ||
72 | |||
73 | #if defined(_WIN32) && !defined(SIGBUS) | ||
74 | /* non-standard, use the same value as mingw-w64 */ | ||
75 | #define SIGBUS 10 | ||
76 | #endif | ||
77 | |||
78 | #if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE | ||
79 | #include <windows.h> | ||
80 | #define COLOR_RED FOREGROUND_RED | ||
81 | #define COLOR_GREEN FOREGROUND_GREEN | ||
82 | #define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN) | ||
83 | #else | ||
84 | #define COLOR_RED 1 | ||
85 | #define COLOR_GREEN 2 | ||
86 | #define COLOR_YELLOW 3 | ||
87 | #endif | ||
88 | |||
89 | #if HAVE_UNISTD_H | ||
90 | #include <unistd.h> | ||
91 | #endif | ||
92 | |||
93 | #if !HAVE_ISATTY | ||
94 | #define isatty(fd) 1 | ||
95 | #endif | ||
96 | |||
97 | #if ARCH_AARCH64 | ||
98 | #include "libavutil/aarch64/cpu.h" | ||
99 | #elif ARCH_RISCV | ||
100 | #include "libavutil/riscv/cpu.h" | ||
101 | #endif | ||
102 | |||
103 | #if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL | ||
104 | #include "libavutil/arm/cpu.h" | ||
105 | |||
106 | void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp; | ||
107 | #endif | ||
108 | |||
109 | /* Trade-off between speed and accuracy */ | ||
110 | uint64_t bench_runs = 1U << 10; | ||
111 | |||
112 | /* List of tests to invoke */ | ||
113 | static const struct { | ||
114 | const char *name; | ||
115 | void (*func)(void); | ||
116 | } tests[] = { | ||
117 | #if CONFIG_AVCODEC | ||
118 | #if CONFIG_AAC_DECODER | ||
119 | { "aacpsdsp", checkasm_check_aacpsdsp }, | ||
120 | { "sbrdsp", checkasm_check_sbrdsp }, | ||
121 | #endif | ||
122 | #if CONFIG_AAC_ENCODER | ||
123 | { "aacencdsp", checkasm_check_aacencdsp }, | ||
124 | #endif | ||
125 | #if CONFIG_AC3DSP | ||
126 | { "ac3dsp", checkasm_check_ac3dsp }, | ||
127 | #endif | ||
128 | #if CONFIG_ALAC_DECODER | ||
129 | { "alacdsp", checkasm_check_alacdsp }, | ||
130 | #endif | ||
131 | #if CONFIG_AUDIODSP | ||
132 | { "audiodsp", checkasm_check_audiodsp }, | ||
133 | #endif | ||
134 | #if CONFIG_BLOCKDSP | ||
135 | { "blockdsp", checkasm_check_blockdsp }, | ||
136 | #endif | ||
137 | #if CONFIG_BSWAPDSP | ||
138 | { "bswapdsp", checkasm_check_bswapdsp }, | ||
139 | #endif | ||
140 | #if CONFIG_DCA_DECODER | ||
141 | { "synth_filter", checkasm_check_synth_filter }, | ||
142 | #endif | ||
143 | #if CONFIG_DIRAC_DECODER | ||
144 | { "diracdsp", checkasm_check_diracdsp }, | ||
145 | #endif | ||
146 | #if CONFIG_EXR_DECODER | ||
147 | { "exrdsp", checkasm_check_exrdsp }, | ||
148 | #endif | ||
149 | #if CONFIG_FDCTDSP | ||
150 | { "fdctdsp", checkasm_check_fdctdsp }, | ||
151 | #endif | ||
152 | #if CONFIG_FLAC_DECODER | ||
153 | { "flacdsp", checkasm_check_flacdsp }, | ||
154 | #endif | ||
155 | #if CONFIG_FMTCONVERT | ||
156 | { "fmtconvert", checkasm_check_fmtconvert }, | ||
157 | #endif | ||
158 | #if CONFIG_G722DSP | ||
159 | { "g722dsp", checkasm_check_g722dsp }, | ||
160 | #endif | ||
161 | #if CONFIG_H263DSP | ||
162 | { "h263dsp", checkasm_check_h263dsp }, | ||
163 | #endif | ||
164 | #if CONFIG_H264CHROMA | ||
165 | { "h264chroma", checkasm_check_h264chroma }, | ||
166 | #endif | ||
167 | #if CONFIG_H264DSP | ||
168 | { "h264dsp", checkasm_check_h264dsp }, | ||
169 | #endif | ||
170 | #if CONFIG_H264PRED | ||
171 | { "h264pred", checkasm_check_h264pred }, | ||
172 | #endif | ||
173 | #if CONFIG_H264QPEL | ||
174 | { "h264qpel", checkasm_check_h264qpel }, | ||
175 | #endif | ||
176 | #if CONFIG_HEVC_DECODER | ||
177 | { "hevc_add_res", checkasm_check_hevc_add_res }, | ||
178 | { "hevc_deblock", checkasm_check_hevc_deblock }, | ||
179 | { "hevc_idct", checkasm_check_hevc_idct }, | ||
180 | { "hevc_pel", checkasm_check_hevc_pel }, | ||
181 | { "hevc_sao", checkasm_check_hevc_sao }, | ||
182 | #endif | ||
183 | #if CONFIG_HUFFYUV_DECODER | ||
184 | { "huffyuvdsp", checkasm_check_huffyuvdsp }, | ||
185 | #endif | ||
186 | #if CONFIG_IDCTDSP | ||
187 | { "idctdsp", checkasm_check_idctdsp }, | ||
188 | #endif | ||
189 | #if CONFIG_JPEG2000_DECODER | ||
190 | { "jpeg2000dsp", checkasm_check_jpeg2000dsp }, | ||
191 | #endif | ||
192 | #if CONFIG_LLAUDDSP | ||
193 | { "llauddsp", checkasm_check_llauddsp }, | ||
194 | #endif | ||
195 | #if CONFIG_HUFFYUVDSP | ||
196 | { "llviddsp", checkasm_check_llviddsp }, | ||
197 | #endif | ||
198 | #if CONFIG_LLVIDENCDSP | ||
199 | { "llviddspenc", checkasm_check_llviddspenc }, | ||
200 | #endif | ||
201 | #if CONFIG_LPC | ||
202 | { "lpc", checkasm_check_lpc }, | ||
203 | #endif | ||
204 | #if CONFIG_ME_CMP | ||
205 | { "motion", checkasm_check_motion }, | ||
206 | #endif | ||
207 | #if CONFIG_MPEGVIDEOENC | ||
208 | { "mpegvideoencdsp", checkasm_check_mpegvideoencdsp }, | ||
209 | #endif | ||
210 | #if CONFIG_OPUS_DECODER | ||
211 | { "opusdsp", checkasm_check_opusdsp }, | ||
212 | #endif | ||
213 | #if CONFIG_PIXBLOCKDSP | ||
214 | { "pixblockdsp", checkasm_check_pixblockdsp }, | ||
215 | #endif | ||
216 | #if CONFIG_RV34DSP | ||
217 | { "rv34dsp", checkasm_check_rv34dsp }, | ||
218 | #endif | ||
219 | #if CONFIG_RV40_DECODER | ||
220 | { "rv40dsp", checkasm_check_rv40dsp }, | ||
221 | #endif | ||
222 | #if CONFIG_SVQ1_ENCODER | ||
223 | { "svq1enc", checkasm_check_svq1enc }, | ||
224 | #endif | ||
225 | #if CONFIG_TAK_DECODER | ||
226 | { "takdsp", checkasm_check_takdsp }, | ||
227 | #endif | ||
228 | #if CONFIG_UTVIDEO_DECODER | ||
229 | { "utvideodsp", checkasm_check_utvideodsp }, | ||
230 | #endif | ||
231 | #if CONFIG_V210_DECODER | ||
232 | { "v210dec", checkasm_check_v210dec }, | ||
233 | #endif | ||
234 | #if CONFIG_V210_ENCODER | ||
235 | { "v210enc", checkasm_check_v210enc }, | ||
236 | #endif | ||
237 | #if CONFIG_VC1DSP | ||
238 | { "vc1dsp", checkasm_check_vc1dsp }, | ||
239 | #endif | ||
240 | #if CONFIG_VP8DSP | ||
241 | { "vp8dsp", checkasm_check_vp8dsp }, | ||
242 | #endif | ||
243 | #if CONFIG_VP9_DECODER | ||
244 | { "vp9dsp", checkasm_check_vp9dsp }, | ||
245 | #endif | ||
246 | #if CONFIG_VIDEODSP | ||
247 | { "videodsp", checkasm_check_videodsp }, | ||
248 | #endif | ||
249 | #if CONFIG_VORBIS_DECODER | ||
250 | { "vorbisdsp", checkasm_check_vorbisdsp }, | ||
251 | #endif | ||
252 | #if CONFIG_VVC_DECODER | ||
253 | { "vvc_alf", checkasm_check_vvc_alf }, | ||
254 | { "vvc_mc", checkasm_check_vvc_mc }, | ||
255 | #endif | ||
256 | #endif | ||
257 | #if CONFIG_AVFILTER | ||
258 | #if CONFIG_AFIR_FILTER | ||
259 | { "af_afir", checkasm_check_afir }, | ||
260 | #endif | ||
261 | #if CONFIG_BLEND_FILTER | ||
262 | { "vf_blend", checkasm_check_blend }, | ||
263 | #endif | ||
264 | #if CONFIG_BWDIF_FILTER | ||
265 | { "vf_bwdif", checkasm_check_vf_bwdif }, | ||
266 | #endif | ||
267 | #if CONFIG_COLORSPACE_FILTER | ||
268 | { "vf_colorspace", checkasm_check_colorspace }, | ||
269 | #endif | ||
270 | #if CONFIG_EQ_FILTER | ||
271 | { "vf_eq", checkasm_check_vf_eq }, | ||
272 | #endif | ||
273 | #if CONFIG_GBLUR_FILTER | ||
274 | { "vf_gblur", checkasm_check_vf_gblur }, | ||
275 | #endif | ||
276 | #if CONFIG_HFLIP_FILTER | ||
277 | { "vf_hflip", checkasm_check_vf_hflip }, | ||
278 | #endif | ||
279 | #if CONFIG_NLMEANS_FILTER | ||
280 | { "vf_nlmeans", checkasm_check_nlmeans }, | ||
281 | #endif | ||
282 | #if CONFIG_THRESHOLD_FILTER | ||
283 | { "vf_threshold", checkasm_check_vf_threshold }, | ||
284 | #endif | ||
285 | #if CONFIG_SOBEL_FILTER | ||
286 | { "vf_sobel", checkasm_check_vf_sobel }, | ||
287 | #endif | ||
288 | #endif | ||
289 | #if CONFIG_SWSCALE | ||
290 | { "sw_gbrp", checkasm_check_sw_gbrp }, | ||
291 | { "sw_range_convert", checkasm_check_sw_range_convert }, | ||
292 | { "sw_rgb", checkasm_check_sw_rgb }, | ||
293 | { "sw_scale", checkasm_check_sw_scale }, | ||
294 | { "sw_yuv2rgb", checkasm_check_sw_yuv2rgb }, | ||
295 | { "sw_yuv2yuv", checkasm_check_sw_yuv2yuv }, | ||
296 | #endif | ||
297 | #if CONFIG_AVUTIL | ||
298 | { "fixed_dsp", checkasm_check_fixed_dsp }, | ||
299 | { "float_dsp", checkasm_check_float_dsp }, | ||
300 | { "lls", checkasm_check_lls }, | ||
301 | { "av_tx", checkasm_check_av_tx }, | ||
302 | #endif | ||
303 | { NULL } | ||
304 | }; | ||
305 | |||
306 | /* List of cpu flags to check */ | ||
307 | static const struct { | ||
308 | const char *name; | ||
309 | const char *suffix; | ||
310 | int flag; | ||
311 | } cpus[] = { | ||
312 | #if ARCH_AARCH64 | ||
313 | { "ARMV8", "armv8", AV_CPU_FLAG_ARMV8 }, | ||
314 | { "NEON", "neon", AV_CPU_FLAG_NEON }, | ||
315 | { "DOTPROD", "dotprod", AV_CPU_FLAG_DOTPROD }, | ||
316 | { "I8MM", "i8mm", AV_CPU_FLAG_I8MM }, | ||
317 | { "SVE", "sve", AV_CPU_FLAG_SVE }, | ||
318 | { "SVE2", "sve2", AV_CPU_FLAG_SVE2 }, | ||
319 | #elif ARCH_ARM | ||
320 | { "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE }, | ||
321 | { "ARMV6", "armv6", AV_CPU_FLAG_ARMV6 }, | ||
322 | { "ARMV6T2", "armv6t2", AV_CPU_FLAG_ARMV6T2 }, | ||
323 | { "VFP", "vfp", AV_CPU_FLAG_VFP }, | ||
324 | { "VFP_VM", "vfp_vm", AV_CPU_FLAG_VFP_VM }, | ||
325 | { "VFPV3", "vfp3", AV_CPU_FLAG_VFPV3 }, | ||
326 | { "NEON", "neon", AV_CPU_FLAG_NEON }, | ||
327 | #elif ARCH_PPC | ||
328 | { "ALTIVEC", "altivec", AV_CPU_FLAG_ALTIVEC }, | ||
329 | { "VSX", "vsx", AV_CPU_FLAG_VSX }, | ||
330 | { "POWER8", "power8", AV_CPU_FLAG_POWER8 }, | ||
331 | #elif ARCH_RISCV | ||
332 | { "RVI", "rvi", AV_CPU_FLAG_RVI }, | ||
333 | { "misaligned", "misaligned", AV_CPU_FLAG_RV_MISALIGNED }, | ||
334 | { "RV_zbb", "rvb_b", AV_CPU_FLAG_RVB_BASIC }, | ||
335 | { "RVB", "rvb", AV_CPU_FLAG_RVB }, | ||
336 | { "RV_zve32x","rvv_i32", AV_CPU_FLAG_RVV_I32 }, | ||
337 | { "RV_zve32f","rvv_f32", AV_CPU_FLAG_RVV_F32 }, | ||
338 | { "RV_zve64x","rvv_i64", AV_CPU_FLAG_RVV_I64 }, | ||
339 | { "RV_zve64d","rvv_f64", AV_CPU_FLAG_RVV_F64 }, | ||
340 | { "RV_zvbb", "rv_zvbb", AV_CPU_FLAG_RV_ZVBB }, | ||
341 | #elif ARCH_MIPS | ||
342 | { "MMI", "mmi", AV_CPU_FLAG_MMI }, | ||
343 | { "MSA", "msa", AV_CPU_FLAG_MSA }, | ||
344 | #elif ARCH_X86 | ||
345 | { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, | ||
346 | { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, | ||
347 | { "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW }, | ||
348 | { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT }, | ||
349 | { "SSE", "sse", AV_CPU_FLAG_SSE }, | ||
350 | { "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW }, | ||
351 | { "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW }, | ||
352 | { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM }, | ||
353 | { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 }, | ||
354 | { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 }, | ||
355 | { "AES-NI", "aesni", AV_CPU_FLAG_AESNI }, | ||
356 | { "AVX", "avx", AV_CPU_FLAG_AVX }, | ||
357 | { "XOP", "xop", AV_CPU_FLAG_XOP }, | ||
358 | { "FMA3", "fma3", AV_CPU_FLAG_FMA3 }, | ||
359 | { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, | ||
360 | { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, | ||
361 | { "AVX-512", "avx512", AV_CPU_FLAG_AVX512 }, | ||
362 | { "AVX-512ICL", "avx512icl", AV_CPU_FLAG_AVX512ICL }, | ||
363 | #elif ARCH_LOONGARCH | ||
364 | { "LSX", "lsx", AV_CPU_FLAG_LSX }, | ||
365 | { "LASX", "lasx", AV_CPU_FLAG_LASX }, | ||
366 | #endif | ||
367 | { NULL } | ||
368 | }; | ||
369 | |||
370 | typedef struct CheckasmFuncVersion { | ||
371 | struct CheckasmFuncVersion *next; | ||
372 | void *func; | ||
373 | int ok; | ||
374 | int cpu; | ||
375 | CheckasmPerf perf; | ||
376 | } CheckasmFuncVersion; | ||
377 | |||
378 | /* Binary search tree node */ | ||
379 | typedef struct CheckasmFunc { | ||
380 | struct CheckasmFunc *child[2]; | ||
381 | CheckasmFuncVersion versions; | ||
382 | uint8_t color; /* 0 = red, 1 = black */ | ||
383 | char name[1]; | ||
384 | } CheckasmFunc; | ||
385 | |||
386 | /* Internal state */ | ||
387 | static struct { | ||
388 | CheckasmFunc *funcs; | ||
389 | CheckasmFunc *current_func; | ||
390 | CheckasmFuncVersion *current_func_ver; | ||
391 | const char *current_test_name; | ||
392 | const char *bench_pattern; | ||
393 | int bench_pattern_len; | ||
394 | int num_checked; | ||
395 | int num_failed; | ||
396 | |||
397 | /* perf */ | ||
398 | int nop_time; | ||
399 | int sysfd; | ||
400 | |||
401 | int cpu_flag; | ||
402 | const char *cpu_flag_name; | ||
403 | const char *test_pattern; | ||
404 | int verbose; | ||
405 | int csv; | ||
406 | int tsv; | ||
407 | volatile sig_atomic_t catch_signals; | ||
408 | } state; | ||
409 | |||
410 | /* PRNG state */ | ||
411 | AVLFG checkasm_lfg; | ||
412 | |||
413 | /* float compare support code */ | ||
414 | 4493960 | static int is_negative(union av_intfloat32 u) | |
415 | { | ||
416 | 4493960 | return u.i >> 31; | |
417 | } | ||
418 | |||
419 | 2246980 | int float_near_ulp(float a, float b, unsigned max_ulp) | |
420 | { | ||
421 | union av_intfloat32 x, y; | ||
422 | |||
423 | 2246980 | x.f = a; | |
424 | 2246980 | y.f = b; | |
425 | |||
426 |
1/2✗ Branch 2 not taken.
✓ Branch 3 taken 2246980 times.
|
2246980 | if (is_negative(x) != is_negative(y)) { |
427 | // handle -0.0 == +0.0 | ||
428 | ✗ | return a == b; | |
429 | } | ||
430 | |||
431 |
1/2✓ Branch 0 taken 2246980 times.
✗ Branch 1 not taken.
|
2246980 | if (llabs((int64_t)x.i - y.i) <= max_ulp) |
432 | 2246980 | return 1; | |
433 | |||
434 | ✗ | return 0; | |
435 | } | ||
436 | |||
437 | 4379 | int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp, | |
438 | unsigned len) | ||
439 | { | ||
440 | unsigned i; | ||
441 | |||
442 |
2/2✓ Branch 0 taken 2241860 times.
✓ Branch 1 taken 4379 times.
|
2246239 | for (i = 0; i < len; i++) { |
443 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2241860 times.
|
2241860 | if (!float_near_ulp(a[i], b[i], max_ulp)) |
444 | ✗ | return 0; | |
445 | } | ||
446 | 4379 | return 1; | |
447 | } | ||
448 | |||
449 | 882133 | int float_near_abs_eps(float a, float b, float eps) | |
450 | { | ||
451 | 882133 | float abs_diff = fabsf(a - b); | |
452 |
1/2✓ Branch 0 taken 882133 times.
✗ Branch 1 not taken.
|
882133 | if (abs_diff < eps) |
453 | 882133 | return 1; | |
454 | |||
455 | ✗ | fprintf(stderr, "test failed comparing %g with %g (abs diff=%g with EPS=%g)\n", a, b, abs_diff, eps); | |
456 | |||
457 | ✗ | return 0; | |
458 | } | ||
459 | |||
460 | 189 | int float_near_abs_eps_array(const float *a, const float *b, float eps, | |
461 | unsigned len) | ||
462 | { | ||
463 | unsigned i; | ||
464 | |||
465 |
2/2✓ Branch 0 taken 872138 times.
✓ Branch 1 taken 189 times.
|
872327 | for (i = 0; i < len; i++) { |
466 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 872138 times.
|
872138 | if (!float_near_abs_eps(a[i], b[i], eps)) |
467 | ✗ | return 0; | |
468 | } | ||
469 | 189 | return 1; | |
470 | } | ||
471 | |||
472 | 5120 | int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp) | |
473 | { | ||
474 |
1/4✗ Branch 1 not taken.
✓ Branch 2 taken 5120 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
5120 | return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps); |
475 | } | ||
476 | |||
477 | ✗ | int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps, | |
478 | unsigned max_ulp, unsigned len) | ||
479 | { | ||
480 | unsigned i; | ||
481 | |||
482 | ✗ | for (i = 0; i < len; i++) { | |
483 | ✗ | if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp)) | |
484 | ✗ | return 0; | |
485 | } | ||
486 | ✗ | return 1; | |
487 | } | ||
488 | |||
489 | 69731 | int double_near_abs_eps(double a, double b, double eps) | |
490 | { | ||
491 | 69731 | double abs_diff = fabs(a - b); | |
492 | |||
493 | 69731 | return abs_diff < eps; | |
494 | } | ||
495 | |||
496 | 11 | int double_near_abs_eps_array(const double *a, const double *b, double eps, | |
497 | unsigned len) | ||
498 | { | ||
499 | unsigned i; | ||
500 | |||
501 |
2/2✓ Branch 0 taken 41068 times.
✓ Branch 1 taken 11 times.
|
41079 | for (i = 0; i < len; i++) { |
502 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 41068 times.
|
41068 | if (!double_near_abs_eps(a[i], b[i], eps)) |
503 | ✗ | return 0; | |
504 | } | ||
505 | 11 | return 1; | |
506 | } | ||
507 | |||
508 | /* Print colored text to stderr if the terminal supports it */ | ||
509 | 576 | static void color_printf(int color, const char *fmt, ...) | |
510 | { | ||
511 | static int use_color = -1; | ||
512 | va_list arg; | ||
513 | |||
514 | #if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE | ||
515 | static HANDLE con; | ||
516 | static WORD org_attributes; | ||
517 | |||
518 | if (use_color < 0) { | ||
519 | CONSOLE_SCREEN_BUFFER_INFO con_info; | ||
520 | con = GetStdHandle(STD_ERROR_HANDLE); | ||
521 | if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) { | ||
522 | org_attributes = con_info.wAttributes; | ||
523 | use_color = 1; | ||
524 | } else | ||
525 | use_color = 0; | ||
526 | } | ||
527 | if (use_color) | ||
528 | SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f)); | ||
529 | #else | ||
530 |
2/2✓ Branch 0 taken 67 times.
✓ Branch 1 taken 509 times.
|
576 | if (use_color < 0) { |
531 | 67 | const char *term = getenv("TERM"); | |
532 |
3/6✓ Branch 0 taken 67 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 67 times.
✗ Branch 3 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 67 times.
|
67 | use_color = term && strcmp(term, "dumb") && isatty(2); |
533 | } | ||
534 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 576 times.
|
576 | if (use_color) |
535 | ✗ | fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07); | |
536 | #endif | ||
537 | |||
538 | 576 | va_start(arg, fmt); | |
539 | 576 | vfprintf(stderr, fmt, arg); | |
540 | 576 | va_end(arg); | |
541 | |||
542 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 576 times.
|
576 | if (use_color) { |
543 | #if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE | ||
544 | SetConsoleTextAttribute(con, org_attributes); | ||
545 | #else | ||
546 | ✗ | fprintf(stderr, "\x1b[0m"); | |
547 | #endif | ||
548 | } | ||
549 | 576 | } | |
550 | |||
551 | /* Deallocate a tree */ | ||
552 | 20880 | static void destroy_func_tree(CheckasmFunc *f) | |
553 | { | ||
554 |
2/2✓ Branch 0 taken 10405 times.
✓ Branch 1 taken 10475 times.
|
20880 | if (f) { |
555 | 10405 | CheckasmFuncVersion *v = f->versions.next; | |
556 |
2/2✓ Branch 0 taken 13454 times.
✓ Branch 1 taken 10405 times.
|
23859 | while (v) { |
557 | 13454 | CheckasmFuncVersion *next = v->next; | |
558 | 13454 | free(v); | |
559 | 13454 | v = next; | |
560 | } | ||
561 | |||
562 | 10405 | destroy_func_tree(f->child[0]); | |
563 | 10405 | destroy_func_tree(f->child[1]); | |
564 | 10405 | free(f); | |
565 | } | ||
566 | 20880 | } | |
567 | |||
568 | /* Allocate a zero-initialized block, clean up and exit on failure */ | ||
569 | 23859 | static void *checkasm_malloc(size_t size) | |
570 | { | ||
571 | 23859 | void *ptr = calloc(1, size); | |
572 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 23859 times.
|
23859 | if (!ptr) { |
573 | ✗ | fprintf(stderr, "checkasm: malloc failed\n"); | |
574 | ✗ | destroy_func_tree(state.funcs); | |
575 | ✗ | exit(1); | |
576 | } | ||
577 | 23859 | return ptr; | |
578 | } | ||
579 | |||
580 | /* Get the suffix of the specified cpu flag */ | ||
581 | ✗ | static const char *cpu_suffix(int cpu) | |
582 | { | ||
583 | ✗ | int i = FF_ARRAY_ELEMS(cpus); | |
584 | |||
585 | ✗ | while (--i >= 0) | |
586 | ✗ | if (cpu & cpus[i].flag) | |
587 | ✗ | return cpus[i].suffix; | |
588 | |||
589 | ✗ | return "c"; | |
590 | } | ||
591 | |||
592 | ✗ | static int cmp_nop(const void *a, const void *b) | |
593 | { | ||
594 | ✗ | return *(const uint16_t*)a - *(const uint16_t*)b; | |
595 | } | ||
596 | |||
597 | /* Measure the overhead of the timing code (in decicycles) */ | ||
598 | ✗ | static int measure_nop_time(void) | |
599 | { | ||
600 | uint16_t nops[10000]; | ||
601 | ✗ | int i, nop_sum = 0; | |
602 | ✗ | av_unused const int sysfd = state.sysfd; | |
603 | |||
604 | ✗ | uint64_t t = 0; | |
605 | ✗ | for (i = 0; i < 10000; i++) { | |
606 | ✗ | PERF_START(t); | |
607 | ✗ | PERF_STOP(t); | |
608 | ✗ | nops[i] = t; | |
609 | } | ||
610 | |||
611 | ✗ | qsort(nops, 10000, sizeof(uint16_t), cmp_nop); | |
612 | ✗ | for (i = 2500; i < 7500; i++) | |
613 | ✗ | nop_sum += nops[i]; | |
614 | |||
615 | ✗ | return nop_sum / 500; | |
616 | } | ||
617 | |||
618 | ✗ | static inline double avg_cycles_per_call(const CheckasmPerf *const p) | |
619 | { | ||
620 | ✗ | if (p->iterations) { | |
621 | ✗ | const double cycles = (double)(10 * p->cycles) / p->iterations - state.nop_time; | |
622 | ✗ | if (cycles > 0.0) | |
623 | ✗ | return cycles / 4.0; /* 4 calls per iteration */ | |
624 | } | ||
625 | ✗ | return 0.0; | |
626 | } | ||
627 | |||
628 | /* Print benchmark results */ | ||
629 | ✗ | static void print_benchs(CheckasmFunc *f) | |
630 | { | ||
631 | ✗ | if (f) { | |
632 | ✗ | print_benchs(f->child[0]); | |
633 | |||
634 | /* Only print functions with at least one assembly version */ | ||
635 | ✗ | if (f->versions.cpu || f->versions.next) { | |
636 | ✗ | CheckasmFuncVersion *v = &f->versions; | |
637 | ✗ | const CheckasmPerf *p = &v->perf; | |
638 | ✗ | const double baseline = avg_cycles_per_call(p); | |
639 | double decicycles; | ||
640 | do { | ||
641 | ✗ | if (p->iterations) { | |
642 | ✗ | p = &v->perf; | |
643 | ✗ | decicycles = avg_cycles_per_call(p); | |
644 | ✗ | if (state.csv || state.tsv) { | |
645 | ✗ | const char sep = state.csv ? ',' : '\t'; | |
646 | ✗ | printf("%s%c%s%c%.1f\n", f->name, sep, | |
647 | cpu_suffix(v->cpu), sep, | ||
648 | decicycles / 10.0); | ||
649 | } else { | ||
650 | ✗ | const int pad_length = 10 + 50 - | |
651 | ✗ | printf("%s_%s:", f->name, cpu_suffix(v->cpu)); | |
652 | ✗ | const double ratio = decicycles ? | |
653 | ✗ | baseline / decicycles : 0.0; | |
654 | ✗ | printf("%*.1f (%5.2fx)\n", FFMAX(pad_length, 0), | |
655 | decicycles / 10.0, ratio); | ||
656 | } | ||
657 | } | ||
658 | ✗ | } while ((v = v->next)); | |
659 | } | ||
660 | |||
661 | ✗ | print_benchs(f->child[1]); | |
662 | } | ||
663 | ✗ | } | |
664 | |||
665 | /* ASCIIbetical sort except preserving natural order for numbers */ | ||
666 | 1348075 | static int cmp_func_names(const char *a, const char *b) | |
667 | { | ||
668 | 1348075 | const char *start = a; | |
669 | int ascii_diff, digit_diff; | ||
670 | |||
671 |
4/4✓ Branch 0 taken 23253724 times.
✓ Branch 1 taken 1216585 times.
✓ Branch 2 taken 23122234 times.
✓ Branch 3 taken 131490 times.
|
24470309 | for (; !(ascii_diff = *(const unsigned char*)a - *(const unsigned char*)b) && *a; a++, b++); |
672 |
4/4✓ Branch 0 taken 1363066 times.
✓ Branch 1 taken 1130845 times.
✓ Branch 2 taken 1145836 times.
✓ Branch 3 taken 217230 times.
|
2493911 | for (; av_isdigit(*a) && av_isdigit(*b); a++, b++); |
673 | |||
674 |
6/6✓ Branch 0 taken 1294284 times.
✓ Branch 1 taken 53791 times.
✓ Branch 2 taken 915729 times.
✓ Branch 3 taken 378555 times.
✓ Branch 4 taken 328201 times.
✓ Branch 5 taken 587528 times.
|
1348075 | if (a > start && av_isdigit(a[-1]) && (digit_diff = av_isdigit(*a) - av_isdigit(*b))) |
675 | 328201 | return digit_diff; | |
676 | |||
677 | 1019874 | return ascii_diff; | |
678 | } | ||
679 | |||
680 | /* Perform a tree rotation in the specified direction and return the new root */ | ||
681 | 9761 | static CheckasmFunc *rotate_tree(CheckasmFunc *f, int dir) | |
682 | { | ||
683 | 9761 | CheckasmFunc *r = f->child[dir^1]; | |
684 | 9761 | f->child[dir^1] = r->child[dir]; | |
685 | 9761 | r->child[dir] = f; | |
686 | 9761 | r->color = f->color; | |
687 | 9761 | f->color = 0; | |
688 | 9761 | return r; | |
689 | } | ||
690 | |||
691 | #define is_red(f) ((f) && !(f)->color) | ||
692 | |||
693 | /* Balance a left-leaning red-black tree at the specified node */ | ||
694 | 96497 | static void balance_tree(CheckasmFunc **root) | |
695 | { | ||
696 | 96497 | CheckasmFunc *f = *root; | |
697 | |||
698 |
8/8✓ Branch 0 taken 91507 times.
✓ Branch 1 taken 4990 times.
✓ Branch 2 taken 37287 times.
✓ Branch 3 taken 54220 times.
✓ Branch 4 taken 33868 times.
✓ Branch 5 taken 3419 times.
✓ Branch 6 taken 7390 times.
✓ Branch 7 taken 26478 times.
|
96497 | if (is_red(f->child[0]) && is_red(f->child[1])) { |
699 | 7390 | f->color ^= 1; | |
700 | 7390 | f->child[0]->color = f->child[1]->color = 1; | |
701 | } | ||
702 | |||
703 |
7/8✓ Branch 0 taken 91507 times.
✓ Branch 1 taken 4990 times.
✓ Branch 2 taken 61610 times.
✓ Branch 3 taken 29897 times.
✓ Branch 4 taken 66600 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 7372 times.
✓ Branch 7 taken 59228 times.
|
96497 | if (!is_red(f->child[0]) && is_red(f->child[1])) |
704 | 7372 | *root = rotate_tree(f, 0); /* Rotate left */ | |
705 |
7/8✓ Branch 0 taken 89125 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 29897 times.
✓ Branch 3 taken 59228 times.
✓ Branch 4 taken 27615 times.
✓ Branch 5 taken 2282 times.
✓ Branch 6 taken 2389 times.
✓ Branch 7 taken 25226 times.
|
89125 | else if (is_red(f->child[0]) && is_red(f->child[0]->child[0])) |
706 | 2389 | *root = rotate_tree(f, 1); /* Rotate right */ | |
707 | 96497 | } | |
708 | |||
709 | /* Get a node with the specified name, creating it if it doesn't exist */ | ||
710 | 1358480 | static CheckasmFunc *get_func(CheckasmFunc **root, const char *name) | |
711 | { | ||
712 | 1358480 | CheckasmFunc *f = *root; | |
713 | |||
714 |
2/2✓ Branch 0 taken 1348075 times.
✓ Branch 1 taken 10405 times.
|
1358480 | if (f) { |
715 | /* Search the tree for a matching node */ | ||
716 | 1348075 | int cmp = cmp_func_names(name, f->name); | |
717 |
2/2✓ Branch 0 taken 1216585 times.
✓ Branch 1 taken 131490 times.
|
1348075 | if (cmp) { |
718 | 1216585 | f = get_func(&f->child[cmp > 0], name); | |
719 | |||
720 | /* Rebalance the tree on the way up if a new node was inserted */ | ||
721 |
2/2✓ Branch 0 taken 96497 times.
✓ Branch 1 taken 1120088 times.
|
1216585 | if (!f->versions.func) |
722 | 96497 | balance_tree(root); | |
723 | } | ||
724 | } else { | ||
725 | /* Allocate and insert a new node into the tree */ | ||
726 | 10405 | int name_length = strlen(name); | |
727 | 10405 | f = *root = checkasm_malloc(sizeof(CheckasmFunc) + name_length); | |
728 | 10405 | memcpy(f->name, name, name_length + 1); | |
729 | } | ||
730 | |||
731 | 1358480 | return f; | |
732 | } | ||
733 | |||
734 | checkasm_context checkasm_context_buf; | ||
735 | |||
736 | /* Crash handling: attempt to catch crashes and handle them | ||
737 | * gracefully instead of just aborting abruptly. */ | ||
738 | #ifdef _WIN32 | ||
739 | #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) | ||
740 | static LONG NTAPI signal_handler(EXCEPTION_POINTERS *e) { | ||
741 | int s; | ||
742 | |||
743 | if (!state.catch_signals) | ||
744 | return EXCEPTION_CONTINUE_SEARCH; | ||
745 | |||
746 | switch (e->ExceptionRecord->ExceptionCode) { | ||
747 | case EXCEPTION_FLT_DIVIDE_BY_ZERO: | ||
748 | case EXCEPTION_INT_DIVIDE_BY_ZERO: | ||
749 | s = SIGFPE; | ||
750 | break; | ||
751 | case EXCEPTION_ILLEGAL_INSTRUCTION: | ||
752 | case EXCEPTION_PRIV_INSTRUCTION: | ||
753 | s = SIGILL; | ||
754 | break; | ||
755 | case EXCEPTION_ACCESS_VIOLATION: | ||
756 | case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: | ||
757 | case EXCEPTION_DATATYPE_MISALIGNMENT: | ||
758 | case EXCEPTION_STACK_OVERFLOW: | ||
759 | s = SIGSEGV; | ||
760 | break; | ||
761 | case EXCEPTION_IN_PAGE_ERROR: | ||
762 | s = SIGBUS; | ||
763 | break; | ||
764 | default: | ||
765 | return EXCEPTION_CONTINUE_SEARCH; | ||
766 | } | ||
767 | state.catch_signals = 0; | ||
768 | checkasm_load_context(s); | ||
769 | return EXCEPTION_CONTINUE_EXECUTION; /* never reached, but shuts up gcc */ | ||
770 | } | ||
771 | #endif | ||
772 | #else | ||
773 | static void signal_handler(int s); | ||
774 | |||
775 | static const struct sigaction signal_handler_act = { | ||
776 | .sa_handler = signal_handler, | ||
777 | .sa_flags = SA_RESETHAND, | ||
778 | }; | ||
779 | |||
780 | ✗ | static void signal_handler(int s) { | |
781 | ✗ | if (state.catch_signals) { | |
782 | ✗ | state.catch_signals = 0; | |
783 | ✗ | sigaction(s, &signal_handler_act, NULL); | |
784 | ✗ | checkasm_load_context(s); | |
785 | } | ||
786 | ✗ | } | |
787 | #endif | ||
788 | |||
789 | /* Compares a string with a wildcard pattern. */ | ||
790 | 63700 | static int wildstrcmp(const char *str, const char *pattern) | |
791 | { | ||
792 | 63700 | const char *wild = strchr(pattern, '*'); | |
793 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 63700 times.
|
63700 | if (wild) { |
794 | ✗ | const size_t len = wild - pattern; | |
795 | ✗ | if (strncmp(str, pattern, len)) return 1; | |
796 | ✗ | while (*++wild == '*'); | |
797 | ✗ | if (!*wild) return 0; | |
798 | ✗ | str += len; | |
799 | ✗ | while (*str && wildstrcmp(str, wild)) str++; | |
800 | ✗ | return !*str; | |
801 | } | ||
802 | 63700 | return strcmp(str, pattern); | |
803 | } | ||
804 | |||
805 | /* Perform tests and benchmarks for the specified cpu flag if supported by the host */ | ||
806 | 1330 | static void check_cpu_flag(const char *name, int flag) | |
807 | { | ||
808 | 1330 | int old_cpu_flag = state.cpu_flag; | |
809 | |||
810 | 1330 | flag |= old_cpu_flag; | |
811 | 1330 | av_force_cpu_flags(-1); | |
812 | 1330 | state.cpu_flag = flag & av_get_cpu_flags(); | |
813 | 1330 | av_force_cpu_flags(state.cpu_flag); | |
814 | |||
815 |
4/4✓ Branch 0 taken 1260 times.
✓ Branch 1 taken 70 times.
✓ Branch 2 taken 840 times.
✓ Branch 3 taken 420 times.
|
1330 | if (!flag || state.cpu_flag != old_cpu_flag) { |
816 | int i; | ||
817 | |||
818 | 910 | state.cpu_flag_name = name; | |
819 |
2/2✓ Branch 0 taken 63700 times.
✓ Branch 1 taken 910 times.
|
64610 | for (i = 0; tests[i].func; i++) { |
820 |
3/4✓ Branch 0 taken 63700 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 62790 times.
✓ Branch 4 taken 910 times.
|
63700 | if (state.test_pattern && wildstrcmp(tests[i].name, state.test_pattern)) |
821 | 62790 | continue; | |
822 | 910 | state.current_test_name = tests[i].name; | |
823 | 910 | tests[i].func(); | |
824 | } | ||
825 | } | ||
826 | 1330 | } | |
827 | |||
828 | /* Print the name of the current CPU flag, but only do it once */ | ||
829 | 381 | static void print_cpu_name(void) | |
830 | { | ||
831 |
2/2✓ Branch 0 taken 195 times.
✓ Branch 1 taken 186 times.
|
381 | if (state.cpu_flag_name) { |
832 | 195 | color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name); | |
833 | 195 | state.cpu_flag_name = NULL; | |
834 | } | ||
835 | 381 | } | |
836 | |||
837 | #if CONFIG_LINUX_PERF | ||
838 | static int bench_init_linux(void) | ||
839 | { | ||
840 | struct perf_event_attr attr = { | ||
841 | .type = PERF_TYPE_HARDWARE, | ||
842 | .size = sizeof(struct perf_event_attr), | ||
843 | .config = PERF_COUNT_HW_CPU_CYCLES, | ||
844 | .disabled = 1, // start counting only on demand | ||
845 | .exclude_kernel = 1, | ||
846 | .exclude_hv = 1, | ||
847 | #if !ARCH_X86 | ||
848 | .exclude_guest = 1, | ||
849 | #endif | ||
850 | }; | ||
851 | |||
852 | fprintf(stderr, "benchmarking with Linux Perf Monitoring API\n"); | ||
853 | |||
854 | state.sysfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); | ||
855 | if (state.sysfd == -1) { | ||
856 | perror("perf_event_open"); | ||
857 | return -1; | ||
858 | } | ||
859 | return 0; | ||
860 | } | ||
861 | #elif CONFIG_MACOS_KPERF | ||
862 | static int bench_init_kperf(void) | ||
863 | { | ||
864 | ff_kperf_init(); | ||
865 | return 0; | ||
866 | } | ||
867 | #else | ||
868 | ✗ | static int bench_init_ffmpeg(void) | |
869 | { | ||
870 | #ifdef AV_READ_TIME | ||
871 | ✗ | if (!checkasm_save_context()) { | |
872 | ✗ | checkasm_set_signal_handler_state(1); | |
873 | ✗ | AV_READ_TIME(); | |
874 | ✗ | checkasm_set_signal_handler_state(0); | |
875 | } else { | ||
876 | ✗ | fprintf(stderr, "checkasm: unable to execute platform specific timer\n"); | |
877 | ✗ | return -1; | |
878 | } | ||
879 | ✗ | fprintf(stderr, "benchmarking with native FFmpeg timers\n"); | |
880 | ✗ | return 0; | |
881 | #else | ||
882 | fprintf(stderr, "checkasm: --bench is not supported on your system\n"); | ||
883 | return -1; | ||
884 | #endif | ||
885 | } | ||
886 | #endif | ||
887 | |||
888 | ✗ | static int bench_init(void) | |
889 | { | ||
890 | #if CONFIG_LINUX_PERF | ||
891 | int ret = bench_init_linux(); | ||
892 | #elif CONFIG_MACOS_KPERF | ||
893 | int ret = bench_init_kperf(); | ||
894 | #else | ||
895 | ✗ | int ret = bench_init_ffmpeg(); | |
896 | #endif | ||
897 | ✗ | if (ret < 0) | |
898 | ✗ | return ret; | |
899 | |||
900 | ✗ | state.nop_time = measure_nop_time(); | |
901 | ✗ | fprintf(stderr, "nop: %d.%d\n", state.nop_time/10, state.nop_time%10); | |
902 | ✗ | return 0; | |
903 | } | ||
904 | |||
905 | 70 | static void bench_uninit(void) | |
906 | { | ||
907 | #if CONFIG_LINUX_PERF | ||
908 | close(state.sysfd); | ||
909 | #endif | ||
910 | 70 | } | |
911 | |||
912 | ✗ | static int usage(const char *path) | |
913 | { | ||
914 | ✗ | fprintf(stderr, | |
915 | "Usage: %s [options...] [seed]\n" | ||
916 | " --test=<pattern> Run specific test.\n" | ||
917 | " --bench Run benchmark.\n" | ||
918 | " --csv, --tsv Output results in rows of comma or tab separated values.\n" | ||
919 | " --runs=<ptwo> Manual number of benchmark iterations to run 2**<ptwo>.\n" | ||
920 | " --verbose Increase verbosity.\n", | ||
921 | path); | ||
922 | ✗ | return 1; | |
923 | } | ||
924 | |||
925 | 70 | int main(int argc, char *argv[]) | |
926 | { | ||
927 | 70 | unsigned int seed = av_get_random_seed(); | |
928 | 70 | int i, ret = 0; | |
929 | 70 | char arch_info_buf[50] = ""; | |
930 | |||
931 | #ifdef _WIN32 | ||
932 | #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) | ||
933 | AddVectoredExceptionHandler(0, signal_handler); | ||
934 | #endif | ||
935 | #else | ||
936 | 70 | sigaction(SIGBUS, &signal_handler_act, NULL); | |
937 | 70 | sigaction(SIGFPE, &signal_handler_act, NULL); | |
938 | 70 | sigaction(SIGILL, &signal_handler_act, NULL); | |
939 | 70 | sigaction(SIGSEGV, &signal_handler_act, NULL); | |
940 | #endif | ||
941 | #if HAVE_PRCTL && defined(PR_SET_UNALIGN) | ||
942 | 70 | prctl(PR_SET_UNALIGN, PR_UNALIGN_SIGBUS); | |
943 | #endif | ||
944 | #if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL | ||
945 | if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags())) | ||
946 | checkasm_checked_call = checkasm_checked_call_vfp; | ||
947 | #endif | ||
948 | |||
949 |
2/4✓ Branch 0 taken 70 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 70 times.
|
70 | if (!tests[0].func || !cpus[0].flag) { |
950 | ✗ | fprintf(stderr, "checkasm: no tests to perform\n"); | |
951 | ✗ | return 0; | |
952 | } | ||
953 | |||
954 |
2/2✓ Branch 0 taken 70 times.
✓ Branch 1 taken 70 times.
|
140 | for (i = 1; i < argc; i++) { |
955 | 70 | const char *arg = argv[i]; | |
956 | unsigned long l; | ||
957 | char *end; | ||
958 | |||
959 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 70 times.
|
70 | if (!strncmp(arg, "--bench", 7)) { |
960 | ✗ | if (bench_init() < 0) | |
961 | ✗ | return 1; | |
962 | ✗ | if (arg[7] == '=') { | |
963 | ✗ | state.bench_pattern = arg + 8; | |
964 | ✗ | state.bench_pattern_len = strlen(state.bench_pattern); | |
965 | } else | ||
966 | ✗ | state.bench_pattern = "*"; | |
967 |
1/2✓ Branch 0 taken 70 times.
✗ Branch 1 not taken.
|
70 | } else if (!strncmp(arg, "--test=", 7)) { |
968 | 70 | state.test_pattern = arg + 7; | |
969 | ✗ | } else if (!strcmp(arg, "--csv")) { | |
970 | ✗ | state.csv = 1; state.tsv = 0; | |
971 | ✗ | } else if (!strcmp(arg, "--tsv")) { | |
972 | ✗ | state.csv = 0; state.tsv = 1; | |
973 | ✗ | } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) { | |
974 | ✗ | state.verbose = 1; | |
975 | ✗ | } else if (!strncmp(arg, "--runs=", 7)) { | |
976 | ✗ | l = strtoul(arg + 7, &end, 10); | |
977 | ✗ | if (*end == '\0') { | |
978 | ✗ | if (l > 30) { | |
979 | ✗ | fprintf(stderr, "checkasm: error: runs exponent must be within the range 0 <= 30\n"); | |
980 | ✗ | usage(argv[0]); | |
981 | } | ||
982 | ✗ | bench_runs = 1U << l; | |
983 | } else { | ||
984 | ✗ | return usage(argv[0]); | |
985 | } | ||
986 | ✗ | } else if ((l = strtoul(arg, &end, 10)) <= UINT_MAX && | |
987 | ✗ | *end == '\0') { | |
988 | ✗ | seed = l; | |
989 | } else { | ||
990 | ✗ | return usage(argv[0]); | |
991 | } | ||
992 | } | ||
993 | |||
994 | #if ARCH_AARCH64 && HAVE_SVE | ||
995 | if (have_sve(av_get_cpu_flags())) | ||
996 | snprintf(arch_info_buf, sizeof(arch_info_buf), | ||
997 | "SVE %d bits, ", 8 * ff_aarch64_sve_length()); | ||
998 | #elif ARCH_RISCV && HAVE_RVV | ||
999 | if (av_get_cpu_flags() & AV_CPU_FLAG_RVV_I32) | ||
1000 | snprintf(arch_info_buf, sizeof (arch_info_buf), | ||
1001 | "%zu-bit vectors, ", 8 * ff_get_rv_vlenb()); | ||
1002 | #endif | ||
1003 | 70 | fprintf(stderr, "checkasm: %susing random seed %u\n", arch_info_buf, seed); | |
1004 | 70 | av_lfg_init(&checkasm_lfg, seed); | |
1005 | |||
1006 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 70 times.
|
70 | if (state.bench_pattern) |
1007 | ✗ | fprintf(stderr, "checkasm: bench runs %" PRIu64 " (1 << %i)\n", bench_runs, av_log2(bench_runs)); | |
1008 | |||
1009 | 70 | check_cpu_flag(NULL, 0); | |
1010 |
2/2✓ Branch 0 taken 1260 times.
✓ Branch 1 taken 70 times.
|
1330 | for (i = 0; cpus[i].flag; i++) |
1011 | 1260 | check_cpu_flag(cpus[i].name, cpus[i].flag); | |
1012 | |||
1013 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 70 times.
|
70 | if (state.num_failed) { |
1014 | ✗ | fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked); | |
1015 | ✗ | ret = 1; | |
1016 | } else { | ||
1017 | 70 | fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked); | |
1018 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 70 times.
|
70 | if (state.bench_pattern) { |
1019 | ✗ | print_benchs(state.funcs); | |
1020 | } | ||
1021 | } | ||
1022 | |||
1023 | 70 | destroy_func_tree(state.funcs); | |
1024 | 70 | bench_uninit(); | |
1025 | 70 | return ret; | |
1026 | } | ||
1027 | |||
1028 | /* Decide whether or not the specified function needs to be tested and | ||
1029 | * allocate/initialize data structures if needed. Returns a pointer to a | ||
1030 | * reference function if the function should be tested, otherwise NULL */ | ||
1031 | 141895 | void *checkasm_check_func(void *func, const char *name, ...) | |
1032 | { | ||
1033 | char name_buf[256]; | ||
1034 | 141895 | void *ref = func; | |
1035 | CheckasmFuncVersion *v; | ||
1036 | int name_length; | ||
1037 | va_list arg; | ||
1038 | |||
1039 | 141895 | va_start(arg, name); | |
1040 | 141895 | name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg); | |
1041 | 141895 | va_end(arg); | |
1042 | |||
1043 |
3/6✓ Branch 0 taken 141895 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 141895 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 141895 times.
|
141895 | if (!func || name_length <= 0 || name_length >= sizeof(name_buf)) |
1044 | ✗ | return NULL; | |
1045 | |||
1046 | 141895 | state.current_func = get_func(&state.funcs, name_buf); | |
1047 | 141895 | state.funcs->color = 1; | |
1048 | 141895 | v = &state.current_func->versions; | |
1049 | |||
1050 |
2/2✓ Branch 0 taken 131490 times.
✓ Branch 1 taken 10405 times.
|
141895 | if (v->func) { |
1051 | CheckasmFuncVersion *prev; | ||
1052 | do { | ||
1053 | /* Only test functions that haven't already been tested */ | ||
1054 |
2/2✓ Branch 0 taken 118036 times.
✓ Branch 1 taken 60623 times.
|
178659 | if (v->func == func) |
1055 | 118036 | return NULL; | |
1056 | |||
1057 |
1/2✓ Branch 0 taken 60623 times.
✗ Branch 1 not taken.
|
60623 | if (v->ok) |
1058 | 60623 | ref = v->func; | |
1059 | |||
1060 | 60623 | prev = v; | |
1061 |
2/2✓ Branch 0 taken 47169 times.
✓ Branch 1 taken 13454 times.
|
60623 | } while ((v = v->next)); |
1062 | |||
1063 | 13454 | v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion)); | |
1064 | } | ||
1065 | |||
1066 | 23859 | v->func = func; | |
1067 | 23859 | v->ok = 1; | |
1068 | 23859 | v->cpu = state.cpu_flag; | |
1069 | 23859 | state.current_func_ver = v; | |
1070 | |||
1071 |
2/2✓ Branch 0 taken 13454 times.
✓ Branch 1 taken 10405 times.
|
23859 | if (state.cpu_flag) |
1072 | 13454 | state.num_checked++; | |
1073 | |||
1074 | 23859 | return ref; | |
1075 | } | ||
1076 | |||
1077 | /* Decide whether or not the current function needs to be benchmarked */ | ||
1078 | 38694 | int checkasm_bench_func(void) | |
1079 | { | ||
1080 |
2/6✓ Branch 0 taken 38694 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 38694 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
38694 | return !state.num_failed && state.bench_pattern && |
1081 | ✗ | !wildstrcmp(state.current_func->name, state.bench_pattern); | |
1082 | } | ||
1083 | |||
1084 | /* Indicate that the current test has failed */ | ||
1085 | ✗ | void checkasm_fail_func(const char *msg, ...) | |
1086 | { | ||
1087 | ✗ | if (state.current_func_ver && state.current_func_ver->cpu && | |
1088 | ✗ | state.current_func_ver->ok) | |
1089 | { | ||
1090 | va_list arg; | ||
1091 | |||
1092 | ✗ | print_cpu_name(); | |
1093 | ✗ | fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu)); | |
1094 | ✗ | va_start(arg, msg); | |
1095 | ✗ | vfprintf(stderr, msg, arg); | |
1096 | ✗ | va_end(arg); | |
1097 | ✗ | fprintf(stderr, ")\n"); | |
1098 | |||
1099 | ✗ | state.current_func_ver->ok = 0; | |
1100 | ✗ | state.num_failed++; | |
1101 | } | ||
1102 | ✗ | } | |
1103 | |||
1104 | 302296 | void checkasm_set_signal_handler_state(int enabled) { | |
1105 | 302296 | state.catch_signals = enabled; | |
1106 | 302296 | } | |
1107 | |||
1108 | 141895 | int checkasm_handle_signal(int s) { | |
1109 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 141895 times.
|
141895 | if (s) { |
1110 | #ifdef __GLIBC__ | ||
1111 | ✗ | checkasm_fail_func("fatal signal %d: %s", s, strsignal(s)); | |
1112 | #else | ||
1113 | checkasm_fail_func(s == SIGFPE ? "fatal arithmetic error" : | ||
1114 | s == SIGILL ? "illegal instruction" : | ||
1115 | s == SIGBUS ? "bus error" : | ||
1116 | "segmentation fault"); | ||
1117 | #endif | ||
1118 | } | ||
1119 | 141895 | return s; | |
1120 | } | ||
1121 | |||
1122 | /* Get the benchmark context of the current function */ | ||
1123 | ✗ | CheckasmPerf *checkasm_get_perf_context(void) | |
1124 | { | ||
1125 | ✗ | CheckasmPerf *perf = &state.current_func_ver->perf; | |
1126 | ✗ | memset(perf, 0, sizeof(*perf)); | |
1127 | ✗ | perf->sysfd = state.sysfd; | |
1128 | ✗ | return perf; | |
1129 | } | ||
1130 | |||
1131 | /* Print the outcome of all tests performed since the last time this function was called */ | ||
1132 | 2514 | void checkasm_report(const char *name, ...) | |
1133 | { | ||
1134 | static int prev_checked, prev_failed, max_length; | ||
1135 | |||
1136 |
2/2✓ Branch 0 taken 381 times.
✓ Branch 1 taken 2133 times.
|
2514 | if (state.num_checked > prev_checked) { |
1137 | 381 | int pad_length = max_length + 4; | |
1138 | va_list arg; | ||
1139 | |||
1140 | 381 | print_cpu_name(); | |
1141 | 381 | pad_length -= fprintf(stderr, " - %s.", state.current_test_name); | |
1142 | 381 | va_start(arg, name); | |
1143 | 381 | pad_length -= vfprintf(stderr, name, arg); | |
1144 | 381 | va_end(arg); | |
1145 | 381 | fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '['); | |
1146 | |||
1147 |
1/2✓ Branch 0 taken 381 times.
✗ Branch 1 not taken.
|
381 | if (state.num_failed == prev_failed) |
1148 | 381 | color_printf(COLOR_GREEN, "OK"); | |
1149 | else | ||
1150 | ✗ | color_printf(COLOR_RED, "FAILED"); | |
1151 | 381 | fprintf(stderr, "]\n"); | |
1152 | |||
1153 | 381 | prev_checked = state.num_checked; | |
1154 | 381 | prev_failed = state.num_failed; | |
1155 |
2/2✓ Branch 0 taken 198 times.
✓ Branch 1 taken 1935 times.
|
2133 | } else if (!state.cpu_flag) { |
1156 | /* Calculate the amount of padding required to make the output vertically aligned */ | ||
1157 | 198 | int length = strlen(state.current_test_name); | |
1158 | va_list arg; | ||
1159 | |||
1160 | 198 | va_start(arg, name); | |
1161 | 198 | length += vsnprintf(NULL, 0, name, arg); | |
1162 | 198 | va_end(arg); | |
1163 | |||
1164 |
2/2✓ Branch 0 taken 122 times.
✓ Branch 1 taken 76 times.
|
198 | if (length > max_length) |
1165 | 122 | max_length = length; | |
1166 | } | ||
1167 | 2514 | } | |
1168 | |||
1169 | #define DEF_CHECKASM_CHECK_FUNC(type, fmt) \ | ||
1170 | int checkasm_check_##type(const char *file, int line, \ | ||
1171 | const type *buf1, ptrdiff_t stride1, \ | ||
1172 | const type *buf2, ptrdiff_t stride2, \ | ||
1173 | int w, int h, const char *name) \ | ||
1174 | { \ | ||
1175 | int y = 0; \ | ||
1176 | stride1 /= sizeof(*buf1); \ | ||
1177 | stride2 /= sizeof(*buf2); \ | ||
1178 | for (y = 0; y < h; y++) \ | ||
1179 | if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \ | ||
1180 | break; \ | ||
1181 | if (y == h) \ | ||
1182 | return 0; \ | ||
1183 | checkasm_fail_func("%s:%d", file, line); \ | ||
1184 | if (!state.verbose) \ | ||
1185 | return 1; \ | ||
1186 | fprintf(stderr, "%s:\n", name); \ | ||
1187 | while (h--) { \ | ||
1188 | for (int x = 0; x < w; x++) \ | ||
1189 | fprintf(stderr, " " fmt, buf1[x]); \ | ||
1190 | fprintf(stderr, " "); \ | ||
1191 | for (int x = 0; x < w; x++) \ | ||
1192 | fprintf(stderr, " " fmt, buf2[x]); \ | ||
1193 | fprintf(stderr, " "); \ | ||
1194 | for (int x = 0; x < w; x++) \ | ||
1195 | fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \ | ||
1196 | buf1 += stride1; \ | ||
1197 | buf2 += stride2; \ | ||
1198 | fprintf(stderr, "\n"); \ | ||
1199 | } \ | ||
1200 | return 1; \ | ||
1201 | } | ||
1202 | |||
1203 |
4/18✗ Branch 0 not taken.
✓ Branch 1 taken 123417 times.
✓ Branch 2 taken 123417 times.
✓ Branch 3 taken 4846 times.
✓ Branch 4 taken 4846 times.
✗ Branch 5 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
|
128263 | DEF_CHECKASM_CHECK_FUNC(uint8_t, "%02x") |
1204 |
4/18✗ Branch 0 not taken.
✓ Branch 1 taken 342752 times.
✓ Branch 2 taken 342752 times.
✓ Branch 3 taken 14206 times.
✓ Branch 4 taken 14206 times.
✗ Branch 5 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
|
356958 | DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x") |
1205 | ✗ | DEF_CHECKASM_CHECK_FUNC(uint32_t, "%08x") | |
1206 |
4/18✗ Branch 0 not taken.
✓ Branch 1 taken 14082 times.
✓ Branch 2 taken 14082 times.
✓ Branch 3 taken 548 times.
✓ Branch 4 taken 548 times.
✗ Branch 5 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
|
14630 | DEF_CHECKASM_CHECK_FUNC(int16_t, "%6d") |
1207 | ✗ | DEF_CHECKASM_CHECK_FUNC(int32_t, "%9d") | |
1208 |