| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Assembly testing and benchmarking tool | ||
| 3 | * Copyright (c) 2015 Henrik Gramner | ||
| 4 | * Copyright (c) 2008 Loren Merritt | ||
| 5 | * | ||
| 6 | * This file is part of FFmpeg. | ||
| 7 | * | ||
| 8 | * FFmpeg is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License as published by | ||
| 10 | * the Free Software Foundation; either version 2 of the License, or | ||
| 11 | * (at your option) any later version. | ||
| 12 | * | ||
| 13 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 16 | * GNU General Public License for more details. | ||
| 17 | * | ||
| 18 | * You should have received a copy of the GNU General Public License along | ||
| 19 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
| 20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 21 | * | ||
| 22 | * Copyright © 2018, VideoLAN and dav1d authors | ||
| 23 | * Copyright © 2018, Two Orioles, LLC | ||
| 24 | * All rights reserved. | ||
| 25 | * | ||
| 26 | * Redistribution and use in source and binary forms, with or without | ||
| 27 | * modification, are permitted provided that the following conditions are met: | ||
| 28 | * | ||
| 29 | * 1. Redistributions of source code must retain the above copyright notice, this | ||
| 30 | * list of conditions and the following disclaimer. | ||
| 31 | * | ||
| 32 | * 2. Redistributions in binary form must reproduce the above copyright notice, | ||
| 33 | * this list of conditions and the following disclaimer in the documentation | ||
| 34 | * and/or other materials provided with the distribution. | ||
| 35 | * | ||
| 36 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
| 37 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
| 38 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 39 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | ||
| 40 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
| 41 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
| 42 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
| 43 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 44 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 45 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 46 | */ | ||
| 47 | |||
| 48 | #include "config.h" | ||
| 49 | #include "config_components.h" | ||
| 50 | |||
| 51 | #ifndef _GNU_SOURCE | ||
| 52 | # define _GNU_SOURCE // for syscall (performance monitoring API), strsignal() | ||
| 53 | #endif | ||
| 54 | |||
| 55 | #include <signal.h> | ||
| 56 | #include <stdarg.h> | ||
| 57 | #include <stdio.h> | ||
| 58 | #include <stdlib.h> | ||
| 59 | #include <string.h> | ||
| 60 | #include "checkasm.h" | ||
| 61 | #include "libavutil/avassert.h" | ||
| 62 | #include "libavutil/common.h" | ||
| 63 | #include "libavutil/cpu.h" | ||
| 64 | #include "libavutil/intfloat.h" | ||
| 65 | #include "libavutil/random_seed.h" | ||
| 66 | |||
| 67 | #if HAVE_IO_H | ||
| 68 | #include <io.h> | ||
| 69 | #endif | ||
| 70 | #if HAVE_PRCTL | ||
| 71 | #include <sys/prctl.h> | ||
| 72 | #endif | ||
| 73 | |||
| 74 | #if defined(_WIN32) && !defined(SIGBUS) | ||
| 75 | /* non-standard, use the same value as mingw-w64 */ | ||
| 76 | #define SIGBUS 10 | ||
| 77 | #endif | ||
| 78 | |||
| 79 | #if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE | ||
| 80 | #include <windows.h> | ||
| 81 | #define COLOR_RED FOREGROUND_RED | ||
| 82 | #define COLOR_GREEN FOREGROUND_GREEN | ||
| 83 | #define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN) | ||
| 84 | #else | ||
| 85 | #define COLOR_RED 1 | ||
| 86 | #define COLOR_GREEN 2 | ||
| 87 | #define COLOR_YELLOW 3 | ||
| 88 | #endif | ||
| 89 | |||
| 90 | #if HAVE_UNISTD_H | ||
| 91 | #include <unistd.h> | ||
| 92 | #endif | ||
| 93 | |||
| 94 | #if !HAVE_ISATTY | ||
| 95 | #define isatty(fd) 1 | ||
| 96 | #endif | ||
| 97 | |||
| 98 | #if ARCH_AARCH64 | ||
| 99 | #include "libavutil/aarch64/cpu.h" | ||
| 100 | #elif ARCH_RISCV | ||
| 101 | #include "libavutil/riscv/cpu.h" | ||
| 102 | #endif | ||
| 103 | |||
| 104 | #if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL | ||
| 105 | #include "libavutil/arm/cpu.h" | ||
| 106 | |||
| 107 | void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp; | ||
| 108 | #endif | ||
| 109 | |||
| 110 | /* Trade-off between speed and accuracy */ | ||
| 111 | uint64_t bench_runs = 1U << 10; | ||
| 112 | |||
| 113 | /* List of tests to invoke */ | ||
| 114 | static const struct { | ||
| 115 | const char *name; | ||
| 116 | void (*func)(void); | ||
| 117 | } tests[] = { | ||
| 118 | /* NOTE: When adding a new test to this list here, it also needs to be | ||
| 119 | * added in tests/fate/checkasm.mak, otherwise it doesn't get executed | ||
| 120 | * as part of "make fate" or "make fate-checkasm". */ | ||
| 121 | #if CONFIG_AVCODEC | ||
| 122 | #if CONFIG_AAC_DECODER | ||
| 123 | { "aacpsdsp", checkasm_check_aacpsdsp }, | ||
| 124 | { "sbrdsp", checkasm_check_sbrdsp }, | ||
| 125 | #endif | ||
| 126 | #if CONFIG_AAC_ENCODER | ||
| 127 | { "aacencdsp", checkasm_check_aacencdsp }, | ||
| 128 | #endif | ||
| 129 | #if CONFIG_AC3DSP | ||
| 130 | { "ac3dsp", checkasm_check_ac3dsp }, | ||
| 131 | #endif | ||
| 132 | #if CONFIG_ALAC_DECODER | ||
| 133 | { "alacdsp", checkasm_check_alacdsp }, | ||
| 134 | #endif | ||
| 135 | #if CONFIG_APV_DECODER | ||
| 136 | { "apv_dsp", checkasm_check_apv_dsp }, | ||
| 137 | #endif | ||
| 138 | #if CONFIG_AUDIODSP | ||
| 139 | { "audiodsp", checkasm_check_audiodsp }, | ||
| 140 | #endif | ||
| 141 | #if CONFIG_BLOCKDSP | ||
| 142 | { "blockdsp", checkasm_check_blockdsp }, | ||
| 143 | #endif | ||
| 144 | #if CONFIG_BSWAPDSP | ||
| 145 | { "bswapdsp", checkasm_check_bswapdsp }, | ||
| 146 | #endif | ||
| 147 | #if CONFIG_CAVS_DECODER | ||
| 148 | { "cavsdsp", checkasm_check_cavsdsp }, | ||
| 149 | #endif | ||
| 150 | #if CONFIG_DCA_DECODER | ||
| 151 | { "dcadsp", checkasm_check_dcadsp }, | ||
| 152 | { "synth_filter", checkasm_check_synth_filter }, | ||
| 153 | #endif | ||
| 154 | #if CONFIG_DIRAC_DECODER | ||
| 155 | { "diracdsp", checkasm_check_diracdsp }, | ||
| 156 | #endif | ||
| 157 | #if CONFIG_EXR_DECODER | ||
| 158 | { "exrdsp", checkasm_check_exrdsp }, | ||
| 159 | #endif | ||
| 160 | #if CONFIG_FDCTDSP | ||
| 161 | { "fdctdsp", checkasm_check_fdctdsp }, | ||
| 162 | #endif | ||
| 163 | #if CONFIG_FLAC_DECODER | ||
| 164 | { "flacdsp", checkasm_check_flacdsp }, | ||
| 165 | #endif | ||
| 166 | #if CONFIG_FMTCONVERT | ||
| 167 | { "fmtconvert", checkasm_check_fmtconvert }, | ||
| 168 | #endif | ||
| 169 | #if CONFIG_G722DSP | ||
| 170 | { "g722dsp", checkasm_check_g722dsp }, | ||
| 171 | #endif | ||
| 172 | #if CONFIG_H263DSP | ||
| 173 | { "h263dsp", checkasm_check_h263dsp }, | ||
| 174 | #endif | ||
| 175 | #if CONFIG_H264CHROMA | ||
| 176 | { "h264chroma", checkasm_check_h264chroma }, | ||
| 177 | #endif | ||
| 178 | #if CONFIG_H264DSP | ||
| 179 | { "h264dsp", checkasm_check_h264dsp }, | ||
| 180 | #endif | ||
| 181 | #if CONFIG_H264PRED | ||
| 182 | { "h264pred", checkasm_check_h264pred }, | ||
| 183 | #endif | ||
| 184 | #if CONFIG_H264QPEL | ||
| 185 | { "h264qpel", checkasm_check_h264qpel }, | ||
| 186 | #endif | ||
| 187 | #if CONFIG_HEVC_DECODER | ||
| 188 | { "hevc_add_res", checkasm_check_hevc_add_res }, | ||
| 189 | { "hevc_deblock", checkasm_check_hevc_deblock }, | ||
| 190 | { "hevc_dequant", checkasm_check_hevc_dequant }, | ||
| 191 | { "hevc_idct", checkasm_check_hevc_idct }, | ||
| 192 | { "hevc_pel", checkasm_check_hevc_pel }, | ||
| 193 | { "hevc_pred", checkasm_check_hevc_pred }, | ||
| 194 | { "hevc_sao", checkasm_check_hevc_sao }, | ||
| 195 | #endif | ||
| 196 | #if CONFIG_HPELDSP | ||
| 197 | { "hpeldsp", checkasm_check_hpeldsp }, | ||
| 198 | #endif | ||
| 199 | #if CONFIG_HUFFYUVDSP | ||
| 200 | { "huffyuvdsp", checkasm_check_huffyuvdsp }, | ||
| 201 | #endif | ||
| 202 | #if CONFIG_HUFFYUVENCDSP | ||
| 203 | { "huffyuvencdsp", checkasm_check_huffyuvencdsp }, | ||
| 204 | #endif | ||
| 205 | #if CONFIG_IDCTDSP | ||
| 206 | { "idctdsp", checkasm_check_idctdsp }, | ||
| 207 | #endif | ||
| 208 | #if CONFIG_JPEG2000_DECODER | ||
| 209 | { "jpeg2000dsp", checkasm_check_jpeg2000dsp }, | ||
| 210 | #endif | ||
| 211 | #if CONFIG_LLAUDDSP | ||
| 212 | { "llauddsp", checkasm_check_llauddsp }, | ||
| 213 | #endif | ||
| 214 | #if CONFIG_HUFFYUVDSP | ||
| 215 | { "llviddsp", checkasm_check_llviddsp }, | ||
| 216 | #endif | ||
| 217 | #if CONFIG_LLVIDENCDSP | ||
| 218 | { "llvidencdsp", checkasm_check_llvidencdsp }, | ||
| 219 | #endif | ||
| 220 | #if CONFIG_LPC | ||
| 221 | { "lpc", checkasm_check_lpc }, | ||
| 222 | #endif | ||
| 223 | #if CONFIG_ME_CMP | ||
| 224 | { "motion", checkasm_check_motion }, | ||
| 225 | #endif | ||
| 226 | #if CONFIG_MPEG4_DECODER | ||
| 227 | { "mpeg4videodsp", checkasm_check_mpeg4videodsp }, | ||
| 228 | #endif | ||
| 229 | #if CONFIG_MPEGVIDEO | ||
| 230 | { "mpegvideo_unquantize", checkasm_check_mpegvideo_unquantize }, | ||
| 231 | #endif | ||
| 232 | #if CONFIG_MPEGVIDEOENCDSP | ||
| 233 | { "mpegvideoencdsp", checkasm_check_mpegvideoencdsp }, | ||
| 234 | #endif | ||
| 235 | #if CONFIG_OPUS_DECODER | ||
| 236 | { "opusdsp", checkasm_check_opusdsp }, | ||
| 237 | #endif | ||
| 238 | #if CONFIG_PIXBLOCKDSP | ||
| 239 | { "pixblockdsp", checkasm_check_pixblockdsp }, | ||
| 240 | #endif | ||
| 241 | #if CONFIG_PNG_DECODER | ||
| 242 | { "png", checkasm_check_png }, | ||
| 243 | #endif | ||
| 244 | #if CONFIG_QPELDSP | ||
| 245 | { "qpeldsp", checkasm_check_qpeldsp }, | ||
| 246 | #endif | ||
| 247 | #if CONFIG_RV34DSP | ||
| 248 | { "rv34dsp", checkasm_check_rv34dsp }, | ||
| 249 | #endif | ||
| 250 | #if CONFIG_RV40_DECODER | ||
| 251 | { "rv40dsp", checkasm_check_rv40dsp }, | ||
| 252 | #endif | ||
| 253 | #if CONFIG_SBC_ENCODER | ||
| 254 | { "sbcdsp", checkasm_check_sbcdsp }, | ||
| 255 | #endif | ||
| 256 | #if CONFIG_SNOW_DECODER | ||
| 257 | { "snowdsp", checkasm_check_snowdsp }, | ||
| 258 | #endif | ||
| 259 | #if CONFIG_SVQ1_ENCODER | ||
| 260 | { "svq1enc", checkasm_check_svq1enc }, | ||
| 261 | #endif | ||
| 262 | #if CONFIG_TAK_DECODER | ||
| 263 | { "takdsp", checkasm_check_takdsp }, | ||
| 264 | #endif | ||
| 265 | #if CONFIG_UTVIDEO_DECODER | ||
| 266 | { "utvideodsp", checkasm_check_utvideodsp }, | ||
| 267 | #endif | ||
| 268 | #if CONFIG_V210_DECODER | ||
| 269 | { "v210dec", checkasm_check_v210dec }, | ||
| 270 | #endif | ||
| 271 | #if CONFIG_V210_ENCODER | ||
| 272 | { "v210enc", checkasm_check_v210enc }, | ||
| 273 | #endif | ||
| 274 | #if CONFIG_VC1DSP | ||
| 275 | { "vc1dsp", checkasm_check_vc1dsp }, | ||
| 276 | #endif | ||
| 277 | #if CONFIG_VP3DSP | ||
| 278 | { "vp3dsp", checkasm_check_vp3dsp }, | ||
| 279 | #endif | ||
| 280 | #if CONFIG_VP6_DECODER | ||
| 281 | { "vp6dsp", checkasm_check_vp6dsp }, | ||
| 282 | #endif | ||
| 283 | #if CONFIG_VP8DSP | ||
| 284 | { "vp8dsp", checkasm_check_vp8dsp }, | ||
| 285 | #endif | ||
| 286 | #if CONFIG_VP9_DECODER | ||
| 287 | { "vp9dsp", checkasm_check_vp9dsp }, // all of the below | ||
| 288 | { "vp9_ipred", checkasm_check_vp9_ipred }, | ||
| 289 | { "vp9_itxfm", checkasm_check_vp9_itxfm }, | ||
| 290 | { "vp9_loopfilter", checkasm_check_vp9_loopfilter }, | ||
| 291 | { "vp9_mc", checkasm_check_vp9_mc }, | ||
| 292 | #endif | ||
| 293 | #if CONFIG_VIDEODSP | ||
| 294 | { "videodsp", checkasm_check_videodsp }, | ||
| 295 | #endif | ||
| 296 | #if CONFIG_VORBIS_DECODER | ||
| 297 | { "vorbisdsp", checkasm_check_vorbisdsp }, | ||
| 298 | #endif | ||
| 299 | #if CONFIG_VVC_DECODER | ||
| 300 | { "vvc_alf", checkasm_check_vvc_alf }, | ||
| 301 | { "vvc_mc", checkasm_check_vvc_mc }, | ||
| 302 | { "vvc_sao", checkasm_check_vvc_sao }, | ||
| 303 | #endif | ||
| 304 | #endif | ||
| 305 | #if CONFIG_AVFILTER | ||
| 306 | #if CONFIG_SCENE_SAD | ||
| 307 | { "scene_sad", checkasm_check_scene_sad }, | ||
| 308 | #endif | ||
| 309 | #if CONFIG_AFIR_FILTER | ||
| 310 | { "af_afir", checkasm_check_afir }, | ||
| 311 | #endif | ||
| 312 | #if CONFIG_BLACKDETECT_FILTER | ||
| 313 | { "vf_blackdetect", checkasm_check_blackdetect }, | ||
| 314 | #endif | ||
| 315 | #if CONFIG_BLEND_FILTER | ||
| 316 | { "vf_blend", checkasm_check_blend }, | ||
| 317 | #endif | ||
| 318 | #if CONFIG_BWDIF_FILTER | ||
| 319 | { "vf_bwdif", checkasm_check_vf_bwdif }, | ||
| 320 | #endif | ||
| 321 | #if CONFIG_COLORDETECT_FILTER | ||
| 322 | { "vf_colordetect", checkasm_check_colordetect }, | ||
| 323 | #endif | ||
| 324 | #if CONFIG_COLORSPACE_FILTER | ||
| 325 | { "vf_colorspace", checkasm_check_colorspace }, | ||
| 326 | #endif | ||
| 327 | #if CONFIG_EQ_FILTER | ||
| 328 | { "vf_eq", checkasm_check_vf_eq }, | ||
| 329 | #endif | ||
| 330 | #if CONFIG_FSPP_FILTER | ||
| 331 | { "vf_fspp", checkasm_check_vf_fspp }, | ||
| 332 | #endif | ||
| 333 | #if CONFIG_GBLUR_FILTER | ||
| 334 | { "vf_gblur", checkasm_check_vf_gblur }, | ||
| 335 | #endif | ||
| 336 | #if CONFIG_HFLIP_FILTER | ||
| 337 | { "vf_hflip", checkasm_check_vf_hflip }, | ||
| 338 | #endif | ||
| 339 | #if CONFIG_IDET_FILTER | ||
| 340 | { "vf_idet", checkasm_check_idet }, | ||
| 341 | #endif | ||
| 342 | #if CONFIG_NLMEANS_FILTER | ||
| 343 | { "vf_nlmeans", checkasm_check_nlmeans }, | ||
| 344 | #endif | ||
| 345 | #if CONFIG_THRESHOLD_FILTER | ||
| 346 | { "vf_threshold", checkasm_check_vf_threshold }, | ||
| 347 | #endif | ||
| 348 | #if CONFIG_SOBEL_FILTER | ||
| 349 | { "vf_sobel", checkasm_check_vf_sobel }, | ||
| 350 | #endif | ||
| 351 | #endif | ||
| 352 | #if CONFIG_SWSCALE | ||
| 353 | { "sw_gbrp", checkasm_check_sw_gbrp }, | ||
| 354 | { "sw_range_convert", checkasm_check_sw_range_convert }, | ||
| 355 | { "sw_rgb", checkasm_check_sw_rgb }, | ||
| 356 | { "sw_scale", checkasm_check_sw_scale }, | ||
| 357 | { "sw_xyz2rgb", checkasm_check_sw_xyz2rgb }, | ||
| 358 | { "sw_yuv2rgb", checkasm_check_sw_yuv2rgb }, | ||
| 359 | { "sw_yuv2yuv", checkasm_check_sw_yuv2yuv }, | ||
| 360 | { "sw_ops", checkasm_check_sw_ops }, | ||
| 361 | #endif | ||
| 362 | #if CONFIG_AVUTIL | ||
| 363 | { "aes", checkasm_check_aes }, | ||
| 364 | { "crc", checkasm_check_crc }, | ||
| 365 | { "fixed_dsp", checkasm_check_fixed_dsp }, | ||
| 366 | { "float_dsp", checkasm_check_float_dsp }, | ||
| 367 | { "lls", checkasm_check_lls }, | ||
| 368 | #if CONFIG_PIXELUTILS | ||
| 369 | { "pixelutils",checkasm_check_pixelutils }, | ||
| 370 | #endif | ||
| 371 | { "av_tx", checkasm_check_av_tx }, | ||
| 372 | #endif | ||
| 373 | { NULL } | ||
| 374 | /* NOTE: When adding a new test to this list here, it also needs to be | ||
| 375 | * added in tests/fate/checkasm.mak, otherwise it doesn't get executed | ||
| 376 | * as part of "make fate" or "make fate-checkasm". */ | ||
| 377 | }; | ||
| 378 | |||
| 379 | /* List of cpu flags to check */ | ||
| 380 | static const struct { | ||
| 381 | const char *name; | ||
| 382 | const char *suffix; | ||
| 383 | int flag; | ||
| 384 | } cpus[] = { | ||
| 385 | #if ARCH_AARCH64 | ||
| 386 | { "ARMV8", "armv8", AV_CPU_FLAG_ARMV8 }, | ||
| 387 | { "NEON", "neon", AV_CPU_FLAG_NEON }, | ||
| 388 | { "DOTPROD", "dotprod", AV_CPU_FLAG_DOTPROD }, | ||
| 389 | { "I8MM", "i8mm", AV_CPU_FLAG_I8MM }, | ||
| 390 | { "SVE", "sve", AV_CPU_FLAG_SVE }, | ||
| 391 | { "SVE2", "sve2", AV_CPU_FLAG_SVE2 }, | ||
| 392 | { "SME", "sme", AV_CPU_FLAG_SME }, | ||
| 393 | { "SME-I16I64", "sme_i16i64", AV_CPU_FLAG_SME_I16I64 }, | ||
| 394 | { "CRC", "crc", AV_CPU_FLAG_ARM_CRC }, | ||
| 395 | { "SME2", "sme2", AV_CPU_FLAG_SME2 }, | ||
| 396 | { "PMULL", "pmull_eor3", AV_CPU_FLAG_PMULL|AV_CPU_FLAG_EOR3 }, | ||
| 397 | #elif ARCH_ARM | ||
| 398 | { "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE }, | ||
| 399 | { "ARMV6", "armv6", AV_CPU_FLAG_ARMV6 }, | ||
| 400 | { "ARMV6T2", "armv6t2", AV_CPU_FLAG_ARMV6T2 }, | ||
| 401 | { "VFP", "vfp", AV_CPU_FLAG_VFP }, | ||
| 402 | { "VFP_VM", "vfp_vm", AV_CPU_FLAG_VFP_VM }, | ||
| 403 | { "VFPV3", "vfp3", AV_CPU_FLAG_VFPV3 }, | ||
| 404 | { "NEON", "neon", AV_CPU_FLAG_NEON }, | ||
| 405 | #elif ARCH_PPC | ||
| 406 | { "ALTIVEC", "altivec", AV_CPU_FLAG_ALTIVEC }, | ||
| 407 | { "VSX", "vsx", AV_CPU_FLAG_VSX }, | ||
| 408 | { "POWER8", "power8", AV_CPU_FLAG_POWER8 }, | ||
| 409 | #elif ARCH_RISCV | ||
| 410 | { "RVI", "rvi", AV_CPU_FLAG_RVI }, | ||
| 411 | { "misaligned", "misaligned", AV_CPU_FLAG_RV_MISALIGNED }, | ||
| 412 | { "RV_zbb", "rvb_b", AV_CPU_FLAG_RVB_BASIC }, | ||
| 413 | { "RVB", "rvb", AV_CPU_FLAG_RVB }, | ||
| 414 | { "RV_zve32x","rvv_i32", AV_CPU_FLAG_RVV_I32 }, | ||
| 415 | { "RV_zve32f","rvv_f32", AV_CPU_FLAG_RVV_F32 }, | ||
| 416 | { "RV_zve64x","rvv_i64", AV_CPU_FLAG_RVV_I64 }, | ||
| 417 | { "RV_zve64d","rvv_f64", AV_CPU_FLAG_RVV_F64 }, | ||
| 418 | { "RV_zvbb", "rv_zvbb", AV_CPU_FLAG_RV_ZVBB }, | ||
| 419 | #elif ARCH_MIPS | ||
| 420 | { "MMI", "mmi", AV_CPU_FLAG_MMI }, | ||
| 421 | { "MSA", "msa", AV_CPU_FLAG_MSA }, | ||
| 422 | #elif ARCH_X86 | ||
| 423 | { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, | ||
| 424 | { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, | ||
| 425 | { "SSE", "sse", AV_CPU_FLAG_SSE }, | ||
| 426 | { "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW }, | ||
| 427 | { "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW }, | ||
| 428 | { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM }, | ||
| 429 | { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 }, | ||
| 430 | { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 }, | ||
| 431 | { "AES-NI", "aesni", AV_CPU_FLAG_AESNI }, | ||
| 432 | { "CLMUL", "clmul", AV_CPU_FLAG_CLMUL }, | ||
| 433 | { "AVX", "avx", AV_CPU_FLAG_AVX }, | ||
| 434 | { "XOP", "xop", AV_CPU_FLAG_XOP }, | ||
| 435 | { "FMA3", "fma3", AV_CPU_FLAG_FMA3 }, | ||
| 436 | { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, | ||
| 437 | { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, | ||
| 438 | { "AVX-512", "avx512", AV_CPU_FLAG_AVX512 }, | ||
| 439 | { "AVX-512ICL", "avx512icl", AV_CPU_FLAG_AVX512ICL }, | ||
| 440 | #elif ARCH_LOONGARCH | ||
| 441 | { "LSX", "lsx", AV_CPU_FLAG_LSX }, | ||
| 442 | { "LASX", "lasx", AV_CPU_FLAG_LASX }, | ||
| 443 | #elif ARCH_WASM | ||
| 444 | { "SIMD128", "simd128", AV_CPU_FLAG_SIMD128 }, | ||
| 445 | #endif | ||
| 446 | { NULL } | ||
| 447 | }; | ||
| 448 | |||
| 449 | typedef struct CheckasmFuncVersion { | ||
| 450 | struct CheckasmFuncVersion *next; | ||
| 451 | void *func; | ||
| 452 | int ok; | ||
| 453 | int cpu; | ||
| 454 | CheckasmPerf perf; | ||
| 455 | } CheckasmFuncVersion; | ||
| 456 | |||
| 457 | /* Binary search tree node */ | ||
| 458 | typedef struct CheckasmFunc { | ||
| 459 | struct CheckasmFunc *child[2]; | ||
| 460 | CheckasmFuncVersion versions; | ||
| 461 | uint8_t color; /* 0 = red, 1 = black */ | ||
| 462 | char name[1]; | ||
| 463 | } CheckasmFunc; | ||
| 464 | |||
| 465 | /* Internal state */ | ||
| 466 | static struct { | ||
| 467 | CheckasmFunc *funcs; | ||
| 468 | CheckasmFunc *current_func; | ||
| 469 | CheckasmFuncVersion *current_func_ver; | ||
| 470 | const char *current_test_name; | ||
| 471 | const char *bench_pattern; | ||
| 472 | int bench_pattern_len; | ||
| 473 | int num_checked; | ||
| 474 | int num_failed; | ||
| 475 | |||
| 476 | /* perf */ | ||
| 477 | int nop_time; | ||
| 478 | int sysfd; | ||
| 479 | |||
| 480 | int cpu_flag; | ||
| 481 | const char *cpu_flag_name; | ||
| 482 | const char *test_pattern; | ||
| 483 | int verbose; | ||
| 484 | int csv; | ||
| 485 | int tsv; | ||
| 486 | volatile sig_atomic_t catch_signals; | ||
| 487 | } state; | ||
| 488 | |||
| 489 | /* PRNG state */ | ||
| 490 | AVLFG checkasm_lfg; | ||
| 491 | |||
| 492 | /* float compare support code */ | ||
| 493 | 22381704 | static int is_negative(union av_intfloat32 u) | |
| 494 | { | ||
| 495 | 22381704 | return u.i >> 31; | |
| 496 | } | ||
| 497 | |||
| 498 | 11190852 | int float_near_ulp(float a, float b, unsigned max_ulp) | |
| 499 | { | ||
| 500 | union av_intfloat32 x, y; | ||
| 501 | |||
| 502 | 11190852 | x.f = a; | |
| 503 | 11190852 | y.f = b; | |
| 504 | |||
| 505 |
1/2✗ Branch 2 not taken.
✓ Branch 3 taken 11190852 times.
|
11190852 | if (is_negative(x) != is_negative(y)) { |
| 506 | // handle -0.0 == +0.0 | ||
| 507 | ✗ | return a == b; | |
| 508 | } | ||
| 509 | |||
| 510 |
2/2✓ Branch 0 taken 11190850 times.
✓ Branch 1 taken 2 times.
|
11190852 | if (llabs((int64_t)x.i - y.i) <= max_ulp) |
| 511 | 11190850 | return 1; | |
| 512 | |||
| 513 | 2 | return 0; | |
| 514 | } | ||
| 515 | |||
| 516 | 281787 | int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp, | |
| 517 | unsigned len) | ||
| 518 | { | ||
| 519 | unsigned i; | ||
| 520 | |||
| 521 |
2/2✓ Branch 0 taken 11185732 times.
✓ Branch 1 taken 281787 times.
|
11467519 | for (i = 0; i < len; i++) { |
| 522 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 11185732 times.
|
11185732 | if (!float_near_ulp(a[i], b[i], max_ulp)) |
| 523 | ✗ | return 0; | |
| 524 | } | ||
| 525 | 281787 | return 1; | |
| 526 | } | ||
| 527 | |||
| 528 | 911567 | int float_near_abs_eps(float a, float b, float eps) | |
| 529 | { | ||
| 530 | 911567 | float abs_diff = fabsf(a - b); | |
| 531 |
1/2✓ Branch 0 taken 911567 times.
✗ Branch 1 not taken.
|
911567 | if (abs_diff < eps) |
| 532 | 911567 | return 1; | |
| 533 | |||
| 534 | ✗ | fprintf(stderr, "test failed comparing %g with %g (abs diff=%g with EPS=%g)\n", a, b, abs_diff, eps); | |
| 535 | |||
| 536 | ✗ | return 0; | |
| 537 | } | ||
| 538 | |||
| 539 | 200 | int float_near_abs_eps_array(const float *a, const float *b, float eps, | |
| 540 | unsigned len) | ||
| 541 | { | ||
| 542 | unsigned i; | ||
| 543 | |||
| 544 |
2/2✓ Branch 0 taken 901570 times.
✓ Branch 1 taken 200 times.
|
901770 | for (i = 0; i < len; i++) { |
| 545 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 901570 times.
|
901570 | if (!float_near_abs_eps(a[i], b[i], eps)) |
| 546 | ✗ | return 0; | |
| 547 | } | ||
| 548 | 200 | return 1; | |
| 549 | } | ||
| 550 | |||
| 551 | 5120 | int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp) | |
| 552 | { | ||
| 553 |
3/4✓ Branch 1 taken 2 times.
✓ Branch 2 taken 5118 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
|
5120 | return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps); |
| 554 | } | ||
| 555 | |||
| 556 | ✗ | int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps, | |
| 557 | unsigned max_ulp, unsigned len) | ||
| 558 | { | ||
| 559 | unsigned i; | ||
| 560 | |||
| 561 | ✗ | for (i = 0; i < len; i++) { | |
| 562 | ✗ | if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp)) | |
| 563 | ✗ | return 0; | |
| 564 | } | ||
| 565 | ✗ | return 1; | |
| 566 | } | ||
| 567 | |||
| 568 | 69803 | int double_near_abs_eps(double a, double b, double eps) | |
| 569 | { | ||
| 570 | 69803 | double abs_diff = fabs(a - b); | |
| 571 | |||
| 572 | 69803 | return abs_diff < eps; | |
| 573 | } | ||
| 574 | |||
| 575 | 11 | int double_near_abs_eps_array(const double *a, const double *b, double eps, | |
| 576 | unsigned len) | ||
| 577 | { | ||
| 578 | unsigned i; | ||
| 579 | |||
| 580 |
2/2✓ Branch 0 taken 41068 times.
✓ Branch 1 taken 11 times.
|
41079 | for (i = 0; i < len; i++) { |
| 581 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 41068 times.
|
41068 | if (!double_near_abs_eps(a[i], b[i], eps)) |
| 582 | ✗ | return 0; | |
| 583 | } | ||
| 584 | 11 | return 1; | |
| 585 | } | ||
| 586 | |||
| 587 | /* Print colored text to stderr if the terminal supports it */ | ||
| 588 | 701 | static void color_printf(int color, const char *fmt, ...) | |
| 589 | { | ||
| 590 | static int use_color = -1; | ||
| 591 | va_list arg; | ||
| 592 | |||
| 593 | #if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE | ||
| 594 | static HANDLE con; | ||
| 595 | static WORD org_attributes; | ||
| 596 | |||
| 597 | if (use_color < 0) { | ||
| 598 | CONSOLE_SCREEN_BUFFER_INFO con_info; | ||
| 599 | con = GetStdHandle(STD_ERROR_HANDLE); | ||
| 600 | if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) { | ||
| 601 | org_attributes = con_info.wAttributes; | ||
| 602 | use_color = 1; | ||
| 603 | } else | ||
| 604 | use_color = 0; | ||
| 605 | } | ||
| 606 | if (use_color) | ||
| 607 | SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f)); | ||
| 608 | #else | ||
| 609 |
2/2✓ Branch 0 taken 92 times.
✓ Branch 1 taken 609 times.
|
701 | if (use_color < 0) { |
| 610 | 92 | const char *term = getenv("TERM"); | |
| 611 |
3/6✓ Branch 0 taken 92 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 92 times.
✗ Branch 3 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 92 times.
|
92 | use_color = term && strcmp(term, "dumb") && isatty(2); |
| 612 | } | ||
| 613 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 701 times.
|
701 | if (use_color) |
| 614 | ✗ | fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07); | |
| 615 | #endif | ||
| 616 | |||
| 617 | 701 | va_start(arg, fmt); | |
| 618 | 701 | vfprintf(stderr, fmt, arg); | |
| 619 | 701 | va_end(arg); | |
| 620 | |||
| 621 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 701 times.
|
701 | if (use_color) { |
| 622 | #if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE | ||
| 623 | SetConsoleTextAttribute(con, org_attributes); | ||
| 624 | #else | ||
| 625 | ✗ | fprintf(stderr, "\x1b[0m"); | |
| 626 | #endif | ||
| 627 | } | ||
| 628 | 701 | } | |
| 629 | |||
| 630 | /* Deallocate a tree */ | ||
| 631 | 28858 | static void destroy_func_tree(CheckasmFunc *f) | |
| 632 | { | ||
| 633 |
2/2✓ Branch 0 taken 14381 times.
✓ Branch 1 taken 14477 times.
|
28858 | if (f) { |
| 634 | 14381 | CheckasmFuncVersion *v = f->versions.next; | |
| 635 |
2/2✓ Branch 0 taken 15605 times.
✓ Branch 1 taken 14381 times.
|
29986 | while (v) { |
| 636 | 15605 | CheckasmFuncVersion *next = v->next; | |
| 637 | 15605 | free(v); | |
| 638 | 15605 | v = next; | |
| 639 | } | ||
| 640 | |||
| 641 | 14381 | destroy_func_tree(f->child[0]); | |
| 642 | 14381 | destroy_func_tree(f->child[1]); | |
| 643 | 14381 | free(f); | |
| 644 | } | ||
| 645 | 28858 | } | |
| 646 | |||
| 647 | /* Allocate a zero-initialized block, clean up and exit on failure */ | ||
| 648 | 29986 | static void *checkasm_malloc(size_t size) | |
| 649 | { | ||
| 650 | 29986 | void *ptr = calloc(1, size); | |
| 651 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 29986 times.
|
29986 | if (!ptr) { |
| 652 | ✗ | fprintf(stderr, "checkasm: malloc failed\n"); | |
| 653 | ✗ | destroy_func_tree(state.funcs); | |
| 654 | ✗ | exit(1); | |
| 655 | } | ||
| 656 | 29986 | return ptr; | |
| 657 | } | ||
| 658 | |||
| 659 | /* Get the suffix of the specified cpu flag */ | ||
| 660 | ✗ | static const char *cpu_suffix(int cpu) | |
| 661 | { | ||
| 662 | ✗ | int i = FF_ARRAY_ELEMS(cpus); | |
| 663 | |||
| 664 | ✗ | while (--i >= 0) | |
| 665 | ✗ | if (cpu & cpus[i].flag) | |
| 666 | ✗ | return cpus[i].suffix; | |
| 667 | |||
| 668 | ✗ | return "c"; | |
| 669 | } | ||
| 670 | |||
| 671 | ✗ | static int cmp_nop(const void *a, const void *b) | |
| 672 | { | ||
| 673 | ✗ | return *(const uint16_t*)a - *(const uint16_t*)b; | |
| 674 | } | ||
| 675 | |||
| 676 | /* Measure the overhead of the timing code (in decicycles) */ | ||
| 677 | ✗ | static int measure_nop_time(void) | |
| 678 | { | ||
| 679 | uint16_t nops[10000]; | ||
| 680 | ✗ | int i, nop_sum = 0; | |
| 681 | ✗ | av_unused const int sysfd = state.sysfd; | |
| 682 | |||
| 683 | ✗ | uint64_t t = 0; | |
| 684 | ✗ | for (i = 0; i < 10000; i++) { | |
| 685 | ✗ | PERF_START(t); | |
| 686 | ✗ | PERF_STOP(t); | |
| 687 | ✗ | nops[i] = t; | |
| 688 | } | ||
| 689 | |||
| 690 | ✗ | qsort(nops, 10000, sizeof(uint16_t), cmp_nop); | |
| 691 | ✗ | for (i = 2500; i < 7500; i++) | |
| 692 | ✗ | nop_sum += nops[i]; | |
| 693 | |||
| 694 | ✗ | return nop_sum / 500; | |
| 695 | } | ||
| 696 | |||
| 697 | ✗ | static inline double avg_cycles_per_call(const CheckasmPerf *const p) | |
| 698 | { | ||
| 699 | ✗ | if (p->iterations) { | |
| 700 | ✗ | const double cycles = (double)(10 * p->cycles) / p->iterations - state.nop_time; | |
| 701 | ✗ | if (cycles > 0.0) | |
| 702 | ✗ | return cycles / 32.0; /* 32 calls per iteration */ | |
| 703 | } | ||
| 704 | ✗ | return 0.0; | |
| 705 | } | ||
| 706 | |||
| 707 | /* Print benchmark results */ | ||
| 708 | ✗ | static void print_benchs(CheckasmFunc *f) | |
| 709 | { | ||
| 710 | ✗ | if (f) { | |
| 711 | ✗ | CheckasmFuncVersion *v = &f->versions; | |
| 712 | ✗ | const CheckasmPerf *p = &v->perf; | |
| 713 | ✗ | const double baseline = avg_cycles_per_call(p); | |
| 714 | double decicycles; | ||
| 715 | |||
| 716 | ✗ | print_benchs(f->child[0]); | |
| 717 | |||
| 718 | do { | ||
| 719 | ✗ | if (p->iterations) { | |
| 720 | ✗ | p = &v->perf; | |
| 721 | ✗ | decicycles = avg_cycles_per_call(p); | |
| 722 | ✗ | if (state.csv || state.tsv) { | |
| 723 | ✗ | const char sep = state.csv ? ',' : '\t'; | |
| 724 | ✗ | printf("%s%c%s%c%.1f\n", f->name, sep, | |
| 725 | cpu_suffix(v->cpu), sep, | ||
| 726 | decicycles / 10.0); | ||
| 727 | } else { | ||
| 728 | ✗ | const int pad_length = 10 + 50 - | |
| 729 | ✗ | printf("%s_%s:", f->name, cpu_suffix(v->cpu)); | |
| 730 | ✗ | const double ratio = decicycles ? | |
| 731 | ✗ | baseline / decicycles : 0.0; | |
| 732 | ✗ | printf("%*.1f (%5.2fx)\n", FFMAX(pad_length, 0), | |
| 733 | decicycles / 10.0, ratio); | ||
| 734 | } | ||
| 735 | } | ||
| 736 | ✗ | } while ((v = v->next)); | |
| 737 | |||
| 738 | ✗ | print_benchs(f->child[1]); | |
| 739 | } | ||
| 740 | ✗ | } | |
| 741 | |||
| 742 | /* ASCIIbetical sort except preserving natural order for numbers */ | ||
| 743 | 1769093 | static int cmp_func_names(const char *a, const char *b) | |
| 744 | { | ||
| 745 | 1769093 | const char *start = a; | |
| 746 | int ascii_diff, digit_diff; | ||
| 747 | |||
| 748 |
4/4✓ Branch 0 taken 29749370 times.
✓ Branch 1 taken 1595031 times.
✓ Branch 2 taken 29575308 times.
✓ Branch 3 taken 174062 times.
|
31344401 | for (; !(ascii_diff = *(const unsigned char*)a - *(const unsigned char*)b) && *a; a++, b++); |
| 749 |
4/4✓ Branch 0 taken 1813175 times.
✓ Branch 1 taken 1485332 times.
✓ Branch 2 taken 1529414 times.
✓ Branch 3 taken 283761 times.
|
3298507 | for (; av_isdigit(*a) && av_isdigit(*b); a++, b++); |
| 750 | |||
| 751 |
6/6✓ Branch 0 taken 1689880 times.
✓ Branch 1 taken 79213 times.
✓ Branch 2 taken 1226619 times.
✓ Branch 3 taken 463261 times.
✓ Branch 4 taken 434009 times.
✓ Branch 5 taken 792610 times.
|
1769093 | if (a > start && av_isdigit(a[-1]) && (digit_diff = av_isdigit(*a) - av_isdigit(*b))) |
| 752 | 434009 | return digit_diff; | |
| 753 | |||
| 754 | 1335084 | return ascii_diff; | |
| 755 | } | ||
| 756 | |||
| 757 | /* Perform a tree rotation in the specified direction and return the new root */ | ||
| 758 | 13930 | static CheckasmFunc *rotate_tree(CheckasmFunc *f, int dir) | |
| 759 | { | ||
| 760 | 13930 | CheckasmFunc *r = f->child[dir^1]; | |
| 761 | 13930 | f->child[dir^1] = r->child[dir]; | |
| 762 | 13930 | r->child[dir] = f; | |
| 763 | 13930 | r->color = f->color; | |
| 764 | 13930 | f->color = 0; | |
| 765 | 13930 | return r; | |
| 766 | } | ||
| 767 | |||
| 768 | #define is_red(f) ((f) && !(f)->color) | ||
| 769 | |||
| 770 | /* Balance a left-leaning red-black tree at the specified node */ | ||
| 771 | 132285 | static void balance_tree(CheckasmFunc **root) | |
| 772 | { | ||
| 773 | 132285 | CheckasmFunc *f = *root; | |
| 774 | |||
| 775 |
8/8✓ Branch 0 taken 125625 times.
✓ Branch 1 taken 6660 times.
✓ Branch 2 taken 53333 times.
✓ Branch 3 taken 72292 times.
✓ Branch 4 taken 47779 times.
✓ Branch 5 taken 5554 times.
✓ Branch 6 taken 10361 times.
✓ Branch 7 taken 37418 times.
|
132285 | if (is_red(f->child[0]) && is_red(f->child[1])) { |
| 776 | 10361 | f->color ^= 1; | |
| 777 | 10361 | f->child[0]->color = f->child[1]->color = 1; | |
| 778 | } | ||
| 779 | |||
| 780 |
7/8✓ Branch 0 taken 125625 times.
✓ Branch 1 taken 6660 times.
✓ Branch 2 taken 82653 times.
✓ Branch 3 taken 42972 times.
✓ Branch 4 taken 89313 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 10139 times.
✓ Branch 7 taken 79174 times.
|
132285 | if (!is_red(f->child[0]) && is_red(f->child[1])) |
| 781 | 10139 | *root = rotate_tree(f, 0); /* Rotate left */ | |
| 782 |
7/8✓ Branch 0 taken 122146 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 42972 times.
✓ Branch 3 taken 79174 times.
✓ Branch 4 taken 39450 times.
✓ Branch 5 taken 3522 times.
✓ Branch 6 taken 3791 times.
✓ Branch 7 taken 35659 times.
|
122146 | else if (is_red(f->child[0]) && is_red(f->child[0]->child[0])) |
| 783 | 3791 | *root = rotate_tree(f, 1); /* Rotate right */ | |
| 784 | 132285 | } | |
| 785 | |||
| 786 | /* Get a node with the specified name, creating it if it doesn't exist */ | ||
| 787 | 1783474 | static CheckasmFunc *get_func(CheckasmFunc **root, const char *name) | |
| 788 | { | ||
| 789 | 1783474 | CheckasmFunc *f = *root; | |
| 790 | |||
| 791 |
2/2✓ Branch 0 taken 1769093 times.
✓ Branch 1 taken 14381 times.
|
1783474 | if (f) { |
| 792 | /* Search the tree for a matching node */ | ||
| 793 | 1769093 | int cmp = cmp_func_names(name, f->name); | |
| 794 |
2/2✓ Branch 0 taken 1595031 times.
✓ Branch 1 taken 174062 times.
|
1769093 | if (cmp) { |
| 795 | 1595031 | f = get_func(&f->child[cmp > 0], name); | |
| 796 | |||
| 797 | /* Rebalance the tree on the way up if a new node was inserted */ | ||
| 798 |
2/2✓ Branch 0 taken 132285 times.
✓ Branch 1 taken 1462746 times.
|
1595031 | if (!f->versions.func) |
| 799 | 132285 | balance_tree(root); | |
| 800 | } | ||
| 801 | } else { | ||
| 802 | /* Allocate and insert a new node into the tree */ | ||
| 803 | 14381 | int name_length = strlen(name); | |
| 804 | 14381 | f = *root = checkasm_malloc(sizeof(CheckasmFunc) + name_length); | |
| 805 | 14381 | memcpy(f->name, name, name_length + 1); | |
| 806 | } | ||
| 807 | |||
| 808 | 1783474 | return f; | |
| 809 | } | ||
| 810 | |||
| 811 | checkasm_context checkasm_context_buf; | ||
| 812 | |||
| 813 | /* Crash handling: attempt to catch crashes and handle them | ||
| 814 | * gracefully instead of just aborting abruptly. */ | ||
| 815 | #ifdef _WIN32 | ||
| 816 | #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) | ||
| 817 | static LONG NTAPI signal_handler(EXCEPTION_POINTERS *e) { | ||
| 818 | int s; | ||
| 819 | |||
| 820 | if (!state.catch_signals) | ||
| 821 | return EXCEPTION_CONTINUE_SEARCH; | ||
| 822 | |||
| 823 | switch (e->ExceptionRecord->ExceptionCode) { | ||
| 824 | case EXCEPTION_FLT_DIVIDE_BY_ZERO: | ||
| 825 | case EXCEPTION_INT_DIVIDE_BY_ZERO: | ||
| 826 | s = SIGFPE; | ||
| 827 | break; | ||
| 828 | case EXCEPTION_ILLEGAL_INSTRUCTION: | ||
| 829 | case EXCEPTION_PRIV_INSTRUCTION: | ||
| 830 | s = SIGILL; | ||
| 831 | break; | ||
| 832 | case EXCEPTION_ACCESS_VIOLATION: | ||
| 833 | case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: | ||
| 834 | case EXCEPTION_DATATYPE_MISALIGNMENT: | ||
| 835 | case EXCEPTION_STACK_OVERFLOW: | ||
| 836 | s = SIGSEGV; | ||
| 837 | break; | ||
| 838 | case EXCEPTION_IN_PAGE_ERROR: | ||
| 839 | s = SIGBUS; | ||
| 840 | break; | ||
| 841 | default: | ||
| 842 | return EXCEPTION_CONTINUE_SEARCH; | ||
| 843 | } | ||
| 844 | state.catch_signals = 0; | ||
| 845 | checkasm_load_context(s); | ||
| 846 | return EXCEPTION_CONTINUE_EXECUTION; /* never reached, but shuts up gcc */ | ||
| 847 | } | ||
| 848 | #endif | ||
| 849 | #elif !defined(_WASI_EMULATED_SIGNAL) | ||
| 850 | static void signal_handler(int s); | ||
| 851 | |||
| 852 | static const struct sigaction signal_handler_act = { | ||
| 853 | .sa_handler = signal_handler, | ||
| 854 | .sa_flags = SA_RESETHAND, | ||
| 855 | }; | ||
| 856 | |||
| 857 | ✗ | static void signal_handler(int s) { | |
| 858 | ✗ | if (state.catch_signals) { | |
| 859 | ✗ | state.catch_signals = 0; | |
| 860 | ✗ | sigaction(s, &signal_handler_act, NULL); | |
| 861 | ✗ | checkasm_load_context(s); | |
| 862 | } | ||
| 863 | ✗ | } | |
| 864 | #endif | ||
| 865 | |||
| 866 | /* Compares a string with a wildcard pattern. */ | ||
| 867 | 134400 | static int wildstrcmp(const char *str, const char *pattern) | |
| 868 | { | ||
| 869 | 134400 | const char *wild = strchr(pattern, '*'); | |
| 870 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 134400 times.
|
134400 | if (wild) { |
| 871 | ✗ | const size_t len = wild - pattern; | |
| 872 | ✗ | if (strncmp(str, pattern, len)) return 1; | |
| 873 | ✗ | while (*++wild == '*'); | |
| 874 | ✗ | if (!*wild) return 0; | |
| 875 | ✗ | str += len; | |
| 876 | ✗ | while (*str && wildstrcmp(str, wild)) str++; | |
| 877 | ✗ | return !*str; | |
| 878 | } | ||
| 879 | 134400 | return strcmp(str, pattern); | |
| 880 | } | ||
| 881 | |||
| 882 | /* Perform tests and benchmarks for the specified cpu flag if supported by the host */ | ||
| 883 | 1728 | static void check_cpu_flag(const char *name, int flag) | |
| 884 | { | ||
| 885 | 1728 | int old_cpu_flag = state.cpu_flag; | |
| 886 | |||
| 887 | 1728 | flag |= old_cpu_flag; | |
| 888 | 1728 | av_force_cpu_flags(-1); | |
| 889 | 1728 | state.cpu_flag = flag & av_get_cpu_flags(); | |
| 890 | 1728 | av_force_cpu_flags(state.cpu_flag); | |
| 891 | |||
| 892 |
4/4✓ Branch 0 taken 1632 times.
✓ Branch 1 taken 96 times.
✓ Branch 2 taken 1248 times.
✓ Branch 3 taken 384 times.
|
1728 | if (!flag || state.cpu_flag != old_cpu_flag) { |
| 893 | int i; | ||
| 894 | |||
| 895 | 1344 | state.cpu_flag_name = name; | |
| 896 |
2/2✓ Branch 0 taken 134400 times.
✓ Branch 1 taken 1344 times.
|
135744 | for (i = 0; tests[i].func; i++) { |
| 897 |
3/4✓ Branch 0 taken 134400 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 133056 times.
✓ Branch 4 taken 1344 times.
|
134400 | if (state.test_pattern && wildstrcmp(tests[i].name, state.test_pattern)) |
| 898 | 133056 | continue; | |
| 899 | 1344 | state.current_test_name = tests[i].name; | |
| 900 | 1344 | tests[i].func(); | |
| 901 | } | ||
| 902 | } | ||
| 903 | 1728 | } | |
| 904 | |||
| 905 | /* Print the name of the current CPU flag, but only do it once */ | ||
| 906 | 473 | static void print_cpu_name(void) | |
| 907 | { | ||
| 908 |
2/2✓ Branch 0 taken 228 times.
✓ Branch 1 taken 245 times.
|
473 | if (state.cpu_flag_name) { |
| 909 | 228 | color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name); | |
| 910 | 228 | state.cpu_flag_name = NULL; | |
| 911 | } | ||
| 912 | 473 | } | |
| 913 | |||
| 914 | #if CONFIG_LINUX_PERF | ||
| 915 | static int bench_init_linux(void) | ||
| 916 | { | ||
| 917 | struct perf_event_attr attr = { | ||
| 918 | .type = PERF_TYPE_HARDWARE, | ||
| 919 | .size = sizeof(struct perf_event_attr), | ||
| 920 | .config = PERF_COUNT_HW_CPU_CYCLES, | ||
| 921 | .disabled = 1, // start counting only on demand | ||
| 922 | .exclude_kernel = 1, | ||
| 923 | .exclude_hv = 1, | ||
| 924 | #if !ARCH_X86 | ||
| 925 | .exclude_guest = 1, | ||
| 926 | #endif | ||
| 927 | }; | ||
| 928 | |||
| 929 | fprintf(stderr, "benchmarking with Linux Perf Monitoring API\n"); | ||
| 930 | |||
| 931 | state.sysfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); | ||
| 932 | if (state.sysfd == -1) { | ||
| 933 | perror("perf_event_open"); | ||
| 934 | return -1; | ||
| 935 | } | ||
| 936 | return 0; | ||
| 937 | } | ||
| 938 | #elif CONFIG_MACOS_KPERF | ||
| 939 | static int bench_init_kperf(void) | ||
| 940 | { | ||
| 941 | ff_kperf_init(); | ||
| 942 | return 0; | ||
| 943 | } | ||
| 944 | #else | ||
| 945 | ✗ | static int bench_init_ffmpeg(void) | |
| 946 | { | ||
| 947 | #ifdef AV_READ_TIME | ||
| 948 | ✗ | if (!checkasm_save_context()) { | |
| 949 | ✗ | checkasm_set_signal_handler_state(1); | |
| 950 | ✗ | AV_READ_TIME(); | |
| 951 | ✗ | checkasm_set_signal_handler_state(0); | |
| 952 | } else { | ||
| 953 | ✗ | fprintf(stderr, "checkasm: unable to execute platform specific timer\n"); | |
| 954 | ✗ | return -1; | |
| 955 | } | ||
| 956 | ✗ | fprintf(stderr, "benchmarking with native FFmpeg timers\n"); | |
| 957 | ✗ | return 0; | |
| 958 | #else | ||
| 959 | fprintf(stderr, "checkasm: --bench is not supported on your system\n"); | ||
| 960 | return -1; | ||
| 961 | #endif | ||
| 962 | } | ||
| 963 | #endif | ||
| 964 | |||
| 965 | ✗ | static int bench_init(void) | |
| 966 | { | ||
| 967 | #if CONFIG_LINUX_PERF | ||
| 968 | int ret = bench_init_linux(); | ||
| 969 | #elif CONFIG_MACOS_KPERF | ||
| 970 | int ret = bench_init_kperf(); | ||
| 971 | #else | ||
| 972 | ✗ | int ret = bench_init_ffmpeg(); | |
| 973 | #endif | ||
| 974 | ✗ | if (ret < 0) | |
| 975 | ✗ | return ret; | |
| 976 | |||
| 977 | ✗ | state.nop_time = measure_nop_time(); | |
| 978 | ✗ | fprintf(stderr, "nop: %d.%d\n", state.nop_time/10, state.nop_time%10); | |
| 979 | ✗ | return 0; | |
| 980 | } | ||
| 981 | |||
| 982 | 96 | static void bench_uninit(void) | |
| 983 | { | ||
| 984 | #if CONFIG_LINUX_PERF | ||
| 985 | close(state.sysfd); | ||
| 986 | #endif | ||
| 987 | 96 | } | |
| 988 | |||
| 989 | ✗ | static int usage(const char *path) | |
| 990 | { | ||
| 991 | ✗ | fprintf(stderr, | |
| 992 | "Usage: %s [options...] [seed]\n" | ||
| 993 | " --test=<pattern> Run specific test.\n" | ||
| 994 | " --bench Run benchmark.\n" | ||
| 995 | " --csv, --tsv Output results in rows of comma or tab separated values.\n" | ||
| 996 | " --runs=<ptwo> Manual number of benchmark iterations to run 2**<ptwo>.\n" | ||
| 997 | " --verbose Increase verbosity.\n", | ||
| 998 | path); | ||
| 999 | ✗ | return 1; | |
| 1000 | } | ||
| 1001 | |||
| 1002 | 96 | int main(int argc, char *argv[]) | |
| 1003 | { | ||
| 1004 | 96 | unsigned int seed = av_get_random_seed(); | |
| 1005 | 96 | int i, ret = 0; | |
| 1006 | 96 | char arch_info_buf[50] = ""; | |
| 1007 | |||
| 1008 | #ifdef _WIN32 | ||
| 1009 | #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) | ||
| 1010 | AddVectoredExceptionHandler(0, signal_handler); | ||
| 1011 | #endif | ||
| 1012 | #elif !defined(_WASI_EMULATED_SIGNAL) | ||
| 1013 | 96 | sigaction(SIGBUS, &signal_handler_act, NULL); | |
| 1014 | 96 | sigaction(SIGFPE, &signal_handler_act, NULL); | |
| 1015 | 96 | sigaction(SIGILL, &signal_handler_act, NULL); | |
| 1016 | 96 | sigaction(SIGSEGV, &signal_handler_act, NULL); | |
| 1017 | #endif | ||
| 1018 | #if HAVE_PRCTL && defined(PR_SET_UNALIGN) | ||
| 1019 | 96 | prctl(PR_SET_UNALIGN, PR_UNALIGN_SIGBUS); | |
| 1020 | #endif | ||
| 1021 | #if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL | ||
| 1022 | if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags())) | ||
| 1023 | checkasm_checked_call = checkasm_checked_call_vfp; | ||
| 1024 | #endif | ||
| 1025 | |||
| 1026 |
2/4✓ Branch 0 taken 96 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 96 times.
|
96 | if (!tests[0].func || !cpus[0].flag) { |
| 1027 | ✗ | fprintf(stderr, "checkasm: no tests to perform\n"); | |
| 1028 | ✗ | return 0; | |
| 1029 | } | ||
| 1030 | |||
| 1031 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 96 times.
|
192 | for (i = 1; i < argc; i++) { |
| 1032 | 96 | const char *arg = argv[i]; | |
| 1033 | unsigned long l; | ||
| 1034 | char *end; | ||
| 1035 | |||
| 1036 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 96 times.
|
96 | if (!strncmp(arg, "--bench", 7)) { |
| 1037 | ✗ | if (bench_init() < 0) | |
| 1038 | ✗ | return 1; | |
| 1039 | ✗ | if (arg[7] == '=') { | |
| 1040 | ✗ | state.bench_pattern = arg + 8; | |
| 1041 | ✗ | state.bench_pattern_len = strlen(state.bench_pattern); | |
| 1042 | } else | ||
| 1043 | ✗ | state.bench_pattern = "*"; | |
| 1044 |
1/2✓ Branch 0 taken 96 times.
✗ Branch 1 not taken.
|
96 | } else if (!strncmp(arg, "--test=", 7)) { |
| 1045 | 96 | state.test_pattern = arg + 7; | |
| 1046 | ✗ | } else if (!strcmp(arg, "--csv")) { | |
| 1047 | ✗ | state.csv = 1; state.tsv = 0; | |
| 1048 | ✗ | } else if (!strcmp(arg, "--tsv")) { | |
| 1049 | ✗ | state.csv = 0; state.tsv = 1; | |
| 1050 | ✗ | } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) { | |
| 1051 | ✗ | state.verbose = 1; | |
| 1052 | ✗ | } else if (!strncmp(arg, "--runs=", 7)) { | |
| 1053 | ✗ | l = strtoul(arg + 7, &end, 10); | |
| 1054 | ✗ | if (*end == '\0') { | |
| 1055 | ✗ | if (l > 30) { | |
| 1056 | ✗ | fprintf(stderr, "checkasm: error: runs exponent must be within the range 0 <= 30\n"); | |
| 1057 | ✗ | usage(argv[0]); | |
| 1058 | } | ||
| 1059 | ✗ | bench_runs = 1U << l; | |
| 1060 | } else { | ||
| 1061 | ✗ | return usage(argv[0]); | |
| 1062 | } | ||
| 1063 | ✗ | } else if ((l = strtoul(arg, &end, 10)) <= UINT_MAX && | |
| 1064 | ✗ | *end == '\0') { | |
| 1065 | ✗ | seed = l; | |
| 1066 | } else { | ||
| 1067 | ✗ | return usage(argv[0]); | |
| 1068 | } | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | #if ARCH_AARCH64 && HAVE_SVE | ||
| 1072 | if (have_sve(av_get_cpu_flags())) | ||
| 1073 | snprintf(arch_info_buf, sizeof(arch_info_buf), | ||
| 1074 | "SVE %d bits, ", 8 * ff_aarch64_sve_length()); | ||
| 1075 | #endif | ||
| 1076 | #if ARCH_AARCH64 && HAVE_SME | ||
| 1077 | if (have_sme(av_get_cpu_flags())) | ||
| 1078 | av_strlcatf(arch_info_buf, sizeof(arch_info_buf), | ||
| 1079 | "SME %d bits, ", 8 * ff_aarch64_sme_length()); | ||
| 1080 | #endif | ||
| 1081 | #if ARCH_RISCV && HAVE_RVV | ||
| 1082 | if (av_get_cpu_flags() & AV_CPU_FLAG_RVV_I32) | ||
| 1083 | snprintf(arch_info_buf, sizeof (arch_info_buf), | ||
| 1084 | "%zu-bit vectors, ", 8 * ff_get_rv_vlenb()); | ||
| 1085 | #endif | ||
| 1086 | 96 | fprintf(stderr, "checkasm: %susing random seed %u\n", arch_info_buf, seed); | |
| 1087 | 96 | av_lfg_init(&checkasm_lfg, seed); | |
| 1088 | |||
| 1089 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 96 times.
|
96 | if (state.bench_pattern) |
| 1090 | ✗ | fprintf(stderr, "checkasm: bench runs %" PRIu64 " (1 << %i)\n", bench_runs, av_log2(bench_runs)); | |
| 1091 | |||
| 1092 | 96 | check_cpu_flag(NULL, 0); | |
| 1093 |
2/2✓ Branch 0 taken 1632 times.
✓ Branch 1 taken 96 times.
|
1728 | for (i = 0; cpus[i].flag; i++) |
| 1094 | 1632 | check_cpu_flag(cpus[i].name, cpus[i].flag); | |
| 1095 | |||
| 1096 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 96 times.
|
96 | if (state.num_failed) { |
| 1097 | ✗ | fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked); | |
| 1098 | ✗ | ret = 1; | |
| 1099 | } else { | ||
| 1100 | 96 | fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked); | |
| 1101 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 96 times.
|
96 | if (state.bench_pattern) { |
| 1102 | ✗ | print_benchs(state.funcs); | |
| 1103 | } | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | 96 | destroy_func_tree(state.funcs); | |
| 1107 | 96 | bench_uninit(); | |
| 1108 | 96 | return ret; | |
| 1109 | } | ||
| 1110 | |||
| 1111 | /* Decide whether or not the specified function needs to be tested and | ||
| 1112 | * allocate/initialize data structures if needed. Returns a pointer to a | ||
| 1113 | * reference function if the function should be tested, otherwise NULL */ | ||
| 1114 | 188807 | void *checkasm_check_func(void *func, const char *name, ...) | |
| 1115 | { | ||
| 1116 | char name_buf[256]; | ||
| 1117 | 188807 | void *ref = func; | |
| 1118 | CheckasmFuncVersion *v; | ||
| 1119 | int name_length; | ||
| 1120 | va_list arg; | ||
| 1121 | |||
| 1122 | 188807 | va_start(arg, name); | |
| 1123 | 188807 | name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg); | |
| 1124 | 188807 | va_end(arg); | |
| 1125 | |||
| 1126 |
4/6✓ Branch 0 taken 188443 times.
✓ Branch 1 taken 364 times.
✓ Branch 2 taken 188443 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 188443 times.
|
188807 | if (!func || name_length <= 0 || name_length >= sizeof(name_buf)) |
| 1127 | 364 | return NULL; | |
| 1128 | |||
| 1129 | 188443 | state.current_func = get_func(&state.funcs, name_buf); | |
| 1130 | 188443 | state.funcs->color = 1; | |
| 1131 | 188443 | v = &state.current_func->versions; | |
| 1132 | |||
| 1133 |
2/2✓ Branch 0 taken 174062 times.
✓ Branch 1 taken 14381 times.
|
188443 | if (v->func) { |
| 1134 | CheckasmFuncVersion *prev; | ||
| 1135 | do { | ||
| 1136 | /* Only test functions that haven't already been tested */ | ||
| 1137 |
2/2✓ Branch 0 taken 158457 times.
✓ Branch 1 taken 78494 times.
|
236951 | if (v->func == func) |
| 1138 | 158457 | return NULL; | |
| 1139 | |||
| 1140 |
1/2✓ Branch 0 taken 78494 times.
✗ Branch 1 not taken.
|
78494 | if (v->ok) |
| 1141 | 78494 | ref = v->func; | |
| 1142 | |||
| 1143 | 78494 | prev = v; | |
| 1144 |
2/2✓ Branch 0 taken 62889 times.
✓ Branch 1 taken 15605 times.
|
78494 | } while ((v = v->next)); |
| 1145 | |||
| 1146 | 15605 | v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion)); | |
| 1147 | } | ||
| 1148 | |||
| 1149 | 29986 | v->func = func; | |
| 1150 | 29986 | v->ok = 1; | |
| 1151 | 29986 | v->cpu = state.cpu_flag; | |
| 1152 | 29986 | state.current_func_ver = v; | |
| 1153 | |||
| 1154 |
2/2✓ Branch 0 taken 15474 times.
✓ Branch 1 taken 14512 times.
|
29986 | if (state.cpu_flag) |
| 1155 | 15474 | state.num_checked++; | |
| 1156 | |||
| 1157 | 29986 | return ref; | |
| 1158 | } | ||
| 1159 | |||
| 1160 | /* Decide whether or not the current function needs to be benchmarked */ | ||
| 1161 | 42896 | int checkasm_bench_func(void) | |
| 1162 | { | ||
| 1163 |
2/6✓ Branch 0 taken 42896 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 42896 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
42896 | return !state.num_failed && state.bench_pattern && |
| 1164 | ✗ | !wildstrcmp(state.current_func->name, state.bench_pattern); | |
| 1165 | } | ||
| 1166 | |||
| 1167 | /* Indicate that the current test has failed, return whether verbose printing | ||
| 1168 | * is requested. */ | ||
| 1169 | ✗ | int checkasm_fail_func(const char *msg, ...) | |
| 1170 | { | ||
| 1171 | ✗ | if (state.current_func_ver && state.current_func_ver->cpu && | |
| 1172 | ✗ | state.current_func_ver->ok) | |
| 1173 | { | ||
| 1174 | va_list arg; | ||
| 1175 | |||
| 1176 | ✗ | print_cpu_name(); | |
| 1177 | ✗ | fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu)); | |
| 1178 | ✗ | va_start(arg, msg); | |
| 1179 | ✗ | vfprintf(stderr, msg, arg); | |
| 1180 | ✗ | va_end(arg); | |
| 1181 | ✗ | fprintf(stderr, ")\n"); | |
| 1182 | |||
| 1183 | ✗ | state.current_func_ver->ok = 0; | |
| 1184 | ✗ | state.num_failed++; | |
| 1185 | } | ||
| 1186 | ✗ | return state.verbose; | |
| 1187 | } | ||
| 1188 | |||
| 1189 | 328856 | void checkasm_set_signal_handler_state(int enabled) { | |
| 1190 | 328856 | state.catch_signals = enabled; | |
| 1191 | 328856 | } | |
| 1192 | |||
| 1193 | 188807 | int checkasm_handle_signal(int s) { | |
| 1194 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 188807 times.
|
188807 | if (s) { |
| 1195 | #ifdef __GLIBC__ | ||
| 1196 | ✗ | checkasm_fail_func("fatal signal %d: %s", s, strsignal(s)); | |
| 1197 | #else | ||
| 1198 | checkasm_fail_func(s == SIGFPE ? "fatal arithmetic error" : | ||
| 1199 | s == SIGILL ? "illegal instruction" : | ||
| 1200 | s == SIGBUS ? "bus error" : | ||
| 1201 | "segmentation fault"); | ||
| 1202 | #endif | ||
| 1203 | } | ||
| 1204 | 188807 | return s; | |
| 1205 | } | ||
| 1206 | |||
| 1207 | /* Get the benchmark context of the current function */ | ||
| 1208 | ✗ | CheckasmPerf *checkasm_get_perf_context(void) | |
| 1209 | { | ||
| 1210 | ✗ | CheckasmPerf *perf = &state.current_func_ver->perf; | |
| 1211 | ✗ | memset(perf, 0, sizeof(*perf)); | |
| 1212 | ✗ | perf->sysfd = state.sysfd; | |
| 1213 | ✗ | return perf; | |
| 1214 | } | ||
| 1215 | |||
| 1216 | /* Print the outcome of all tests performed since the last time this function was called */ | ||
| 1217 | 3982 | void checkasm_report(const char *name, ...) | |
| 1218 | { | ||
| 1219 | static int prev_checked, prev_failed, max_length; | ||
| 1220 | |||
| 1221 |
2/2✓ Branch 0 taken 473 times.
✓ Branch 1 taken 3509 times.
|
3982 | if (state.num_checked > prev_checked) { |
| 1222 | 473 | int pad_length = max_length + 4; | |
| 1223 | va_list arg; | ||
| 1224 | |||
| 1225 | 473 | print_cpu_name(); | |
| 1226 | 473 | pad_length -= fprintf(stderr, " - %s.", state.current_test_name); | |
| 1227 | 473 | va_start(arg, name); | |
| 1228 | 473 | pad_length -= vfprintf(stderr, name, arg); | |
| 1229 | 473 | va_end(arg); | |
| 1230 | 473 | fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '['); | |
| 1231 | |||
| 1232 |
1/2✓ Branch 0 taken 473 times.
✗ Branch 1 not taken.
|
473 | if (state.num_failed == prev_failed) |
| 1233 | 473 | color_printf(COLOR_GREEN, "OK"); | |
| 1234 | else | ||
| 1235 | ✗ | color_printf(COLOR_RED, "FAILED"); | |
| 1236 | 473 | fprintf(stderr, "]\n"); | |
| 1237 | |||
| 1238 | 473 | prev_checked = state.num_checked; | |
| 1239 | 473 | prev_failed = state.num_failed; | |
| 1240 |
2/2✓ Branch 0 taken 290 times.
✓ Branch 1 taken 3219 times.
|
3509 | } else if (!state.cpu_flag) { |
| 1241 | /* Calculate the amount of padding required to make the output vertically aligned */ | ||
| 1242 | 290 | int length = strlen(state.current_test_name); | |
| 1243 | va_list arg; | ||
| 1244 | |||
| 1245 | 290 | va_start(arg, name); | |
| 1246 | 290 | length += vsnprintf(NULL, 0, name, arg); | |
| 1247 | 290 | va_end(arg); | |
| 1248 | |||
| 1249 |
2/2✓ Branch 0 taken 163 times.
✓ Branch 1 taken 127 times.
|
290 | if (length > max_length) |
| 1250 | 163 | max_length = length; | |
| 1251 | } | ||
| 1252 | 3982 | } | |
| 1253 | |||
| 1254 | ✗ | static int check_err(const char *file, int line, | |
| 1255 | const char *name, int w, int h, | ||
| 1256 | int *err) | ||
| 1257 | { | ||
| 1258 | ✗ | if (*err) | |
| 1259 | ✗ | return 0; | |
| 1260 | ✗ | if (!checkasm_fail_func("%s:%d", file, line)) | |
| 1261 | ✗ | return 1; | |
| 1262 | ✗ | *err = 1; | |
| 1263 | ✗ | fprintf(stderr, "%s (%dx%d):\n", name, w, h); | |
| 1264 | ✗ | return 0; | |
| 1265 | } | ||
| 1266 | |||
| 1267 | #define DEF_CHECKASM_CHECK_BODY(compare, type, fmt) \ | ||
| 1268 | do { \ | ||
| 1269 | int64_t aligned_w = (w - 1LL + align_w) & ~(align_w - 1); \ | ||
| 1270 | int64_t aligned_h = (h - 1LL + align_h) & ~(align_h - 1); \ | ||
| 1271 | int err = 0; \ | ||
| 1272 | int y = 0; \ | ||
| 1273 | av_assert0(aligned_w == (int32_t)aligned_w);\ | ||
| 1274 | av_assert0(aligned_h == (int32_t)aligned_h);\ | ||
| 1275 | stride1 /= sizeof(*buf1); \ | ||
| 1276 | stride2 /= sizeof(*buf2); \ | ||
| 1277 | for (y = 0; y < h; y++) \ | ||
| 1278 | if (!compare(&buf1[y*stride1], &buf2[y*stride2], w)) \ | ||
| 1279 | break; \ | ||
| 1280 | if (y != h) { \ | ||
| 1281 | if (check_err(file, line, name, w, h, &err)) \ | ||
| 1282 | return 1; \ | ||
| 1283 | for (y = 0; y < h; y++) { \ | ||
| 1284 | for (int x = 0; x < w; x++) \ | ||
| 1285 | fprintf(stderr, " " fmt, buf1[x]); \ | ||
| 1286 | fprintf(stderr, " "); \ | ||
| 1287 | for (int x = 0; x < w; x++) \ | ||
| 1288 | fprintf(stderr, " " fmt, buf2[x]); \ | ||
| 1289 | fprintf(stderr, " "); \ | ||
| 1290 | for (int x = 0; x < w; x++) \ | ||
| 1291 | fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \ | ||
| 1292 | buf1 += stride1; \ | ||
| 1293 | buf2 += stride2; \ | ||
| 1294 | fprintf(stderr, "\n"); \ | ||
| 1295 | } \ | ||
| 1296 | buf1 -= h*stride1; \ | ||
| 1297 | buf2 -= h*stride2; \ | ||
| 1298 | } \ | ||
| 1299 | for (y = -padding; y < 0; y++) \ | ||
| 1300 | if (!compare(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \ | ||
| 1301 | w + 2*padding)) { \ | ||
| 1302 | if (check_err(file, line, name, w, h, &err)) \ | ||
| 1303 | return 1; \ | ||
| 1304 | fprintf(stderr, " overwrite above\n"); \ | ||
| 1305 | break; \ | ||
| 1306 | } \ | ||
| 1307 | for (y = aligned_h; y < aligned_h + padding; y++) \ | ||
| 1308 | if (!compare(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \ | ||
| 1309 | w + 2*padding)) { \ | ||
| 1310 | if (check_err(file, line, name, w, h, &err)) \ | ||
| 1311 | return 1; \ | ||
| 1312 | fprintf(stderr, " overwrite below\n"); \ | ||
| 1313 | break; \ | ||
| 1314 | } \ | ||
| 1315 | for (y = 0; y < h; y++) \ | ||
| 1316 | if (!compare(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \ | ||
| 1317 | padding)) { \ | ||
| 1318 | if (check_err(file, line, name, w, h, &err)) \ | ||
| 1319 | return 1; \ | ||
| 1320 | fprintf(stderr, " overwrite left\n"); \ | ||
| 1321 | break; \ | ||
| 1322 | } \ | ||
| 1323 | for (y = 0; y < h; y++) \ | ||
| 1324 | if (!compare(&buf1[y*stride1 + aligned_w], &buf2[y*stride2 + aligned_w], \ | ||
| 1325 | padding)) { \ | ||
| 1326 | if (check_err(file, line, name, w, h, &err)) \ | ||
| 1327 | return 1; \ | ||
| 1328 | fprintf(stderr, " overwrite right\n"); \ | ||
| 1329 | break; \ | ||
| 1330 | } \ | ||
| 1331 | return err; \ | ||
| 1332 | } while (0) | ||
| 1333 | |||
| 1334 | #define cmp_int(a, b, len) (!memcmp(a, b, (len) * sizeof(*(a)))) | ||
| 1335 | #define DEF_CHECKASM_CHECK_FUNC(type, fmt) \ | ||
| 1336 | int checkasm_check_##type(const char *file, int line, \ | ||
| 1337 | const type *buf1, ptrdiff_t stride1, \ | ||
| 1338 | const type *buf2, ptrdiff_t stride2, \ | ||
| 1339 | int w, int h, const char *name, \ | ||
| 1340 | int align_w, int align_h, \ | ||
| 1341 | int padding) \ | ||
| 1342 | { \ | ||
| 1343 | DEF_CHECKASM_CHECK_BODY(cmp_int, type, fmt); \ | ||
| 1344 | } | ||
| 1345 | |||
| 1346 |
18/46✗ Branch 0 not taken.
✓ Branch 1 taken 9128 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 9128 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 262599 times.
✓ Branch 10 taken 262599 times.
✓ Branch 11 taken 9128 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 9128 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✓ Branch 34 taken 40192 times.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✓ Branch 39 taken 40192 times.
✓ Branch 40 taken 9128 times.
✗ Branch 41 not taken.
✓ Branch 42 taken 40192 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✓ Branch 47 taken 40192 times.
✓ Branch 48 taken 9128 times.
✗ Branch 49 not taken.
✓ Branch 50 taken 262599 times.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✓ Branch 55 taken 262599 times.
✓ Branch 56 taken 9128 times.
✗ Branch 57 not taken.
✓ Branch 58 taken 262599 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✓ Branch 63 taken 262599 times.
✓ Branch 64 taken 9128 times.
|
877309 | DEF_CHECKASM_CHECK_FUNC(uint8_t, "%02x") |
| 1347 |
18/46✗ Branch 0 not taken.
✓ Branch 1 taken 20945 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 20945 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 606576 times.
✓ Branch 10 taken 606576 times.
✓ Branch 11 taken 20945 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 20945 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✓ Branch 34 taken 117488 times.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✓ Branch 39 taken 117488 times.
✓ Branch 40 taken 20945 times.
✗ Branch 41 not taken.
✓ Branch 42 taken 117488 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✓ Branch 47 taken 117488 times.
✓ Branch 48 taken 20945 times.
✗ Branch 49 not taken.
✓ Branch 50 taken 606576 times.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✓ Branch 55 taken 606576 times.
✓ Branch 56 taken 20945 times.
✗ Branch 57 not taken.
✓ Branch 58 taken 606576 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✓ Branch 63 taken 606576 times.
✓ Branch 64 taken 20945 times.
|
2075649 | DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x") |
| 1348 |
14/46✗ Branch 0 not taken.
✓ Branch 1 taken 1581 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 1581 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 25296 times.
✓ Branch 10 taken 25296 times.
✓ Branch 11 taken 1581 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 1581 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 1581 times.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 1581 times.
✗ Branch 49 not taken.
✓ Branch 50 taken 25296 times.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✓ Branch 55 taken 25296 times.
✓ Branch 56 taken 1581 times.
✗ Branch 57 not taken.
✓ Branch 58 taken 25296 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✓ Branch 63 taken 25296 times.
✓ Branch 64 taken 1581 times.
|
77469 | DEF_CHECKASM_CHECK_FUNC(uint32_t, "%08x") |
| 1349 |
14/46✗ Branch 0 not taken.
✓ Branch 1 taken 550 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 550 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 14114 times.
✓ Branch 10 taken 14114 times.
✓ Branch 11 taken 550 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 550 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 550 times.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 550 times.
✗ Branch 49 not taken.
✓ Branch 50 taken 14114 times.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✓ Branch 55 taken 14114 times.
✓ Branch 56 taken 550 times.
✗ Branch 57 not taken.
✓ Branch 58 taken 14114 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✓ Branch 63 taken 14114 times.
✓ Branch 64 taken 550 times.
|
42892 | DEF_CHECKASM_CHECK_FUNC(int16_t, "%6d") |
| 1350 |
14/46✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 4 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 64 times.
✓ Branch 10 taken 64 times.
✓ Branch 11 taken 4 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 4 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 4 times.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 4 times.
✗ Branch 49 not taken.
✓ Branch 50 taken 64 times.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✓ Branch 55 taken 64 times.
✓ Branch 56 taken 4 times.
✗ Branch 57 not taken.
✓ Branch 58 taken 64 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✓ Branch 63 taken 64 times.
✓ Branch 64 taken 4 times.
|
196 | DEF_CHECKASM_CHECK_FUNC(int32_t, "%9d") |
| 1351 | |||
| 1352 | 5749 | int checkasm_check_float_ulp(const char *file, int line, | |
| 1353 | const float *buf1, ptrdiff_t stride1, | ||
| 1354 | const float *buf2, ptrdiff_t stride2, | ||
| 1355 | int w, int h, const char *name, | ||
| 1356 | unsigned max_ulp, int align_w, int align_h, | ||
| 1357 | int padding) | ||
| 1358 | { | ||
| 1359 | #define cmp_float(a, b, len) float_near_ulp_array(a, b, max_ulp, len) | ||
| 1360 |
14/46✗ Branch 0 not taken.
✓ Branch 1 taken 5749 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 5749 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 91984 times.
✓ Branch 11 taken 91984 times.
✓ Branch 12 taken 5749 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 5749 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 41 not taken.
✓ Branch 42 taken 5749 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 50 not taken.
✓ Branch 51 taken 5749 times.
✗ Branch 53 not taken.
✓ Branch 54 taken 91984 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✓ Branch 59 taken 91984 times.
✓ Branch 60 taken 5749 times.
✗ Branch 62 not taken.
✓ Branch 63 taken 91984 times.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✓ Branch 68 taken 91984 times.
✓ Branch 69 taken 5749 times.
|
281701 | DEF_CHECKASM_CHECK_BODY(cmp_float, float, "%g"); |
| 1361 | #undef cmp_float | ||
| 1362 | } | ||
| 1363 |