| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright © 2025, Niklas Haas | ||
| 3 | * Copyright © 2018, VideoLAN and dav1d authors | ||
| 4 | * Copyright © 2018, Two Orioles, LLC | ||
| 5 | * All rights reserved. | ||
| 6 | * | ||
| 7 | * Redistribution and use in source and binary forms, with or without | ||
| 8 | * modification, are permitted provided that the following conditions are met: | ||
| 9 | * | ||
| 10 | * 1. Redistributions of source code must retain the above copyright notice, this | ||
| 11 | * list of conditions and the following disclaimer. | ||
| 12 | * | ||
| 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, | ||
| 14 | * this list of conditions and the following disclaimer in the documentation | ||
| 15 | * and/or other materials provided with the distribution. | ||
| 16 | * | ||
| 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
| 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
| 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | ||
| 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
| 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
| 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
| 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 27 | */ | ||
| 28 | |||
| 29 | #include <stdint.h> | ||
| 30 | |||
| 31 | #include "checkasm_config.h" | ||
| 32 | #include "cpu.h" | ||
| 33 | #include "internal.h" | ||
| 34 | |||
| 35 | #if ARCH_X86 | ||
| 36 | |||
| 37 | int checkasm_check_vzeroupper = 0; | ||
| 38 | |||
| 39 | ✗ | COLD int checkasm_get_check_vzeroupper(void) | |
| 40 | { | ||
| 41 | ✗ | return checkasm_check_vzeroupper; | |
| 42 | } | ||
| 43 | |||
| 44 | void checkasm_warmup_avx(void); | ||
| 45 | void checkasm_warmup_avx512(void); | ||
| 46 | void checkasm_dirty_ymm_state(void); | ||
| 47 | |||
| 48 | ✗ | static void noop(void) | |
| 49 | { | ||
| 50 | ✗ | } | |
| 51 | |||
| 52 | 97 | static size_t get_model_name(char *name) | |
| 53 | { | ||
| 54 | CpuidRegisters r; | ||
| 55 | |||
| 56 | 97 | checkasm_cpu_cpuid(&r, 0x80000000, 0); | |
| 57 |
1/2✓ Branch 0 taken 97 times.
✗ Branch 1 not taken.
|
97 | if (r.eax >= 0x80000004) { |
| 58 | /* processor brand string */ | ||
| 59 | 97 | CpuidRegisters *buf = (CpuidRegisters *) name; | |
| 60 | 97 | checkasm_cpu_cpuid(buf + 0, 0x80000002, 0); | |
| 61 | 97 | checkasm_cpu_cpuid(buf + 1, 0x80000003, 0); | |
| 62 | 97 | checkasm_cpu_cpuid(buf + 2, 0x80000004, 0); | |
| 63 | } else { | ||
| 64 | /* use manufacturer id as a fallback */ | ||
| 65 | ✗ | checkasm_cpu_cpuid(&r, 0, 0); | |
| 66 | ✗ | memcpy(name + 0, &r.ebx, 4); | |
| 67 | ✗ | memcpy(name + 4, &r.edx, 4); | |
| 68 | ✗ | memcpy(name + 8, &r.ecx, 4); | |
| 69 | ✗ | name[12] = '\0'; | |
| 70 | } | ||
| 71 | |||
| 72 | /* trim trailing whitespace */ | ||
| 73 | 97 | size_t len = strlen(name); | |
| 74 |
2/4✓ Branch 0 taken 97 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 97 times.
|
97 | while (len && name[len - 1] == ' ') |
| 75 | ✗ | len--; | |
| 76 | 97 | name[len] = '\0'; | |
| 77 | 97 | return len; | |
| 78 | } | ||
| 79 | |||
| 80 | 97 | static unsigned get_cpuid(void) | |
| 81 | { | ||
| 82 | CpuidRegisters r; | ||
| 83 | |||
| 84 | 97 | checkasm_cpu_cpuid(&r, 0, 0); | |
| 85 | 97 | const uint32_t max_leaf = r.eax; | |
| 86 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 97 times.
|
97 | if (!max_leaf) |
| 87 | ✗ | return 0; | |
| 88 | |||
| 89 | 97 | checkasm_cpu_cpuid(&r, 1, 0); | |
| 90 | 97 | const uint32_t cpuid_sig = r.eax; | |
| 91 | 97 | return cpuid_sig; | |
| 92 | } | ||
| 93 | |||
| 94 | 97 | COLD char *checkasm_get_x86_cpuid(char *buf, size_t buflen) | |
| 95 | { | ||
| 96 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 97 times.
|
97 | if (buflen < 64) |
| 97 | ✗ | return NULL; | |
| 98 | |||
| 99 | 97 | const size_t len = get_model_name(buf); | |
| 100 | 97 | const unsigned cpuid = get_cpuid(); | |
| 101 |
1/2✓ Branch 0 taken 97 times.
✗ Branch 1 not taken.
|
97 | if (cpuid) |
| 102 | 97 | snprintf(buf + len, buflen - len, " (%08X)", cpuid); | |
| 103 | 97 | return buf; | |
| 104 | } | ||
| 105 | |||
| 106 | 97 | COLD void checkasm_init_x86(void) | |
| 107 | { | ||
| 108 | CpuidRegisters r; | ||
| 109 | |||
| 110 | 97 | checkasm_cpu_cpuid(&r, 0, 0); | |
| 111 | 97 | const uint32_t max_leaf = r.eax; | |
| 112 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 97 times.
|
97 | if (max_leaf < 13) |
| 113 | ✗ | return; | |
| 114 | |||
| 115 | 97 | checkasm_cpu_cpuid(&r, 1, 0); | |
| 116 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 97 times.
|
97 | if (~r.ecx & 0x18000000 /* OSXSAVE/AVX */) |
| 117 | ✗ | return; | |
| 118 | |||
| 119 | 97 | checkasm_cpu_cpuid(&r, 13, 1); | |
| 120 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 97 times.
|
97 | if (!(r.eax & 0x04)) /* XCR1 not supported */ |
| 121 | ✗ | return; | |
| 122 | |||
| 123 | /* Check that the state is clean after touching XMM registers (with a | ||
| 124 | * non-VEX-encoded instruction), without vzeroupper, after using YMM | ||
| 125 | * with vzeroupper. (This currently fails on Zen 4 CPUs.) */ | ||
| 126 | 97 | checkasm_dirty_ymm_state(); | |
| 127 | 97 | const uint64_t xcr1 = checkasm_cpu_xgetbv(1); | |
| 128 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 97 times.
|
97 | if (xcr1 & 0x04) /* always-dirty ymm state */ |
| 129 | ✗ | return; | |
| 130 | |||
| 131 | #if ARCH_X86_32 && defined(_WIN32) | ||
| 132 | /* x86_32 processes on Windows can spuriously get the dirty ymm bit set | ||
| 133 | * while running; skip checking this aspect. */ | ||
| 134 | #else | ||
| 135 | 97 | checkasm_check_vzeroupper = 1; | |
| 136 | #endif | ||
| 137 | } | ||
| 138 | |||
| 139 | typedef void (*checkasm_simd_warmup_func)(void); | ||
| 140 | ✗ | static COLD checkasm_simd_warmup_func get_simd_warmup(void) | |
| 141 | { | ||
| 142 | ✗ | checkasm_simd_warmup_func simd_warmup = noop; | |
| 143 | CpuidRegisters r; | ||
| 144 | ✗ | checkasm_cpu_cpuid(&r, 0, 0); | |
| 145 | ✗ | const uint32_t max_leaf = r.eax; | |
| 146 | ✗ | if (max_leaf < 1) | |
| 147 | ✗ | return simd_warmup; | |
| 148 | |||
| 149 | ✗ | checkasm_cpu_cpuid(&r, 1, 0); | |
| 150 | ✗ | if (~r.ecx & 0x18000000) /* OSXSAVE/AVX */ | |
| 151 | ✗ | return simd_warmup; | |
| 152 | |||
| 153 | ✗ | const uint64_t xcr0 = checkasm_cpu_xgetbv(0); | |
| 154 | ✗ | if (~xcr0 & 0x6) /* XMM/YMM */ | |
| 155 | ✗ | return simd_warmup; | |
| 156 | |||
| 157 | ✗ | simd_warmup = checkasm_warmup_avx; | |
| 158 | ✗ | if (max_leaf < 7 || ~xcr0 & 0xe0) /* ZMM/OPMASK */ | |
| 159 | ✗ | return simd_warmup; | |
| 160 | |||
| 161 | ✗ | checkasm_cpu_cpuid(&r, 7, 0); | |
| 162 | ✗ | if (r.ebx & 0x00000020) /* AVX512F */ | |
| 163 | ✗ | simd_warmup = checkasm_warmup_avx512; | |
| 164 | |||
| 165 | ✗ | return simd_warmup; | |
| 166 | } | ||
| 167 | |||
| 168 | ✗ | void checkasm_simd_warmup(void) | |
| 169 | { | ||
| 170 | static checkasm_simd_warmup_func simd_warmup = NULL; | ||
| 171 | ✗ | if (!simd_warmup) | |
| 172 | ✗ | simd_warmup = get_simd_warmup(); | |
| 173 | |||
| 174 | ✗ | simd_warmup(); | |
| 175 | ✗ | } | |
| 176 | |||
| 177 | #endif | ||
| 178 |