2 * cpu_features.c - runtime CPU feature detection
4 * Copyright 2022-2023 Eric Biggers
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software and associated documentation
8 * files (the "Software"), to deal in the Software without
9 * restriction, including without limitation the rights to use,
10 * copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 * OTHER DEALINGS IN THE SOFTWARE.
32 #include "wimlib/cpu_features.h"
34 #if CPU_FEATURES_ENABLED
36 #include "wimlib/util.h"
41 #if defined(__i386__) || defined(__x86_64__)
44 * With old GCC versions we have to manually save and restore the x86_32 PIC
45 * register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602
47 #if defined(__i386__) && defined(__PIC__)
48 # define EBX_CONSTRAINT "=&r"
50 # define EBX_CONSTRAINT "=b"
53 /* Execute the CPUID instruction. */
55 cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d)
57 asm volatile(".ifnc %%ebx, %1; mov %%ebx, %1; .endif\n"
59 ".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n"
60 : "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d)
61 : "a" (leaf), "c" (subleaf));
64 /* Read an extended control register. */
71 * Execute the "xgetbv" instruction. Old versions of binutils do not
72 * recognize this instruction, so list the raw bytes instead.
74 * This must be 'volatile' to prevent this code from being moved out
75 * from under the check for OSXSAVE.
77 asm volatile(".byte 0x0f, 0x01, 0xd0" :
78 "=d" (d), "=a" (a) : "c" (index));
80 return ((u64)d << 32) | a;
84 get_cpu_features(void)
86 u32 max_leaf, a, b, c, d;
90 /* EAX=0: Highest Function Parameter and Manufacturer ID */
91 cpuid(0, 0, &max_leaf, &b, &c, &d);
95 /* EAX=1: Processor Info and Feature Bits */
96 cpuid(1, 0, &a, &b, &c, &d);
98 features |= X86_CPU_FEATURE_SSSE3;
100 features |= X86_CPU_FEATURE_SSE4_1;
102 features |= X86_CPU_FEATURE_SSE4_2;
105 if ((c & (1 << 28)) && ((xcr0 & 0x6) == 0x6))
106 features |= X86_CPU_FEATURE_AVX;
111 /* EAX=7, ECX=0: Extended Features */
112 cpuid(7, 0, &a, &b, &c, &d);
114 features |= X86_CPU_FEATURE_BMI2;
116 features |= X86_CPU_FEATURE_SHA;
121 #elif defined(__aarch64__) && defined(__linux__)
124 * On Linux, arm32 and arm64 CPU features can be detected by reading the
125 * AT_HWCAP and AT_HWCAP2 values from /proc/self/auxv.
127 * Ideally we'd use the C library function getauxval(), but it's not guaranteed
128 * to be available: it was only added to glibc in 2.16, and in Android it was
129 * added to API level 18 for arm32 and level 21 for arm64.
140 static void scan_auxv(unsigned long *hwcap, unsigned long *hwcap2)
143 unsigned long auxbuf[32];
147 fd = open("/proc/self/auxv", O_RDONLY);
153 int ret = read(fd, &((char *)auxbuf)[filled],
154 sizeof(auxbuf) - filled);
156 if (ret < 0 && errno == EINTR)
161 } while (filled < 2 * sizeof(long));
165 unsigned long type = auxbuf[i];
166 unsigned long value = auxbuf[i + 1];
168 if (type == AT_HWCAP)
170 else if (type == AT_HWCAP2)
173 filled -= 2 * sizeof(long);
174 } while (filled >= 2 * sizeof(long));
176 memmove(auxbuf, &auxbuf[i], filled);
183 get_cpu_features(void)
185 unsigned long hwcap = 0;
186 unsigned long hwcap2 = 0;
189 scan_auxv(&hwcap, &hwcap2);
191 if (hwcap & (1 << 5)) /* HWCAP_SHA1 */
192 features |= ARM_CPU_FEATURE_SHA1;
197 #elif defined(__aarch64__) && defined(__APPLE__)
199 /* On Apple platforms, arm64 CPU features can be detected via sysctlbyname(). */
201 #include <sys/types.h>
202 #include <sys/sysctl.h>
204 static const struct {
207 } feature_sysctls[] = {
208 { "hw.optional.arm.FEAT_SHA1", ARM_CPU_FEATURE_SHA1 },
212 get_cpu_features(void)
216 for (size_t i = 0; i < ARRAY_LEN(feature_sysctls); i++) {
217 const char *name = feature_sysctls[i].name;
219 size_t valsize = sizeof(val);
221 if (sysctlbyname(name, &val, &valsize, NULL, 0) == 0 &&
222 valsize == sizeof(val) && val == 1)
223 features |= feature_sysctls[i].feature;
228 #elif defined(__aarch64__) && defined(_WIN32)
233 get_cpu_features(void)
237 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
238 features |= ARM_CPU_FEATURE_SHA1;
244 # error "CPU_FEATURES_ENABLED was set but no implementation is available!"
247 static const struct {
250 } feature_table[] = {
251 #if defined(__i386__) || defined(__x86_64__)
252 {"ssse3", X86_CPU_FEATURE_SSSE3},
253 {"sse4.1", X86_CPU_FEATURE_SSE4_1},
254 {"sse4.2", X86_CPU_FEATURE_SSE4_2},
255 {"avx", X86_CPU_FEATURE_AVX},
256 {"bmi2", X86_CPU_FEATURE_BMI2},
257 {"sha", X86_CPU_FEATURE_SHA},
258 {"sha1", X86_CPU_FEATURE_SHA},
259 #elif defined(__aarch64__)
260 {"sha1", ARM_CPU_FEATURE_SHA1},
262 # error "CPU_FEATURES_ENABLED was set but no features are defined!"
268 find_cpu_feature(const char *name, size_t namelen)
270 for (size_t i = 0; i < ARRAY_LEN(feature_table); i++) {
271 if (namelen == strlen(feature_table[i].name) &&
272 memcmp(name, feature_table[i].name, namelen) == 0)
273 return feature_table[i].feature;
280 void init_cpu_features(void)
284 cpu_features = get_cpu_features();
287 * Allow disabling CPU features via an environmental variable for
288 * testing purposes. Syntax is comma-separated list of feature names.
290 p = getenv("WIMLIB_DISABLE_CPU_FEATURES");
291 if (likely(p == NULL))
293 for (; (sep = strchr(p, ',')) != NULL; p = sep + 1)
294 cpu_features &= ~find_cpu_feature(p, sep - p);
295 cpu_features &= ~find_cpu_feature(p, strlen(p));
298 #endif /* CPU_FEATURES_ENABLED */