2 * cpu_features.c - runtime CPU feature detection
4 * Copyright 2022 Eric Biggers
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software and associated documentation
8 * files (the "Software"), to deal in the Software without
9 * restriction, including without limitation the rights to use,
10 * copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 * OTHER DEALINGS IN THE SOFTWARE.
32 #include "wimlib/cpu_features.h"
34 #if CPU_FEATURES_ENABLED
36 #include "wimlib/util.h"
41 #if defined(__i386__) || defined(__x86_64__)
44 * With old GCC versions we have to manually save and restore the x86_32 PIC
45 * register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602
47 #if defined(__i386__) && defined(__PIC__)
48 # define EBX_CONSTRAINT "=&r"
50 # define EBX_CONSTRAINT "=b"
53 /* Execute the CPUID instruction. */
55 cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d)
57 asm(".ifnc %%ebx, %1; mov %%ebx, %1; .endif\n"
59 ".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n"
60 : "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d)
61 : "a" (leaf), "c" (subleaf));
64 /* Read an extended control register. */
71 * Execute the "xgetbv" instruction. Old versions of binutils do not
72 * recognize this instruction, so list the raw bytes instead.
74 asm(".byte 0x0f, 0x01, 0xd0" : "=d" (d), "=a" (a) : "c" (index));
76 return ((u64)d << 32) | a;
80 get_cpu_features(void)
82 u32 max_leaf, a, b, c, d;
86 /* EAX=0: Highest Function Parameter and Manufacturer ID */
87 cpuid(0, 0, &max_leaf, &b, &c, &d);
91 /* EAX=1: Processor Info and Feature Bits */
92 cpuid(1, 0, &a, &b, &c, &d);
94 features |= X86_CPU_FEATURE_SSSE3;
96 features |= X86_CPU_FEATURE_SSE4_1;
98 features |= X86_CPU_FEATURE_SSE4_2;
101 if ((c & (1 << 28)) && ((xcr0 & 0x6) == 0x6))
102 features |= X86_CPU_FEATURE_AVX;
107 /* EAX=7, ECX=0: Extended Features */
108 cpuid(7, 0, &a, &b, &c, &d);
110 features |= X86_CPU_FEATURE_BMI2;
112 features |= X86_CPU_FEATURE_SHA;
117 #elif defined(__aarch64__) && defined(__linux__)
120 * On Linux, arm32 and arm64 CPU features can be detected by reading the
121 * AT_HWCAP and AT_HWCAP2 values from /proc/self/auxv.
123 * Ideally we'd use the C library function getauxval(), but it's not guaranteed
124 * to be available: it was only added to glibc in 2.16, and in Android it was
125 * added to API level 18 for arm32 and level 21 for arm64.
136 static void scan_auxv(unsigned long *hwcap, unsigned long *hwcap2)
139 unsigned long auxbuf[32];
143 fd = open("/proc/self/auxv", O_RDONLY);
149 int ret = read(fd, &((char *)auxbuf)[filled],
150 sizeof(auxbuf) - filled);
152 if (ret < 0 && errno == EINTR)
157 } while (filled < 2 * sizeof(long));
161 unsigned long type = auxbuf[i];
162 unsigned long value = auxbuf[i + 1];
164 if (type == AT_HWCAP)
166 else if (type == AT_HWCAP2)
169 filled -= 2 * sizeof(long);
170 } while (filled >= 2 * sizeof(long));
172 memmove(auxbuf, &auxbuf[i], filled);
179 get_cpu_features(void)
181 unsigned long hwcap = 0;
182 unsigned long hwcap2 = 0;
185 scan_auxv(&hwcap, &hwcap2);
187 if (hwcap & (1 << 5)) /* HWCAP_SHA1 */
188 features |= ARM_CPU_FEATURE_SHA1;
193 #elif defined(__aarch64__) && defined(__APPLE__)
195 /* On Apple platforms, arm64 CPU features can be detected via sysctlbyname(). */
197 #include <sys/types.h>
198 #include <sys/sysctl.h>
200 static const struct {
203 } feature_sysctls[] = {
204 { "hw.optional.arm.FEAT_SHA1", ARM_CPU_FEATURE_SHA1 },
208 get_cpu_features(void)
212 for (size_t i = 0; i < ARRAY_LEN(feature_sysctls); i++) {
213 const char *name = feature_sysctls[i].name;
215 size_t valsize = sizeof(val);
217 if (sysctlbyname(name, &val, &valsize, NULL, 0) == 0 &&
218 valsize == sizeof(val) && val == 1)
219 features |= feature_sysctls[i].feature;
224 #elif defined(__aarch64__) && defined(_WIN32)
229 get_cpu_features(void)
233 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
234 features |= ARM_CPU_FEATURE_SHA1;
240 # error "CPU_FEATURES_ENABLED was set but no implementation is available!"
243 static const struct {
246 } feature_table[] = {
247 #if defined(__i386__) || defined(__x86_64__)
248 {"ssse3", X86_CPU_FEATURE_SSSE3},
249 {"sse4.1", X86_CPU_FEATURE_SSE4_1},
250 {"sse4.2", X86_CPU_FEATURE_SSE4_2},
251 {"avx", X86_CPU_FEATURE_AVX},
252 {"bmi2", X86_CPU_FEATURE_BMI2},
253 {"sha", X86_CPU_FEATURE_SHA},
254 {"sha1", X86_CPU_FEATURE_SHA},
255 #elif defined(__aarch64__)
256 {"sha1", ARM_CPU_FEATURE_SHA1},
258 # error "CPU_FEATURES_ENABLED was set but no features are defined!"
264 find_cpu_feature(const char *name, size_t namelen)
266 for (size_t i = 0; i < ARRAY_LEN(feature_table); i++) {
267 if (namelen == strlen(feature_table[i].name) &&
268 memcmp(name, feature_table[i].name, namelen) == 0)
269 return feature_table[i].feature;
276 void init_cpu_features(void)
280 cpu_features = get_cpu_features();
283 * Allow disabling CPU features via an environmental variable for
284 * testing purposes. Syntax is comma-separated list of feature names.
286 p = getenv("WIMLIB_DISABLE_CPU_FEATURES");
287 if (likely(p == NULL))
289 for (; (sep = strchr(p, ',')) != NULL; p = sep + 1)
290 cpu_features &= ~find_cpu_feature(p, sep - p);
291 cpu_features &= ~find_cpu_feature(p, strlen(p));
294 #endif /* CPU_FEATURES_ENABLED */