From: Eric Biggers Date: Sat, 18 Mar 2023 07:17:54 +0000 (-0700) Subject: Improve runtime CPU feature detection X-Git-Tag: v1.14.0~95 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=ad1e3889667e6d98c90c031b63a1e899bb0010d2 Improve runtime CPU feature detection - Make wimlib_global_init() do the CPU feature detection, so that it doesn't have to be done on-demand later. - Add support for detecting the x86 SHA extensions. - Add support for detecting ARMv8 SHA1 instructions on Linux, Windows, and macOS. (64-bit only for now.) - Allow disabling features via an environment variable for testing. - Remove some unused functionality. --- diff --git a/Makefile.am b/Makefile.am index c868bb72..fd2ec083 100644 --- a/Makefile.am +++ b/Makefile.am @@ -40,6 +40,7 @@ libwim_la_SOURCES = \ src/compress_common.c \ src/compress_parallel.c \ src/compress_serial.c \ + src/cpu_features.c \ src/decompress.c \ src/decompress_common.c \ src/delete_image.c \ @@ -88,7 +89,6 @@ libwim_la_SOURCES = \ src/verify.c \ src/wim.c \ src/write.c \ - src/x86_cpu_features.c \ src/xml.c \ src/xml_windows.c \ src/xpress_compress.c \ @@ -105,6 +105,7 @@ libwim_la_SOURCES = \ include/wimlib/compressor_ops.h \ include/wimlib/compress_common.h \ include/wimlib/chunk_compressor.h \ + include/wimlib/cpu_features.h \ include/wimlib/decompressor_ops.h \ include/wimlib/decompress_common.h \ include/wimlib/dentry.h \ @@ -150,7 +151,6 @@ libwim_la_SOURCES = \ include/wimlib/util.h \ include/wimlib/wim.h \ include/wimlib/write.h \ - include/wimlib/x86_cpu_features.h \ include/wimlib/xattr.h \ include/wimlib/xml.h \ include/wimlib/xml_windows.h \ diff --git a/include/wimlib/cpu_features.h b/include/wimlib/cpu_features.h new file mode 100644 index 00000000..ee6ef485 --- /dev/null +++ b/include/wimlib/cpu_features.h @@ -0,0 +1,37 @@ +#ifndef _WIMLIB_CPU_FEATURES_H +#define _WIMLIB_CPU_FEATURES_H + +#include "wimlib/types.h" + +#define X86_CPU_FEATURE_SSSE3 0x00000001 +#define X86_CPU_FEATURE_SSE4_1 0x00000002 +#define X86_CPU_FEATURE_SSE4_2 0x00000004 +#define X86_CPU_FEATURE_AVX 0x00000008 +#define X86_CPU_FEATURE_BMI2 0x00000010 +#define X86_CPU_FEATURE_SHA 0x00000020 + +#define ARM_CPU_FEATURE_SHA1 0x00000001 + +#if (defined(__i386__) || defined(__x86_64__)) || \ + (defined(__aarch64__) && defined(__linux__)) || \ + (defined(__aarch64__) && defined(__APPLE__)) || \ + (defined(__aarch64__) && defined(_WIN32)) + +#define CPU_FEATURES_ENABLED 1 +extern u32 cpu_features; + +void init_cpu_features(void); + +#else + +#define CPU_FEATURES_ENABLED 0 +#define cpu_features 0 + +static inline void +init_cpu_features(void) +{ +} + +#endif + +#endif /* _WIMLIB_CPU_FEATURES_H */ diff --git a/include/wimlib/x86_cpu_features.h b/include/wimlib/x86_cpu_features.h deleted file mode 100644 index e57742b1..00000000 --- a/include/wimlib/x86_cpu_features.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef _WIMLIB_X86_CPU_FEATURES_H -#define _WIMLIB_X86_CPU_FEATURES_H - -#include "wimlib/types.h" - -#if defined(__i386__) || defined(__x86_64__) - -#define X86_CPU_FEATURE_SSE 0x00000001 -#define X86_CPU_FEATURE_SSE2 0x00000002 -#define X86_CPU_FEATURE_SSE3 0x00000004 -#define X86_CPU_FEATURE_SSSE3 0x00000008 -#define X86_CPU_FEATURE_SSE4_1 0x00000010 -#define X86_CPU_FEATURE_SSE4_2 0x00000020 -#define X86_CPU_FEATURE_AVX 0x00000040 -#define X86_CPU_FEATURE_BMI 0x00000080 -#define X86_CPU_FEATURE_AVX2 0x00000100 -#define X86_CPU_FEATURE_BMI2 0x00000200 - -#define X86_CPU_FEATURES_KNOWN 0x80000000 - -extern u32 _x86_cpu_features; - -extern void -x86_setup_cpu_features(void); - -/* Does the processor has the specified feature? */ -static inline bool -x86_have_cpu_feature(u32 feature) -{ - if (!(_x86_cpu_features & X86_CPU_FEATURES_KNOWN)) - x86_setup_cpu_features(); - return _x86_cpu_features & feature; -} - -#else - -static inline bool -x86_have_cpu_feature(u32 feature) -{ - return false; -} - -#endif /* __i386__ || __x86_64__ */ - -#endif /* _WIMLIB_X86_CPU_FEATURES_H */ diff --git a/src/cpu_features.c b/src/cpu_features.c new file mode 100644 index 00000000..9aae638c --- /dev/null +++ b/src/cpu_features.c @@ -0,0 +1,294 @@ +/* + * cpu_features.c - runtime CPU feature detection + * + * Copyright 2022 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wimlib/cpu_features.h" + +#if CPU_FEATURES_ENABLED + +#include "wimlib/util.h" + +#include +#include + +#if defined(__i386__) || defined(__x86_64__) + +/* + * With old GCC versions we have to manually save and restore the x86_32 PIC + * register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602 + */ +#if defined(__i386__) && defined(__PIC__) +# define EBX_CONSTRAINT "=&r" +#else +# define EBX_CONSTRAINT "=b" +#endif + +/* Execute the CPUID instruction. */ +static inline void +cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d) +{ + asm(".ifnc %%ebx, %1; mov %%ebx, %1; .endif\n" + "cpuid \n" + ".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n" + : "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d) + : "a" (leaf), "c" (subleaf)); +} + +/* Read an extended control register. */ +static inline u64 +read_xcr(u32 index) +{ + u32 d, a; + + /* + * Execute the "xgetbv" instruction. Old versions of binutils do not + * recognize this instruction, so list the raw bytes instead. + */ + asm(".byte 0x0f, 0x01, 0xd0" : "=d" (d), "=a" (a) : "c" (index)); + + return ((u64)d << 32) | a; +} + +static u32 +get_cpu_features(void) +{ + u32 max_leaf, a, b, c, d; + u64 xcr0 = 0; + u32 features = 0; + + /* EAX=0: Highest Function Parameter and Manufacturer ID */ + cpuid(0, 0, &max_leaf, &b, &c, &d); + if (max_leaf < 1) + return features; + + /* EAX=1: Processor Info and Feature Bits */ + cpuid(1, 0, &a, &b, &c, &d); + if (c & (1 << 9)) + features |= X86_CPU_FEATURE_SSSE3; + if (c & (1 << 19)) + features |= X86_CPU_FEATURE_SSE4_1; + if (c & (1 << 20)) + features |= X86_CPU_FEATURE_SSE4_2; + if (c & (1 << 27)) + xcr0 = read_xcr(0); + if ((c & (1 << 28)) && ((xcr0 & 0x6) == 0x6)) + features |= X86_CPU_FEATURE_AVX; + + if (max_leaf < 7) + return features; + + /* EAX=7, ECX=0: Extended Features */ + cpuid(7, 0, &a, &b, &c, &d); + if (b & (1 << 8)) + features |= X86_CPU_FEATURE_BMI2; + if (b & (1 << 29)) + features |= X86_CPU_FEATURE_SHA; + + return features; +} + +#elif defined(__aarch64__) && defined(__linux__) + +/* + * On Linux, arm32 and arm64 CPU features can be detected by reading the + * AT_HWCAP and AT_HWCAP2 values from /proc/self/auxv. + * + * Ideally we'd use the C library function getauxval(), but it's not guaranteed + * to be available: it was only added to glibc in 2.16, and in Android it was + * added to API level 18 for arm32 and level 21 for arm64. + */ + +#include +#include +#include +#include + +#define AT_HWCAP 16 +#define AT_HWCAP2 26 + +static void scan_auxv(unsigned long *hwcap, unsigned long *hwcap2) +{ + int fd; + unsigned long auxbuf[32]; + int filled = 0; + int i; + + fd = open("/proc/self/auxv", O_RDONLY); + if (fd < 0) + return; + + for (;;) { + do { + int ret = read(fd, &((char *)auxbuf)[filled], + sizeof(auxbuf) - filled); + if (ret <= 0) { + if (ret < 0 && errno == EINTR) + continue; + goto out; + } + filled += ret; + } while (filled < 2 * sizeof(long)); + + i = 0; + do { + unsigned long type = auxbuf[i]; + unsigned long value = auxbuf[i + 1]; + + if (type == AT_HWCAP) + *hwcap = value; + else if (type == AT_HWCAP2) + *hwcap2 = value; + i += 2; + filled -= 2 * sizeof(long); + } while (filled >= 2 * sizeof(long)); + + memmove(auxbuf, &auxbuf[i], filled); + } +out: + close(fd); +} + +static u32 +get_cpu_features(void) +{ + unsigned long hwcap = 0; + unsigned long hwcap2 = 0; + u32 features = 0; + + scan_auxv(&hwcap, &hwcap2); + + if (hwcap & (1 << 5)) /* HWCAP_SHA1 */ + features |= ARM_CPU_FEATURE_SHA1; + + return features; +} + +#elif defined(__aarch64__) && defined(__APPLE__) + +/* On Apple platforms, arm64 CPU features can be detected via sysctlbyname(). */ + +#include +#include + +static const struct { + const char *name; + u32 feature; +} feature_sysctls[] = { + { "hw.optional.arm.FEAT_SHA1", ARM_CPU_FEATURE_SHA1 }, +}; + +static u32 +get_cpu_features(void) +{ + u32 features = 0; + + for (size_t i = 0; i < ARRAY_LEN(feature_sysctls); i++) { + const char *name = feature_sysctls[i].name; + u32 val = 0; + size_t valsize = sizeof(val); + + if (sysctlbyname(name, &val, &valsize, NULL, 0) == 0 && + valsize == sizeof(val) && val == 1) + features |= feature_sysctls[i].feature; + } + return features; +} + +#elif defined(__aarch64__) && defined(_WIN32) + +#include + +static u32 +get_cpu_features(void) +{ + u32 features = 0; + + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) + features |= ARM_CPU_FEATURE_SHA1; + + return features; +} + +#else +# error "CPU_FEATURES_ENABLED was set but no implementation is available!" +#endif + +static const struct { + const char *name; + u32 feature; +} feature_table[] = { +#if defined(__i386__) || defined(__x86_64__) + {"ssse3", X86_CPU_FEATURE_SSSE3}, + {"sse4.1", X86_CPU_FEATURE_SSE4_1}, + {"sse4.2", X86_CPU_FEATURE_SSE4_2}, + {"avx", X86_CPU_FEATURE_AVX}, + {"bmi2", X86_CPU_FEATURE_BMI2}, + {"sha", X86_CPU_FEATURE_SHA}, + {"sha1", X86_CPU_FEATURE_SHA}, +#elif defined(__aarch64__) + {"sha1", ARM_CPU_FEATURE_SHA1}, +#else +# error "CPU_FEATURES_ENABLED was set but no features are defined!" +#endif + {"*", 0xFFFFFFFF}, +}; + +static u32 +find_cpu_feature(const char *name, size_t namelen) +{ + for (size_t i = 0; i < ARRAY_LEN(feature_table); i++) { + if (namelen == strlen(feature_table[i].name) && + memcmp(name, feature_table[i].name, namelen) == 0) + return feature_table[i].feature; + } + return 0; +} + +u32 cpu_features; + +void init_cpu_features(void) +{ + char *p, *sep; + + cpu_features = get_cpu_features(); + + /* + * Allow disabling CPU features via an environmental variable for + * testing purposes. Syntax is comma-separated list of feature names. + */ + p = getenv("WIMLIB_DISABLE_CPU_FEATURES"); + if (likely(p == NULL)) + return; + for (; (sep = strchr(p, ',')) != NULL; p = sep + 1) + cpu_features &= ~find_cpu_feature(p, sep - p); + cpu_features &= ~find_cpu_feature(p, strlen(p)); +} + +#endif /* CPU_FEATURES_ENABLED */ diff --git a/src/lzms_common.c b/src/lzms_common.c index fb13a435..03d31c95 100644 --- a/src/lzms_common.c +++ b/src/lzms_common.c @@ -23,9 +23,9 @@ # include "config.h" #endif +#include "wimlib/cpu_features.h" #include "wimlib/lzms_common.h" #include "wimlib/unaligned.h" -#include "wimlib/x86_cpu_features.h" #ifdef __x86_64__ # include @@ -614,7 +614,7 @@ lzms_x86_filter(u8 data[restrict], s32 size, tail_ptr = &data[size - 16]; #ifdef __x86_64__ - if (x86_have_cpu_feature(X86_CPU_FEATURE_SSE4_2)) { + if (cpu_features & X86_CPU_FEATURE_SSE4_2) { u8 saved_byte = *tail_ptr; *tail_ptr = 0xE8; for (;;) { diff --git a/src/wim.c b/src/wim.c index 08b0c1c0..69001122 100644 --- a/src/wim.c +++ b/src/wim.c @@ -32,6 +32,7 @@ #include "wimlib.h" #include "wimlib/assert.h" #include "wimlib/blob_table.h" +#include "wimlib/cpu_features.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" #include "wimlib/file_io.h" @@ -977,6 +978,7 @@ wimlib_global_init(int init_flags) WIMLIB_INIT_FLAG_DEFAULT_CASE_INSENSITIVE)) goto out_unlock; + init_cpu_features(); xml_global_init(); #ifdef __WIN32__ ret = win32_global_init(init_flags); diff --git a/src/x86_cpu_features.c b/src/x86_cpu_features.c deleted file mode 100644 index 11722622..00000000 --- a/src/x86_cpu_features.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * x86_cpu_features.c - feature detection for x86 processors - * - * Copyright 2022 Eric Biggers - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include "wimlib/x86_cpu_features.h" - -#if defined(__i386__) || defined(__x86_64__) - -#define DEBUG 0 - -#if DEBUG -# include -#endif - -u32 _x86_cpu_features = 0; - -/* With old GCC versions we have to manually save and restore the x86_32 PIC - * register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602 */ -#if defined(__i386__) && defined(__PIC__) -# define EBX_CONSTRAINT "=r" -#else -# define EBX_CONSTRAINT "=b" -#endif - -/* Execute the CPUID instruction. */ -static inline void -cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d) -{ - __asm__(".ifnc %%ebx, %1; mov %%ebx, %1; .endif\n" - "cpuid \n" - ".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n" - : "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d) - : "a" (leaf), "c" (subleaf)); -} - -/* Read an extended control register. */ -static inline u64 -read_xcr(u32 index) -{ - u32 edx, eax; - - /* Execute the "xgetbv" instruction. Old versions of binutils do not - * recognize this instruction, so list the raw bytes instead. */ - __asm__ (".byte 0x0f, 0x01, 0xd0" : "=d" (edx), "=a" (eax) : "c" (index)); - - return ((u64)edx << 32) | eax; -} - -#define IS_SET(reg, bit) ((reg) & ((u32)1 << (bit))) - -/* Initialize _x86_cpu_features with bits for interesting processor features. */ -void -x86_setup_cpu_features(void) -{ - u32 features = 0; - u32 dummy1, dummy2, dummy3, dummy4; - u32 max_function; - u32 features_1, features_2, features_3, features_4; - bool os_saves_ymm_regs = false; - - /* Get maximum supported function */ - cpuid(0, 0, &max_function, &dummy2, &dummy3, &dummy4); - if (max_function < 1) - goto out; - - /* Standard feature flags */ - cpuid(1, 0, &dummy1, &dummy2, &features_2, &features_1); - - if (IS_SET(features_1, 25)) - features |= X86_CPU_FEATURE_SSE; - - if (IS_SET(features_1, 26)) - features |= X86_CPU_FEATURE_SSE2; - - if (IS_SET(features_2, 0)) - features |= X86_CPU_FEATURE_SSE3; - - if (IS_SET(features_2, 9)) - features |= X86_CPU_FEATURE_SSSE3; - - if (IS_SET(features_2, 19)) - features |= X86_CPU_FEATURE_SSE4_1; - - if (IS_SET(features_2, 20)) - features |= X86_CPU_FEATURE_SSE4_2; - - if (IS_SET(features_2, 27)) /* OSXSAVE set? */ - if ((read_xcr(0) & 0x6) == 0x6) - os_saves_ymm_regs = true; - - if (os_saves_ymm_regs && IS_SET(features_2, 28)) - features |= X86_CPU_FEATURE_AVX; - - if (max_function < 7) - goto out; - - /* Extended feature flags */ - cpuid(7, 0, &dummy1, &features_3, &features_4, &dummy4); - - if (IS_SET(features_3, 3)) - features |= X86_CPU_FEATURE_BMI; - - if (os_saves_ymm_regs && IS_SET(features_3, 5)) - features |= X86_CPU_FEATURE_AVX2; - - if (IS_SET(features_3, 8)) - features |= X86_CPU_FEATURE_BMI2; - -out: - -#if DEBUG - printf("Detected x86 CPU features: "); - if (features & X86_CPU_FEATURE_SSE) - printf("SSE "); - if (features & X86_CPU_FEATURE_SSE2) - printf("SSE2 "); - if (features & X86_CPU_FEATURE_SSE3) - printf("SSE3 "); - if (features & X86_CPU_FEATURE_SSSE3) - printf("SSSE3 "); - if (features & X86_CPU_FEATURE_SSE4_1) - printf("SSE4.1 "); - if (features & X86_CPU_FEATURE_SSE4_2) - printf("SSE4.2 "); - if (features & X86_CPU_FEATURE_BMI) - printf("BMI "); - if (features & X86_CPU_FEATURE_AVX) - printf("AVX "); - if (features & X86_CPU_FEATURE_BMI2) - printf("BMI2 "); - if (features & X86_CPU_FEATURE_AVX2) - printf("AVX2 "); - printf("\n"); -#endif /* DEBUG */ - - _x86_cpu_features = features | X86_CPU_FEATURES_KNOWN; -} - -#endif /* __i386__ || __x86_64__ */