]> wimlib.net Git - wimlib/commitdiff
Improve runtime CPU feature detection
authorEric Biggers <ebiggers3@gmail.com>
Sat, 18 Mar 2023 07:17:54 +0000 (00:17 -0700)
committerEric Biggers <ebiggers3@gmail.com>
Sat, 18 Mar 2023 07:17:54 +0000 (00:17 -0700)
- Make wimlib_global_init() do the CPU feature detection, so that it
  doesn't have to be done on-demand later.

- Add support for detecting the x86 SHA extensions.

- Add support for detecting ARMv8 SHA1 instructions on Linux, Windows,
  and macOS.  (64-bit only for now.)

- Allow disabling features via an environment variable for testing.

- Remove some unused functionality.

Makefile.am
include/wimlib/cpu_features.h [new file with mode: 0644]
include/wimlib/x86_cpu_features.h [deleted file]
src/cpu_features.c [new file with mode: 0644]
src/lzms_common.c
src/wim.c
src/x86_cpu_features.c [deleted file]

index c868bb7207453c05b08f45d8c01899066303ef72..fd2ec0831ec2ce1ba661b39ad96cd82bef662d4d 100644 (file)
@@ -40,6 +40,7 @@ libwim_la_SOURCES =           \
        src/compress_common.c   \
        src/compress_parallel.c \
        src/compress_serial.c   \
+       src/cpu_features.c      \
        src/decompress.c        \
        src/decompress_common.c \
        src/delete_image.c      \
@@ -88,7 +89,6 @@ libwim_la_SOURCES =           \
        src/verify.c            \
        src/wim.c               \
        src/write.c             \
-       src/x86_cpu_features.c  \
        src/xml.c               \
        src/xml_windows.c       \
        src/xpress_compress.c   \
@@ -105,6 +105,7 @@ libwim_la_SOURCES =         \
        include/wimlib/compressor_ops.h \
        include/wimlib/compress_common.h        \
        include/wimlib/chunk_compressor.h       \
+       include/wimlib/cpu_features.h   \
        include/wimlib/decompressor_ops.h       \
        include/wimlib/decompress_common.h      \
        include/wimlib/dentry.h         \
@@ -150,7 +151,6 @@ libwim_la_SOURCES =         \
        include/wimlib/util.h           \
        include/wimlib/wim.h            \
        include/wimlib/write.h          \
-       include/wimlib/x86_cpu_features.h       \
        include/wimlib/xattr.h          \
        include/wimlib/xml.h            \
        include/wimlib/xml_windows.h    \
diff --git a/include/wimlib/cpu_features.h b/include/wimlib/cpu_features.h
new file mode 100644 (file)
index 0000000..ee6ef48
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef _WIMLIB_CPU_FEATURES_H
+#define _WIMLIB_CPU_FEATURES_H
+
+#include "wimlib/types.h"
+
+#define X86_CPU_FEATURE_SSSE3          0x00000001
+#define X86_CPU_FEATURE_SSE4_1         0x00000002
+#define X86_CPU_FEATURE_SSE4_2         0x00000004
+#define X86_CPU_FEATURE_AVX            0x00000008
+#define X86_CPU_FEATURE_BMI2           0x00000010
+#define X86_CPU_FEATURE_SHA            0x00000020
+
+#define ARM_CPU_FEATURE_SHA1           0x00000001
+
+#if (defined(__i386__) || defined(__x86_64__)) || \
+    (defined(__aarch64__) && defined(__linux__)) || \
+    (defined(__aarch64__) && defined(__APPLE__)) || \
+    (defined(__aarch64__) && defined(_WIN32))
+
+#define CPU_FEATURES_ENABLED   1
+extern u32 cpu_features;
+
+void init_cpu_features(void);
+
+#else
+
+#define CPU_FEATURES_ENABLED   0
+#define cpu_features 0
+
+static inline void
+init_cpu_features(void)
+{
+}
+
+#endif
+
+#endif /* _WIMLIB_CPU_FEATURES_H */
diff --git a/include/wimlib/x86_cpu_features.h b/include/wimlib/x86_cpu_features.h
deleted file mode 100644 (file)
index e57742b..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef _WIMLIB_X86_CPU_FEATURES_H
-#define _WIMLIB_X86_CPU_FEATURES_H
-
-#include "wimlib/types.h"
-
-#if defined(__i386__) || defined(__x86_64__)
-
-#define X86_CPU_FEATURE_SSE            0x00000001
-#define X86_CPU_FEATURE_SSE2           0x00000002
-#define X86_CPU_FEATURE_SSE3           0x00000004
-#define X86_CPU_FEATURE_SSSE3          0x00000008
-#define X86_CPU_FEATURE_SSE4_1         0x00000010
-#define X86_CPU_FEATURE_SSE4_2         0x00000020
-#define X86_CPU_FEATURE_AVX            0x00000040
-#define X86_CPU_FEATURE_BMI            0x00000080
-#define X86_CPU_FEATURE_AVX2           0x00000100
-#define X86_CPU_FEATURE_BMI2           0x00000200
-
-#define X86_CPU_FEATURES_KNOWN         0x80000000
-
-extern u32 _x86_cpu_features;
-
-extern void
-x86_setup_cpu_features(void);
-
-/* Does the processor has the specified feature?  */
-static inline bool
-x86_have_cpu_feature(u32 feature)
-{
-       if (!(_x86_cpu_features & X86_CPU_FEATURES_KNOWN))
-               x86_setup_cpu_features();
-       return _x86_cpu_features & feature;
-}
-
-#else
-
-static inline bool
-x86_have_cpu_feature(u32 feature)
-{
-       return false;
-}
-
-#endif /* __i386__ || __x86_64__ */
-
-#endif /* _WIMLIB_X86_CPU_FEATURES_H */
diff --git a/src/cpu_features.c b/src/cpu_features.c
new file mode 100644 (file)
index 0000000..9aae638
--- /dev/null
@@ -0,0 +1,294 @@
+/*
+ * cpu_features.c - runtime CPU feature detection
+ *
+ * Copyright 2022 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "wimlib/cpu_features.h"
+
+#if CPU_FEATURES_ENABLED
+
+#include "wimlib/util.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+
+/*
+ * With old GCC versions we have to manually save and restore the x86_32 PIC
+ * register (ebx).  See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602
+ */
+#if defined(__i386__) && defined(__PIC__)
+#  define EBX_CONSTRAINT "=&r"
+#else
+#  define EBX_CONSTRAINT "=b"
+#endif
+
+/* Execute the CPUID instruction. */
+static inline void
+cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d)
+{
+       asm(".ifnc %%ebx, %1; mov  %%ebx, %1; .endif\n"
+           "cpuid                                  \n"
+           ".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n"
+           : "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d)
+           : "a" (leaf), "c" (subleaf));
+}
+
+/* Read an extended control register. */
+static inline u64
+read_xcr(u32 index)
+{
+       u32 d, a;
+
+       /*
+        * Execute the "xgetbv" instruction.  Old versions of binutils do not
+        * recognize this instruction, so list the raw bytes instead.
+        */
+       asm(".byte 0x0f, 0x01, 0xd0" : "=d" (d), "=a" (a) : "c" (index));
+
+       return ((u64)d << 32) | a;
+}
+
+static u32
+get_cpu_features(void)
+{
+       u32 max_leaf, a, b, c, d;
+       u64 xcr0 = 0;
+       u32 features = 0;
+
+       /* EAX=0: Highest Function Parameter and Manufacturer ID */
+       cpuid(0, 0, &max_leaf, &b, &c, &d);
+       if (max_leaf < 1)
+               return features;
+
+       /* EAX=1: Processor Info and Feature Bits */
+       cpuid(1, 0, &a, &b, &c, &d);
+       if (c & (1 << 9))
+               features |= X86_CPU_FEATURE_SSSE3;
+       if (c & (1 << 19))
+               features |= X86_CPU_FEATURE_SSE4_1;
+       if (c & (1 << 20))
+               features |= X86_CPU_FEATURE_SSE4_2;
+       if (c & (1 << 27))
+               xcr0 = read_xcr(0);
+       if ((c & (1 << 28)) && ((xcr0 & 0x6) == 0x6))
+               features |= X86_CPU_FEATURE_AVX;
+
+       if (max_leaf < 7)
+               return features;
+
+       /* EAX=7, ECX=0: Extended Features */
+       cpuid(7, 0, &a, &b, &c, &d);
+       if (b & (1 << 8))
+               features |= X86_CPU_FEATURE_BMI2;
+       if (b & (1 << 29))
+               features |= X86_CPU_FEATURE_SHA;
+
+       return features;
+}
+
+#elif defined(__aarch64__) && defined(__linux__)
+
+/*
+ * On Linux, arm32 and arm64 CPU features can be detected by reading the
+ * AT_HWCAP and AT_HWCAP2 values from /proc/self/auxv.
+ *
+ * Ideally we'd use the C library function getauxval(), but it's not guaranteed
+ * to be available: it was only added to glibc in 2.16, and in Android it was
+ * added to API level 18 for arm32 and level 21 for arm64.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#define AT_HWCAP       16
+#define AT_HWCAP2      26
+
+static void scan_auxv(unsigned long *hwcap, unsigned long *hwcap2)
+{
+       int fd;
+       unsigned long auxbuf[32];
+       int filled = 0;
+       int i;
+
+       fd = open("/proc/self/auxv", O_RDONLY);
+       if (fd < 0)
+               return;
+
+       for (;;) {
+               do {
+                       int ret = read(fd, &((char *)auxbuf)[filled],
+                                      sizeof(auxbuf) - filled);
+                       if (ret <= 0) {
+                               if (ret < 0 && errno == EINTR)
+                                       continue;
+                               goto out;
+                       }
+                       filled += ret;
+               } while (filled < 2 * sizeof(long));
+
+               i = 0;
+               do {
+                       unsigned long type = auxbuf[i];
+                       unsigned long value = auxbuf[i + 1];
+
+                       if (type == AT_HWCAP)
+                               *hwcap = value;
+                       else if (type == AT_HWCAP2)
+                               *hwcap2 = value;
+                       i += 2;
+                       filled -= 2 * sizeof(long);
+               } while (filled >= 2 * sizeof(long));
+
+               memmove(auxbuf, &auxbuf[i], filled);
+       }
+out:
+       close(fd);
+}
+
+static u32
+get_cpu_features(void)
+{
+       unsigned long hwcap = 0;
+       unsigned long hwcap2 = 0;
+       u32 features = 0;
+
+       scan_auxv(&hwcap, &hwcap2);
+
+       if (hwcap & (1 << 5))   /* HWCAP_SHA1 */
+               features |= ARM_CPU_FEATURE_SHA1;
+
+       return features;
+}
+
+#elif defined(__aarch64__) && defined(__APPLE__)
+
+/* On Apple platforms, arm64 CPU features can be detected via sysctlbyname(). */
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+static const struct {
+       const char *name;
+       u32 feature;
+} feature_sysctls[] = {
+       { "hw.optional.arm.FEAT_SHA1",  ARM_CPU_FEATURE_SHA1 },
+};
+
+static u32
+get_cpu_features(void)
+{
+       u32 features = 0;
+
+       for (size_t i = 0; i < ARRAY_LEN(feature_sysctls); i++) {
+               const char *name = feature_sysctls[i].name;
+               u32 val = 0;
+               size_t valsize = sizeof(val);
+
+               if (sysctlbyname(name, &val, &valsize, NULL, 0) == 0 &&
+                   valsize == sizeof(val) && val == 1)
+                       features |= feature_sysctls[i].feature;
+       }
+       return features;
+}
+
+#elif defined(__aarch64__) && defined(_WIN32)
+
+#include <windows.h>
+
+static u32
+get_cpu_features(void)
+{
+       u32 features = 0;
+
+       if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
+               features |= ARM_CPU_FEATURE_SHA1;
+
+       return features;
+}
+
+#else
+#  error "CPU_FEATURES_ENABLED was set but no implementation is available!"
+#endif
+
+static const struct {
+       const char *name;
+       u32 feature;
+} feature_table[] = {
+#if defined(__i386__) || defined(__x86_64__)
+       {"ssse3",       X86_CPU_FEATURE_SSSE3},
+       {"sse4.1",      X86_CPU_FEATURE_SSE4_1},
+       {"sse4.2",      X86_CPU_FEATURE_SSE4_2},
+       {"avx",         X86_CPU_FEATURE_AVX},
+       {"bmi2",        X86_CPU_FEATURE_BMI2},
+       {"sha",         X86_CPU_FEATURE_SHA},
+       {"sha1",        X86_CPU_FEATURE_SHA},
+#elif defined(__aarch64__)
+       {"sha1",        ARM_CPU_FEATURE_SHA1},
+#else
+#  error "CPU_FEATURES_ENABLED was set but no features are defined!"
+#endif
+       {"*",           0xFFFFFFFF},
+};
+
+static u32
+find_cpu_feature(const char *name, size_t namelen)
+{
+       for (size_t i = 0; i < ARRAY_LEN(feature_table); i++) {
+               if (namelen == strlen(feature_table[i].name) &&
+                   memcmp(name, feature_table[i].name, namelen) == 0)
+                       return feature_table[i].feature;
+       }
+       return 0;
+}
+
+u32 cpu_features;
+
+void init_cpu_features(void)
+{
+       char *p, *sep;
+
+       cpu_features = get_cpu_features();
+
+       /*
+        * Allow disabling CPU features via an environmental variable for
+        * testing purposes.  Syntax is comma-separated list of feature names.
+        */
+       p = getenv("WIMLIB_DISABLE_CPU_FEATURES");
+       if (likely(p == NULL))
+               return;
+       for (; (sep = strchr(p, ',')) != NULL; p = sep + 1)
+               cpu_features &= ~find_cpu_feature(p, sep - p);
+       cpu_features &= ~find_cpu_feature(p, strlen(p));
+}
+
+#endif /* CPU_FEATURES_ENABLED */
index fb13a4358c5469a79d0c4c5e101c03ee2f9e59db..03d31c95f59d45ac08d67165687f0d24523d6597 100644 (file)
@@ -23,9 +23,9 @@
 #  include "config.h"
 #endif
 
+#include "wimlib/cpu_features.h"
 #include "wimlib/lzms_common.h"
 #include "wimlib/unaligned.h"
-#include "wimlib/x86_cpu_features.h"
 
 #ifdef __x86_64__
 #  include <emmintrin.h>
@@ -614,7 +614,7 @@ lzms_x86_filter(u8 data[restrict], s32 size,
        tail_ptr = &data[size - 16];
 
 #ifdef __x86_64__
-       if (x86_have_cpu_feature(X86_CPU_FEATURE_SSE4_2)) {
+       if (cpu_features & X86_CPU_FEATURE_SSE4_2) {
                u8 saved_byte = *tail_ptr;
                *tail_ptr = 0xE8;
                for (;;) {
index 08b0c1c0e6df49be04f97da7afd2a570866a897c..69001122e1c147f58c261b94bcffbe5b6bf52f52 100644 (file)
--- a/src/wim.c
+++ b/src/wim.c
@@ -32,6 +32,7 @@
 #include "wimlib.h"
 #include "wimlib/assert.h"
 #include "wimlib/blob_table.h"
+#include "wimlib/cpu_features.h"
 #include "wimlib/dentry.h"
 #include "wimlib/encoding.h"
 #include "wimlib/file_io.h"
@@ -977,6 +978,7 @@ wimlib_global_init(int init_flags)
                            WIMLIB_INIT_FLAG_DEFAULT_CASE_INSENSITIVE))
                goto out_unlock;
 
+       init_cpu_features();
        xml_global_init();
 #ifdef __WIN32__
        ret = win32_global_init(init_flags);
diff --git a/src/x86_cpu_features.c b/src/x86_cpu_features.c
deleted file mode 100644 (file)
index 1172262..0000000
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * x86_cpu_features.c - feature detection for x86 processors
- *
- * Copyright 2022 Eric Biggers
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#  include "config.h"
-#endif
-
-#include "wimlib/x86_cpu_features.h"
-
-#if defined(__i386__) || defined(__x86_64__)
-
-#define DEBUG 0
-
-#if DEBUG
-#  include <stdio.h>
-#endif
-
-u32 _x86_cpu_features = 0;
-
-/* With old GCC versions we have to manually save and restore the x86_32 PIC
- * register (ebx).  See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602  */
-#if defined(__i386__) && defined(__PIC__)
-#  define EBX_CONSTRAINT "=r"
-#else
-#  define EBX_CONSTRAINT "=b"
-#endif
-
-/* Execute the CPUID instruction.  */
-static inline void
-cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d)
-{
-       __asm__(".ifnc %%ebx, %1; mov  %%ebx, %1; .endif\n"
-               "cpuid                                  \n"
-               ".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n"
-               : "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d)
-               : "a" (leaf), "c" (subleaf));
-}
-
-/* Read an extended control register.  */
-static inline u64
-read_xcr(u32 index)
-{
-       u32 edx, eax;
-
-       /* Execute the "xgetbv" instruction.  Old versions of binutils do not
-        * recognize this instruction, so list the raw bytes instead.  */
-       __asm__ (".byte 0x0f, 0x01, 0xd0" : "=d" (edx), "=a" (eax) : "c" (index));
-
-       return ((u64)edx << 32) | eax;
-}
-
-#define IS_SET(reg, bit) ((reg) & ((u32)1 << (bit)))
-
-/* Initialize _x86_cpu_features with bits for interesting processor features. */
-void
-x86_setup_cpu_features(void)
-{
-       u32 features = 0;
-       u32 dummy1, dummy2, dummy3, dummy4;
-       u32 max_function;
-       u32 features_1, features_2, features_3, features_4;
-       bool os_saves_ymm_regs = false;
-
-       /* Get maximum supported function  */
-       cpuid(0, 0, &max_function, &dummy2, &dummy3, &dummy4);
-       if (max_function < 1)
-               goto out;
-
-       /* Standard feature flags  */
-       cpuid(1, 0, &dummy1, &dummy2, &features_2, &features_1);
-
-       if (IS_SET(features_1, 25))
-               features |= X86_CPU_FEATURE_SSE;
-
-       if (IS_SET(features_1, 26))
-               features |= X86_CPU_FEATURE_SSE2;
-
-       if (IS_SET(features_2, 0))
-               features |= X86_CPU_FEATURE_SSE3;
-
-       if (IS_SET(features_2, 9))
-               features |= X86_CPU_FEATURE_SSSE3;
-
-       if (IS_SET(features_2, 19))
-               features |= X86_CPU_FEATURE_SSE4_1;
-
-       if (IS_SET(features_2, 20))
-               features |= X86_CPU_FEATURE_SSE4_2;
-
-       if (IS_SET(features_2, 27)) /* OSXSAVE set?  */
-               if ((read_xcr(0) & 0x6) == 0x6)
-                       os_saves_ymm_regs = true;
-
-       if (os_saves_ymm_regs && IS_SET(features_2, 28))
-               features |= X86_CPU_FEATURE_AVX;
-
-       if (max_function < 7)
-               goto out;
-
-       /* Extended feature flags  */
-       cpuid(7, 0, &dummy1, &features_3, &features_4, &dummy4);
-
-       if (IS_SET(features_3, 3))
-               features |= X86_CPU_FEATURE_BMI;
-
-       if (os_saves_ymm_regs && IS_SET(features_3, 5))
-               features |= X86_CPU_FEATURE_AVX2;
-
-       if (IS_SET(features_3, 8))
-               features |= X86_CPU_FEATURE_BMI2;
-
-out:
-
-#if DEBUG
-       printf("Detected x86 CPU features: ");
-       if (features & X86_CPU_FEATURE_SSE)
-               printf("SSE ");
-       if (features & X86_CPU_FEATURE_SSE2)
-               printf("SSE2 ");
-       if (features & X86_CPU_FEATURE_SSE3)
-               printf("SSE3 ");
-       if (features & X86_CPU_FEATURE_SSSE3)
-               printf("SSSE3 ");
-       if (features & X86_CPU_FEATURE_SSE4_1)
-               printf("SSE4.1 ");
-       if (features & X86_CPU_FEATURE_SSE4_2)
-               printf("SSE4.2 ");
-       if (features & X86_CPU_FEATURE_BMI)
-               printf("BMI ");
-       if (features & X86_CPU_FEATURE_AVX)
-               printf("AVX ");
-       if (features & X86_CPU_FEATURE_BMI2)
-               printf("BMI2 ");
-       if (features & X86_CPU_FEATURE_AVX2)
-               printf("AVX2 ");
-       printf("\n");
-#endif /* DEBUG */
-
-       _x86_cpu_features = features | X86_CPU_FEATURES_KNOWN;
-}
-
-#endif /* __i386__ || __x86_64__ */