Add support for runtime x86 CPU feature detection
authorEric Biggers <ebiggers3@gmail.com>
Fri, 7 Aug 2015 02:55:42 +0000 (21:55 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Fri, 7 Aug 2015 03:24:20 +0000 (22:24 -0500)
Makefile.am
include/wimlib/x86_cpu_features.h [new file with mode: 0644]
src/x86_cpu_features.c [new file with mode: 0644]

index 647521e..6c41eb5 100644 (file)
@@ -88,6 +88,7 @@ libwim_la_SOURCES =           \
        src/verify.c            \
        src/wim.c               \
        src/write.c             \
+       src/x86_cpu_features.c  \
        src/xml.c               \
        src/xpress_compress.c   \
        src/xpress_decompress.c \
@@ -150,6 +151,7 @@ libwim_la_SOURCES =         \
        include/wimlib/util.h           \
        include/wimlib/wim.h            \
        include/wimlib/write.h          \
+       include/wimlib/x86_cpu_features.h       \
        include/wimlib/xml.h            \
        include/wimlib/xpress_constants.h
 
diff --git a/include/wimlib/x86_cpu_features.h b/include/wimlib/x86_cpu_features.h
new file mode 100644 (file)
index 0000000..e57742b
--- /dev/null
@@ -0,0 +1,45 @@
+#ifndef _WIMLIB_X86_CPU_FEATURES_H
+#define _WIMLIB_X86_CPU_FEATURES_H
+
+#include "wimlib/types.h"
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#define X86_CPU_FEATURE_SSE            0x00000001
+#define X86_CPU_FEATURE_SSE2           0x00000002
+#define X86_CPU_FEATURE_SSE3           0x00000004
+#define X86_CPU_FEATURE_SSSE3          0x00000008
+#define X86_CPU_FEATURE_SSE4_1         0x00000010
+#define X86_CPU_FEATURE_SSE4_2         0x00000020
+#define X86_CPU_FEATURE_AVX            0x00000040
+#define X86_CPU_FEATURE_BMI            0x00000080
+#define X86_CPU_FEATURE_AVX2           0x00000100
+#define X86_CPU_FEATURE_BMI2           0x00000200
+
+#define X86_CPU_FEATURES_KNOWN         0x80000000
+
+extern u32 _x86_cpu_features;
+
+extern void
+x86_setup_cpu_features(void);
+
+/* Does the processor has the specified feature?  */
+static inline bool
+x86_have_cpu_feature(u32 feature)
+{
+       if (!(_x86_cpu_features & X86_CPU_FEATURES_KNOWN))
+               x86_setup_cpu_features();
+       return _x86_cpu_features & feature;
+}
+
+#else
+
+static inline bool
+x86_have_cpu_feature(u32 feature)
+{
+       return false;
+}
+
+#endif /* __i386__ || __x86_64__ */
+
+#endif /* _WIMLIB_X86_CPU_FEATURES_H */
diff --git a/src/x86_cpu_features.c b/src/x86_cpu_features.c
new file mode 100644 (file)
index 0000000..f834757
--- /dev/null
@@ -0,0 +1,135 @@
+/*
+ * x86_cpu_features.c
+ *
+ * Feature detection for x86 processors.
+ *
+ * Author:     Eric Biggers
+ * Year:       2015
+ *
+ * The author dedicates this file to the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#include "wimlib/x86_cpu_features.h"
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#define DEBUG 0
+
+#if DEBUG
+#  include <stdio.h>
+#endif
+
+u32 _x86_cpu_features = 0;
+
+/* Execute the CPUID instruction.  */
+static inline void
+cpuid(u32 leaf, u32 subleaf, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+       __asm__ ("cpuid"
+                : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
+                : "a" (leaf), "c" (subleaf));
+}
+
+/* Read an extended control register.  */
+static inline u64
+read_xcr(u32 index)
+{
+       u32 edx, eax;
+
+       __asm__ ("xgetbv" : "=d" (edx), "=a" (eax) : "c" (index));
+
+       return ((u64)edx << 32) | eax;
+}
+
+#define IS_SET(reg, bit) ((reg) & ((u32)1 << (bit)))
+
+/* Initialize _x86_cpu_features with bits for interesting processor features. */
+void
+x86_setup_cpu_features(void)
+{
+       u32 features = 0;
+       u32 dummy1, dummy2, dummy3, dummy4;
+       u32 max_function;
+       u32 features_1, features_2, features_3, features_4;
+       bool os_saves_ymm_regs = false;
+
+       /* Get maximum supported function  */
+       cpuid(0, 0, &max_function, &dummy2, &dummy3, &dummy4);
+       if (max_function < 1)
+               goto out;
+
+       /* Standard feature flags  */
+       cpuid(1, 0, &dummy1, &dummy2, &features_2, &features_1);
+
+       if (IS_SET(features_1, 25))
+               features |= X86_CPU_FEATURE_SSE;
+
+       if (IS_SET(features_1, 26))
+               features |= X86_CPU_FEATURE_SSE2;
+
+       if (IS_SET(features_2, 0))
+               features |= X86_CPU_FEATURE_SSE3;
+
+       if (IS_SET(features_2, 9))
+               features |= X86_CPU_FEATURE_SSSE3;
+
+       if (IS_SET(features_2, 19))
+               features |= X86_CPU_FEATURE_SSE4_1;
+
+       if (IS_SET(features_2, 20))
+               features |= X86_CPU_FEATURE_SSE4_2;
+
+       if (IS_SET(features_2, 27)) /* OSXSAVE set?  */
+               if ((read_xcr(0) & 0x6) == 0x6)
+                       os_saves_ymm_regs = true;
+
+       if (os_saves_ymm_regs && IS_SET(features_2, 28))
+               features |= X86_CPU_FEATURE_AVX;
+
+       if (max_function < 7)
+               goto out;
+
+       /* Extended feature flags  */
+       cpuid(7, 0, &dummy1, &features_3, &features_4, &dummy4);
+
+       if (IS_SET(features_3, 3))
+               features |= X86_CPU_FEATURE_BMI;
+
+       if (os_saves_ymm_regs && IS_SET(features_3, 5))
+               features |= X86_CPU_FEATURE_AVX2;
+
+       if (IS_SET(features_3, 8))
+               features |= X86_CPU_FEATURE_BMI2;
+
+out:
+
+#if DEBUG
+       printf("Detected x86 CPU features: ");
+       if (features & X86_CPU_FEATURE_SSE)
+               printf("SSE ");
+       if (features & X86_CPU_FEATURE_SSE2)
+               printf("SSE2 ");
+       if (features & X86_CPU_FEATURE_SSE3)
+               printf("SSE3 ");
+       if (features & X86_CPU_FEATURE_SSSE3)
+               printf("SSSE3 ");
+       if (features & X86_CPU_FEATURE_SSE4_1)
+               printf("SSE4.1 ");
+       if (features & X86_CPU_FEATURE_SSE4_2)
+               printf("SSE4.2 ");
+       if (features & X86_CPU_FEATURE_BMI)
+               printf("BMI ");
+       if (features & X86_CPU_FEATURE_AVX)
+               printf("AVX ");
+       if (features & X86_CPU_FEATURE_BMI2)
+               printf("BMI2 ");
+       if (features & X86_CPU_FEATURE_AVX2)
+               printf("AVX2 ");
+       printf("\n");
+#endif /* DEBUG */
+
+       _x86_cpu_features = features | X86_CPU_FEATURES_KNOWN;
+}
+
+#endif /* __i386__ || __x86_64__ */