You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1522 lines
51 KiB
1522 lines
51 KiB
We add back Prefer_SSE_for_memop since we still need it for all of the |
|
existing era implementations for RHEL 7.3. To remove it would require |
|
a more wholesale backport of optmized routines. |
|
|
|
commit e2e4f56056adddc3c1efe676b40a4b4f2453103b |
|
Author: H.J. Lu <hjl.tools@gmail.com> |
|
Date: Thu Aug 13 03:37:47 2015 -0700 |
|
|
|
Add _dl_x86_cpu_features to rtld_global |
|
|
|
This patch adds _dl_x86_cpu_features to rtld_global in x86 ld.so |
|
and initializes it early before __libc_start_main is called so that |
|
cpu_features is always available when it is used and we can avoid |
|
calling __init_cpu_features in IFUNC selectors. |
|
|
|
Index: glibc-2.17-c758a686/sysdeps/i386/dl-machine.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/i386/dl-machine.h |
|
+++ glibc-2.17-c758a686/sysdeps/i386/dl-machine.h |
|
@@ -25,6 +25,7 @@ |
|
#include <sysdep.h> |
|
#include <tls.h> |
|
#include <dl-tlsdesc.h> |
|
+#include <cpu-features.c> |
|
|
|
/* Return nonzero iff ELF header is compatible with the running host. */ |
|
static inline int __attribute__ ((unused)) |
|
@@ -266,6 +267,8 @@ dl_platform_init (void) |
|
if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') |
|
/* Avoid an empty string which would disturb us. */ |
|
GLRO(dl_platform) = NULL; |
|
+ |
|
+ init_cpu_features (&GLRO(dl_x86_cpu_features)); |
|
} |
|
|
|
static inline Elf32_Addr |
|
Index: glibc-2.17-c758a686/sysdeps/i386/dl-procinfo.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/i386/dl-procinfo.c |
|
+++ glibc-2.17-c758a686/sysdeps/i386/dl-procinfo.c |
|
@@ -43,6 +43,22 @@ |
|
# define PROCINFO_CLASS |
|
#endif |
|
|
|
+#if !IS_IN (ldconfig) |
|
+# if !defined PROCINFO_DECL && defined SHARED |
|
+ ._dl_x86_cpu_features |
|
+# else |
|
+PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features |
|
+# endif |
|
+# ifndef PROCINFO_DECL |
|
+= { } |
|
+# endif |
|
+# if !defined SHARED || defined PROCINFO_DECL |
|
+; |
|
+# else |
|
+, |
|
+# endif |
|
+#endif |
|
+ |
|
#if !defined PROCINFO_DECL && defined SHARED |
|
._dl_x86_cap_flags |
|
#else |
|
Index: glibc-2.17-c758a686/sysdeps/i386/i686/cacheinfo.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/i386/i686/cacheinfo.c |
|
+++ glibc-2.17-c758a686/sysdeps/i386/i686/cacheinfo.c |
|
@@ -8,6 +8,5 @@ |
|
#define __x86_64_raw_shared_cache_size_half __x86_raw_shared_cache_size_half |
|
|
|
#define DISABLE_PREFETCHW |
|
-#define DISABLE_PREFERRED_MEMORY_INSTRUCTION |
|
|
|
#include <sysdeps/x86_64/cacheinfo.c> |
|
Index: glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/Makefile |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/i386/i686/multiarch/Makefile |
|
+++ glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/Makefile |
|
@@ -1,5 +1,4 @@ |
|
ifeq ($(subdir),csu) |
|
-aux += init-arch |
|
tests += test-multiarch |
|
gen-as-const-headers += ifunc-defines.sym |
|
endif |
|
Index: glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/Versions |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/i386/i686/multiarch/Versions |
|
+++ /dev/null |
|
@@ -1,5 +0,0 @@ |
|
-libc { |
|
- GLIBC_PRIVATE { |
|
- __get_cpu_features; |
|
- } |
|
-} |
|
Index: glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/ifunc-defines.sym |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/i386/i686/multiarch/ifunc-defines.sym |
|
+++ glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/ifunc-defines.sym |
|
@@ -4,7 +4,6 @@ |
|
-- |
|
|
|
CPU_FEATURES_SIZE sizeof (struct cpu_features) |
|
-KIND_OFFSET offsetof (struct cpu_features, kind) |
|
CPUID_OFFSET offsetof (struct cpu_features, cpuid) |
|
CPUID_SIZE sizeof (struct cpuid_registers) |
|
CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax) |
|
Index: glibc-2.17-c758a686/sysdeps/i386/ldsodefs.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/i386/ldsodefs.h |
|
+++ glibc-2.17-c758a686/sysdeps/i386/ldsodefs.h |
|
@@ -20,6 +20,7 @@ |
|
#define _I386_LDSODEFS_H 1 |
|
|
|
#include <elf.h> |
|
+#include <cpu-features.h> |
|
|
|
struct La_i86_regs; |
|
struct La_i86_retval; |
|
Index: glibc-2.17-c758a686/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c |
|
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c |
|
@@ -1,5 +1,5 @@ |
|
#if IS_IN (ldconfig) |
|
# include <sysdeps/i386/dl-procinfo.c> |
|
#else |
|
-# include <sysdeps/generic/dl-procinfo.c> |
|
+# include <sysdeps/x86_64/dl-procinfo.c> |
|
#endif |
|
Index: glibc-2.17-c758a686/sysdeps/x86/Makefile |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/x86/Makefile |
|
+++ glibc-2.17-c758a686/sysdeps/x86/Makefile |
|
@@ -7,3 +7,14 @@ $(objpfx)tst-xmmymmzmm.out: ../sysdeps/x |
|
@echo "Checking ld.so for SSE register use. This will take a few seconds..." |
|
$(SHELL) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@ |
|
endif |
|
+ |
|
+ifeq ($(subdir),csu) |
|
+gen-as-const-headers += cpu-features-offsets.sym rtld-global-offsets.sym |
|
+endif |
|
+ |
|
+ifeq ($(subdir),elf) |
|
+sysdep-dl-routines += dl-get-cpu-features |
|
+ |
|
+tests += tst-get-cpu-features |
|
+tests-static += tst-get-cpu-features-static |
|
+endif |
|
Index: glibc-2.17-c758a686/sysdeps/x86/Versions |
|
=================================================================== |
|
--- /dev/null |
|
+++ glibc-2.17-c758a686/sysdeps/x86/Versions |
|
@@ -0,0 +1,5 @@ |
|
+ld { |
|
+ GLIBC_PRIVATE { |
|
+ __get_cpu_features; |
|
+ } |
|
+} |
|
Index: glibc-2.17-c758a686/sysdeps/x86/cpu-features-offsets.sym |
|
=================================================================== |
|
--- /dev/null |
|
+++ glibc-2.17-c758a686/sysdeps/x86/cpu-features-offsets.sym |
|
@@ -0,0 +1,7 @@ |
|
+#define SHARED 1 |
|
+ |
|
+#include <ldsodefs.h> |
|
+ |
|
+#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) |
|
+ |
|
+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features) |
|
Index: glibc-2.17-c758a686/sysdeps/x86/cpu-features.c |
|
=================================================================== |
|
--- /dev/null |
|
+++ glibc-2.17-c758a686/sysdeps/x86/cpu-features.c |
|
@@ -0,0 +1,213 @@ |
|
+/* Initialize CPU feature data. |
|
+ This file is part of the GNU C Library. |
|
+ Copyright (C) 2008-2015 Free Software Foundation, Inc. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#include <cpuid.h> |
|
+#include <cpu-features.h> |
|
+ |
|
+static inline void |
|
+get_common_indeces (struct cpu_features *cpu_features, |
|
+ unsigned int *family, unsigned int *model) |
|
+{ |
|
+ unsigned int eax; |
|
+ __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx, |
|
+ cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx, |
|
+ cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx); |
|
+ GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax; |
|
+ *family = (eax >> 8) & 0x0f; |
|
+ *model = (eax >> 4) & 0x0f; |
|
+} |
|
+ |
|
+static inline void |
|
+init_cpu_features (struct cpu_features *cpu_features) |
|
+{ |
|
+ unsigned int ebx, ecx, edx; |
|
+ unsigned int family = 0; |
|
+ unsigned int model = 0; |
|
+ enum cpu_features_kind kind; |
|
+ |
|
+ __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx); |
|
+ |
|
+ /* This spells out "GenuineIntel". */ |
|
+ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) |
|
+ { |
|
+ kind = arch_kind_intel; |
|
+ |
|
+ get_common_indeces (cpu_features, &family, &model); |
|
+ |
|
+ /* Intel processors prefer SSE instruction for memory/string |
|
+ routines if they are available. */ |
|
+ cpu_features->feature[index_Prefer_SSE_for_memop] |
|
+ |= bit_Prefer_SSE_for_memop; |
|
+ |
|
+ unsigned int eax = cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax; |
|
+ unsigned int extended_family = (eax >> 20) & 0xff; |
|
+ unsigned int extended_model = (eax >> 12) & 0xf0; |
|
+ if (family == 0x0f) |
|
+ { |
|
+ family += extended_family; |
|
+ model += extended_model; |
|
+ } |
|
+ else if (family == 0x06) |
|
+ { |
|
+ ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; |
|
+ model += extended_model; |
|
+ switch (model) |
|
+ { |
|
+ case 0x1c: |
|
+ case 0x26: |
|
+ /* BSF is slow on Atom. */ |
|
+ cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF; |
|
+ break; |
|
+ |
|
+ case 0x37: |
|
+ case 0x4a: |
|
+ case 0x4d: |
|
+ case 0x5a: |
|
+ case 0x5d: |
|
+ /* Unaligned load versions are faster than SSSE3 |
|
+ on Silvermont. */ |
|
+#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop |
|
+# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop |
|
+#endif |
|
+#if index_Fast_Unaligned_Load != index_Slow_SSE4_2 |
|
+# error index_Fast_Unaligned_Load != index_Slow_SSE4_2 |
|
+#endif |
|
+ cpu_features->feature[index_Fast_Unaligned_Load] |
|
+ |= (bit_Fast_Unaligned_Load |
|
+ | bit_Prefer_PMINUB_for_stringop |
|
+ | bit_Slow_SSE4_2); |
|
+ break; |
|
+ |
|
+ default: |
|
+ /* Unknown family 0x06 processors. Assuming this is one |
|
+ of Core i3/i5/i7 processors if AVX is available. */ |
|
+ if ((ecx & bit_AVX) == 0) |
|
+ break; |
|
+ |
|
+ case 0x1a: |
|
+ case 0x1e: |
|
+ case 0x1f: |
|
+ case 0x25: |
|
+ case 0x2c: |
|
+ case 0x2e: |
|
+ case 0x2f: |
|
+ /* Rep string instructions, copy backward, unaligned loads |
|
+ and pminub are fast on Intel Core i3, i5 and i7. */ |
|
+#if index_Fast_Rep_String != index_Fast_Copy_Backward |
|
+# error index_Fast_Rep_String != index_Fast_Copy_Backward |
|
+#endif |
|
+#if index_Fast_Rep_String != index_Fast_Unaligned_Load |
|
+# error index_Fast_Rep_String != index_Fast_Unaligned_Load |
|
+#endif |
|
+#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop |
|
+# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop |
|
+#endif |
|
+ cpu_features->feature[index_Fast_Rep_String] |
|
+ |= (bit_Fast_Rep_String |
|
+ | bit_Fast_Copy_Backward |
|
+ | bit_Fast_Unaligned_Load |
|
+ | bit_Prefer_PMINUB_for_stringop); |
|
+ break; |
|
+ } |
|
+ } |
|
+ } |
|
+ /* This spells out "AuthenticAMD". */ |
|
+ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) |
|
+ { |
|
+ kind = arch_kind_amd; |
|
+ |
|
+ get_common_indeces (cpu_features, &family, &model); |
|
+ |
|
+ ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; |
|
+ |
|
+ /* AMD processors prefer SSE instructions for memory/string routines |
|
+ if they are available, otherwise they prefer integer instructions. */ |
|
+ if ((ecx & 0x200)) |
|
+ cpu_features->feature[index_Prefer_SSE_for_memop] |
|
+ |= bit_Prefer_SSE_for_memop; |
|
+ |
|
+ unsigned int eax; |
|
+ __cpuid (0x80000000, eax, ebx, ecx, edx); |
|
+ if (eax >= 0x80000001) |
|
+ __cpuid (0x80000001, |
|
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax, |
|
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx, |
|
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx, |
|
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx); |
|
+ } |
|
+ else |
|
+ kind = arch_kind_other; |
|
+ |
|
+ if (cpu_features->max_cpuid >= 7) |
|
+ __cpuid_count (7, 0, |
|
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax, |
|
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx, |
|
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx, |
|
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx); |
|
+ |
|
+ /* Can we call xgetbv? */ |
|
+ if (HAS_CPU_FEATURE (OSXSAVE)) |
|
+ { |
|
+ unsigned int xcrlow; |
|
+ unsigned int xcrhigh; |
|
+ asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); |
|
+ /* Is YMM and XMM state usable? */ |
|
+ if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == |
|
+ (bit_YMM_state | bit_XMM_state)) |
|
+ { |
|
+ /* Determine if AVX is usable. */ |
|
+ if (HAS_CPU_FEATURE (AVX)) |
|
+ cpu_features->feature[index_AVX_Usable] |= bit_AVX_Usable; |
|
+#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load |
|
+# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load |
|
+#endif |
|
+ /* Determine if AVX2 is usable. Unaligned load with 256-bit |
|
+ AVX registers are faster on processors with AVX2. */ |
|
+ if (HAS_CPU_FEATURE (AVX2)) |
|
+ cpu_features->feature[index_AVX2_Usable] |
|
+ |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load; |
|
+ /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and |
|
+ ZMM16-ZMM31 state are enabled. */ |
|
+ if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state |
|
+ | bit_ZMM16_31_state)) == |
|
+ (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) |
|
+ { |
|
+ /* Determine if AVX512F is usable. */ |
|
+ if (HAS_CPU_FEATURE (AVX512F)) |
|
+ { |
|
+ cpu_features->feature[index_AVX512F_Usable] |
|
+ |= bit_AVX512F_Usable; |
|
+ /* Determine if AVX512DQ is usable. */ |
|
+ if (HAS_CPU_FEATURE (AVX512DQ)) |
|
+ cpu_features->feature[index_AVX512DQ_Usable] |
|
+ |= bit_AVX512DQ_Usable; |
|
+ } |
|
+ } |
|
+ /* Determine if FMA is usable. */ |
|
+ if (HAS_CPU_FEATURE (FMA)) |
|
+ cpu_features->feature[index_FMA_Usable] |= bit_FMA_Usable; |
|
+ /* Determine if FMA4 is usable. */ |
|
+ if (HAS_CPU_FEATURE (FMA4)) |
|
+ cpu_features->feature[index_FMA4_Usable] |= bit_FMA4_Usable; |
|
+ } |
|
+ } |
|
+ |
|
+ cpu_features->family = family; |
|
+ cpu_features->model = model; |
|
+ cpu_features->kind = kind; |
|
+} |
|
Index: glibc-2.17-c758a686/sysdeps/x86/cpu-features.h |
|
=================================================================== |
|
--- /dev/null |
|
+++ glibc-2.17-c758a686/sysdeps/x86/cpu-features.h |
|
@@ -0,0 +1,273 @@ |
|
+/* This file is part of the GNU C Library. |
|
+ Copyright (C) 2008-2015 Free Software Foundation, Inc. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#ifndef cpu_features_h |
|
+#define cpu_features_h |
|
+ |
|
+#define bit_Fast_Rep_String (1 << 0) |
|
+#define bit_Fast_Copy_Backward (1 << 1) |
|
+#define bit_Slow_BSF (1 << 2) |
|
+#define bit_Prefer_SSE_for_memop (1 << 3) |
|
+#define bit_Fast_Unaligned_Load (1 << 4) |
|
+#define bit_Prefer_PMINUB_for_stringop (1 << 5) |
|
+#define bit_AVX_Usable (1 << 6) |
|
+#define bit_FMA_Usable (1 << 7) |
|
+#define bit_FMA4_Usable (1 << 8) |
|
+#define bit_Slow_SSE4_2 (1 << 9) |
|
+#define bit_AVX2_Usable (1 << 10) |
|
+#define bit_AVX_Fast_Unaligned_Load (1 << 11) |
|
+#define bit_AVX512F_Usable (1 << 12) |
|
+#define bit_AVX512DQ_Usable (1 << 13) |
|
+ |
|
+/* CPUID Feature flags. */ |
|
+ |
|
+/* COMMON_CPUID_INDEX_1. */ |
|
+#define bit_SSE2 (1 << 26) |
|
+#define bit_SSSE3 (1 << 9) |
|
+#define bit_SSE4_1 (1 << 19) |
|
+#define bit_SSE4_2 (1 << 20) |
|
+#define bit_OSXSAVE (1 << 27) |
|
+#define bit_AVX (1 << 28) |
|
+#define bit_POPCOUNT (1 << 23) |
|
+#define bit_FMA (1 << 12) |
|
+#define bit_FMA4 (1 << 16) |
|
+ |
|
+/* COMMON_CPUID_INDEX_7. */ |
|
+#define bit_RTM (1 << 11) |
|
+#define bit_AVX2 (1 << 5) |
|
+#define bit_AVX512F (1 << 16) |
|
+#define bit_AVX512DQ (1 << 17) |
|
+ |
|
+/* XCR0 Feature flags. */ |
|
+#define bit_XMM_state (1 << 1) |
|
+#define bit_YMM_state (2 << 1) |
|
+#define bit_Opmask_state (1 << 5) |
|
+#define bit_ZMM0_15_state (1 << 6) |
|
+#define bit_ZMM16_31_state (1 << 7) |
|
+ |
|
+/* The integer bit array index for the first set of internal feature bits. */ |
|
+#define FEATURE_INDEX_1 0 |
|
+ |
|
+/* The current maximum size of the feature integer bit array. */ |
|
+#define FEATURE_INDEX_MAX 1 |
|
+ |
|
+#ifdef __ASSEMBLER__ |
|
+ |
|
+# include <ifunc-defines.h> |
|
+# include <rtld-global-offsets.h> |
|
+ |
|
+# define index_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET |
|
+# define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
|
+# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
|
+# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
|
+# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
|
+# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET |
|
+ |
|
+# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE |
|
+# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE |
|
+# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE |
|
+# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE |
|
+# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE |
|
+# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE |
|
+# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE |
|
+# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE |
|
+# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE |
|
+# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE |
|
+# define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE |
|
+# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE |
|
+# define index_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE |
|
+# define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE |
|
+ |
|
+# if defined (_LIBC) && !IS_IN (nonlib) |
|
+# ifdef __x86_64__ |
|
+# ifdef SHARED |
|
+# if IS_IN (rtld) |
|
+# define LOAD_RTLD_GLOBAL_RO_RDX |
|
+# define HAS_FEATURE(offset, name) \ |
|
+ testl $(bit_##name), _rtld_local_ro+offset+(index_##name)(%rip) |
|
+# else |
|
+# define LOAD_RTLD_GLOBAL_RO_RDX \ |
|
+ mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP |
|
+# define HAS_FEATURE(offset, name) \ |
|
+ testl $(bit_##name), \ |
|
+ RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%rdx) |
|
+# endif |
|
+# else /* SHARED */ |
|
+# define LOAD_RTLD_GLOBAL_RO_RDX |
|
+# define HAS_FEATURE(offset, name) \ |
|
+ testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name)(%rip) |
|
+# endif /* !SHARED */ |
|
+# else /* __x86_64__ */ |
|
+# ifdef SHARED |
|
+# define LOAD_FUNC_GOT_EAX(func) \ |
|
+ leal func@GOTOFF(%edx), %eax |
|
+# if IS_IN (rtld) |
|
+# define LOAD_GOT_AND_RTLD_GLOBAL_RO \ |
|
+ LOAD_PIC_REG(dx) |
|
+# define HAS_FEATURE(offset, name) \ |
|
+ testl $(bit_##name), offset+(index_##name)+_rtld_local_ro@GOTOFF(%edx) |
|
+# else |
|
+# define LOAD_GOT_AND_RTLD_GLOBAL_RO \ |
|
+ LOAD_PIC_REG(dx); \ |
|
+ mov _rtld_global_ro@GOT(%edx), %ecx |
|
+# define HAS_FEATURE(offset, name) \ |
|
+ testl $(bit_##name), \ |
|
+ RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%ecx) |
|
+# endif |
|
+# else /* SHARED */ |
|
+# define LOAD_FUNC_GOT_EAX(func) \ |
|
+ leal func, %eax |
|
+# define LOAD_GOT_AND_RTLD_GLOBAL_RO |
|
+# define HAS_FEATURE(offset, name) \ |
|
+ testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name) |
|
+# endif /* !SHARED */ |
|
+# endif /* !__x86_64__ */ |
|
+# else /* _LIBC && !nonlib */ |
|
+# error "Sorry, <cpu-features.h> is unimplemented for assembler" |
|
+# endif /* !_LIBC || nonlib */ |
|
+ |
|
+/* HAS_* evaluates to true if we may use the feature at runtime. */ |
|
+# define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, name) |
|
+# define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, name) |
|
+ |
|
+#else /* __ASSEMBLER__ */ |
|
+ |
|
+# include <sys/param.h> |
|
+# include <sys/types.h> |
|
+# include <sysdep.h> |
|
+# include <stdbool.h> |
|
+ |
|
+/* Ugly hack to make it possible to select a strstr and strcasestr |
|
+ implementation that avoids using the stack for 16-byte aligned |
|
+ SSE temporaries. Doing so makes it possible to call the functions |
|
+ with a stack that's not 16-byte aligned as can happen, for example, |
|
+ as a result of compiling the functions' callers with the GCC |
|
+ -mpreferred-stack-boubdary=2 or =3 option, or with the ICC |
|
+ -falign-stack=assume-4-byte option. See rhbz 1150282 for details. |
|
+ |
|
+ The ifunc selector uses the unaligned version by default if this |
|
+ file exists and is accessible. */ |
|
+# define ENABLE_STRSTR_UNALIGNED_PATHNAME \ |
|
+ "/etc/sysconfig/64bit_strstr_via_64bit_strstr_sse2_unaligned" |
|
+ |
|
+static bool __attribute__ ((unused)) |
|
+use_unaligned_strstr (void) |
|
+{ |
|
+ struct stat unaligned_strstr_etc_sysconfig_file; |
|
+ |
|
+ /* TLS may not have been set up yet, so avoid using stat since it tries to |
|
+ set errno. */ |
|
+ return INTERNAL_SYSCALL (stat, , 2, |
|
+ ENABLE_STRSTR_UNALIGNED_PATHNAME, |
|
+ &unaligned_strstr_etc_sysconfig_file) == 0; |
|
+} |
|
+ |
|
+enum |
|
+ { |
|
+ COMMON_CPUID_INDEX_1 = 0, |
|
+ COMMON_CPUID_INDEX_7, |
|
+ COMMON_CPUID_INDEX_80000001, /* for AMD */ |
|
+ /* Keep the following line at the end. */ |
|
+ COMMON_CPUID_INDEX_MAX |
|
+ }; |
|
+ |
|
+struct cpu_features |
|
+{ |
|
+ enum cpu_features_kind |
|
+ { |
|
+ arch_kind_unknown = 0, |
|
+ arch_kind_intel, |
|
+ arch_kind_amd, |
|
+ arch_kind_other |
|
+ } kind; |
|
+ int max_cpuid; |
|
+ struct cpuid_registers |
|
+ { |
|
+ unsigned int eax; |
|
+ unsigned int ebx; |
|
+ unsigned int ecx; |
|
+ unsigned int edx; |
|
+ } cpuid[COMMON_CPUID_INDEX_MAX]; |
|
+ unsigned int family; |
|
+ unsigned int model; |
|
+ unsigned int feature[FEATURE_INDEX_MAX]; |
|
+}; |
|
+ |
|
+/* Used from outside of glibc to get access to the CPU features |
|
+ structure. */ |
|
+extern const struct cpu_features *__get_cpu_features (void) |
|
+ __attribute__ ((const)); |
|
+ |
|
+# if defined (_LIBC) && !IS_IN (nonlib) |
|
+/* Unused for x86. */ |
|
+# define INIT_ARCH() |
|
+# define __get_cpu_features() (&GLRO(dl_x86_cpu_features)) |
|
+# endif |
|
+ |
|
+ |
|
+/* HAS_* evaluates to true if we may use the feature at runtime. */ |
|
+# define HAS_CPU_FEATURE(name) \ |
|
+ ((__get_cpu_features ()->cpuid[index_##name].reg_##name & (bit_##name)) != 0) |
|
+# define HAS_ARCH_FEATURE(name) \ |
|
+ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) |
|
+ |
|
+# define index_SSE2 COMMON_CPUID_INDEX_1 |
|
+# define index_SSSE3 COMMON_CPUID_INDEX_1 |
|
+# define index_SSE4_1 COMMON_CPUID_INDEX_1 |
|
+# define index_SSE4_2 COMMON_CPUID_INDEX_1 |
|
+# define index_AVX COMMON_CPUID_INDEX_1 |
|
+# define index_AVX2 COMMON_CPUID_INDEX_7 |
|
+# define index_AVX512F COMMON_CPUID_INDEX_7 |
|
+# define index_AVX512DQ COMMON_CPUID_INDEX_7 |
|
+# define index_RTM COMMON_CPUID_INDEX_7 |
|
+# define index_FMA COMMON_CPUID_INDEX_1 |
|
+# define index_FMA4 COMMON_CPUID_INDEX_80000001 |
|
+# define index_POPCOUNT COMMON_CPUID_INDEX_1 |
|
+# define index_OSXSAVE COMMON_CPUID_INDEX_1 |
|
+ |
|
+# define reg_SSE2 edx |
|
+# define reg_SSSE3 ecx |
|
+# define reg_SSE4_1 ecx |
|
+# define reg_SSE4_2 ecx |
|
+# define reg_AVX ecx |
|
+# define reg_AVX2 ebx |
|
+# define reg_AVX512F ebx |
|
+# define reg_AVX512DQ ebx |
|
+# define reg_RTM ebx |
|
+# define reg_FMA ecx |
|
+# define reg_FMA4 ecx |
|
+# define reg_POPCOUNT ecx |
|
+# define reg_OSXSAVE ecx |
|
+ |
|
+# define index_Fast_Rep_String FEATURE_INDEX_1 |
|
+# define index_Fast_Copy_Backward FEATURE_INDEX_1 |
|
+# define index_Slow_BSF FEATURE_INDEX_1 |
|
+# define index_Prefer_SSE_for_memop FEATURE_INDEX_1 |
|
+# define index_Fast_Unaligned_Load FEATURE_INDEX_1 |
|
+# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1 |
|
+# define index_AVX_Usable FEATURE_INDEX_1 |
|
+# define index_FMA_Usable FEATURE_INDEX_1 |
|
+# define index_FMA4_Usable FEATURE_INDEX_1 |
|
+# define index_Slow_SSE4_2 FEATURE_INDEX_1 |
|
+# define index_AVX2_Usable FEATURE_INDEX_1 |
|
+# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 |
|
+# define index_AVX512F_Usable FEATURE_INDEX_1 |
|
+# define index_AVX512DQ_Usable FEATURE_INDEX_1 |
|
+ |
|
+#endif /* !__ASSEMBLER__ */ |
|
+ |
|
+#endif /* cpu_features_h */ |
|
Index: glibc-2.17-c758a686/sysdeps/x86/dl-get-cpu-features.c |
|
=================================================================== |
|
--- /dev/null |
|
+++ glibc-2.17-c758a686/sysdeps/x86/dl-get-cpu-features.c |
|
@@ -0,0 +1,27 @@ |
|
+/* This file is part of the GNU C Library. |
|
+ Copyright (C) 2015 Free Software Foundation, Inc. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+ |
|
+#include <ldsodefs.h> |
|
+ |
|
+#undef __get_cpu_features |
|
+ |
|
+const struct cpu_features * |
|
+__get_cpu_features (void) |
|
+{ |
|
+ return &GLRO(dl_x86_cpu_features); |
|
+} |
|
Index: glibc-2.17-c758a686/sysdeps/x86/libc-start.c |
|
=================================================================== |
|
--- /dev/null |
|
+++ glibc-2.17-c758a686/sysdeps/x86/libc-start.c |
|
@@ -0,0 +1,41 @@ |
|
+/* Copyright (C) 2015 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#ifdef SHARED |
|
+# include <csu/libc-start.c> |
|
+# else |
|
+/* The main work is done in the generic function. */ |
|
+# define LIBC_START_DISABLE_INLINE |
|
+# define LIBC_START_MAIN generic_start_main |
|
+# include <csu/libc-start.c> |
|
+# include <cpu-features.h> |
|
+# include <cpu-features.c> |
|
+ |
|
+extern struct cpu_features _dl_x86_cpu_features; |
|
+ |
|
+int |
|
+__libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL), |
|
+ int argc, char **argv, |
|
+ __typeof (main) init, |
|
+ void (*fini) (void), |
|
+ void (*rtld_fini) (void), void *stack_end) |
|
+{ |
|
+ init_cpu_features (&_dl_x86_cpu_features); |
|
+ return generic_start_main (main, argc, argv, init, fini, rtld_fini, |
|
+ stack_end); |
|
+} |
|
+#endif |
|
Index: glibc-2.17-c758a686/sysdeps/x86/rtld-global-offsets.sym |
|
=================================================================== |
|
--- /dev/null |
|
+++ glibc-2.17-c758a686/sysdeps/x86/rtld-global-offsets.sym |
|
@@ -0,0 +1,7 @@ |
|
+#define SHARED 1 |
|
+ |
|
+#include <ldsodefs.h> |
|
+ |
|
+#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) |
|
+ |
|
+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features) |
|
Index: glibc-2.17-c758a686/sysdeps/x86/tst-get-cpu-features-static.c |
|
=================================================================== |
|
--- /dev/null |
|
+++ glibc-2.17-c758a686/sysdeps/x86/tst-get-cpu-features-static.c |
|
@@ -0,0 +1 @@ |
|
+#include "tst-get-cpu-features.c" |
|
Index: glibc-2.17-c758a686/sysdeps/x86/tst-get-cpu-features.c |
|
=================================================================== |
|
--- /dev/null |
|
+++ glibc-2.17-c758a686/sysdeps/x86/tst-get-cpu-features.c |
|
@@ -0,0 +1,31 @@ |
|
+/* Test case for x86 __get_cpu_features interface |
|
+ Copyright (C) 2015 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#include <stdlib.h> |
|
+#include <cpu-features.h> |
|
+ |
|
+static int |
|
+do_test (void) |
|
+{ |
|
+ if (__get_cpu_features ()->kind == arch_kind_unknown) |
|
+ abort (); |
|
+ return 0; |
|
+} |
|
+ |
|
+#define TEST_FUNCTION do_test () |
|
+#include "../../test-skeleton.c" |
|
Index: glibc-2.17-c758a686/sysdeps/x86_64/cacheinfo.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/cacheinfo.c |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/cacheinfo.c |
|
@@ -21,40 +21,11 @@ |
|
#include <stdlib.h> |
|
#include <unistd.h> |
|
#include <cpuid.h> |
|
+#include "multiarch/init-arch.h" |
|
|
|
-#ifndef __cpuid_count |
|
-/* FIXME: Provide __cpuid_count if it isn't defined. Copied from gcc |
|
- 4.4.0. Remove this if gcc 4.4 is the minimum requirement. */ |
|
-# if defined(__i386__) && defined(__PIC__) |
|
-/* %ebx may be the PIC register. */ |
|
-# define __cpuid_count(level, count, a, b, c, d) \ |
|
- __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \ |
|
- "cpuid\n\t" \ |
|
- "xchg{l}\t{%%}ebx, %1\n\t" \ |
|
- : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ |
|
- : "0" (level), "2" (count)) |
|
-# else |
|
-# define __cpuid_count(level, count, a, b, c, d) \ |
|
- __asm__ ("cpuid\n\t" \ |
|
- : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ |
|
- : "0" (level), "2" (count)) |
|
-# endif |
|
-#endif |
|
- |
|
-#ifdef USE_MULTIARCH |
|
-# include "multiarch/init-arch.h" |
|
- |
|
-# define is_intel __cpu_features.kind == arch_kind_intel |
|
-# define is_amd __cpu_features.kind == arch_kind_amd |
|
-# define max_cpuid __cpu_features.max_cpuid |
|
-#else |
|
- /* This spells out "GenuineIntel". */ |
|
-# define is_intel \ |
|
- ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69 |
|
- /* This spells out "AuthenticAMD". */ |
|
-# define is_amd \ |
|
- ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65 |
|
-#endif |
|
+#define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel |
|
+#define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd |
|
+#define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid |
|
|
|
static const struct intel_02_cache_info |
|
{ |
|
@@ -237,21 +208,8 @@ intel_check_word (int name, unsigned int |
|
/* Intel reused this value. For family 15, model 6 it |
|
specifies the 3rd level cache. Otherwise the 2nd |
|
level cache. */ |
|
- unsigned int family; |
|
- unsigned int model; |
|
-#ifdef USE_MULTIARCH |
|
- family = __cpu_features.family; |
|
- model = __cpu_features.model; |
|
-#else |
|
- unsigned int eax; |
|
- unsigned int ebx; |
|
- unsigned int ecx; |
|
- unsigned int edx; |
|
- __cpuid (1, eax, ebx, ecx, edx); |
|
- |
|
- family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf); |
|
- model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf); |
|
-#endif |
|
+ unsigned int family = GLRO(dl_x86_cpu_features).family; |
|
+ unsigned int model = GLRO(dl_x86_cpu_features).model; |
|
|
|
if (family == 15 && model == 6) |
|
{ |
|
@@ -478,18 +436,6 @@ long int |
|
attribute_hidden |
|
__cache_sysconf (int name) |
|
{ |
|
-#ifdef USE_MULTIARCH |
|
- if (__cpu_features.kind == arch_kind_unknown) |
|
- __init_cpu_features (); |
|
-#else |
|
- /* Find out what brand of processor. */ |
|
- unsigned int max_cpuid; |
|
- unsigned int ebx; |
|
- unsigned int ecx; |
|
- unsigned int edx; |
|
- __cpuid (0, max_cpuid, ebx, ecx, edx); |
|
-#endif |
|
- |
|
if (is_intel) |
|
return handle_intel (name, max_cpuid); |
|
|
|
@@ -525,18 +471,6 @@ long int __x86_64_raw_shared_cache_size |
|
int __x86_64_prefetchw attribute_hidden; |
|
#endif |
|
|
|
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION |
|
-/* Instructions preferred for memory and string routines. |
|
- |
|
- 0: Regular instructions |
|
- 1: MMX instructions |
|
- 2: SSE2 instructions |
|
- 3: SSSE3 instructions |
|
- |
|
- */ |
|
-int __x86_64_preferred_memory_instruction attribute_hidden; |
|
-#endif |
|
- |
|
|
|
static void |
|
__attribute__((constructor)) |
|
@@ -553,14 +487,6 @@ init_cacheinfo (void) |
|
unsigned int level; |
|
unsigned int threads = 0; |
|
|
|
-#ifdef USE_MULTIARCH |
|
- if (__cpu_features.kind == arch_kind_unknown) |
|
- __init_cpu_features (); |
|
-#else |
|
- int max_cpuid; |
|
- __cpuid (0, max_cpuid, ebx, ecx, edx); |
|
-#endif |
|
- |
|
if (is_intel) |
|
{ |
|
data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid); |
|
@@ -576,34 +502,13 @@ init_cacheinfo (void) |
|
shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid); |
|
} |
|
|
|
- unsigned int ebx_1; |
|
- |
|
-#ifdef USE_MULTIARCH |
|
- eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; |
|
- ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx; |
|
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; |
|
- edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx; |
|
-#else |
|
- __cpuid (1, eax, ebx_1, ecx, edx); |
|
-#endif |
|
- |
|
- unsigned int family = (eax >> 8) & 0x0f; |
|
- unsigned int model = (eax >> 4) & 0x0f; |
|
- unsigned int extended_model = (eax >> 12) & 0xf0; |
|
- |
|
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION |
|
- /* Intel prefers SSSE3 instructions for memory/string routines |
|
- if they are available. */ |
|
- if ((ecx & 0x200)) |
|
- __x86_64_preferred_memory_instruction = 3; |
|
- else |
|
- __x86_64_preferred_memory_instruction = 2; |
|
-#endif |
|
- |
|
/* Figure out the number of logical threads that share the |
|
highest cache level. */ |
|
if (max_cpuid >= 4) |
|
{ |
|
+ unsigned int family = GLRO(dl_x86_cpu_features).family; |
|
+ unsigned int model = GLRO(dl_x86_cpu_features).model; |
|
+ |
|
int i = 0; |
|
|
|
/* Query until desired cache level is enumerated. */ |
|
@@ -655,7 +560,6 @@ init_cacheinfo (void) |
|
threads += 1; |
|
if (threads > 2 && level == 2 && family == 6) |
|
{ |
|
- model += extended_model; |
|
switch (model) |
|
{ |
|
case 0x57: |
|
@@ -678,7 +582,9 @@ init_cacheinfo (void) |
|
intel_bug_no_cache_info: |
|
/* Assume that all logical threads share the highest cache level. */ |
|
|
|
- threads = (ebx_1 >> 16) & 0xff; |
|
+ threads |
|
+ = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx |
|
+ >> 16) & 0xff); |
|
} |
|
|
|
/* Cap usage of highest cache level to the number of supported |
|
@@ -693,25 +599,6 @@ init_cacheinfo (void) |
|
long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE); |
|
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); |
|
|
|
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION |
|
-# ifdef USE_MULTIARCH |
|
- eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; |
|
- ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx; |
|
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; |
|
- edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx; |
|
-# else |
|
- __cpuid (1, eax, ebx, ecx, edx); |
|
-# endif |
|
- |
|
- /* AMD prefers SSSE3 instructions for memory/string routines |
|
- if they are avaiable, otherwise it prefers integer |
|
- instructions. */ |
|
- if ((ecx & 0x200)) |
|
- __x86_64_preferred_memory_instruction = 3; |
|
- else |
|
- __x86_64_preferred_memory_instruction = 0; |
|
-#endif |
|
- |
|
/* Get maximum extended function. */ |
|
__cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx); |
|
|
|
Index: glibc-2.17-c758a686/sysdeps/x86_64/dl-machine.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/dl-machine.h |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/dl-machine.h |
|
@@ -26,6 +26,7 @@ |
|
#include <sysdep.h> |
|
#include <tls.h> |
|
#include <dl-tlsdesc.h> |
|
+#include <cpu-features.c> |
|
|
|
/* Return nonzero iff ELF header is compatible with the running host. */ |
|
static inline int __attribute__ ((unused)) |
|
@@ -200,6 +201,8 @@ dl_platform_init (void) |
|
if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') |
|
/* Avoid an empty string which would disturb us. */ |
|
GLRO(dl_platform) = NULL; |
|
+ |
|
+ init_cpu_features (&GLRO(dl_x86_cpu_features)); |
|
} |
|
|
|
static inline ElfW(Addr) |
|
Index: glibc-2.17-c758a686/sysdeps/x86_64/dl-procinfo.c |
|
=================================================================== |
|
--- /dev/null |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/dl-procinfo.c |
|
@@ -0,0 +1,57 @@ |
|
+/* Data for x86-64 version of processor capability information. |
|
+ Copyright (C) 2015 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+/* If anything should be added here check whether the size of each string |
|
+ is still ok with the given array size. |
|
+ |
|
+ All the #ifdefs in the definitions are quite irritating but |
|
+ necessary if we want to avoid duplicating the information. There |
|
+ are three different modes: |
|
+ |
|
+ - PROCINFO_DECL is defined. This means we are only interested in |
|
+ declarations. |
|
+ |
|
+ - PROCINFO_DECL is not defined: |
|
+ |
|
+ + if SHARED is defined the file is included in an array |
|
+ initializer. The .element = { ... } syntax is needed. |
|
+ |
|
+ + if SHARED is not defined a normal array initialization is |
|
+ needed. |
|
+ */ |
|
+ |
|
+#ifndef PROCINFO_CLASS |
|
+# define PROCINFO_CLASS |
|
+#endif |
|
+ |
|
+#if !defined PROCINFO_DECL && defined SHARED |
|
+ ._dl_x86_cpu_features |
|
+#else |
|
+PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features |
|
+#endif |
|
+#ifndef PROCINFO_DECL |
|
+= { } |
|
+#endif |
|
+#if !defined SHARED || defined PROCINFO_DECL |
|
+; |
|
+#else |
|
+, |
|
+#endif |
|
+ |
|
+#undef PROCINFO_DECL |
|
+#undef PROCINFO_CLASS |
|
Index: glibc-2.17-c758a686/sysdeps/x86_64/ldsodefs.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/ldsodefs.h |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/ldsodefs.h |
|
@@ -20,6 +20,7 @@ |
|
#define _X86_64_LDSODEFS_H 1 |
|
|
|
#include <elf.h> |
|
+#include <cpu-features.h> |
|
|
|
struct La_x86_64_regs; |
|
struct La_x86_64_retval; |
|
Index: glibc-2.17-c758a686/sysdeps/x86_64/multiarch/Makefile |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/multiarch/Makefile |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/multiarch/Makefile |
|
@@ -1,5 +1,4 @@ |
|
ifeq ($(subdir),csu) |
|
-aux += init-arch |
|
tests += test-multiarch |
|
gen-as-const-headers += ifunc-defines.sym |
|
endif |
|
Index: glibc-2.17-c758a686/sysdeps/x86_64/multiarch/Versions |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/multiarch/Versions |
|
+++ /dev/null |
|
@@ -1,5 +0,0 @@ |
|
-libc { |
|
- GLIBC_PRIVATE { |
|
- __get_cpu_features; |
|
- } |
|
-} |
|
Index: glibc-2.17-c758a686/sysdeps/x86_64/multiarch/cacheinfo.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/multiarch/cacheinfo.c |
|
+++ /dev/null |
|
@@ -1,2 +0,0 @@ |
|
-#define DISABLE_PREFERRED_MEMORY_INSTRUCTION |
|
-#include "../cacheinfo.c" |
|
Index: glibc-2.17-c758a686/sysdeps/x86_64/multiarch/ifunc-defines.sym |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/multiarch/ifunc-defines.sym |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/multiarch/ifunc-defines.sym |
|
@@ -4,7 +4,6 @@ |
|
-- |
|
|
|
CPU_FEATURES_SIZE sizeof (struct cpu_features) |
|
-KIND_OFFSET offsetof (struct cpu_features, kind) |
|
CPUID_OFFSET offsetof (struct cpu_features, cpuid) |
|
CPUID_SIZE sizeof (struct cpuid_registers) |
|
CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax) |
|
Index: glibc-2.17-c758a686/sysdeps/x86_64/multiarch/init-arch.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/multiarch/init-arch.c |
|
+++ /dev/null |
|
@@ -1,183 +0,0 @@ |
|
-/* Initialize CPU feature data. |
|
- This file is part of the GNU C Library. |
|
- Copyright (C) 2008-2012 Free Software Foundation, Inc. |
|
- Contributed by Ulrich Drepper <drepper@redhat.com>. |
|
- |
|
- The GNU C Library is free software; you can redistribute it and/or |
|
- modify it under the terms of the GNU Lesser General Public |
|
- License as published by the Free Software Foundation; either |
|
- version 2.1 of the License, or (at your option) any later version. |
|
- |
|
- The GNU C Library is distributed in the hope that it will be useful, |
|
- but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
- Lesser General Public License for more details. |
|
- |
|
- You should have received a copy of the GNU Lesser General Public |
|
- License along with the GNU C Library; if not, see |
|
- <http://www.gnu.org/licenses/>. */ |
|
- |
|
-#include <atomic.h> |
|
-#include <cpuid.h> |
|
-#include "init-arch.h" |
|
- |
|
- |
|
-struct cpu_features __cpu_features attribute_hidden; |
|
- |
|
- |
|
-static void |
|
-get_common_indeces (unsigned int *family, unsigned int *model) |
|
-{ |
|
- __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax, |
|
- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx, |
|
- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx, |
|
- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx); |
|
- |
|
- unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; |
|
- *family = (eax >> 8) & 0x0f; |
|
- *model = (eax >> 4) & 0x0f; |
|
-} |
|
- |
|
- |
|
-void |
|
-__init_cpu_features (void) |
|
-{ |
|
- unsigned int ebx; |
|
- unsigned int ecx; |
|
- unsigned int edx; |
|
- unsigned int family = 0; |
|
- unsigned int model = 0; |
|
- enum cpu_features_kind kind; |
|
- |
|
- __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx); |
|
- |
|
- /* This spells out "GenuineIntel". */ |
|
- if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) |
|
- { |
|
- kind = arch_kind_intel; |
|
- |
|
- get_common_indeces (&family, &model); |
|
- |
|
- /* Intel processors prefer SSE instruction for memory/string |
|
- routines if they are available. */ |
|
- __cpu_features.feature[index_Prefer_SSE_for_memop] |
|
- |= bit_Prefer_SSE_for_memop; |
|
- |
|
- unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; |
|
- unsigned int extended_family = (eax >> 20) & 0xff; |
|
- unsigned int extended_model = (eax >> 12) & 0xf0; |
|
- if (family == 0x0f) |
|
- { |
|
- family += extended_family; |
|
- model += extended_model; |
|
- } |
|
- else if (family == 0x06) |
|
- { |
|
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; |
|
- model += extended_model; |
|
- switch (model) |
|
- { |
|
- case 0x1c: |
|
- case 0x26: |
|
- /* BSF is slow on Atom. */ |
|
- __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF; |
|
- break; |
|
- |
|
- default: |
|
- /* Unknown family 0x06 processors. Assuming this is one |
|
- of Core i3/i5/i7 processors if AVX is available. */ |
|
- if ((ecx & bit_AVX) == 0) |
|
- break; |
|
- |
|
- case 0x1a: |
|
- case 0x1e: |
|
- case 0x1f: |
|
- case 0x25: |
|
- case 0x2c: |
|
- case 0x2e: |
|
- case 0x2f: |
|
- /* Rep string instructions, copy backward, unaligned loads |
|
- and pminub are fast on Intel Core i3, i5 and i7. */ |
|
-#if index_Fast_Rep_String != index_Fast_Copy_Backward |
|
-# error index_Fast_Rep_String != index_Fast_Copy_Backward |
|
-#endif |
|
-#if index_Fast_Rep_String != index_Fast_Unaligned_Load |
|
-# error index_Fast_Rep_String != index_Fast_Unaligned_Load |
|
-#endif |
|
-#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop |
|
-# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop |
|
-#endif |
|
- __cpu_features.feature[index_Fast_Rep_String] |
|
- |= (bit_Fast_Rep_String |
|
- | bit_Fast_Copy_Backward |
|
- | bit_Fast_Unaligned_Load |
|
- | bit_Prefer_PMINUB_for_stringop); |
|
- break; |
|
- } |
|
- } |
|
- } |
|
- /* This spells out "AuthenticAMD". */ |
|
- else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) |
|
- { |
|
- kind = arch_kind_amd; |
|
- |
|
- get_common_indeces (&family, &model); |
|
- |
|
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; |
|
- |
|
- /* AMD processors prefer SSE instructions for memory/string routines |
|
- if they are available, otherwise they prefer integer instructions. */ |
|
- if ((ecx & 0x200)) |
|
- __cpu_features.feature[index_Prefer_SSE_for_memop] |
|
- |= bit_Prefer_SSE_for_memop; |
|
- |
|
- unsigned int eax; |
|
- __cpuid (0x80000000, eax, ebx, ecx, edx); |
|
- if (eax >= 0x80000001) |
|
- __cpuid (0x80000001, |
|
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax, |
|
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx, |
|
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx, |
|
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx); |
|
- } |
|
- else |
|
- kind = arch_kind_other; |
|
- |
|
- /* Can we call xgetbv? */ |
|
- if (CPUID_OSXSAVE) |
|
- { |
|
- unsigned int xcrlow; |
|
- unsigned int xcrhigh; |
|
- asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); |
|
- /* Is YMM and XMM state usable? */ |
|
- if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == |
|
- (bit_YMM_state | bit_XMM_state)) |
|
- { |
|
- /* Determine if AVX is usable. */ |
|
- if (CPUID_AVX) |
|
- __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable; |
|
- /* Determine if FMA is usable. */ |
|
- if (CPUID_FMA) |
|
- __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable; |
|
- /* Determine if FMA4 is usable. */ |
|
- if (CPUID_FMA4) |
|
- __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable; |
|
- } |
|
- } |
|
- |
|
- __cpu_features.family = family; |
|
- __cpu_features.model = model; |
|
- atomic_write_barrier (); |
|
- __cpu_features.kind = kind; |
|
-} |
|
- |
|
-#undef __get_cpu_features |
|
- |
|
-const struct cpu_features * |
|
-__get_cpu_features (void) |
|
-{ |
|
- if (__cpu_features.kind == arch_kind_unknown) |
|
- __init_cpu_features (); |
|
- |
|
- return &__cpu_features; |
|
-} |
|
Index: glibc-2.17-c758a686/sysdeps/x86_64/multiarch/init-arch.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/multiarch/init-arch.h |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/multiarch/init-arch.h |
|
@@ -15,183 +15,8 @@ |
|
License along with the GNU C Library; if not, see |
|
<http://www.gnu.org/licenses/>. */ |
|
|
|
-#define bit_Fast_Rep_String (1 << 0) |
|
-#define bit_Fast_Copy_Backward (1 << 1) |
|
-#define bit_Slow_BSF (1 << 2) |
|
-#define bit_Prefer_SSE_for_memop (1 << 3) |
|
-#define bit_Fast_Unaligned_Load (1 << 4) |
|
-#define bit_Prefer_PMINUB_for_stringop (1 << 5) |
|
-#define bit_AVX_Usable (1 << 6) |
|
-#define bit_FMA_Usable (1 << 7) |
|
-#define bit_FMA4_Usable (1 << 8) |
|
- |
|
-/* CPUID Feature flags. */ |
|
-#define bit_SSE2 (1 << 26) |
|
-#define bit_SSSE3 (1 << 9) |
|
-#define bit_SSE4_1 (1 << 19) |
|
-#define bit_SSE4_2 (1 << 20) |
|
-#define bit_OSXSAVE (1 << 27) |
|
-#define bit_AVX (1 << 28) |
|
-#define bit_POPCOUNT (1 << 23) |
|
-#define bit_FMA (1 << 12) |
|
-#define bit_FMA4 (1 << 16) |
|
- |
|
-/* XCR0 Feature flags. */ |
|
-#define bit_XMM_state (1 << 1) |
|
-#define bit_YMM_state (2 << 1) |
|
- |
|
-#ifdef __ASSEMBLER__ |
|
- |
|
-# include <ifunc-defines.h> |
|
- |
|
-# define index_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET |
|
-# define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
|
-# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
|
-# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
|
-# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
|
- |
|
-# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE |
|
-# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE |
|
-# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE |
|
-# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE |
|
-# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE |
|
-# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE |
|
-# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE |
|
-# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE |
|
-# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE |
|
- |
|
-#else /* __ASSEMBLER__ */ |
|
- |
|
-# include <sys/param.h> |
|
-# include <sys/types.h> |
|
-# include <sysdep.h> |
|
-# include <stdbool.h> |
|
- |
|
-/* Ugly hack to make it possible to select a strstr and strcasestr |
|
- implementation that avoids using the stack for 16-byte aligned |
|
- SSE temporaries. Doing so makes it possible to call the functions |
|
- with a stack that's not 16-byte aligned as can happen, for example, |
|
- as a result of compiling the functions' callers with the GCC |
|
- -mpreferred-stack-boubdary=2 or =3 option, or with the ICC |
|
- -falign-stack=assume-4-byte option. See rhbz 1150282 for details. |
|
- |
|
- The ifunc selector uses the unaligned version by default if this |
|
- file exists and is accessible. */ |
|
-# define ENABLE_STRSTR_UNALIGNED_PATHNAME \ |
|
- "/etc/sysconfig/64bit_strstr_via_64bit_strstr_sse2_unaligned" |
|
- |
|
-static bool __attribute__ ((unused)) |
|
-use_unaligned_strstr (void) |
|
-{ |
|
- struct stat unaligned_strstr_etc_sysconfig_file; |
|
- |
|
- /* TLS may not have been set up yet, so avoid using stat since it tries to |
|
- set errno. */ |
|
- return INTERNAL_SYSCALL (stat, , 2, |
|
- ENABLE_STRSTR_UNALIGNED_PATHNAME, |
|
- &unaligned_strstr_etc_sysconfig_file) == 0; |
|
-} |
|
- |
|
-enum |
|
- { |
|
- COMMON_CPUID_INDEX_1 = 0, |
|
- COMMON_CPUID_INDEX_80000001, /* for AMD */ |
|
- /* Keep the following line at the end. */ |
|
- COMMON_CPUID_INDEX_MAX |
|
- }; |
|
- |
|
-enum |
|
- { |
|
- FEATURE_INDEX_1 = 0, |
|
- /* Keep the following line at the end. */ |
|
- FEATURE_INDEX_MAX |
|
- }; |
|
- |
|
-extern struct cpu_features |
|
-{ |
|
- enum cpu_features_kind |
|
- { |
|
- arch_kind_unknown = 0, |
|
- arch_kind_intel, |
|
- arch_kind_amd, |
|
- arch_kind_other |
|
- } kind; |
|
- int max_cpuid; |
|
- struct cpuid_registers |
|
- { |
|
- unsigned int eax; |
|
- unsigned int ebx; |
|
- unsigned int ecx; |
|
- unsigned int edx; |
|
- } cpuid[COMMON_CPUID_INDEX_MAX]; |
|
- unsigned int family; |
|
- unsigned int model; |
|
- unsigned int feature[FEATURE_INDEX_MAX]; |
|
-} __cpu_features attribute_hidden; |
|
- |
|
- |
|
-extern void __init_cpu_features (void) attribute_hidden; |
|
-# define INIT_ARCH() \ |
|
- do \ |
|
- if (__cpu_features.kind == arch_kind_unknown) \ |
|
- __init_cpu_features (); \ |
|
- while (0) |
|
- |
|
-/* Used from outside libc.so to get access to the CPU features structure. */ |
|
-extern const struct cpu_features *__get_cpu_features (void) |
|
- __attribute__ ((const)); |
|
- |
|
-# if IS_IN (libc) |
|
-# define __get_cpu_features() (&__cpu_features) |
|
-# endif |
|
- |
|
-# define HAS_CPU_FEATURE(idx, reg, bit) \ |
|
- ((__get_cpu_features ()->cpuid[idx].reg & (bit)) != 0) |
|
- |
|
-/* Following are the feature tests used throughout libc. */ |
|
- |
|
-/* CPUID_* evaluates to true if the feature flag is enabled. |
|
- We always use &__cpu_features because the HAS_CPUID_* macros |
|
- are called only within __init_cpu_features, where we can't |
|
- call __get_cpu_features without infinite recursion. */ |
|
-# define HAS_CPUID_FLAG(idx, reg, bit) \ |
|
- (((&__cpu_features)->cpuid[idx].reg & (bit)) != 0) |
|
- |
|
-# define CPUID_OSXSAVE \ |
|
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE) |
|
-# define CPUID_AVX \ |
|
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX) |
|
-# define CPUID_FMA \ |
|
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_FMA) |
|
-# define CPUID_FMA4 \ |
|
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4) |
|
- |
|
-/* HAS_* evaluates to true if we may use the feature at runtime. */ |
|
-# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2) |
|
-# define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT) |
|
-# define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3) |
|
-# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1) |
|
-# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2) |
|
- |
|
-# define index_Fast_Rep_String FEATURE_INDEX_1 |
|
-# define index_Fast_Copy_Backward FEATURE_INDEX_1 |
|
-# define index_Slow_BSF FEATURE_INDEX_1 |
|
-# define index_Prefer_SSE_for_memop FEATURE_INDEX_1 |
|
-# define index_Fast_Unaligned_Load FEATURE_INDEX_1 |
|
-# define index_AVX_Usable FEATURE_INDEX_1 |
|
-# define index_FMA_Usable FEATURE_INDEX_1 |
|
-# define index_FMA4_Usable FEATURE_INDEX_1 |
|
- |
|
-# define HAS_ARCH_FEATURE(name) \ |
|
- ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) |
|
- |
|
-# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String) |
|
-# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward) |
|
-# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF) |
|
-# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop) |
|
-# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load) |
|
-# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable) |
|
-# define HAS_FMA HAS_ARCH_FEATURE (FMA_Usable) |
|
-# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable) |
|
- |
|
-#endif /* __ASSEMBLER__ */ |
|
+#ifdef __ASSEMBLER__ |
|
+# include <cpu-features.h> |
|
+#else |
|
+# include <ldsodefs.h> |
|
+#endif
|
|
|