You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1158 lines
36 KiB
1158 lines
36 KiB
# |
|
# AVX-512 support for glibc: |
|
# |
|
# Notes: Renamed configure.ac changes to configure.in. |
|
# |
|
# commit aa4de9cea5c07d43caeaca9722c2d417e9a2919c |
|
# Author: H.J. Lu <hjl.tools@gmail.com> |
|
# Date: Fri Mar 14 08:51:25 2014 -0700 |
|
# |
|
# Check AVX-512 assembler support first |
|
# |
|
# It checks AVX-512 assembler support first and sets libc_cv_cc_avx512 to |
|
# $libc_cv_asm_avx512, instead of yes. GCC won't support AVX-512 if |
|
# assembler doesn't support it. |
|
# |
|
# * sysdeps/x86_64/configure.ac: Check AVX-512 assembler support |
|
# first. Disable AVX-512 GCC support if assembler doesn't support |
|
# it. |
|
# * sysdeps/x86_64/configure: Regenerated. |
|
# |
|
# commit 2d63a517e4084ec80403cd9f278690fa8b676cc4 |
|
# Author: Igor Zamyatin <igor.zamyatin@intel.com> |
|
# Date: Thu Mar 13 11:10:22 2014 -0700 |
|
# |
|
# Save and restore AVX-512 zmm registers to x86-64 ld.so |
|
# |
|
# AVX-512 ISA adds 512-bit zmm registers. This patch updates |
|
# _dl_runtime_profile to pass zmm registers to run-time audit. It also |
|
# changes _dl_x86_64_save_sse and _dl_x86_64_restore_sse to upport zmm |
|
# registers, which are called when only when RTLD_PREPARE_FOREIGN_CALL |
|
# is used. Its performance impact is minimum. |
|
# |
|
# * config.h.in (HAVE_AVX512_SUPPORT): New #undef. |
|
# (HAVE_AVX512_ASM_SUPPORT): Likewise. |
|
# * sysdeps/x86_64/bits/link.h (La_x86_64_zmm): New. |
|
# (La_x86_64_vector): Add zmm. |
|
# * sysdeps/x86_64/Makefile (tests): Add tst-audit10. |
|
# (modules-names): Add tst-auditmod10a and tst-auditmod10b. |
|
# ($(objpfx)tst-audit10): New target. |
|
# ($(objpfx)tst-audit10.out): Likewise. |
|
# (tst-audit10-ENV): New. |
|
# (AVX512-CFLAGS): Likewise. |
|
# (CFLAGS-tst-audit10.c): Likewise. |
|
# (CFLAGS-tst-auditmod10a.c): Likewise. |
|
# (CFLAGS-tst-auditmod10b.c): Likewise. |
|
# * sysdeps/x86_64/configure.ac: Set config-cflags-avx512, |
|
# HAVE_AVX512_SUPPORT and HAVE_AVX512_ASM_SUPPORT. |
|
# * sysdeps/x86_64/configure: Regenerated. |
|
# * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Add |
|
# AVX-512 zmm register support. |
|
# (_dl_x86_64_save_sse): Likewise. |
|
# (_dl_x86_64_restore_sse): Likewise. |
|
# * sysdeps/x86_64/dl-trampoline.h: Updated to support different |
|
# size vector registers. |
|
# * sysdeps/x86_64/link-defines.sym (YMM_SIZE): New. |
|
# (ZMM_SIZE): Likewise. |
|
# * sysdeps/x86_64/tst-audit10.c: New file. |
|
# * sysdeps/x86_64/tst-auditmod10a.c: Likewise. |
|
# * sysdeps/x86_64/tst-auditmod10b.c: Likewise. |
|
# |
|
# In addition adds: |
|
# https://sourceware.org/ml/libc-alpha/2014-09/msg00228.html |
|
# To extend zmm register checking. |
|
# |
|
diff -urN glibc-2.17-c758a686/config.h.in glibc-2.17-c758a686/config.h.in |
|
--- glibc-2.17-c758a686/config.h.in 2014-09-10 23:11:14.605787816 -0400 |
|
+++ glibc-2.17-c758a686/config.h.in 2014-09-10 23:16:36.331167056 -0400 |
|
@@ -101,6 +101,12 @@ |
|
/* Define if gcc supports VEX encoding. */ |
|
#undef HAVE_SSE2AVX_SUPPORT |
|
|
|
+/* Define if compiler supports AVX512. */ |
|
+#undef HAVE_AVX512_SUPPORT |
|
+ |
|
+/* Define if assembler supports AVX512. */ |
|
+#undef HAVE_AVX512_ASM_SUPPORT |
|
+ |
|
/* Define if gcc supports FMA4. */ |
|
#undef HAVE_FMA4_SUPPORT |
|
|
|
diff -urN glibc-2.17-c758a686/sysdeps/x86/bits/link.h glibc-2.17-c758a686/sysdeps/x86/bits/link.h |
|
--- glibc-2.17-c758a686/sysdeps/x86/bits/link.h 2012-12-24 22:02:13.000000000 -0500 |
|
+++ glibc-2.17-c758a686/sysdeps/x86/bits/link.h 2014-09-10 23:16:36.331167056 -0400 |
|
@@ -66,6 +66,8 @@ |
|
typedef float La_x86_64_xmm __attribute__ ((__vector_size__ (16))); |
|
typedef float La_x86_64_ymm |
|
__attribute__ ((__vector_size__ (32), __aligned__ (16))); |
|
+typedef double La_x86_64_zmm |
|
+ __attribute__ ((__vector_size__ (64), __aligned__ (16))); |
|
# else |
|
typedef float La_x86_64_xmm __attribute__ ((__mode__ (__V4SF__))); |
|
# endif |
|
@@ -74,6 +76,7 @@ |
|
{ |
|
# if __GNUC_PREREQ (4,0) |
|
La_x86_64_ymm ymm[2]; |
|
+ La_x86_64_zmm zmm[1]; |
|
# endif |
|
La_x86_64_xmm xmm[4]; |
|
} La_x86_64_vector __attribute__ ((__aligned__ (16))); |
|
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/configure glibc-2.17-c758a686/sysdeps/x86_64/configure |
|
--- glibc-2.17-c758a686/sysdeps/x86_64/configure 2014-09-10 23:11:15.000787061 -0400 |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/configure 2014-09-10 23:16:36.338167042 -0400 |
|
@@ -91,6 +91,59 @@ |
|
|
|
fi |
|
|
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support in assembler" >&5 |
|
+$as_echo_n "checking for AVX512 support in assembler... " >&6; } |
|
+if ${libc_cv_asm_avx512+:} false; then : |
|
+ $as_echo_n "(cached) " >&6 |
|
+else |
|
+ cat > conftest.s <<\EOF |
|
+ vmovdqu64 %zmm0, (%rsp) |
|
+EOF |
|
+if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5' |
|
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 |
|
+ (eval $ac_try) 2>&5 |
|
+ ac_status=$? |
|
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 |
|
+ test $ac_status = 0; }; }; then |
|
+ libc_cv_asm_avx512=yes |
|
+else |
|
+ libc_cv_asm_avx512=no |
|
+fi |
|
+rm -f conftest* |
|
+fi |
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_avx512" >&5 |
|
+$as_echo "$libc_cv_asm_avx512" >&6; } |
|
+if test $libc_cv_asm_avx512 == yes; then |
|
+ $as_echo "#define HAVE_AVX512_ASM_SUPPORT 1" >>confdefs.h |
|
+ |
|
+fi |
|
+ |
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support" >&5 |
|
+$as_echo_n "checking for AVX512 support... " >&6; } |
|
+if ${libc_cv_cc_avx512+:} false; then : |
|
+ $as_echo_n "(cached) " >&6 |
|
+else |
|
+ if { ac_try='${CC-cc} -mavx512f -xc /dev/null -S -o /dev/null' |
|
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 |
|
+ (eval $ac_try) 2>&5 |
|
+ ac_status=$? |
|
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 |
|
+ test $ac_status = 0; }; }; then : |
|
+ libc_cv_cc_avx512=$libc_cv_asm_avx512 |
|
+else |
|
+ libc_cv_cc_avx512=no |
|
+fi |
|
+ |
|
+fi |
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx512" >&5 |
|
+$as_echo "$libc_cv_cc_avx512" >&6; } |
|
+if test $libc_cv_cc_avx512 = yes; then |
|
+ $as_echo "#define HAVE_AVX512_SUPPORT 1" >>confdefs.h |
|
+ |
|
+fi |
|
+config_vars="$config_vars |
|
+config-cflags-avx512 = $libc_cv_cc_avx512" |
|
+ |
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5 |
|
$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; } |
|
if ${libc_cv_cc_sse2avx+:} false; then : |
|
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/configure.in glibc-2.17-c758a686/sysdeps/x86_64/configure.in |
|
--- glibc-2.17-c758a686/sysdeps/x86_64/configure.in 2012-12-24 22:02:13.000000000 -0500 |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/configure.in 2014-09-10 23:16:36.338167042 -0400 |
|
@@ -21,6 +21,30 @@ |
|
AC_DEFINE(HAVE_AVX_SUPPORT) |
|
fi |
|
|
|
+dnl Check if asm supports AVX512. |
|
+AC_CACHE_CHECK(for AVX512 support in assembler, libc_cv_asm_avx512, [dnl |
|
+cat > conftest.s <<\EOF |
|
+ vmovdqu64 %zmm0, (%rsp) |
|
+EOF |
|
+if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then |
|
+ libc_cv_asm_avx512=yes |
|
+else |
|
+ libc_cv_asm_avx512=no |
|
+fi |
|
+rm -f conftest*]) |
|
+if test $libc_cv_asm_avx512 == yes; then |
|
+ AC_DEFINE(HAVE_AVX512_ASM_SUPPORT) |
|
+fi |
|
+ |
|
+dnl Check if -mavx512f works. |
|
+AC_CACHE_CHECK(for AVX512 support, libc_cv_cc_avx512, [dnl |
|
+LIBC_TRY_CC_OPTION([-mavx512f], [libc_cv_cc_avx512=$libc_cv_asm_avx512], [libc_cv_cc_avx512=no]) |
|
+]) |
|
+if test $libc_cv_cc_avx512 = yes; then |
|
+ AC_DEFINE(HAVE_AVX512_SUPPORT) |
|
+fi |
|
+LIBC_CONFIG_VAR([config-cflags-avx512], [$libc_cv_cc_avx512]) |
|
+ |
|
dnl Check if -msse2avx works. |
|
AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl |
|
LIBC_TRY_CC_OPTION([-msse2avx], |
|
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h |
|
--- glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h 2012-12-24 22:02:13.000000000 -0500 |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.h 2014-09-10 23:16:36.334167050 -0400 |
|
@@ -19,14 +19,14 @@ |
|
|
|
#ifdef RESTORE_AVX |
|
/* This is to support AVX audit modules. */ |
|
- vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp) |
|
- vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) |
|
- vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) |
|
- vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) |
|
- vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) |
|
- vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) |
|
- vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) |
|
- vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) |
|
+ VMOV %VEC(0), (LR_VECTOR_OFFSET)(%rsp) |
|
+ VMOV %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) |
|
+ VMOV %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) |
|
+ VMOV %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) |
|
+ VMOV %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) |
|
+ VMOV %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) |
|
+ VMOV %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) |
|
+ VMOV %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) |
|
|
|
/* Save xmm0-xmm7 registers to detect if any of them are |
|
changed by audit module. */ |
|
@@ -72,7 +72,7 @@ |
|
je 2f |
|
vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp) |
|
jmp 1f |
|
-2: vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0 |
|
+2: VMOV (LR_VECTOR_OFFSET)(%rsp), %VEC(0) |
|
vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp) |
|
|
|
1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 |
|
@@ -81,7 +81,7 @@ |
|
je 2f |
|
vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) |
|
jmp 1f |
|
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1 |
|
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1) |
|
vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) |
|
|
|
1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 |
|
@@ -90,7 +90,7 @@ |
|
je 2f |
|
vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) |
|
jmp 1f |
|
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2 |
|
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2) |
|
vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) |
|
|
|
1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 |
|
@@ -99,7 +99,7 @@ |
|
je 2f |
|
vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) |
|
jmp 1f |
|
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3 |
|
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3) |
|
vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) |
|
|
|
1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 |
|
@@ -108,7 +108,7 @@ |
|
je 2f |
|
vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) |
|
jmp 1f |
|
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4 |
|
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4) |
|
vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) |
|
|
|
1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 |
|
@@ -117,7 +117,7 @@ |
|
je 2f |
|
vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) |
|
jmp 1f |
|
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5 |
|
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5) |
|
vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) |
|
|
|
1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 |
|
@@ -126,7 +126,7 @@ |
|
je 2f |
|
vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) |
|
jmp 1f |
|
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6 |
|
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6) |
|
vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) |
|
|
|
1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 |
|
@@ -135,7 +135,7 @@ |
|
je 2f |
|
vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) |
|
jmp 1f |
|
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7 |
|
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7) |
|
vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) |
|
|
|
1: |
|
@@ -213,8 +213,8 @@ |
|
|
|
#ifdef RESTORE_AVX |
|
/* This is to support AVX audit modules. */ |
|
- vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx) |
|
- vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx) |
|
+ VMOV %VEC(0), LRV_VECTOR0_OFFSET(%rcx) |
|
+ VMOV %VEC(1), LRV_VECTOR1_OFFSET(%rcx) |
|
|
|
/* Save xmm0/xmm1 registers to detect if they are changed |
|
by audit module. */ |
|
@@ -243,13 +243,13 @@ |
|
vpmovmskb %xmm2, %esi |
|
cmpl $0xffff, %esi |
|
jne 1f |
|
- vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0 |
|
+ VMOV LRV_VECTOR0_OFFSET(%rsp), %VEC(0) |
|
|
|
1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 |
|
vpmovmskb %xmm2, %esi |
|
cmpl $0xffff, %esi |
|
jne 1f |
|
- vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1 |
|
+ VMOV LRV_VECTOR1_OFFSET(%rsp), %VEC(1) |
|
|
|
1: |
|
#endif |
|
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S |
|
--- glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S 2012-12-24 22:02:13.000000000 -0500 |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/dl-trampoline.S 2014-09-10 23:16:36.334167050 -0400 |
|
@@ -96,7 +96,7 @@ |
|
|
|
/* Actively align the La_x86_64_regs structure. */ |
|
andq $0xfffffffffffffff0, %rsp |
|
-# ifdef HAVE_AVX_SUPPORT |
|
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT |
|
/* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers |
|
to detect if any xmm0-xmm7 registers are changed by audit |
|
module. */ |
|
@@ -130,7 +130,7 @@ |
|
movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) |
|
movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) |
|
|
|
-# ifdef HAVE_AVX_SUPPORT |
|
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT |
|
.data |
|
L(have_avx): |
|
.zero 4 |
|
@@ -138,7 +138,7 @@ |
|
.previous |
|
|
|
cmpl $0, L(have_avx)(%rip) |
|
- jne 1f |
|
+ jne L(defined) |
|
movq %rbx, %r11 # Save rbx |
|
movl $1, %eax |
|
cpuid |
|
@@ -147,18 +147,54 @@ |
|
// AVX and XSAVE supported? |
|
andl $((1 << 28) | (1 << 27)), %ecx |
|
cmpl $((1 << 28) | (1 << 27)), %ecx |
|
- jne 2f |
|
+ jne 10f |
|
+# ifdef HAVE_AVX512_ASM_SUPPORT |
|
+ // AVX512 supported in processor? |
|
+ movq %rbx, %r11 # Save rbx |
|
+ xorl %ecx, %ecx |
|
+ mov $0x7, %eax |
|
+ cpuid |
|
+ andl $(1 << 16), %ebx |
|
+# endif |
|
xorl %ecx, %ecx |
|
// Get XFEATURE_ENABLED_MASK |
|
xgetbv |
|
- andl $0x6, %eax |
|
-2: subl $0x5, %eax |
|
+# ifdef HAVE_AVX512_ASM_SUPPORT |
|
+ test %ebx, %ebx |
|
+ movq %r11, %rbx # Restore rbx |
|
+ je 20f |
|
+ // Verify that XCR0[7:5] = '111b' and |
|
+ // XCR0[2:1] = '11b' which means |
|
+ // that zmm state is enabled |
|
+ andl $0xe6, %eax |
|
+ cmpl $0xe6, %eax |
|
+ jne 20f |
|
+ movl %eax, L(have_avx)(%rip) |
|
+L(avx512): |
|
+# define RESTORE_AVX |
|
+# define VMOV vmovdqu64 |
|
+# define VEC(i) zmm##i |
|
+# define MORE_CODE |
|
+# include "dl-trampoline.h" |
|
+# undef VMOV |
|
+# undef VEC |
|
+# undef RESTORE_AVX |
|
+# endif |
|
+20: andl $0x6, %eax |
|
+10: subl $0x5, %eax |
|
movl %eax, L(have_avx)(%rip) |
|
cmpl $0, %eax |
|
|
|
-1: js L(no_avx) |
|
+L(defined): |
|
+ js L(no_avx) |
|
+# ifdef HAVE_AVX512_ASM_SUPPORT |
|
+ cmpl $0xe6, L(have_avx)(%rip) |
|
+ je L(avx512) |
|
+# endif |
|
|
|
# define RESTORE_AVX |
|
+# define VMOV vmovdqu |
|
+# define VEC(i) ymm##i |
|
# define MORE_CODE |
|
# include "dl-trampoline.h" |
|
|
|
@@ -180,9 +216,9 @@ |
|
.align 16 |
|
cfi_startproc |
|
_dl_x86_64_save_sse: |
|
-# ifdef HAVE_AVX_SUPPORT |
|
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT |
|
cmpl $0, L(have_avx)(%rip) |
|
- jne 1f |
|
+ jne L(defined_5) |
|
movq %rbx, %r11 # Save rbx |
|
movl $1, %eax |
|
cpuid |
|
@@ -191,21 +227,43 @@ |
|
// AVX and XSAVE supported? |
|
andl $((1 << 28) | (1 << 27)), %ecx |
|
cmpl $((1 << 28) | (1 << 27)), %ecx |
|
- jne 2f |
|
+ jne 1f |
|
+# ifdef HAVE_AVX512_ASM_SUPPORT |
|
+ // AVX512 supported in a processor? |
|
+ movq %rbx, %r11 # Save rbx |
|
+ xorl %ecx,%ecx |
|
+ mov $0x7,%eax |
|
+ cpuid |
|
+ andl $(1 << 16), %ebx |
|
+# endif |
|
xorl %ecx, %ecx |
|
// Get XFEATURE_ENABLED_MASK |
|
xgetbv |
|
- andl $0x6, %eax |
|
- cmpl $0x6, %eax |
|
- // Nonzero if SSE and AVX state saving is enabled. |
|
- sete %al |
|
-2: leal -1(%eax,%eax), %eax |
|
+# ifdef HAVE_AVX512_ASM_SUPPORT |
|
+ test %ebx, %ebx |
|
+ movq %r11, %rbx # Restore rbx |
|
+ je 2f |
|
+ // Verify that XCR0[7:5] = '111b' and |
|
+ // XCR0[2:1] = '11b' which means |
|
+ // that zmm state is enabled |
|
+ andl $0xe6, %eax |
|
+ movl %eax, L(have_avx)(%rip) |
|
+ cmpl $0xe6, %eax |
|
+ je L(avx512_5) |
|
+# endif |
|
+ |
|
+2: andl $0x6, %eax |
|
+1: subl $0x5, %eax |
|
movl %eax, L(have_avx)(%rip) |
|
cmpl $0, %eax |
|
|
|
-1: js L(no_avx5) |
|
+L(defined_5): |
|
+ js L(no_avx5) |
|
+# ifdef HAVE_AVX512_ASM_SUPPORT |
|
+ cmpl $0xe6, L(have_avx)(%rip) |
|
+ je L(avx512_5) |
|
+# endif |
|
|
|
-# define YMM_SIZE 32 |
|
vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE |
|
vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE |
|
vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE |
|
@@ -215,6 +273,18 @@ |
|
vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE |
|
vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE |
|
ret |
|
+# ifdef HAVE_AVX512_ASM_SUPPORT |
|
+L(avx512_5): |
|
+ vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE |
|
+ vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE |
|
+ vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE |
|
+ vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE |
|
+ vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE |
|
+ vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE |
|
+ vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE |
|
+ vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE |
|
+ ret |
|
+# endif |
|
L(no_avx5): |
|
# endif |
|
movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE |
|
@@ -235,9 +305,13 @@ |
|
.align 16 |
|
cfi_startproc |
|
_dl_x86_64_restore_sse: |
|
-# ifdef HAVE_AVX_SUPPORT |
|
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT |
|
cmpl $0, L(have_avx)(%rip) |
|
js L(no_avx6) |
|
+# ifdef HAVE_AVX512_ASM_SUPPORT |
|
+ cmpl $0xe6, L(have_avx)(%rip) |
|
+ je L(avx512_6) |
|
+# endif |
|
|
|
vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0 |
|
vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1 |
|
@@ -248,6 +322,18 @@ |
|
vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6 |
|
vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7 |
|
ret |
|
+# ifdef HAVE_AVX512_ASM_SUPPORT |
|
+L(avx512_6): |
|
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0 |
|
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1 |
|
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2 |
|
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3 |
|
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4 |
|
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5 |
|
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6 |
|
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7 |
|
+ ret |
|
+# endif |
|
L(no_avx6): |
|
# endif |
|
movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0 |
|
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym |
|
--- glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym 2012-12-24 22:02:13.000000000 -0500 |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/link-defines.sym 2014-09-10 23:16:36.335167048 -0400 |
|
@@ -4,6 +4,8 @@ |
|
-- |
|
VECTOR_SIZE sizeof (La_x86_64_vector) |
|
XMM_SIZE sizeof (La_x86_64_xmm) |
|
+YMM_SIZE sizeof (La_x86_64_ymm) |
|
+ZMM_SIZE sizeof (La_x86_64_zmm) |
|
|
|
LR_SIZE sizeof (struct La_x86_64_regs) |
|
LR_RDX_OFFSET offsetof (struct La_x86_64_regs, lr_rdx) |
|
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/Makefile glibc-2.17-c758a686/sysdeps/x86_64/Makefile |
|
--- glibc-2.17-c758a686/sysdeps/x86_64/Makefile 2012-12-24 22:02:13.000000000 -0500 |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/Makefile 2014-09-10 23:22:04.269518711 -0400 |
|
@@ -37,6 +37,20 @@ |
|
|
|
$(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o |
|
$(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o |
|
+ |
|
+tests += tst-audit10 |
|
+modules-names += tst-auditmod10a tst-auditmod10b |
|
+ |
|
+$(objpfx)tst-audit10: $(objpfx)tst-auditmod10a.so |
|
+$(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so |
|
+tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so |
|
+ |
|
+ifeq (yes,$(config-cflags-avx512)) |
|
+AVX512-CFLAGS = -mavx512f |
|
+CFLAGS-tst-audit10.c += $(AVX512-CFLAGS) |
|
+CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS) |
|
+CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS) |
|
+endif |
|
endif |
|
|
|
ifeq ($(subdir),csu) |
|
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c |
|
--- glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c 1969-12-31 19:00:00.000000000 -0500 |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/tst-audit10.c 2014-09-10 23:16:36.335167048 -0400 |
|
@@ -0,0 +1,70 @@ |
|
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+/* Test case for x86-64 preserved registers in dynamic linker. */ |
|
+ |
|
+#ifdef __AVX512F__ |
|
+#include <stdlib.h> |
|
+#include <string.h> |
|
+#include <cpuid.h> |
|
+#include <immintrin.h> |
|
+ |
|
+static int |
|
+avx512_enabled (void) |
|
+{ |
|
+ unsigned int eax, ebx, ecx, edx; |
|
+ |
|
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0 |
|
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) |
|
+ return 0; |
|
+ |
|
+ __cpuid_count (7, 0, eax, ebx, ecx, edx); |
|
+ if (!(ebx & bit_AVX512F)) |
|
+ return 0; |
|
+ |
|
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); |
|
+ |
|
+ /* Verify that ZMM, YMM and XMM states are enabled. */ |
|
+ return (eax & 0xe6) == 0xe6; |
|
+} |
|
+ |
|
+ |
|
+extern __m512i audit_test (__m512i, __m512i, __m512i, __m512i, |
|
+ __m512i, __m512i, __m512i, __m512i); |
|
+int |
|
+main (void) |
|
+{ |
|
+ /* Run AVX512 test only if AVX512 is supported. */ |
|
+ if (avx512_enabled ()) |
|
+ { |
|
+ __m512i zmm = _mm512_setzero_si512 (); |
|
+ __m512i ret = audit_test (zmm, zmm, zmm, zmm, zmm, zmm, zmm, zmm); |
|
+ |
|
+ zmm = _mm512_set1_epi64 (0x12349876); |
|
+ |
|
+ if (memcmp (&zmm, &ret, sizeof (ret))) |
|
+ abort (); |
|
+ } |
|
+ return 0; |
|
+} |
|
+#else |
|
+int |
|
+main (void) |
|
+{ |
|
+ return 0; |
|
+} |
|
+#endif |
|
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c |
|
--- glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c 1969-12-31 19:00:00.000000000 -0500 |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10a.c 2014-09-10 23:16:36.335167048 -0400 |
|
@@ -0,0 +1,65 @@ |
|
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+/* Test case for x86-64 preserved registers in dynamic linker. */ |
|
+ |
|
+#ifdef __AVX512F__ |
|
+#include <stdlib.h> |
|
+#include <string.h> |
|
+#include <immintrin.h> |
|
+ |
|
+__m512i |
|
+audit_test (__m512i x0, __m512i x1, __m512i x2, __m512i x3, |
|
+ __m512i x4, __m512i x5, __m512i x6, __m512i x7) |
|
+{ |
|
+ __m512i zmm; |
|
+ |
|
+ zmm = _mm512_set1_epi64 (1); |
|
+ if (memcmp (&zmm, &x0, sizeof (zmm))) |
|
+ abort (); |
|
+ |
|
+ zmm = _mm512_set1_epi64 (2); |
|
+ if (memcmp (&zmm, &x1, sizeof (zmm))) |
|
+ abort (); |
|
+ |
|
+ zmm = _mm512_set1_epi64 (3); |
|
+ if (memcmp (&zmm, &x2, sizeof (zmm))) |
|
+ abort (); |
|
+ |
|
+ zmm = _mm512_set1_epi64 (4); |
|
+ if (memcmp (&zmm, &x3, sizeof (zmm))) |
|
+ abort (); |
|
+ |
|
+ zmm = _mm512_set1_epi64 (5); |
|
+ if (memcmp (&zmm, &x4, sizeof (zmm))) |
|
+ abort (); |
|
+ |
|
+ zmm = _mm512_set1_epi64 (6); |
|
+ if (memcmp (&zmm, &x5, sizeof (zmm))) |
|
+ abort (); |
|
+ |
|
+ zmm = _mm512_set1_epi64 (7); |
|
+ if (memcmp (&zmm, &x6, sizeof (zmm))) |
|
+ abort (); |
|
+ |
|
+ zmm = _mm512_set1_epi64 (8); |
|
+ if (memcmp (&zmm, &x7, sizeof (zmm))) |
|
+ abort (); |
|
+ |
|
+ return _mm512_setzero_si512 (); |
|
+} |
|
+#endif |
|
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c |
|
--- glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c 1969-12-31 19:00:00.000000000 -0500 |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/tst-auditmod10b.c 2014-09-10 23:16:36.336167046 -0400 |
|
@@ -0,0 +1,219 @@ |
|
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+/* Verify that changing AVX512 registers in audit library won't affect |
|
+ function parameter passing/return. */ |
|
+ |
|
+#include <dlfcn.h> |
|
+#include <stdint.h> |
|
+#include <stdio.h> |
|
+#include <stdlib.h> |
|
+#include <string.h> |
|
+#include <unistd.h> |
|
+#include <bits/wordsize.h> |
|
+#include <gnu/lib-names.h> |
|
+ |
|
+unsigned int |
|
+la_version (unsigned int v) |
|
+{ |
|
+ setlinebuf (stdout); |
|
+ |
|
+ printf ("version: %u\n", v); |
|
+ |
|
+ char buf[20]; |
|
+ sprintf (buf, "%u", v); |
|
+ |
|
+ return v; |
|
+} |
|
+ |
|
+void |
|
+la_activity (uintptr_t *cookie, unsigned int flag) |
|
+{ |
|
+ if (flag == LA_ACT_CONSISTENT) |
|
+ printf ("activity: consistent\n"); |
|
+ else if (flag == LA_ACT_ADD) |
|
+ printf ("activity: add\n"); |
|
+ else if (flag == LA_ACT_DELETE) |
|
+ printf ("activity: delete\n"); |
|
+ else |
|
+ printf ("activity: unknown activity %u\n", flag); |
|
+} |
|
+ |
|
+char * |
|
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag) |
|
+{ |
|
+ char buf[100]; |
|
+ const char *flagstr; |
|
+ if (flag == LA_SER_ORIG) |
|
+ flagstr = "LA_SET_ORIG"; |
|
+ else if (flag == LA_SER_LIBPATH) |
|
+ flagstr = "LA_SER_LIBPATH"; |
|
+ else if (flag == LA_SER_RUNPATH) |
|
+ flagstr = "LA_SER_RUNPATH"; |
|
+ else if (flag == LA_SER_CONFIG) |
|
+ flagstr = "LA_SER_CONFIG"; |
|
+ else if (flag == LA_SER_DEFAULT) |
|
+ flagstr = "LA_SER_DEFAULT"; |
|
+ else if (flag == LA_SER_SECURE) |
|
+ flagstr = "LA_SER_SECURE"; |
|
+ else |
|
+ { |
|
+ sprintf (buf, "unknown flag %d", flag); |
|
+ flagstr = buf; |
|
+ } |
|
+ printf ("objsearch: %s, %s\n", name, flagstr); |
|
+ |
|
+ return (char *) name; |
|
+} |
|
+ |
|
+unsigned int |
|
+la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie) |
|
+{ |
|
+ printf ("objopen: %ld, %s\n", lmid, l->l_name); |
|
+ |
|
+ return 3; |
|
+} |
|
+ |
|
+void |
|
+la_preinit (uintptr_t *cookie) |
|
+{ |
|
+ printf ("preinit\n"); |
|
+} |
|
+ |
|
+unsigned int |
|
+la_objclose (uintptr_t *cookie) |
|
+{ |
|
+ printf ("objclose\n"); |
|
+ return 0; |
|
+} |
|
+ |
|
+uintptr_t |
|
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook, |
|
+ uintptr_t *defcook, unsigned int *flags, const char *symname) |
|
+{ |
|
+ printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", |
|
+ symname, (long int) sym->st_value, ndx, *flags); |
|
+ |
|
+ return sym->st_value; |
|
+} |
|
+ |
|
+#include <tst-audit.h> |
|
+ |
|
+#ifdef __AVX512F__ |
|
+#include <immintrin.h> |
|
+#include <cpuid.h> |
|
+ |
|
+static int |
|
+check_avx512 (void) |
|
+{ |
|
+ unsigned int eax, ebx, ecx, edx; |
|
+ |
|
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0 |
|
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) |
|
+ return 0; |
|
+ |
|
+ __cpuid_count (7, 0, eax, ebx, ecx, edx); |
|
+ if (!(ebx & bit_AVX512F)) |
|
+ return 0; |
|
+ |
|
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); |
|
+ |
|
+ /* Verify that ZMM, YMM and XMM states are enabled. */ |
|
+ return (eax & 0xe6) == 0xe6; |
|
+} |
|
+ |
|
+#else |
|
+#include <emmintrin.h> |
|
+#endif |
|
+ |
|
+ElfW(Addr) |
|
+pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, |
|
+ uintptr_t *defcook, La_regs *regs, unsigned int *flags, |
|
+ const char *symname, long int *framesizep) |
|
+{ |
|
+ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", |
|
+ symname, (long int) sym->st_value, ndx, *flags); |
|
+ |
|
+#ifdef __AVX512F__ |
|
+ if (check_avx512 () && strcmp (symname, "audit_test") == 0) |
|
+ { |
|
+ __m512i zero = _mm512_setzero_si512 (); |
|
+ if (memcmp (®s->lr_vector[0], &zero, sizeof (zero)) |
|
+ || memcmp (®s->lr_vector[1], &zero, sizeof (zero)) |
|
+ || memcmp (®s->lr_vector[2], &zero, sizeof (zero)) |
|
+ || memcmp (®s->lr_vector[3], &zero, sizeof (zero)) |
|
+ || memcmp (®s->lr_vector[4], &zero, sizeof (zero)) |
|
+ || memcmp (®s->lr_vector[5], &zero, sizeof (zero)) |
|
+ || memcmp (®s->lr_vector[6], &zero, sizeof (zero)) |
|
+ || memcmp (®s->lr_vector[7], &zero, sizeof (zero))) |
|
+ abort (); |
|
+ |
|
+ for (int i = 0; i < 8; i++) |
|
+ regs->lr_vector[i].zmm[0] |
|
+ = (La_x86_64_zmm) _mm512_set1_epi64 (i + 1); |
|
+ |
|
+ __m512i zmm = _mm512_set1_epi64 (-1); |
|
+ asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" ); |
|
+ asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" ); |
|
+ asm volatile ("vmovdqa64 %0, %%zmm2" : : "x" (zmm) : "xmm2" ); |
|
+ asm volatile ("vmovdqa64 %0, %%zmm3" : : "x" (zmm) : "xmm3" ); |
|
+ asm volatile ("vmovdqa64 %0, %%zmm4" : : "x" (zmm) : "xmm4" ); |
|
+ asm volatile ("vmovdqa64 %0, %%zmm5" : : "x" (zmm) : "xmm5" ); |
|
+ asm volatile ("vmovdqa64 %0, %%zmm6" : : "x" (zmm) : "xmm6" ); |
|
+ asm volatile ("vmovdqa64 %0, %%zmm7" : : "x" (zmm) : "xmm7" ); |
|
+ |
|
+ *framesizep = 1024; |
|
+ } |
|
+#endif |
|
+ |
|
+ return sym->st_value; |
|
+} |
|
+ |
|
+unsigned int |
|
+pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, |
|
+ uintptr_t *defcook, const La_regs *inregs, La_retval *outregs, |
|
+ const char *symname) |
|
+{ |
|
+ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n", |
|
+ symname, (long int) sym->st_value, ndx, |
|
+ (ptrdiff_t) outregs->int_retval); |
|
+ |
|
+#ifdef __AVX512F__ |
|
+ if (check_avx512 () && strcmp (symname, "audit_test") == 0) |
|
+ { |
|
+ __m512i zero = _mm512_setzero_si512 (); |
|
+ if (memcmp (&outregs->lrv_vector0, &zero, sizeof (zero))) |
|
+ abort (); |
|
+ |
|
+ for (int i = 0; i < 8; i++) |
|
+ { |
|
+ __m512i zmm = _mm512_set1_epi64 (i + 1); |
|
+ if (memcmp (&inregs->lr_vector[i], &zmm, sizeof (zmm)) != 0) |
|
+ abort (); |
|
+ } |
|
+ |
|
+ outregs->lrv_vector0.zmm[0] |
|
+ = (La_x86_64_zmm) _mm512_set1_epi64 (0x12349876); |
|
+ |
|
+ __m512i zmm = _mm512_set1_epi64 (-1); |
|
+ asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" ); |
|
+ asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" ); |
|
+ } |
|
+#endif |
|
+ |
|
+ return 0; |
|
+} |
|
diff -urN glibc-2.17-c758a686/sysdeps/x86/Makefile glibc-2.17-c758a686/sysdeps/x86/Makefile |
|
--- glibc-2.17-c758a686/sysdeps/x86/Makefile 2012-12-24 22:02:13.000000000 -0500 |
|
+++ glibc-2.17-c758a686/sysdeps/x86/Makefile 2014-09-11 16:06:03.121319867 -0400 |
|
@@ -2,8 +2,8 @@ |
|
CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ |
|
-mno-sse -mno-mmx) |
|
|
|
-tests: $(objpfx)tst-xmmymm.out |
|
-$(objpfx)tst-xmmymm.out: ../sysdeps/x86/tst-xmmymm.sh $(objpfx)ld.so |
|
+tests: $(objpfx)tst-xmmymmzmm.out |
|
+$(objpfx)tst-xmmymmzmm.out: ../sysdeps/x86/tst-xmmymmzmm.sh $(objpfx)ld.so |
|
@echo "Checking ld.so for SSE register use. This will take a few seconds..." |
|
$(SHELL) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@ |
|
endif |
|
diff -urN glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh |
|
--- glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh 2012-12-24 22:02:13.000000000 -0500 |
|
+++ glibc-2.17-c758a686/sysdeps/x86/tst-xmmymm.sh 1969-12-31 19:00:00.000000000 -0500 |
|
@@ -1,103 +0,0 @@ |
|
-#! /bin/bash |
|
-# Make sure no code in ld.so uses xmm/ymm registers on x86-64. |
|
-# Copyright (C) 2009-2012 Free Software Foundation, Inc. |
|
-# This file is part of the GNU C Library. |
|
- |
|
-# The GNU C Library is free software; you can redistribute it and/or |
|
-# modify it under the terms of the GNU Lesser General Public |
|
-# License as published by the Free Software Foundation; either |
|
-# version 2.1 of the License, or (at your option) any later version. |
|
- |
|
-# The GNU C Library is distributed in the hope that it will be useful, |
|
-# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
-# Lesser General Public License for more details. |
|
- |
|
-# You should have received a copy of the GNU Lesser General Public |
|
-# License along with the GNU C Library; if not, see |
|
-# <http://www.gnu.org/licenses/>. |
|
- |
|
-set -e |
|
- |
|
-objpfx="$1" |
|
-NM="$2" |
|
-OBJDUMP="$3" |
|
-READELF="$4" |
|
- |
|
-tmp=$(mktemp ${objpfx}tst-xmmymm.XXXXXX) |
|
-trap 'rm -f "$tmp"' 1 2 3 15 |
|
- |
|
-# List of object files we have to test |
|
-rtldobjs=$($READELF -W -wi ${objpfx}dl-allobjs.os | |
|
- awk '/^ </ { if ($5 == "(DW_TAG_compile_unit)") c=1; else c=0 } $2 == "DW_AT_name" { if (c == 1) print $NF }' | |
|
- sed 's,\(.*/\|\)\([_[:alnum:]-]*[.]\).$,\2os,') |
|
-rtldobjs="$rtldobjs $(ar t ${objpfx}rtld-libc.a)" |
|
- |
|
-# OBJECT symbols can be ignored. |
|
-$READELF -sW ${objpfx}dl-allobjs.os ${objpfx}rtld-libc.a | |
|
-egrep " OBJECT *GLOBAL " | |
|
-awk '{if ($7 != "ABS") print $8 }' | |
|
-sort -u > "$tmp" |
|
-declare -a objects |
|
-objects=($(cat "$tmp")) |
|
- |
|
-objs="dl-runtime.os" |
|
-tocheck="dl-runtime.os" |
|
- |
|
-while test -n "$objs"; do |
|
- this="$objs" |
|
- objs="" |
|
- |
|
- for f in $this; do |
|
- undef=$($NM -u "$objpfx"../*/"$f" | awk '{print $2}') |
|
- if test -n "$undef"; then |
|
- for s in $undef; do |
|
- for obj in ${objects[*]} "_GLOBAL_OFFSET_TABLE_"; do |
|
- if test "$obj" = "$s"; then |
|
- continue 2 |
|
- fi |
|
- done |
|
- for o in $rtldobjs; do |
|
- ro=$(echo "$objpfx"../*/"$o") |
|
- if $NM -g --defined-only "$ro" | egrep -qs " $s\$"; then |
|
- if ! (echo "$tocheck $objs" | fgrep -qs "$o"); then |
|
- echo "$o needed for $s" |
|
- objs="$objs $o" |
|
- fi |
|
- break; |
|
- fi |
|
- done |
|
- done |
|
- fi |
|
- done |
|
- tocheck="$tocheck$objs" |
|
-done |
|
- |
|
-echo |
|
-echo |
|
-echo "object files needed: $tocheck" |
|
- |
|
-cp /dev/null "$tmp" |
|
-for f in $tocheck; do |
|
- $OBJDUMP -d "$objpfx"../*/"$f" | |
|
- awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xy]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' | |
|
- while read fct; do |
|
- if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then |
|
- continue; |
|
- fi |
|
- echo "function $fct in $f modifies xmm/ymm" >> "$tmp" |
|
- result=1 |
|
- done |
|
-done |
|
- |
|
-if test -s "$tmp"; then |
|
- echo |
|
- echo |
|
- cat "$tmp" |
|
- result=1 |
|
-else |
|
- result=0 |
|
-fi |
|
- |
|
-rm "$tmp" |
|
-exit $result |
|
diff -urN glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh |
|
--- glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh 1969-12-31 19:00:00.000000000 -0500 |
|
+++ glibc-2.17-c758a686/sysdeps/x86/tst-xmmymmzmm.sh 2014-09-11 16:05:10.073426623 -0400 |
|
@@ -0,0 +1,103 @@ |
|
+#! /bin/bash |
|
+# Make sure no code in ld.so uses xmm/ymm/zmm registers on x86-64. |
|
+# Copyright (C) 2009-2012 Free Software Foundation, Inc. |
|
+# This file is part of the GNU C Library. |
|
+ |
|
+# The GNU C Library is free software; you can redistribute it and/or |
|
+# modify it under the terms of the GNU Lesser General Public |
|
+# License as published by the Free Software Foundation; either |
|
+# version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+# The GNU C Library is distributed in the hope that it will be useful, |
|
+# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+# Lesser General Public License for more details. |
|
+ |
|
+# You should have received a copy of the GNU Lesser General Public |
|
+# License along with the GNU C Library; if not, see |
|
+# <http://www.gnu.org/licenses/>. |
|
+ |
|
+set -e |
|
+ |
|
+objpfx="$1" |
|
+NM="$2" |
|
+OBJDUMP="$3" |
|
+READELF="$4" |
|
+ |
|
+tmp=$(mktemp ${objpfx}tst-xmmymmzmm.XXXXXX) |
|
+trap 'rm -f "$tmp"' 1 2 3 15 |
|
+ |
|
+# List of object files we have to test |
|
+rtldobjs=$($READELF -W -wi ${objpfx}dl-allobjs.os | |
|
+ awk '/^ </ { if ($5 == "(DW_TAG_compile_unit)") c=1; else c=0 } $2 == "DW_AT_name" { if (c == 1) print $NF }' | |
|
+ sed 's,\(.*/\|\)\([_[:alnum:]-]*[.]\).$,\2os,') |
|
+rtldobjs="$rtldobjs $(ar t ${objpfx}rtld-libc.a)" |
|
+ |
|
+# OBJECT symbols can be ignored. |
|
+$READELF -sW ${objpfx}dl-allobjs.os ${objpfx}rtld-libc.a | |
|
+egrep " OBJECT *GLOBAL " | |
|
+awk '{if ($7 != "ABS") print $8 }' | |
|
+sort -u > "$tmp" |
|
+declare -a objects |
|
+objects=($(cat "$tmp")) |
|
+ |
|
+objs="dl-runtime.os" |
|
+tocheck="dl-runtime.os" |
|
+ |
|
+while test -n "$objs"; do |
|
+ this="$objs" |
|
+ objs="" |
|
+ |
|
+ for f in $this; do |
|
+ undef=$($NM -u "$objpfx"../*/"$f" | awk '{print $2}') |
|
+ if test -n "$undef"; then |
|
+ for s in $undef; do |
|
+ for obj in ${objects[*]} "_GLOBAL_OFFSET_TABLE_"; do |
|
+ if test "$obj" = "$s"; then |
|
+ continue 2 |
|
+ fi |
|
+ done |
|
+ for o in $rtldobjs; do |
|
+ ro=$(echo "$objpfx"../*/"$o") |
|
+ if $NM -g --defined-only "$ro" | egrep -qs " $s\$"; then |
|
+ if ! (echo "$tocheck $objs" | fgrep -qs "$o"); then |
|
+ echo "$o needed for $s" |
|
+ objs="$objs $o" |
|
+ fi |
|
+ break; |
|
+ fi |
|
+ done |
|
+ done |
|
+ fi |
|
+ done |
|
+ tocheck="$tocheck$objs" |
|
+done |
|
+ |
|
+echo |
|
+echo |
|
+echo "object files needed: $tocheck" |
|
+ |
|
+cp /dev/null "$tmp" |
|
+for f in $tocheck; do |
|
+ $OBJDUMP -d "$objpfx"../*/"$f" | |
|
+ awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xyz]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' | |
|
+ while read fct; do |
|
+ if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then |
|
+ continue; |
|
+ fi |
|
+ echo "function $fct in $f modifies xmm/ymm/zmm" >> "$tmp" |
|
+ result=1 |
|
+ done |
|
+done |
|
+ |
|
+if test -s "$tmp"; then |
|
+ echo |
|
+ echo |
|
+ cat "$tmp" |
|
+ result=1 |
|
+else |
|
+ result=0 |
|
+fi |
|
+ |
|
+rm "$tmp" |
|
+exit $result
|
|
|