Backport of this upstream patch: commit 031e519c95c069abe4e4c7c59e2b4b67efccdee5 Author: H.J. Lu Date: Thu Jul 6 04:43:06 2017 -0700 x86-64: Align the stack in __tls_get_addr [BZ #21609] This change forces realignment of the stack pointer in __tls_get_addr, so that binaries compiled by GCCs older than GCC 4.9: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 continue to work even if vector instructions are used in glibc which require the ABI stack realignment. __tls_get_addr_slow is added to handle the slow paths in the default implementation of__tls_get_addr in elf/dl-tls.c. The new __tls_get_addr calls __tls_get_addr_slow after realigning the stack. Internal calls within ld.so go directly to the default implementation of __tls_get_addr because they do not need stack realignment. [BZ #21609] * sysdeps/x86_64/Makefile (sysdep-dl-routines): Add tls_get_addr. (gen-as-const-headers): Add rtld-offsets.sym. * sysdeps/x86_64/dl-tls.c: New file. * sysdeps/x86_64/rtld-offsets.sym: Likwise. * sysdeps/x86_64/tls_get_addr.S: Likewise. * sysdeps/x86_64/dl-tls.h: Add multiple inclusion guards. * sysdeps/x86_64/tlsdesc.sym (TI_MODULE_OFFSET): New. (TI_OFFSET_OFFSET): Likwise. Adjusted for drift in sysdeps/x86_64/Makefile and lack of TLS_DTV_UNALLOCATED consolidation (upstream commit aca1daef298b43bd7b1987b31f5aabcf6c2f6021). diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index 6c03a89fb3ea3063..c6766bb2b443a28a 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -19,7 +19,7 @@ gen-as-const-headers += locale-defines.sym endif ifeq ($(subdir),elf) -sysdep-dl-routines += tlsdesc dl-tlsdesc +sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr tests += ifuncmain8 modules-names += ifuncmod8 @@ -69,5 +69,5 @@ endif endif ifeq ($(subdir),csu) -gen-as-const-headers += tlsdesc.sym +gen-as-const-headers += tlsdesc.sym rtld-offsets.sym endif diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c new file mode 100644 index 0000000000000000..3584805c8ecca59a --- /dev/null +++ b/sysdeps/x86_64/dl-tls.c @@ -0,0 +1,53 @@ +/* Thread-local storage handling in the ELF dynamic linker. x86-64 version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef SHARED +/* Work around GCC PR58066, due to which __tls_get_addr may be called + with an unaligned stack. The compat implementation is in + tls_get_addr-compat.S. */ + +# include + +/* Define __tls_get_addr within elf/dl-tls.c under a different + name. */ +extern __typeof__ (__tls_get_addr) ___tls_get_addr; + +# define __tls_get_addr ___tls_get_addr +# include +# undef __tls_get_addr + +hidden_ver (___tls_get_addr, __tls_get_addr) + +/* Only handle slow paths for __tls_get_addr. */ +attribute_hidden +void * +__tls_get_addr_slow (GET_ADDR_ARGS) +{ + dtv_t *dtv = THREAD_DTV (); + + if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation))) + return update_get_addr (GET_ADDR_PARAM); + + return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL); +} +#else + +/* No compatibility symbol needed. */ +# include + +#endif diff --git a/sysdeps/x86_64/dl-tls.h b/sysdeps/x86_64/dl-tls.h index 56162ee64a4aae7d..0b4a6b3b634a83f4 100644 --- a/sysdeps/x86_64/dl-tls.h +++ b/sysdeps/x86_64/dl-tls.h @@ -16,6 +16,10 @@ License along with the GNU C Library; if not, see . */ +#ifndef _X86_64_DL_TLS_H +#define _X86_64_DL_TLS_H + +#include /* Type used for the representation of TLS information in the GOT. */ typedef struct dl_tls_index @@ -29,3 +33,5 @@ extern void *__tls_get_addr (tls_index *ti); /* Value used for dtv entries for which the allocation is delayed. */ #define TLS_DTV_UNALLOCATED ((void *) -1l) + +#endif /* _X86_64_DL_TLS_H */ diff --git a/sysdeps/x86_64/rtld-offsets.sym b/sysdeps/x86_64/rtld-offsets.sym new file mode 100644 index 0000000000000000..fd41b51521ac80bd --- /dev/null +++ b/sysdeps/x86_64/rtld-offsets.sym @@ -0,0 +1,6 @@ +#define SHARED +#include + +-- + +GL_TLS_GENERATION_OFFSET offsetof (struct rtld_global, _dl_tls_generation) diff --git a/sysdeps/x86_64/tls_get_addr.S b/sysdeps/x86_64/tls_get_addr.S new file mode 100644 index 0000000000000000..9d38fb3be54fbcb5 --- /dev/null +++ b/sysdeps/x86_64/tls_get_addr.S @@ -0,0 +1,61 @@ +/* Stack-aligning implementation of __tls_get_addr. x86-64 version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef SHARED + +# include +# include "tlsdesc.h" +# include "rtld-offsets.h" + +/* See __tls_get_addr and __tls_get_addr_slow in dl-tls.c. This function + call __tls_get_addr_slow on both slow paths. It realigns the stack + before the call to work around GCC PR58066. */ + +ENTRY (__tls_get_addr) + mov %fs:DTV_OFFSET, %RDX_LP + mov GL_TLS_GENERATION_OFFSET+_rtld_local(%rip), %RAX_LP + /* GL(dl_tls_generation) == dtv[0].counter */ + cmp %RAX_LP, (%rdx) + jne 1f + mov TI_MODULE_OFFSET(%rdi), %RAX_LP + /* dtv[ti->ti_module] */ +# ifdef __LP64__ + salq $4, %rax + movq (%rdx,%rax), %rax +# else + movl (%rdx,%rax, 8), %eax +# endif + cmp $-1, %RAX_LP + je 1f + add TI_OFFSET_OFFSET(%rdi), %RAX_LP + ret +1: + /* On the slow path, align the stack. */ + pushq %rbp + cfi_def_cfa_offset (16) + cfi_offset (%rbp, -16) + mov %RSP_LP, %RBP_LP + cfi_def_cfa_register (%rbp) + and $-16, %RSP_LP + call __tls_get_addr_slow + mov %RBP_LP, %RSP_LP + popq %rbp + cfi_def_cfa (%rsp, 8) + ret +END (__tls_get_addr) +#endif /* SHARED */ diff --git a/sysdeps/x86_64/tlsdesc.sym b/sysdeps/x86_64/tlsdesc.sym index 33854975d04184b2..fc897ab4b522b1a9 100644 --- a/sysdeps/x86_64/tlsdesc.sym +++ b/sysdeps/x86_64/tlsdesc.sym @@ -15,3 +15,6 @@ TLSDESC_ARG offsetof(struct tlsdesc, arg) TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count) TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module) TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset) + +TI_MODULE_OFFSET offsetof(tls_index, ti_module) +TI_OFFSET_OFFSET offsetof(tls_index, ti_offset)