You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
75 lines
3.6 KiB
75 lines
3.6 KiB
4 years ago
|
Note: the context of this patch differs from upstream slightly,
|
||
|
to accomodate the lack of ILP32 in RHEL.
|
||
|
|
||
|
commit b7cf203b5c17dd6d9878537d41e0c7cc3d270a67
|
||
|
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
|
||
|
Date: Wed Sep 27 16:55:14 2017 +0100
|
||
|
|
||
|
aarch64: Disable lazy symbol binding of TLSDESC
|
||
|
|
||
|
Always do TLS descriptor initialization at load time during relocation
|
||
|
processing to avoid barriers at every TLS access. In non-dlopened shared
|
||
|
libraries the overhead of tls access vs static global access is > 3x
|
||
|
bigger when lazy initialization is used (_dl_tlsdesc_return_lazy)
|
||
|
compared to bind-now (_dl_tlsdesc_return) so the barriers dominate tls
|
||
|
access performance.
|
||
|
|
||
|
TLSDESC relocs are in DT_JMPREL which are processed at load time using
|
||
|
elf_machine_lazy_rel which is only supposed to do lightweight
|
||
|
initialization using the DT_TLSDESC_PLT trampoline (the trampoline code
|
||
|
jumps to the entry point in DT_TLSDESC_GOT which does the lazy tlsdesc
|
||
|
initialization at runtime). This patch changes elf_machine_lazy_rel
|
||
|
in aarch64 to do the symbol binding and initialization as if DF_BIND_NOW
|
||
|
was set, so the non-lazy code path of elf/do-rel.h was replicated.
|
||
|
|
||
|
The static linker could be changed to emit TLSDESC relocs in DT_REL*,
|
||
|
which are processed non-lazily, but the goal of this patch is to always
|
||
|
guarantee bind-now semantics, even if the binary was produced with an
|
||
|
old linker, so the barriers can be dropped in tls descriptor functions.
|
||
|
|
||
|
After this change the synchronizing ldar instructions can be dropped
|
||
|
as well as the lazy initialization machinery including the DT_TLSDESC_GOT
|
||
|
setup.
|
||
|
|
||
|
I believe this should be done on all targets, including ones where no
|
||
|
barrier is needed for lazy initialization. There is very little gain in
|
||
|
optimizing for large number of symbolic tlsdesc relocations which is an
|
||
|
extremely uncommon case. And currently the tlsdesc entries are only
|
||
|
readonly protected with -z now and some hardennings against writable
|
||
|
JUMPSLOT relocs don't work for TLSDESC so they are a security hazard.
|
||
|
(But to fix that the static linker has to be changed.)
|
||
|
|
||
|
* sysdeps/aarch64/dl-machine.h (elf_machine_lazy_rel): Do symbol
|
||
|
binding and initialization non-lazily for R_AARCH64_TLSDESC.
|
||
|
|
||
|
diff -rup a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
|
||
|
--- a/sysdeps/aarch64/dl-machine.h 2018-10-16 12:07:31.588149003 -0400
|
||
|
+++ b/sysdeps/aarch64/dl-machine.h 2018-10-16 12:18:46.214078837 -0400
|
||
|
@@ -376,12 +376,21 @@ elf_machine_lazy_rel (struct link_map *m
|
||
|
}
|
||
|
else if (__builtin_expect (r_type == R_AARCH64_TLSDESC, 1))
|
||
|
{
|
||
|
- struct tlsdesc volatile *td =
|
||
|
- (struct tlsdesc volatile *)reloc_addr;
|
||
|
+ const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
|
||
|
+ const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
|
||
|
+ const ElfW (Sym) *sym = &symtab[symndx];
|
||
|
+ const struct r_found_version *version = NULL;
|
||
|
|
||
|
- td->arg = (void*)reloc;
|
||
|
- td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
|
||
|
- + map->l_addr);
|
||
|
+ if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
|
||
|
+ {
|
||
|
+ const ElfW (Half) *vernum =
|
||
|
+ (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
|
||
|
+ version = &map->l_versions[vernum[symndx] & 0x7fff];
|
||
|
+ }
|
||
|
+
|
||
|
+ /* Always initialize TLS descriptors completely, because lazy
|
||
|
+ initialization requires synchronization at every TLS access. */
|
||
|
+ elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc);
|
||
|
}
|
||
|
else if (__glibc_unlikely (r_type == R_AARCH64_IRELATIVE))
|
||
|
{
|