You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
385 lines
12 KiB
385 lines
12 KiB
7 years ago
|
commit 5cdd1989d1d2f135d02e66250f37ba8e767f9772
|
||
|
Author: Stefan Liebler <stli@linux.vnet.ibm.com>
|
||
|
Date: Thu Mar 31 17:37:16 2016 +0200
|
||
|
|
||
|
S390: Extend structs La_s390_regs / La_s390_retval with vector-registers.
|
||
|
|
||
|
Starting with z13, vector registers can also occur as argument registers.
|
||
|
Thus the passed input/output register structs for
|
||
|
la_s390_[32|64]_gnu_plt[enter|exit] functions should reflect those new
|
||
|
registers. This patch extends these structs La_s390_regs and La_s390_retval
|
||
|
and adjusts _dl_runtime_profile() to handle those fields in case of
|
||
|
running on a z13 machine.
|
||
|
|
||
|
ChangeLog:
|
||
|
|
||
|
* sysdeps/s390/bits/link.h: (La_s390_vr) New typedef.
|
||
|
(La_s390_32_regs): Append vector register lr_v24-lr_v31.
|
||
|
(La_s390_64_regs): Likewise.
|
||
|
(La_s390_32_retval): Append vector register lrv_v24.
|
||
|
(La_s390_64_retval): Likeweise.
|
||
|
* sysdeps/s390/s390-32/dl-trampoline.h (_dl_runtime_profile):
|
||
|
Handle extended structs La_s390_32_regs and La_s390_32_retval.
|
||
|
* sysdeps/s390/s390-64/dl-trampoline.h (_dl_runtime_profile):
|
||
|
Handle extended structs La_s390_64_regs and La_s390_64_retval.
|
||
|
|
||
|
diff --git a/sysdeps/s390/bits/link.h b/sysdeps/s390/bits/link.h
|
||
|
index 2ef7f44..e27ed67 100644
|
||
|
--- a/sysdeps/s390/bits/link.h
|
||
|
+++ b/sysdeps/s390/bits/link.h
|
||
|
@@ -19,6 +19,9 @@
|
||
|
# error "Never include <bits/link.h> directly; use <link.h> instead."
|
||
|
#endif
|
||
|
|
||
|
+#if defined HAVE_S390_VX_ASM_SUPPORT
|
||
|
+typedef char La_s390_vr[16];
|
||
|
+#endif
|
||
|
|
||
|
#if __ELF_NATIVE_CLASS == 32
|
||
|
|
||
|
@@ -32,6 +35,16 @@ typedef struct La_s390_32_regs
|
||
|
uint32_t lr_r6;
|
||
|
double lr_fp0;
|
||
|
double lr_fp2;
|
||
|
+# if defined HAVE_S390_VX_ASM_SUPPORT
|
||
|
+ La_s390_vr lr_v24;
|
||
|
+ La_s390_vr lr_v25;
|
||
|
+ La_s390_vr lr_v26;
|
||
|
+ La_s390_vr lr_v27;
|
||
|
+ La_s390_vr lr_v28;
|
||
|
+ La_s390_vr lr_v29;
|
||
|
+ La_s390_vr lr_v30;
|
||
|
+ La_s390_vr lr_v31;
|
||
|
+# endif
|
||
|
} La_s390_32_regs;
|
||
|
|
||
|
/* Return values for calls from PLT on s390-32. */
|
||
|
@@ -40,6 +53,9 @@ typedef struct La_s390_32_retval
|
||
|
uint32_t lrv_r2;
|
||
|
uint32_t lrv_r3;
|
||
|
double lrv_fp0;
|
||
|
+# if defined HAVE_S390_VX_ASM_SUPPORT
|
||
|
+ La_s390_vr lrv_v24;
|
||
|
+# endif
|
||
|
} La_s390_32_retval;
|
||
|
|
||
|
|
||
|
@@ -77,6 +93,16 @@ typedef struct La_s390_64_regs
|
||
|
double lr_fp2;
|
||
|
double lr_fp4;
|
||
|
double lr_fp6;
|
||
|
+# if defined HAVE_S390_VX_ASM_SUPPORT
|
||
|
+ La_s390_vr lr_v24;
|
||
|
+ La_s390_vr lr_v25;
|
||
|
+ La_s390_vr lr_v26;
|
||
|
+ La_s390_vr lr_v27;
|
||
|
+ La_s390_vr lr_v28;
|
||
|
+ La_s390_vr lr_v29;
|
||
|
+ La_s390_vr lr_v30;
|
||
|
+ La_s390_vr lr_v31;
|
||
|
+# endif
|
||
|
} La_s390_64_regs;
|
||
|
|
||
|
/* Return values for calls from PLT on s390-64. */
|
||
|
@@ -84,6 +110,9 @@ typedef struct La_s390_64_retval
|
||
|
{
|
||
|
uint64_t lrv_r2;
|
||
|
double lrv_fp0;
|
||
|
+# if defined HAVE_S390_VX_ASM_SUPPORT
|
||
|
+ La_s390_vr lrv_v24;
|
||
|
+# endif
|
||
|
} La_s390_64_retval;
|
||
|
|
||
|
|
||
|
diff --git a/sysdeps/s390/s390-32/dl-trampoline.h b/sysdeps/s390/s390-32/dl-trampoline.h
|
||
|
index a152a7b..bb74d27 100644
|
||
|
--- a/sysdeps/s390/s390-32/dl-trampoline.h
|
||
|
+++ b/sysdeps/s390/s390-32/dl-trampoline.h
|
||
|
@@ -112,28 +112,31 @@ _dl_runtime_resolve:
|
||
|
cfi_startproc
|
||
|
.align 16
|
||
|
_dl_runtime_profile:
|
||
|
- stm %r2,%r6,32(%r15) # save registers
|
||
|
- cfi_offset (r2, -64) # + r6 needed as arg for
|
||
|
- cfi_offset (r3, -60) # _dl_profile_fixup
|
||
|
- cfi_offset (r4, -56)
|
||
|
- cfi_offset (r5, -52)
|
||
|
- cfi_offset (r6, -48)
|
||
|
- std %f0,56(%r15)
|
||
|
- cfi_offset (f0, -40)
|
||
|
- std %f2,64(%r15)
|
||
|
- cfi_offset (f2, -32)
|
||
|
st %r12,12(%r15) # r12 is used as backup of r15
|
||
|
cfi_offset (r12, -84)
|
||
|
st %r14,16(%r15)
|
||
|
cfi_offset (r14, -80)
|
||
|
lr %r12,%r15 # backup stack pointer
|
||
|
cfi_def_cfa_register (12)
|
||
|
+ ahi %r15,-264 # create stack frame:
|
||
|
+ # 96 + sizeof(La_s390_32_regs)
|
||
|
+ st %r12,0(%r15) # save backchain
|
||
|
+
|
||
|
+ stm %r2,%r6,96(%r15) # save registers
|
||
|
+ cfi_offset (r2, -264) # + r6 needed as arg for
|
||
|
+ cfi_offset (r3, -260) # _dl_profile_fixup
|
||
|
+ cfi_offset (r4, -256)
|
||
|
+ cfi_offset (r5, -252)
|
||
|
+ cfi_offset (r6, -248)
|
||
|
+ std %f0,120(%r15)
|
||
|
+ cfi_offset (f0, -240)
|
||
|
+ std %f2,128(%r15)
|
||
|
+ cfi_offset (f2, -232)
|
||
|
#ifdef RESTORE_VRS
|
||
|
- ahi %r15,-224 # create stack frame
|
||
|
.machine push
|
||
|
.machine "z13"
|
||
|
.machinemode "zarch_nohighgprs"
|
||
|
- vstm %v24,%v31,96(%r15) # store call-clobbered vr arguments
|
||
|
+ vstm %v24,%v31,136(%r15) # store call-clobbered vr arguments
|
||
|
cfi_offset (v24, -224)
|
||
|
cfi_offset (v25, -208)
|
||
|
cfi_offset (v26, -192)
|
||
|
@@ -143,31 +146,31 @@ _dl_runtime_profile:
|
||
|
cfi_offset (v30, -128)
|
||
|
cfi_offset (v31, -112)
|
||
|
.machine pop
|
||
|
-#else
|
||
|
- ahi %r15,-96 # create stack frame
|
||
|
#endif
|
||
|
- st %r12,0(%r15) # save backchain
|
||
|
+
|
||
|
lm %r2,%r3,24(%r12) # load arguments saved by PLT
|
||
|
lr %r4,%r14 # return address as third parameter
|
||
|
basr %r1,0
|
||
|
0: l %r14,6f-0b(%r1)
|
||
|
- la %r5,32(%r12) # pointer to struct La_s390_32_regs
|
||
|
+ la %r5,96(%r15) # pointer to struct La_s390_32_regs
|
||
|
la %r6,20(%r12) # long int * framesize
|
||
|
bas %r14,0(%r14,%r1) # call resolver
|
||
|
lr %r1,%r2 # function addr returned in r2
|
||
|
- ld %f0,56(%r12) # restore call-clobbered arg fprs
|
||
|
- ld %f2,64(%r12)
|
||
|
+ ld %f0,120(%r15) # restore call-clobbered arg fprs
|
||
|
+ ld %f2,128(%r15)
|
||
|
#ifdef RESTORE_VRS
|
||
|
.machine push
|
||
|
.machine "z13"
|
||
|
.machinemode "zarch_nohighgprs"
|
||
|
- vlm %v24,%v31,96(%r15) # restore call-clobbered arg vrs
|
||
|
+ vlm %v24,%v31,136(%r15) # restore call-clobbered arg vrs
|
||
|
.machine pop
|
||
|
#endif
|
||
|
icm %r0,15,20(%r12) # load & test framesize
|
||
|
jnm 2f
|
||
|
|
||
|
- lm %r2,%r6,32(%r12)
|
||
|
+ lm %r2,%r6,96(%r15) # framesize < 0 means no pltexit call
|
||
|
+ # so we can do a tail call without
|
||
|
+ # copying the arg overflow area
|
||
|
lr %r15,%r12 # remove stack frame
|
||
|
cfi_def_cfa_register (15)
|
||
|
l %r14,16(%r15) # restore registers
|
||
|
@@ -175,7 +178,9 @@ _dl_runtime_profile:
|
||
|
br %r1 # tail-call to the resolved function
|
||
|
|
||
|
cfi_def_cfa_register (12)
|
||
|
-2: jz 4f # framesize == 0 ?
|
||
|
+2: la %r4,96(%r15) # pointer to struct La_s390_32_regs
|
||
|
+ st %r4,32(%r12)
|
||
|
+ jz 4f # framesize == 0 ?
|
||
|
ahi %r0,7 # align framesize to 8
|
||
|
lhi %r2,-8
|
||
|
nr %r0,%r2
|
||
|
@@ -188,24 +193,35 @@ _dl_runtime_profile:
|
||
|
la %r2,8(%r2)
|
||
|
la %r3,8(%r3)
|
||
|
brct %r0,3b
|
||
|
-4: lm %r2,%r6,32(%r12) # load register parameters
|
||
|
+4: lm %r2,%r6,0(%r4) # load register parameters
|
||
|
basr %r14,%r1 # call resolved function
|
||
|
- stm %r2,%r3,72(%r12) # store return values r2, r3, f0
|
||
|
- std %f0,80(%r12) # to struct La_s390_32_retval
|
||
|
- lm %r2,%r3,24(%r12) # load arguments saved by PLT
|
||
|
+ stm %r2,%r3,40(%r12) # store return values r2, r3, f0
|
||
|
+ std %f0,48(%r12) # to struct La_s390_32_retval
|
||
|
+#ifdef RESTORE_VRS
|
||
|
+ .machine push
|
||
|
+ .machine "z13"
|
||
|
+ vst %v24,56(%r12) # store return value v24
|
||
|
+ .machine pop
|
||
|
+#endif
|
||
|
+ lm %r2,%r4,24(%r12) # r2, r3: load arguments saved by PLT
|
||
|
+ # r4: pointer to struct La_s390_32_regs
|
||
|
basr %r1,0
|
||
|
5: l %r14,7f-5b(%r1)
|
||
|
- la %r4,32(%r12) # pointer to struct La_s390_32_regs
|
||
|
- la %r5,72(%r12) # pointer to struct La_s390_32_retval
|
||
|
+ la %r5,40(%r12) # pointer to struct La_s390_32_retval
|
||
|
bas %r14,0(%r14,%r1) # call _dl_call_pltexit
|
||
|
|
||
|
lr %r15,%r12 # remove stack frame
|
||
|
cfi_def_cfa_register (15)
|
||
|
l %r14,16(%r15) # restore registers
|
||
|
l %r12,12(%r15)
|
||
|
- l %r2,72(%r15) # restore return values
|
||
|
- l %r3,76(%r15)
|
||
|
- ld %f0,80(%r15)
|
||
|
+ lm %r2,%r3,40(%r15) # restore return values
|
||
|
+ ld %f0,48(%r15)
|
||
|
+#ifdef RESTORE_VRS
|
||
|
+ .machine push
|
||
|
+ .machine "z13"
|
||
|
+ vl %v24,56(%r15) # restore return value v24
|
||
|
+ .machine pop
|
||
|
+#endif
|
||
|
br %r14
|
||
|
|
||
|
6: .long _dl_profile_fixup - 0b
|
||
|
diff --git a/sysdeps/s390/s390-64/dl-trampoline.h b/sysdeps/s390/s390-64/dl-trampoline.h
|
||
|
index 658e3a3..33ea3de 100644
|
||
|
--- a/sysdeps/s390/s390-64/dl-trampoline.h
|
||
|
+++ b/sysdeps/s390/s390-64/dl-trampoline.h
|
||
|
@@ -109,31 +109,34 @@ _dl_runtime_resolve:
|
||
|
cfi_startproc
|
||
|
.align 16
|
||
|
_dl_runtime_profile:
|
||
|
- stmg %r2,%r6,64(%r15) # save call-clobbered arg regs
|
||
|
- cfi_offset (r2, -96) # + r6 needed as arg for
|
||
|
- cfi_offset (r3, -88) # _dl_profile_fixup
|
||
|
- cfi_offset (r4, -80)
|
||
|
- cfi_offset (r5, -72)
|
||
|
- cfi_offset (r6, -64)
|
||
|
- std %f0,104(%r15)
|
||
|
- cfi_offset (f0, -56)
|
||
|
- std %f2,112(%r15)
|
||
|
- cfi_offset (f2, -48)
|
||
|
- std %f4,120(%r15)
|
||
|
- cfi_offset (f4, -40)
|
||
|
- std %f6,128(%r15)
|
||
|
- cfi_offset (f6, -32)
|
||
|
stg %r12,24(%r15) # r12 is used as backup of r15
|
||
|
cfi_offset (r12, -136)
|
||
|
stg %r14,32(%r15)
|
||
|
cfi_offset (r14, -128)
|
||
|
lgr %r12,%r15 # backup stack pointer
|
||
|
cfi_def_cfa_register (12)
|
||
|
+ aghi %r15,-360 # create stack frame:
|
||
|
+ # 160 + sizeof(La_s390_64_regs)
|
||
|
+ stg %r12,0(%r15) # save backchain
|
||
|
+
|
||
|
+ stmg %r2,%r6,160(%r15) # save call-clobbered arg regs
|
||
|
+ cfi_offset (r2, -360) # + r6 needed as arg for
|
||
|
+ cfi_offset (r3, -352) # _dl_profile_fixup
|
||
|
+ cfi_offset (r4, -344)
|
||
|
+ cfi_offset (r5, -336)
|
||
|
+ cfi_offset (r6, -328)
|
||
|
+ std %f0,200(%r15)
|
||
|
+ cfi_offset (f0, -320)
|
||
|
+ std %f2,208(%r15)
|
||
|
+ cfi_offset (f2, -312)
|
||
|
+ std %f4,216(%r15)
|
||
|
+ cfi_offset (f4, -304)
|
||
|
+ std %f6,224(%r15)
|
||
|
+ cfi_offset (f6, -296)
|
||
|
#ifdef RESTORE_VRS
|
||
|
- aghi %r15,-288 # create stack frame
|
||
|
.machine push
|
||
|
.machine "z13"
|
||
|
- vstm %v24,%v31,160(%r15)# store call-clobbered vector argument registers
|
||
|
+ vstm %v24,%v31,232(%r15) # store call-clobbered vector arguments
|
||
|
cfi_offset (v24, -288)
|
||
|
cfi_offset (v25, -272)
|
||
|
cfi_offset (v26, -256)
|
||
|
@@ -143,31 +146,28 @@ _dl_runtime_profile:
|
||
|
cfi_offset (v30, -192)
|
||
|
cfi_offset (v31, -176)
|
||
|
.machine pop
|
||
|
-#else
|
||
|
- aghi %r15,-160 # create stack frame
|
||
|
#endif
|
||
|
- stg %r12,0(%r15) # save backchain
|
||
|
lmg %r2,%r3,48(%r12) # load arguments saved by PLT
|
||
|
lgr %r4,%r14 # return address as third parameter
|
||
|
- la %r5,64(%r12) # pointer to struct La_s390_64_regs
|
||
|
+ la %r5,160(%r15) # pointer to struct La_s390_64_regs
|
||
|
la %r6,40(%r12) # long int * framesize
|
||
|
brasl %r14,_dl_profile_fixup # call resolver
|
||
|
lgr %r1,%r2 # function addr returned in r2
|
||
|
- ld %f0,104(%r12) # restore call-clobbered arg fprs
|
||
|
- ld %f2,112(%r12)
|
||
|
- ld %f4,120(%r12)
|
||
|
- ld %f6,128(%r12)
|
||
|
+ ld %f0,200(%r15) # restore call-clobbered arg fprs
|
||
|
+ ld %f2,208(%r15)
|
||
|
+ ld %f4,216(%r15)
|
||
|
+ ld %f6,224(%r15)
|
||
|
#ifdef RESTORE_VRS
|
||
|
.machine push
|
||
|
.machine "z13"
|
||
|
- vlm %v24,%v31,160(%r15) # restore call-clobbered arg vrs
|
||
|
+ vlm %v24,%v31,232(%r15) # restore call-clobbered arg vrs
|
||
|
.machine pop
|
||
|
#endif
|
||
|
lg %r0,40(%r12) # load framesize
|
||
|
ltgr %r0,%r0
|
||
|
jnm 1f
|
||
|
|
||
|
- lmg %r2,%r6,64(%r12) # framesize < 0 means no pltexit call
|
||
|
+ lmg %r2,%r6,160(%r15) # framesize < 0 means no pltexit call
|
||
|
# so we can do a tail call without
|
||
|
# copying the arg overflow area
|
||
|
lgr %r15,%r12 # remove stack frame
|
||
|
@@ -177,7 +177,9 @@ _dl_runtime_profile:
|
||
|
br %r1 # tail-call to resolved function
|
||
|
|
||
|
cfi_def_cfa_register (12)
|
||
|
-1: jz 4f # framesize == 0 ?
|
||
|
+1: la %r4,160(%r15) # pointer to struct La_s390_64_regs
|
||
|
+ stg %r4,64(%r12)
|
||
|
+ jz 4f # framesize == 0 ?
|
||
|
aghi %r0,7 # align framesize to 8
|
||
|
nill %r0,0xfff8
|
||
|
slgr %r15,%r0 # make room for framesize bytes
|
||
|
@@ -189,21 +191,33 @@ _dl_runtime_profile:
|
||
|
la %r2,8(%r2) # depending on framesize
|
||
|
la %r3,8(%r3)
|
||
|
brctg %r0,3b
|
||
|
-4: lmg %r2,%r6,64(%r12) # restore call-clobbered arg gprs
|
||
|
+4: lmg %r2,%r6,0(%r4) # restore call-clobbered arg gprs
|
||
|
basr %r14,%r1 # call resolved function
|
||
|
- stg %r2,136(%r12) # store return values r2, f0
|
||
|
- std %f0,144(%r12) # to struct La_s390_64_retval
|
||
|
- lmg %r2,%r3,48(%r12) # load arguments saved by PLT
|
||
|
- la %r4,64(%r12) # pointer to struct La_s390_64_regs
|
||
|
- la %r5,136(%r12) # pointer to struct La_s390_64_retval
|
||
|
+ stg %r2,72(%r12) # store return values r2, f0
|
||
|
+ std %f0,80(%r12) # to struct La_s390_64_retval
|
||
|
+#ifdef RESTORE_VRS
|
||
|
+ .machine push
|
||
|
+ .machine "z13"
|
||
|
+ vst %v24,88(%r12) # store return value v24
|
||
|
+ .machine pop
|
||
|
+#endif
|
||
|
+ lmg %r2,%r4,48(%r12) # r2, r3: load arguments saved by PLT
|
||
|
+ # r4: pointer to struct La_s390_64_regs
|
||
|
+ la %r5,72(%r12) # pointer to struct La_s390_64_retval
|
||
|
brasl %r14,_dl_call_pltexit
|
||
|
|
||
|
lgr %r15,%r12 # remove stack frame
|
||
|
cfi_def_cfa_register (15)
|
||
|
lg %r14,32(%r15) # restore registers
|
||
|
lg %r12,24(%r15)
|
||
|
- lg %r2,136(%r15) # restore return values
|
||
|
- ld %f0,144(%r15)
|
||
|
+ lg %r2,72(%r15) # restore return values
|
||
|
+ ld %f0,80(%r15)
|
||
|
+#ifdef RESTORE_VRS
|
||
|
+ .machine push
|
||
|
+ .machine "z13"
|
||
|
+ vl %v24,88(%r15) # restore return value v24
|
||
|
+ .machine pop
|
||
|
+#endif
|
||
|
br %r14 # Jump back to caller
|
||
|
|
||
|
cfi_endproc
|