You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
384 lines
12 KiB
384 lines
12 KiB
commit 5cdd1989d1d2f135d02e66250f37ba8e767f9772 |
|
Author: Stefan Liebler <stli@linux.vnet.ibm.com> |
|
Date: Thu Mar 31 17:37:16 2016 +0200 |
|
|
|
S390: Extend structs La_s390_regs / La_s390_retval with vector-registers. |
|
|
|
Starting with z13, vector registers can also occur as argument registers. |
|
Thus the passed input/output register structs for |
|
la_s390_[32|64]_gnu_plt[enter|exit] functions should reflect those new |
|
registers. This patch extends these structs La_s390_regs and La_s390_retval |
|
and adjusts _dl_runtime_profile() to handle those fields in case of |
|
running on a z13 machine. |
|
|
|
ChangeLog: |
|
|
|
* sysdeps/s390/bits/link.h: (La_s390_vr) New typedef. |
|
(La_s390_32_regs): Append vector register lr_v24-lr_v31. |
|
(La_s390_64_regs): Likewise. |
|
(La_s390_32_retval): Append vector register lrv_v24. |
|
(La_s390_64_retval): Likeweise. |
|
* sysdeps/s390/s390-32/dl-trampoline.h (_dl_runtime_profile): |
|
Handle extended structs La_s390_32_regs and La_s390_32_retval. |
|
* sysdeps/s390/s390-64/dl-trampoline.h (_dl_runtime_profile): |
|
Handle extended structs La_s390_64_regs and La_s390_64_retval. |
|
|
|
diff --git a/sysdeps/s390/bits/link.h b/sysdeps/s390/bits/link.h |
|
index 2ef7f44..e27ed67 100644 |
|
--- a/sysdeps/s390/bits/link.h |
|
+++ b/sysdeps/s390/bits/link.h |
|
@@ -19,6 +19,9 @@ |
|
# error "Never include <bits/link.h> directly; use <link.h> instead." |
|
#endif |
|
|
|
+#if defined HAVE_S390_VX_ASM_SUPPORT |
|
+typedef char La_s390_vr[16]; |
|
+#endif |
|
|
|
#if __ELF_NATIVE_CLASS == 32 |
|
|
|
@@ -32,6 +35,16 @@ typedef struct La_s390_32_regs |
|
uint32_t lr_r6; |
|
double lr_fp0; |
|
double lr_fp2; |
|
+# if defined HAVE_S390_VX_ASM_SUPPORT |
|
+ La_s390_vr lr_v24; |
|
+ La_s390_vr lr_v25; |
|
+ La_s390_vr lr_v26; |
|
+ La_s390_vr lr_v27; |
|
+ La_s390_vr lr_v28; |
|
+ La_s390_vr lr_v29; |
|
+ La_s390_vr lr_v30; |
|
+ La_s390_vr lr_v31; |
|
+# endif |
|
} La_s390_32_regs; |
|
|
|
/* Return values for calls from PLT on s390-32. */ |
|
@@ -40,6 +53,9 @@ typedef struct La_s390_32_retval |
|
uint32_t lrv_r2; |
|
uint32_t lrv_r3; |
|
double lrv_fp0; |
|
+# if defined HAVE_S390_VX_ASM_SUPPORT |
|
+ La_s390_vr lrv_v24; |
|
+# endif |
|
} La_s390_32_retval; |
|
|
|
|
|
@@ -77,6 +93,16 @@ typedef struct La_s390_64_regs |
|
double lr_fp2; |
|
double lr_fp4; |
|
double lr_fp6; |
|
+# if defined HAVE_S390_VX_ASM_SUPPORT |
|
+ La_s390_vr lr_v24; |
|
+ La_s390_vr lr_v25; |
|
+ La_s390_vr lr_v26; |
|
+ La_s390_vr lr_v27; |
|
+ La_s390_vr lr_v28; |
|
+ La_s390_vr lr_v29; |
|
+ La_s390_vr lr_v30; |
|
+ La_s390_vr lr_v31; |
|
+# endif |
|
} La_s390_64_regs; |
|
|
|
/* Return values for calls from PLT on s390-64. */ |
|
@@ -84,6 +110,9 @@ typedef struct La_s390_64_retval |
|
{ |
|
uint64_t lrv_r2; |
|
double lrv_fp0; |
|
+# if defined HAVE_S390_VX_ASM_SUPPORT |
|
+ La_s390_vr lrv_v24; |
|
+# endif |
|
} La_s390_64_retval; |
|
|
|
|
|
diff --git a/sysdeps/s390/s390-32/dl-trampoline.h b/sysdeps/s390/s390-32/dl-trampoline.h |
|
index a152a7b..bb74d27 100644 |
|
--- a/sysdeps/s390/s390-32/dl-trampoline.h |
|
+++ b/sysdeps/s390/s390-32/dl-trampoline.h |
|
@@ -112,28 +112,31 @@ _dl_runtime_resolve: |
|
cfi_startproc |
|
.align 16 |
|
_dl_runtime_profile: |
|
- stm %r2,%r6,32(%r15) # save registers |
|
- cfi_offset (r2, -64) # + r6 needed as arg for |
|
- cfi_offset (r3, -60) # _dl_profile_fixup |
|
- cfi_offset (r4, -56) |
|
- cfi_offset (r5, -52) |
|
- cfi_offset (r6, -48) |
|
- std %f0,56(%r15) |
|
- cfi_offset (f0, -40) |
|
- std %f2,64(%r15) |
|
- cfi_offset (f2, -32) |
|
st %r12,12(%r15) # r12 is used as backup of r15 |
|
cfi_offset (r12, -84) |
|
st %r14,16(%r15) |
|
cfi_offset (r14, -80) |
|
lr %r12,%r15 # backup stack pointer |
|
cfi_def_cfa_register (12) |
|
+ ahi %r15,-264 # create stack frame: |
|
+ # 96 + sizeof(La_s390_32_regs) |
|
+ st %r12,0(%r15) # save backchain |
|
+ |
|
+ stm %r2,%r6,96(%r15) # save registers |
|
+ cfi_offset (r2, -264) # + r6 needed as arg for |
|
+ cfi_offset (r3, -260) # _dl_profile_fixup |
|
+ cfi_offset (r4, -256) |
|
+ cfi_offset (r5, -252) |
|
+ cfi_offset (r6, -248) |
|
+ std %f0,120(%r15) |
|
+ cfi_offset (f0, -240) |
|
+ std %f2,128(%r15) |
|
+ cfi_offset (f2, -232) |
|
#ifdef RESTORE_VRS |
|
- ahi %r15,-224 # create stack frame |
|
.machine push |
|
.machine "z13" |
|
.machinemode "zarch_nohighgprs" |
|
- vstm %v24,%v31,96(%r15) # store call-clobbered vr arguments |
|
+ vstm %v24,%v31,136(%r15) # store call-clobbered vr arguments |
|
cfi_offset (v24, -224) |
|
cfi_offset (v25, -208) |
|
cfi_offset (v26, -192) |
|
@@ -143,31 +146,31 @@ _dl_runtime_profile: |
|
cfi_offset (v30, -128) |
|
cfi_offset (v31, -112) |
|
.machine pop |
|
-#else |
|
- ahi %r15,-96 # create stack frame |
|
#endif |
|
- st %r12,0(%r15) # save backchain |
|
+ |
|
lm %r2,%r3,24(%r12) # load arguments saved by PLT |
|
lr %r4,%r14 # return address as third parameter |
|
basr %r1,0 |
|
0: l %r14,6f-0b(%r1) |
|
- la %r5,32(%r12) # pointer to struct La_s390_32_regs |
|
+ la %r5,96(%r15) # pointer to struct La_s390_32_regs |
|
la %r6,20(%r12) # long int * framesize |
|
bas %r14,0(%r14,%r1) # call resolver |
|
lr %r1,%r2 # function addr returned in r2 |
|
- ld %f0,56(%r12) # restore call-clobbered arg fprs |
|
- ld %f2,64(%r12) |
|
+ ld %f0,120(%r15) # restore call-clobbered arg fprs |
|
+ ld %f2,128(%r15) |
|
#ifdef RESTORE_VRS |
|
.machine push |
|
.machine "z13" |
|
.machinemode "zarch_nohighgprs" |
|
- vlm %v24,%v31,96(%r15) # restore call-clobbered arg vrs |
|
+ vlm %v24,%v31,136(%r15) # restore call-clobbered arg vrs |
|
.machine pop |
|
#endif |
|
icm %r0,15,20(%r12) # load & test framesize |
|
jnm 2f |
|
|
|
- lm %r2,%r6,32(%r12) |
|
+ lm %r2,%r6,96(%r15) # framesize < 0 means no pltexit call |
|
+ # so we can do a tail call without |
|
+ # copying the arg overflow area |
|
lr %r15,%r12 # remove stack frame |
|
cfi_def_cfa_register (15) |
|
l %r14,16(%r15) # restore registers |
|
@@ -175,7 +178,9 @@ _dl_runtime_profile: |
|
br %r1 # tail-call to the resolved function |
|
|
|
cfi_def_cfa_register (12) |
|
-2: jz 4f # framesize == 0 ? |
|
+2: la %r4,96(%r15) # pointer to struct La_s390_32_regs |
|
+ st %r4,32(%r12) |
|
+ jz 4f # framesize == 0 ? |
|
ahi %r0,7 # align framesize to 8 |
|
lhi %r2,-8 |
|
nr %r0,%r2 |
|
@@ -188,24 +193,35 @@ _dl_runtime_profile: |
|
la %r2,8(%r2) |
|
la %r3,8(%r3) |
|
brct %r0,3b |
|
-4: lm %r2,%r6,32(%r12) # load register parameters |
|
+4: lm %r2,%r6,0(%r4) # load register parameters |
|
basr %r14,%r1 # call resolved function |
|
- stm %r2,%r3,72(%r12) # store return values r2, r3, f0 |
|
- std %f0,80(%r12) # to struct La_s390_32_retval |
|
- lm %r2,%r3,24(%r12) # load arguments saved by PLT |
|
+ stm %r2,%r3,40(%r12) # store return values r2, r3, f0 |
|
+ std %f0,48(%r12) # to struct La_s390_32_retval |
|
+#ifdef RESTORE_VRS |
|
+ .machine push |
|
+ .machine "z13" |
|
+ vst %v24,56(%r12) # store return value v24 |
|
+ .machine pop |
|
+#endif |
|
+ lm %r2,%r4,24(%r12) # r2, r3: load arguments saved by PLT |
|
+ # r4: pointer to struct La_s390_32_regs |
|
basr %r1,0 |
|
5: l %r14,7f-5b(%r1) |
|
- la %r4,32(%r12) # pointer to struct La_s390_32_regs |
|
- la %r5,72(%r12) # pointer to struct La_s390_32_retval |
|
+ la %r5,40(%r12) # pointer to struct La_s390_32_retval |
|
bas %r14,0(%r14,%r1) # call _dl_call_pltexit |
|
|
|
lr %r15,%r12 # remove stack frame |
|
cfi_def_cfa_register (15) |
|
l %r14,16(%r15) # restore registers |
|
l %r12,12(%r15) |
|
- l %r2,72(%r15) # restore return values |
|
- l %r3,76(%r15) |
|
- ld %f0,80(%r15) |
|
+ lm %r2,%r3,40(%r15) # restore return values |
|
+ ld %f0,48(%r15) |
|
+#ifdef RESTORE_VRS |
|
+ .machine push |
|
+ .machine "z13" |
|
+ vl %v24,56(%r15) # restore return value v24 |
|
+ .machine pop |
|
+#endif |
|
br %r14 |
|
|
|
6: .long _dl_profile_fixup - 0b |
|
diff --git a/sysdeps/s390/s390-64/dl-trampoline.h b/sysdeps/s390/s390-64/dl-trampoline.h |
|
index 658e3a3..33ea3de 100644 |
|
--- a/sysdeps/s390/s390-64/dl-trampoline.h |
|
+++ b/sysdeps/s390/s390-64/dl-trampoline.h |
|
@@ -109,31 +109,34 @@ _dl_runtime_resolve: |
|
cfi_startproc |
|
.align 16 |
|
_dl_runtime_profile: |
|
- stmg %r2,%r6,64(%r15) # save call-clobbered arg regs |
|
- cfi_offset (r2, -96) # + r6 needed as arg for |
|
- cfi_offset (r3, -88) # _dl_profile_fixup |
|
- cfi_offset (r4, -80) |
|
- cfi_offset (r5, -72) |
|
- cfi_offset (r6, -64) |
|
- std %f0,104(%r15) |
|
- cfi_offset (f0, -56) |
|
- std %f2,112(%r15) |
|
- cfi_offset (f2, -48) |
|
- std %f4,120(%r15) |
|
- cfi_offset (f4, -40) |
|
- std %f6,128(%r15) |
|
- cfi_offset (f6, -32) |
|
stg %r12,24(%r15) # r12 is used as backup of r15 |
|
cfi_offset (r12, -136) |
|
stg %r14,32(%r15) |
|
cfi_offset (r14, -128) |
|
lgr %r12,%r15 # backup stack pointer |
|
cfi_def_cfa_register (12) |
|
+ aghi %r15,-360 # create stack frame: |
|
+ # 160 + sizeof(La_s390_64_regs) |
|
+ stg %r12,0(%r15) # save backchain |
|
+ |
|
+ stmg %r2,%r6,160(%r15) # save call-clobbered arg regs |
|
+ cfi_offset (r2, -360) # + r6 needed as arg for |
|
+ cfi_offset (r3, -352) # _dl_profile_fixup |
|
+ cfi_offset (r4, -344) |
|
+ cfi_offset (r5, -336) |
|
+ cfi_offset (r6, -328) |
|
+ std %f0,200(%r15) |
|
+ cfi_offset (f0, -320) |
|
+ std %f2,208(%r15) |
|
+ cfi_offset (f2, -312) |
|
+ std %f4,216(%r15) |
|
+ cfi_offset (f4, -304) |
|
+ std %f6,224(%r15) |
|
+ cfi_offset (f6, -296) |
|
#ifdef RESTORE_VRS |
|
- aghi %r15,-288 # create stack frame |
|
.machine push |
|
.machine "z13" |
|
- vstm %v24,%v31,160(%r15)# store call-clobbered vector argument registers |
|
+ vstm %v24,%v31,232(%r15) # store call-clobbered vector arguments |
|
cfi_offset (v24, -288) |
|
cfi_offset (v25, -272) |
|
cfi_offset (v26, -256) |
|
@@ -143,31 +146,28 @@ _dl_runtime_profile: |
|
cfi_offset (v30, -192) |
|
cfi_offset (v31, -176) |
|
.machine pop |
|
-#else |
|
- aghi %r15,-160 # create stack frame |
|
#endif |
|
- stg %r12,0(%r15) # save backchain |
|
lmg %r2,%r3,48(%r12) # load arguments saved by PLT |
|
lgr %r4,%r14 # return address as third parameter |
|
- la %r5,64(%r12) # pointer to struct La_s390_64_regs |
|
+ la %r5,160(%r15) # pointer to struct La_s390_64_regs |
|
la %r6,40(%r12) # long int * framesize |
|
brasl %r14,_dl_profile_fixup # call resolver |
|
lgr %r1,%r2 # function addr returned in r2 |
|
- ld %f0,104(%r12) # restore call-clobbered arg fprs |
|
- ld %f2,112(%r12) |
|
- ld %f4,120(%r12) |
|
- ld %f6,128(%r12) |
|
+ ld %f0,200(%r15) # restore call-clobbered arg fprs |
|
+ ld %f2,208(%r15) |
|
+ ld %f4,216(%r15) |
|
+ ld %f6,224(%r15) |
|
#ifdef RESTORE_VRS |
|
.machine push |
|
.machine "z13" |
|
- vlm %v24,%v31,160(%r15) # restore call-clobbered arg vrs |
|
+ vlm %v24,%v31,232(%r15) # restore call-clobbered arg vrs |
|
.machine pop |
|
#endif |
|
lg %r0,40(%r12) # load framesize |
|
ltgr %r0,%r0 |
|
jnm 1f |
|
|
|
- lmg %r2,%r6,64(%r12) # framesize < 0 means no pltexit call |
|
+ lmg %r2,%r6,160(%r15) # framesize < 0 means no pltexit call |
|
# so we can do a tail call without |
|
# copying the arg overflow area |
|
lgr %r15,%r12 # remove stack frame |
|
@@ -177,7 +177,9 @@ _dl_runtime_profile: |
|
br %r1 # tail-call to resolved function |
|
|
|
cfi_def_cfa_register (12) |
|
-1: jz 4f # framesize == 0 ? |
|
+1: la %r4,160(%r15) # pointer to struct La_s390_64_regs |
|
+ stg %r4,64(%r12) |
|
+ jz 4f # framesize == 0 ? |
|
aghi %r0,7 # align framesize to 8 |
|
nill %r0,0xfff8 |
|
slgr %r15,%r0 # make room for framesize bytes |
|
@@ -189,21 +191,33 @@ _dl_runtime_profile: |
|
la %r2,8(%r2) # depending on framesize |
|
la %r3,8(%r3) |
|
brctg %r0,3b |
|
-4: lmg %r2,%r6,64(%r12) # restore call-clobbered arg gprs |
|
+4: lmg %r2,%r6,0(%r4) # restore call-clobbered arg gprs |
|
basr %r14,%r1 # call resolved function |
|
- stg %r2,136(%r12) # store return values r2, f0 |
|
- std %f0,144(%r12) # to struct La_s390_64_retval |
|
- lmg %r2,%r3,48(%r12) # load arguments saved by PLT |
|
- la %r4,64(%r12) # pointer to struct La_s390_64_regs |
|
- la %r5,136(%r12) # pointer to struct La_s390_64_retval |
|
+ stg %r2,72(%r12) # store return values r2, f0 |
|
+ std %f0,80(%r12) # to struct La_s390_64_retval |
|
+#ifdef RESTORE_VRS |
|
+ .machine push |
|
+ .machine "z13" |
|
+ vst %v24,88(%r12) # store return value v24 |
|
+ .machine pop |
|
+#endif |
|
+ lmg %r2,%r4,48(%r12) # r2, r3: load arguments saved by PLT |
|
+ # r4: pointer to struct La_s390_64_regs |
|
+ la %r5,72(%r12) # pointer to struct La_s390_64_retval |
|
brasl %r14,_dl_call_pltexit |
|
|
|
lgr %r15,%r12 # remove stack frame |
|
cfi_def_cfa_register (15) |
|
lg %r14,32(%r15) # restore registers |
|
lg %r12,24(%r15) |
|
- lg %r2,136(%r15) # restore return values |
|
- ld %f0,144(%r15) |
|
+ lg %r2,72(%r15) # restore return values |
|
+ ld %f0,80(%r15) |
|
+#ifdef RESTORE_VRS |
|
+ .machine push |
|
+ .machine "z13" |
|
+ vl %v24,88(%r15) # restore return value v24 |
|
+ .machine pop |
|
+#endif |
|
br %r14 # Jump back to caller |
|
|
|
cfi_endproc
|
|
|