diff -urp libffi-3.0.13/src/powerpc/ffi.c libffi-current/src/powerpc/ffi.c --- libffi-3.0.13/src/powerpc/ffi.c 2013-03-16 22:46:20.000000000 +1030 +++ libffi-current/src/powerpc/ffi.c 2013-11-18 00:48:55.218044221 +1030 @@ -48,12 +48,8 @@ enum { FLAG_RETURNS_128BITS = 1 << (31-27), /* cr6 */ - FLAG_SYSV_SMST_R4 = 1 << (31-26), /* use r4 for FFI_SYSV 8 byte - structs. */ - FLAG_SYSV_SMST_R3 = 1 << (31-25), /* use r3 for FFI_SYSV 4 byte - structs. */ - FLAG_ARG_NEEDS_COPY = 1 << (31- 7), + FLAG_ARG_NEEDS_PSAVE = FLAG_ARG_NEEDS_COPY, /* Used by ELFv2 */ #ifndef __NO_FPRS__ FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */ #endif @@ -132,6 +128,9 @@ ffi_prep_args_SYSV (extended_cif *ecif, int i; ffi_type **ptr; +#ifndef __NO_FPRS__ + double double_tmp; +#endif union { void **v; char **c; @@ -151,7 +150,6 @@ ffi_prep_args_SYSV (extended_cif *ecif, gpr_base.u = stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS; intarg_count = 0; #ifndef __NO_FPRS__ - double double_tmp; fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS; fparg_count = 0; copy_space.c = ((flags & FLAG_FP_ARGUMENTS) ? fpr_base.c : gpr_base.c); @@ -374,7 +372,7 @@ ffi_prep_args_SYSV (extended_cif *ecif, FFI_ASSERT (gpr_base.u <= stacktop.u - ASM_NEEDS_REGISTERS); /* The assert below is testing that the number of integer arguments agrees with the number found in ffi_prep_cif_machdep(). However, intarg_count - is incremeneted whenever we place an FP arg on the stack, so account for + is incremented whenever we place an FP arg on the stack, so account for that before our assert test. */ #ifndef __NO_FPRS__ if (fparg_count > NUM_FPR_ARG_REGISTERS) @@ -392,6 +390,45 @@ enum { }; enum { ASM_NEEDS_REGISTERS64 = 4 }; +#if _CALL_ELF == 2 +static unsigned int +discover_homogeneous_aggregate (const ffi_type *t, unsigned int *elnum) +{ + switch (t->type) + { + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + *elnum = 1; + return (int) t->type; + + case FFI_TYPE_STRUCT:; + { + unsigned int base_elt = 0, total_elnum = 0; + ffi_type **el = t->elements; + while (*el) + { + unsigned int el_elt, el_elnum = 0; + el_elt = discover_homogeneous_aggregate (*el, &el_elnum); + if (el_elt == 0 + || (base_elt && base_elt != el_elt)) + return 0; + base_elt = el_elt; + total_elnum += el_elnum; + if (total_elnum > 8) + return 0; + el++; + } + *elnum = total_elnum; + return base_elt; + } + + default: + return 0; + } +} +#endif + + /* ffi_prep_args64 is called by the assembly routine once stack space has been allocated for the function's arguments. @@ -437,6 +474,7 @@ ffi_prep_args64 (extended_cif *ecif, uns unsigned long *ul; float *f; double *d; + size_t p; } valp; /* 'stacktop' points at the previous backchain pointer. */ @@ -452,9 +490,9 @@ ffi_prep_args64 (extended_cif *ecif, uns /* 'fpr_base' points at the space for fpr3, and grows upwards as we use FPR registers. */ valp fpr_base; - int fparg_count; + unsigned int fparg_count; - int i, words; + unsigned int i, words, nargs, nfixedargs; ffi_type **ptr; double double_tmp; union { @@ -471,11 +509,18 @@ ffi_prep_args64 (extended_cif *ecif, uns double **d; } p_argv; unsigned long gprvalue; +#ifdef __STRUCT_PARM_ALIGN__ + unsigned long align; +#endif stacktop.c = (char *) stack + bytes; gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64; gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64; +#if _CALL_ELF == 2 + rest.ul = stack + 4 + NUM_GPR_ARG_REGISTERS64; +#else rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64; +#endif fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64; fparg_count = 0; next_arg.ul = gpr_base.ul; @@ -491,30 +536,36 @@ ffi_prep_args64 (extended_cif *ecif, uns /* Now for the arguments. */ p_argv.v = ecif->avalue; - for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs; - i > 0; - i--, ptr++, p_argv.v++) + nargs = ecif->cif->nargs; + nfixedargs = ecif->cif->nfixedargs; + for (ptr = ecif->cif->arg_types, i = 0; + i < nargs; + i++, ptr++, p_argv.v++) { + unsigned int elt, elnum; + switch ((*ptr)->type) { case FFI_TYPE_FLOAT: double_tmp = **p_argv.f; - *next_arg.f = (float) double_tmp; + if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs) + *fpr_base.d++ = double_tmp; + else + *next_arg.f = (float) double_tmp; if (++next_arg.ul == gpr_end.ul) next_arg.ul = rest.ul; - if (fparg_count < NUM_FPR_ARG_REGISTERS64) - *fpr_base.d++ = double_tmp; fparg_count++; FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); break; case FFI_TYPE_DOUBLE: double_tmp = **p_argv.d; - *next_arg.d = double_tmp; + if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs) + *fpr_base.d++ = double_tmp; + else + *next_arg.d = double_tmp; if (++next_arg.ul == gpr_end.ul) next_arg.ul = rest.ul; - if (fparg_count < NUM_FPR_ARG_REGISTERS64) - *fpr_base.d++ = double_tmp; fparg_count++; FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); break; @@ -522,18 +573,20 @@ ffi_prep_args64 (extended_cif *ecif, uns #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE case FFI_TYPE_LONGDOUBLE: double_tmp = (*p_argv.d)[0]; - *next_arg.d = double_tmp; + if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs) + *fpr_base.d++ = double_tmp; + else + *next_arg.d = double_tmp; if (++next_arg.ul == gpr_end.ul) next_arg.ul = rest.ul; - if (fparg_count < NUM_FPR_ARG_REGISTERS64) - *fpr_base.d++ = double_tmp; fparg_count++; double_tmp = (*p_argv.d)[1]; - *next_arg.d = double_tmp; + if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs) + *fpr_base.d++ = double_tmp; + else + *next_arg.d = double_tmp; if (++next_arg.ul == gpr_end.ul) next_arg.ul = rest.ul; - if (fparg_count < NUM_FPR_ARG_REGISTERS64) - *fpr_base.d++ = double_tmp; fparg_count++; FFI_ASSERT (__LDBL_MANT_DIG__ == 106); FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); @@ -541,27 +594,86 @@ ffi_prep_args64 (extended_cif *ecif, uns #endif case FFI_TYPE_STRUCT: - words = ((*ptr)->size + 7) / 8; - if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul) - { - size_t first = gpr_end.c - next_arg.c; - memcpy (next_arg.c, *p_argv.c, first); - memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first); - next_arg.c = rest.c + words * 8 - first; +#ifdef __STRUCT_PARM_ALIGN__ + align = (*ptr)->alignment; + if (align > __STRUCT_PARM_ALIGN__) + align = __STRUCT_PARM_ALIGN__; + if (align > 1) + next_arg.p = ALIGN (next_arg.p, align); +#endif + elt = 0; +#if _CALL_ELF == 2 + elt = discover_homogeneous_aggregate (*ptr, &elnum); +#endif + if (elt) + { + union { + void *v; + float *f; + double *d; + } arg; + + arg.v = *p_argv.v; + if (elt == FFI_TYPE_FLOAT) + { + do + { + double_tmp = *arg.f++; + if (fparg_count < NUM_FPR_ARG_REGISTERS64 + && i < nfixedargs) + *fpr_base.d++ = double_tmp; + else + *next_arg.f = (float) double_tmp; + if (++next_arg.f == gpr_end.f) + next_arg.f = rest.f; + fparg_count++; + } + while (--elnum != 0); + if ((next_arg.p & 3) != 0) + { + if (++next_arg.f == gpr_end.f) + next_arg.f = rest.f; + } + } + else + do + { + double_tmp = *arg.d++; + if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs) + *fpr_base.d++ = double_tmp; + else + *next_arg.d = double_tmp; + if (++next_arg.d == gpr_end.d) + next_arg.d = rest.d; + fparg_count++; + } + while (--elnum != 0); } else { - char *where = next_arg.c; - - /* Structures with size less than eight bytes are passed - left-padded. */ - if ((*ptr)->size < 8) - where += 8 - (*ptr)->size; + words = ((*ptr)->size + 7) / 8; + if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul) + { + size_t first = gpr_end.c - next_arg.c; + memcpy (next_arg.c, *p_argv.c, first); + memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first); + next_arg.c = rest.c + words * 8 - first; + } + else + { + char *where = next_arg.c; - memcpy (where, *p_argv.c, (*ptr)->size); - next_arg.ul += words; - if (next_arg.ul == gpr_end.ul) - next_arg.ul = rest.ul; +#ifndef __LITTLE_ENDIAN__ + /* Structures with size less than eight bytes are passed + left-padded. */ + if ((*ptr)->size < 8) + where += 8 - (*ptr)->size; +#endif + memcpy (where, *p_argv.c, (*ptr)->size); + next_arg.ul += words; + if (next_arg.ul == gpr_end.ul) + next_arg.ul = rest.ul; + } } break; @@ -605,24 +717,22 @@ ffi_prep_args64 (extended_cif *ecif, uns /* Perform machine dependent cif processing */ -ffi_status -ffi_prep_cif_machdep (ffi_cif *cif) +static ffi_status +ffi_prep_cif_machdep_core (ffi_cif *cif) { /* All this is for the SYSV and LINUX64 ABI. */ - int i; ffi_type **ptr; unsigned bytes; - int fparg_count = 0, intarg_count = 0; - unsigned flags = 0; + unsigned i, fparg_count = 0, intarg_count = 0; + unsigned flags = cif->flags; unsigned struct_copy_size = 0; unsigned type = cif->rtype->type; unsigned size = cif->rtype->size; + /* The machine-independent calculation of cif->bytes doesn't work + for us. Redo the calculation. */ if (cif->abi != FFI_LINUX64) { - /* All the machine-independent calculation of cif->bytes will be wrong. - Redo the calculation for SYSV. */ - /* Space for the frame pointer, callee's LR, and the asm's temp regs. */ bytes = (2 + ASM_NEEDS_REGISTERS) * sizeof (int); @@ -632,13 +742,20 @@ ffi_prep_cif_machdep (ffi_cif *cif) else { /* 64-bit ABI. */ +#if _CALL_ELF == 2 + /* Space for backchain, CR, LR, TOC and the asm's temp regs. */ + bytes = (4 + ASM_NEEDS_REGISTERS64) * sizeof (long); + /* Space for the general registers. */ + bytes += NUM_GPR_ARG_REGISTERS64 * sizeof (long); +#else /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp regs. */ bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long); /* Space for the mandatory parm save area and general registers. */ bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long); +#endif } /* Return value handling. The rules for SYSV are as follows: @@ -658,19 +775,23 @@ ffi_prep_cif_machdep (ffi_cif *cif) - soft-float float/doubles are treated as UINT32/UINT64 respectivley. - soft-float long doubles are returned in gpr3-gpr6. */ /* First translate for softfloat/nonlinux */ - if (cif->abi == FFI_LINUX_SOFT_FLOAT) { - if (type == FFI_TYPE_FLOAT) - type = FFI_TYPE_UINT32; - if (type == FFI_TYPE_DOUBLE) - type = FFI_TYPE_UINT64; - if (type == FFI_TYPE_LONGDOUBLE) - type = FFI_TYPE_UINT128; - } else if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64) { + if (cif->abi == FFI_LINUX_SOFT_FLOAT) + { + if (type == FFI_TYPE_FLOAT) + type = FFI_TYPE_UINT32; + if (type == FFI_TYPE_DOUBLE) + type = FFI_TYPE_UINT64; + if (type == FFI_TYPE_LONGDOUBLE) + type = FFI_TYPE_UINT128; + } + else if (cif->abi != FFI_LINUX + && cif->abi != FFI_LINUX64) + { #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE - if (type == FFI_TYPE_LONGDOUBLE) - type = FFI_TYPE_STRUCT; + if (type == FFI_TYPE_LONGDOUBLE) + type = FFI_TYPE_STRUCT; #endif - } + } switch (type) { @@ -697,35 +818,40 @@ ffi_prep_cif_machdep (ffi_cif *cif) break; case FFI_TYPE_STRUCT: - if (cif->abi == FFI_SYSV) + /* + * The final SYSV ABI says that structures smaller or equal 8 bytes + * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them + * in memory. + * + * NOTE: The assembly code can safely assume that it just needs to + * store both r3 and r4 into a 8-byte word-aligned buffer, as + * we allocate a temporary buffer in ffi_call() if this flag is + * set. + */ + if (cif->abi == FFI_SYSV && size <= 8) { - /* The final SYSV ABI says that structures smaller or equal 8 bytes - are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them - in memory. */ - - /* Treat structs with size <= 8 bytes. */ - if (size <= 8) + flags |= FLAG_RETURNS_SMST; + break; + } +#if _CALL_ELF == 2 + if (cif->abi == FFI_LINUX64) + { + unsigned int elt, elnum; + elt = discover_homogeneous_aggregate (cif->rtype, &elnum); + if (elt) + { + if (elt == FFI_TYPE_DOUBLE) + flags |= FLAG_RETURNS_64BITS; + flags |= FLAG_RETURNS_FP | FLAG_RETURNS_SMST; + break; + } + if (size <= 16) { flags |= FLAG_RETURNS_SMST; - /* These structs are returned in r3. We pack the type and the - precalculated shift value (needed in the sysv.S) into flags. - The same applies for the structs returned in r3/r4. */ - if (size <= 4) - { - flags |= FLAG_SYSV_SMST_R3; - flags |= 8 * (4 - size) << 8; - break; - } - /* These structs are returned in r3 and r4. See above. */ - if (size <= 8) - { - flags |= FLAG_SYSV_SMST_R3 | FLAG_SYSV_SMST_R4; - flags |= 8 * (8 - size) << 8; - break; - } + break; } } - +#endif intarg_count++; flags |= FLAG_RETVAL_REFERENCE; /* Fall through. */ @@ -841,27 +967,54 @@ ffi_prep_cif_machdep (ffi_cif *cif) else for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) { + unsigned int elt, elnum; +#ifdef __STRUCT_PARM_ALIGN__ + unsigned int align; +#endif + switch ((*ptr)->type) { #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE case FFI_TYPE_LONGDOUBLE: - if (cif->abi == FFI_LINUX_SOFT_FLOAT) - intarg_count += 4; - else - { - fparg_count += 2; - intarg_count += 2; - } + fparg_count += 2; + intarg_count += 2; + if (fparg_count > NUM_FPR_ARG_REGISTERS) + flags |= FLAG_ARG_NEEDS_PSAVE; break; #endif case FFI_TYPE_FLOAT: case FFI_TYPE_DOUBLE: fparg_count++; intarg_count++; + if (fparg_count > NUM_FPR_ARG_REGISTERS) + flags |= FLAG_ARG_NEEDS_PSAVE; break; case FFI_TYPE_STRUCT: +#ifdef __STRUCT_PARM_ALIGN__ + align = (*ptr)->alignment; + if (align > __STRUCT_PARM_ALIGN__) + align = __STRUCT_PARM_ALIGN__; + align = align / 8; + if (align > 1) + intarg_count = ALIGN (intarg_count, align); +#endif intarg_count += ((*ptr)->size + 7) / 8; + elt = 0; +#if _CALL_ELF == 2 + elt = discover_homogeneous_aggregate (*ptr, &elnum); +#endif + if (elt) + { + fparg_count += elnum; + if (fparg_count > NUM_FPR_ARG_REGISTERS) + flags |= FLAG_ARG_NEEDS_PSAVE; + } + else + { + if (intarg_count > NUM_GPR_ARG_REGISTERS) + flags |= FLAG_ARG_NEEDS_PSAVE; + } break; case FFI_TYPE_POINTER: @@ -877,9 +1030,11 @@ ffi_prep_cif_machdep (ffi_cif *cif) /* Everything else is passed as a 8-byte word in a GPR, either the object itself or a pointer to it. */ intarg_count++; + if (intarg_count > NUM_GPR_ARG_REGISTERS) + flags |= FLAG_ARG_NEEDS_PSAVE; break; default: - FFI_ASSERT (0); + FFI_ASSERT (0); } } @@ -917,8 +1072,13 @@ ffi_prep_cif_machdep (ffi_cif *cif) #endif /* Stack space. */ +#if _CALL_ELF == 2 + if ((flags & FLAG_ARG_NEEDS_PSAVE) != 0) + bytes += intarg_count * sizeof (long); +#else if (intarg_count > NUM_GPR_ARG_REGISTERS64) bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long); +#endif } /* The stack space allocated needs to be a multiple of 16 bytes. */ @@ -933,6 +1093,26 @@ ffi_prep_cif_machdep (ffi_cif *cif) return FFI_OK; } +ffi_status +ffi_prep_cif_machdep (ffi_cif *cif) +{ + cif->nfixedargs = cif->nargs; + return ffi_prep_cif_machdep_core (cif); +} + +ffi_status +ffi_prep_cif_machdep_var (ffi_cif *cif, + unsigned int nfixedargs, + unsigned int ntotalargs MAYBE_UNUSED) +{ + cif->nfixedargs = nfixedargs; +#if _CALL_ELF == 2 + if (cif->abi == FFI_LINUX64) + cif->flags |= FLAG_ARG_NEEDS_PSAVE; +#endif + return ffi_prep_cif_machdep_core (cif); +} + extern void ffi_call_SYSV(extended_cif *, unsigned, unsigned, unsigned *, void (*fn)(void)); extern void FFI_HIDDEN ffi_call_LINUX64(extended_cif *, unsigned long, @@ -944,30 +1124,28 @@ ffi_call(ffi_cif *cif, void (*fn)(void), { /* * The final SYSV ABI says that structures smaller or equal 8 bytes - * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them + * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them * in memory. * - * Just to keep things simple for the assembly code, we will always - * bounce-buffer struct return values less than or equal to 8 bytes. - * This allows the ASM to handle SYSV small structures by directly - * writing r3 and r4 to memory without worrying about struct size. + * We bounce-buffer SYSV small struct return values so that sysv.S + * can write r3 and r4 to memory without worrying about struct size. + * + * For ELFv2 ABI, use a bounce buffer for homogeneous structs too, + * for similar reasons. */ - unsigned int smst_buffer[2]; + unsigned long smst_buffer[8]; extended_cif ecif; - unsigned int rsize = 0; ecif.cif = cif; ecif.avalue = avalue; - /* Ensure that we have a valid struct return value */ ecif.rvalue = rvalue; - if (cif->rtype->type == FFI_TYPE_STRUCT) { - rsize = cif->rtype->size; - if (rsize <= 8) - ecif.rvalue = smst_buffer; - else if (!rvalue) - ecif.rvalue = alloca(rsize); - } + if ((cif->flags & FLAG_RETURNS_SMST) != 0) + ecif.rvalue = smst_buffer; + /* Ensure that we have a valid struct return value. + FIXME: Isn't this just papering over a user problem? */ + else if (!rvalue && cif->rtype->type == FFI_TYPE_STRUCT) + ecif.rvalue = alloca (cif->rtype->size); switch (cif->abi) { @@ -992,11 +1170,26 @@ ffi_call(ffi_cif *cif, void (*fn)(void), /* Check for a bounce-buffered return value */ if (rvalue && ecif.rvalue == smst_buffer) - memcpy(rvalue, smst_buffer, rsize); + { + unsigned int rsize = cif->rtype->size; +#ifndef __LITTLE_ENDIAN__ + /* The SYSV ABI returns a structure of up to 4 bytes in size + left-padded in r3. */ + if (cif->abi == FFI_SYSV && rsize <= 4) + memcpy (rvalue, (char *) smst_buffer + 4 - rsize, rsize); + /* The SYSV ABI returns a structure of up to 8 bytes in size + left-padded in r3/r4, and the ELFv2 ABI similarly returns a + structure of up to 8 bytes in size left-padded in r3. */ + else if (rsize <= 8) + memcpy (rvalue, (char *) smst_buffer + 8 - rsize, rsize); + else +#endif + memcpy (rvalue, smst_buffer, rsize); + } } -#ifndef POWERPC64 +#if !defined POWERPC64 || _CALL_ELF == 2 #define MIN_CACHE_LINE_SIZE 8 static void @@ -1020,6 +1213,22 @@ ffi_prep_closure_loc (ffi_closure *closu void *codeloc) { #ifdef POWERPC64 +# if _CALL_ELF == 2 + unsigned int *tramp = (unsigned int *) &closure->tramp[0]; + + if (cif->abi != FFI_LINUX64) + return FFI_BAD_ABI; + + tramp[0] = 0xe96c0018; /* 0: ld 11,2f-0b(12) */ + tramp[1] = 0xe98c0010; /* ld 12,1f-0b(12) */ + tramp[2] = 0x7d8903a6; /* mtctr 12 */ + tramp[3] = 0x4e800420; /* bctr */ + /* 1: .quad function_addr */ + /* 2: .quad context */ + *(void **) &tramp[4] = (void *) ffi_closure_LINUX64; + *(void **) &tramp[6] = codeloc; + flush_icache ((char *)tramp, (char *)codeloc, FFI_TRAMPOLINE_SIZE); +# else void **tramp = (void **) &closure->tramp[0]; if (cif->abi != FFI_LINUX64) @@ -1027,6 +1236,7 @@ ffi_prep_closure_loc (ffi_closure *closu /* Copy function address and TOC from ffi_closure_LINUX64. */ memcpy (tramp, (char *) ffi_closure_LINUX64, 16); tramp[2] = codeloc; +# endif #else unsigned int *tramp; @@ -1236,6 +1446,7 @@ ffi_closure_helper_SYSV (ffi_closure *cl case FFI_TYPE_SINT8: case FFI_TYPE_UINT8: +#ifndef __LITTLE_ENDIAN__ /* there are 8 gpr registers used to pass values */ if (ng < 8) { @@ -1249,9 +1460,11 @@ ffi_closure_helper_SYSV (ffi_closure *cl pst++; } break; +#endif case FFI_TYPE_SINT16: case FFI_TYPE_UINT16: +#ifndef __LITTLE_ENDIAN__ /* there are 8 gpr registers used to pass values */ if (ng < 8) { @@ -1265,6 +1478,7 @@ ffi_closure_helper_SYSV (ffi_closure *cl pst++; } break; +#endif case FFI_TYPE_SINT32: case FFI_TYPE_UINT32: @@ -1369,16 +1583,20 @@ ffi_closure_helper_LINUX64 (ffi_closure void **avalue; ffi_type **arg_types; - long i, avn; + unsigned long i, avn, nfixedargs; ffi_cif *cif; ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64; +#ifdef __STRUCT_PARM_ALIGN__ + unsigned long align; +#endif cif = closure->cif; avalue = alloca (cif->nargs * sizeof (void *)); - /* Copy the caller's structure return value address so that the closure - returns the data directly to the caller. */ - if (cif->rtype->type == FFI_TYPE_STRUCT) + /* Copy the caller's structure return value address so that the + closure returns the data directly to the caller. */ + if (cif->rtype->type == FFI_TYPE_STRUCT + && (cif->flags & FLAG_RETURNS_SMST) == 0) { rvalue = (void *) *pst; pst++; @@ -1386,30 +1604,39 @@ ffi_closure_helper_LINUX64 (ffi_closure i = 0; avn = cif->nargs; + nfixedargs = cif->nfixedargs; arg_types = cif->arg_types; /* Grab the addresses of the arguments from the stack frame. */ while (i < avn) { + unsigned int elt, elnum; + switch (arg_types[i]->type) { case FFI_TYPE_SINT8: case FFI_TYPE_UINT8: +#ifndef __LITTLE_ENDIAN__ avalue[i] = (char *) pst + 7; pst++; break; +#endif case FFI_TYPE_SINT16: case FFI_TYPE_UINT16: +#ifndef __LITTLE_ENDIAN__ avalue[i] = (char *) pst + 6; pst++; break; +#endif case FFI_TYPE_SINT32: case FFI_TYPE_UINT32: +#ifndef __LITTLE_ENDIAN__ avalue[i] = (char *) pst + 4; pst++; break; +#endif case FFI_TYPE_SINT64: case FFI_TYPE_UINT64: @@ -1419,12 +1646,82 @@ ffi_closure_helper_LINUX64 (ffi_closure break; case FFI_TYPE_STRUCT: - /* Structures with size less than eight bytes are passed - left-padded. */ - if (arg_types[i]->size < 8) - avalue[i] = (char *) pst + 8 - arg_types[i]->size; +#ifdef __STRUCT_PARM_ALIGN__ + align = arg_types[i]->alignment; + if (align > __STRUCT_PARM_ALIGN__) + align = __STRUCT_PARM_ALIGN__; + if (align > 1) + pst = (unsigned long *) ALIGN ((size_t) pst, align); +#endif + elt = 0; +#if _CALL_ELF == 2 + elt = discover_homogeneous_aggregate (arg_types[i], &elnum); +#endif + if (elt) + { + union { + void *v; + unsigned long *ul; + float *f; + double *d; + size_t p; + } to, from; + + /* Repackage the aggregate from its parts. The + aggregate size is not greater than the space taken by + the registers so store back to the register/parameter + save arrays. */ + if (pfr + elnum <= end_pfr) + to.v = pfr; + else + to.v = pst; + + avalue[i] = to.v; + from.ul = pst; + if (elt == FFI_TYPE_FLOAT) + { + do + { + if (pfr < end_pfr && i < nfixedargs) + { + *to.f = (float) pfr->d; + pfr++; + } + else + *to.f = *from.f; + to.f++; + from.f++; + } + while (--elnum != 0); + } + else + { + do + { + if (pfr < end_pfr && i < nfixedargs) + { + *to.d = pfr->d; + pfr++; + } + else + *to.d = *from.d; + to.d++; + from.d++; + } + while (--elnum != 0); + } + } else - avalue[i] = pst; + { +#ifndef __LITTLE_ENDIAN__ + /* Structures with size less than eight bytes are passed + left-padded. */ + if (arg_types[i]->size < 8) + avalue[i] = (char *) pst + 8 - arg_types[i]->size; + else +#endif + avalue[i] = pst; + } pst += (arg_types[i]->size + 7) / 8; break; @@ -1436,7 +1733,7 @@ ffi_closure_helper_LINUX64 (ffi_closure /* there are 13 64bit floating point registers */ - if (pfr < end_pfr) + if (pfr < end_pfr && i < nfixedargs) { double temp = pfr->d; pfr->f = (float) temp; @@ -1452,7 +1749,7 @@ ffi_closure_helper_LINUX64 (ffi_closure /* On the outgoing stack all values are aligned to 8 */ /* there are 13 64bit floating point registers */ - if (pfr < end_pfr) + if (pfr < end_pfr && i < nfixedargs) { avalue[i] = pfr; pfr++; @@ -1464,14 +1761,14 @@ ffi_closure_helper_LINUX64 (ffi_closure #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE case FFI_TYPE_LONGDOUBLE: - if (pfr + 1 < end_pfr) + if (pfr + 1 < end_pfr && i + 1 < nfixedargs) { avalue[i] = pfr; pfr += 2; } else { - if (pfr < end_pfr) + if (pfr < end_pfr && i < nfixedargs) { /* Passed partly in f13 and partly on the stack. Move it all to the stack. */ @@ -1495,5 +1792,14 @@ ffi_closure_helper_LINUX64 (ffi_closure (closure->fun) (cif, rvalue, avalue, closure->user_data); /* Tell ffi_closure_LINUX64 how to perform return type promotions. */ + if ((cif->flags & FLAG_RETURNS_SMST) != 0) + { + if ((cif->flags & FLAG_RETURNS_FP) == 0) + return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1; + else if ((cif->flags & FLAG_RETURNS_64BITS) != 0) + return FFI_V2_TYPE_DOUBLE_HOMOG; + else + return FFI_V2_TYPE_FLOAT_HOMOG; + } return cif->rtype->type; } diff -urp libffi-3.0.13/src/powerpc/ffitarget.h libffi-current/src/powerpc/ffitarget.h --- libffi-3.0.13/src/powerpc/ffitarget.h 2013-03-16 21:49:39.000000000 +1030 +++ libffi-current/src/powerpc/ffitarget.h 2013-11-17 09:07:45.433681274 +1030 @@ -106,6 +106,10 @@ typedef enum ffi_abi { #define FFI_CLOSURES 1 #define FFI_NATIVE_RAW_API 0 +#if defined (POWERPC) || defined (POWERPC_FREEBSD) +# define FFI_TARGET_SPECIFIC_VARIADIC 1 +# define FFI_EXTRA_CIF_FIELDS unsigned nfixedargs +#endif /* For additional types like the below, take care about the order in ppc_closures.S. They must follow after the FFI_TYPE_LAST. */ @@ -118,14 +122,23 @@ typedef enum ffi_abi { defined in ffi.c, to determine the exact return type and its size. */ #define FFI_SYSV_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 2) -#if defined(POWERPC64) || defined(POWERPC_AIX) +/* Used by ELFv2 for homogenous structure returns. */ +#define FFI_V2_TYPE_FLOAT_HOMOG (FFI_TYPE_LAST + 1) +#define FFI_V2_TYPE_DOUBLE_HOMOG (FFI_TYPE_LAST + 2) +#define FFI_V2_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 3) + +#if _CALL_ELF == 2 +# define FFI_TRAMPOLINE_SIZE 32 +#else +# if defined(POWERPC64) || defined(POWERPC_AIX) # if defined(POWERPC_DARWIN64) # define FFI_TRAMPOLINE_SIZE 48 # else # define FFI_TRAMPOLINE_SIZE 24 # endif -#else /* POWERPC || POWERPC_AIX */ +# else /* POWERPC || POWERPC_AIX */ # define FFI_TRAMPOLINE_SIZE 40 +# endif #endif #ifndef LIBFFI_ASM diff -urp libffi-3.0.13/src/powerpc/linux64_closure.S libffi-current/src/powerpc/linux64_closure.S --- libffi-3.0.13/src/powerpc/linux64_closure.S 2013-03-16 21:49:39.000000000 +1030 +++ libffi-current/src/powerpc/linux64_closure.S 2013-11-17 09:11:54.267742403 +1030 @@ -33,15 +33,22 @@ #ifdef __powerpc64__ FFI_HIDDEN (ffi_closure_LINUX64) .globl ffi_closure_LINUX64 +# if _CALL_ELF == 2 + .text +ffi_closure_LINUX64: + addis %r2, %r12, .TOC.-ffi_closure_LINUX64@ha + addi %r2, %r2, .TOC.-ffi_closure_LINUX64@l + .localentry ffi_closure_LINUX64, . - ffi_closure_LINUX64 +# else .section ".opd","aw" .align 3 ffi_closure_LINUX64: -#ifdef _CALL_LINUX +# ifdef _CALL_LINUX .quad .L.ffi_closure_LINUX64,.TOC.@tocbase,0 .type ffi_closure_LINUX64,@function .text .L.ffi_closure_LINUX64: -#else +# else FFI_HIDDEN (.ffi_closure_LINUX64) .globl .ffi_closure_LINUX64 .quad .ffi_closure_LINUX64,.TOC.@tocbase,0 @@ -49,61 +56,103 @@ ffi_closure_LINUX64: .type .ffi_closure_LINUX64,@function .text .ffi_closure_LINUX64: -#endif +# endif +# endif + +# if _CALL_ELF == 2 +# 32 byte special reg save area + 64 byte parm save area and retval +# + 13*8 fpr save area + round to 16 +# define STACKFRAME 208 +# define PARMSAVE 32 +# No parameter save area is needed for the call to ffi_closure_helper_LINUX64, +# so return value can start there. +# define RETVAL PARMSAVE +# else +# 48 bytes special reg save area + 64 bytes parm save area +# + 16 bytes retval area + 13*8 bytes fpr save area + round to 16 +# define STACKFRAME 240 +# define PARMSAVE 48 +# define RETVAL PARMSAVE+64 +# endif + .LFB1: - # save general regs into parm save area - std %r3, 48(%r1) - std %r4, 56(%r1) - std %r5, 64(%r1) - std %r6, 72(%r1) +# if _CALL_ELF == 2 + ld %r12, FFI_TRAMPOLINE_SIZE(%r11) # closure->cif + mflr %r0 + lwz %r12, 28(%r12) # cif->flags + mtcrf 0x40, %r12 + addi %r12, %r1, PARMSAVE + bt 7, .Lparmsave + # Our caller has not allocated a parameter save area. + # We need to allocate one here and use it to pass gprs to + # ffi_closure_helper_LINUX64. The return value area will do. + addi %r12, %r1, -STACKFRAME+RETVAL +.Lparmsave: + std %r0, 16(%r1) + # Save general regs into parm save area + std %r3, 0(%r12) + std %r4, 8(%r12) + std %r5, 16(%r12) + std %r6, 24(%r12) + std %r7, 32(%r12) + std %r8, 40(%r12) + std %r9, 48(%r12) + std %r10, 56(%r12) + + # load up the pointer to the parm save area + mr %r5, %r12 +# else mflr %r0 + # Save general regs into parm save area + # This is the parameter save area set up by our caller. + std %r3, PARMSAVE+0(%r1) + std %r4, PARMSAVE+8(%r1) + std %r5, PARMSAVE+16(%r1) + std %r6, PARMSAVE+24(%r1) + std %r7, PARMSAVE+32(%r1) + std %r8, PARMSAVE+40(%r1) + std %r9, PARMSAVE+48(%r1) + std %r10, PARMSAVE+56(%r1) - std %r7, 80(%r1) - std %r8, 88(%r1) - std %r9, 96(%r1) - std %r10, 104(%r1) std %r0, 16(%r1) - # mandatory 48 bytes special reg save area + 64 bytes parm save area - # + 16 bytes retval area + 13*8 bytes fpr save area + round to 16 - stdu %r1, -240(%r1) -.LCFI0: + # load up the pointer to the parm save area + addi %r5, %r1, PARMSAVE +# endif # next save fpr 1 to fpr 13 - stfd %f1, 128+(0*8)(%r1) - stfd %f2, 128+(1*8)(%r1) - stfd %f3, 128+(2*8)(%r1) - stfd %f4, 128+(3*8)(%r1) - stfd %f5, 128+(4*8)(%r1) - stfd %f6, 128+(5*8)(%r1) - stfd %f7, 128+(6*8)(%r1) - stfd %f8, 128+(7*8)(%r1) - stfd %f9, 128+(8*8)(%r1) - stfd %f10, 128+(9*8)(%r1) - stfd %f11, 128+(10*8)(%r1) - stfd %f12, 128+(11*8)(%r1) - stfd %f13, 128+(12*8)(%r1) + stfd %f1, -104+(0*8)(%r1) + stfd %f2, -104+(1*8)(%r1) + stfd %f3, -104+(2*8)(%r1) + stfd %f4, -104+(3*8)(%r1) + stfd %f5, -104+(4*8)(%r1) + stfd %f6, -104+(5*8)(%r1) + stfd %f7, -104+(6*8)(%r1) + stfd %f8, -104+(7*8)(%r1) + stfd %f9, -104+(8*8)(%r1) + stfd %f10, -104+(9*8)(%r1) + stfd %f11, -104+(10*8)(%r1) + stfd %f12, -104+(11*8)(%r1) + stfd %f13, -104+(12*8)(%r1) - # set up registers for the routine that actually does the work - # get the context pointer from the trampoline - mr %r3, %r11 + # load up the pointer to the saved fpr registers */ + addi %r6, %r1, -104 - # now load up the pointer to the result storage - addi %r4, %r1, 112 + # load up the pointer to the result storage + addi %r4, %r1, -STACKFRAME+RETVAL - # now load up the pointer to the parameter save area - # in the previous frame - addi %r5, %r1, 240 + 48 + stdu %r1, -STACKFRAME(%r1) +.LCFI0: - # now load up the pointer to the saved fpr registers */ - addi %r6, %r1, 128 + # get the context pointer from the trampoline + mr %r3, %r11 # make the call -#ifdef _CALL_LINUX +# if defined _CALL_LINUX || _CALL_ELF == 2 bl ffi_closure_helper_LINUX64 -#else +# else bl .ffi_closure_helper_LINUX64 -#endif +# endif .Lret: # now r3 contains the return type @@ -112,10 +161,12 @@ ffi_closure_LINUX64: # look up the proper starting point in table # by using return type as offset + ld %r0, STACKFRAME+16(%r1) + cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT + bge .Lsmall mflr %r4 # move address of .Lret to r4 sldi %r3, %r3, 4 # now multiply return type by 16 addi %r4, %r4, .Lret_type0 - .Lret - ld %r0, 240+16(%r1) add %r3, %r3, %r4 # add contents of table to table address mtctr %r3 bctr # jump to it @@ -128,89 +179,175 @@ ffi_closure_LINUX64: .Lret_type0: # case FFI_TYPE_VOID mtlr %r0 - addi %r1, %r1, 240 + addi %r1, %r1, STACKFRAME blr nop # case FFI_TYPE_INT - lwa %r3, 112+4(%r1) +# ifdef __LITTLE_ENDIAN__ + lwa %r3, RETVAL+0(%r1) +# else + lwa %r3, RETVAL+4(%r1) +# endif mtlr %r0 - addi %r1, %r1, 240 + addi %r1, %r1, STACKFRAME blr # case FFI_TYPE_FLOAT - lfs %f1, 112+0(%r1) + lfs %f1, RETVAL+0(%r1) mtlr %r0 - addi %r1, %r1, 240 + addi %r1, %r1, STACKFRAME blr # case FFI_TYPE_DOUBLE - lfd %f1, 112+0(%r1) + lfd %f1, RETVAL+0(%r1) mtlr %r0 - addi %r1, %r1, 240 + addi %r1, %r1, STACKFRAME blr # case FFI_TYPE_LONGDOUBLE - lfd %f1, 112+0(%r1) + lfd %f1, RETVAL+0(%r1) mtlr %r0 - lfd %f2, 112+8(%r1) + lfd %f2, RETVAL+8(%r1) b .Lfinish # case FFI_TYPE_UINT8 - lbz %r3, 112+7(%r1) +# ifdef __LITTLE_ENDIAN__ + lbz %r3, RETVAL+0(%r1) +# else + lbz %r3, RETVAL+7(%r1) +# endif mtlr %r0 - addi %r1, %r1, 240 + addi %r1, %r1, STACKFRAME blr # case FFI_TYPE_SINT8 - lbz %r3, 112+7(%r1) +# ifdef __LITTLE_ENDIAN__ + lbz %r3, RETVAL+0(%r1) +# else + lbz %r3, RETVAL+7(%r1) +# endif extsb %r3,%r3 mtlr %r0 b .Lfinish # case FFI_TYPE_UINT16 - lhz %r3, 112+6(%r1) +# ifdef __LITTLE_ENDIAN__ + lhz %r3, RETVAL+0(%r1) +# else + lhz %r3, RETVAL+6(%r1) +# endif mtlr %r0 .Lfinish: - addi %r1, %r1, 240 + addi %r1, %r1, STACKFRAME blr # case FFI_TYPE_SINT16 - lha %r3, 112+6(%r1) +# ifdef __LITTLE_ENDIAN__ + lha %r3, RETVAL+0(%r1) +# else + lha %r3, RETVAL+6(%r1) +# endif mtlr %r0 - addi %r1, %r1, 240 + addi %r1, %r1, STACKFRAME blr # case FFI_TYPE_UINT32 - lwz %r3, 112+4(%r1) +# ifdef __LITTLE_ENDIAN__ + lwz %r3, RETVAL+0(%r1) +# else + lwz %r3, RETVAL+4(%r1) +# endif mtlr %r0 - addi %r1, %r1, 240 + addi %r1, %r1, STACKFRAME blr # case FFI_TYPE_SINT32 - lwa %r3, 112+4(%r1) +# ifdef __LITTLE_ENDIAN__ + lwa %r3, RETVAL+0(%r1) +# else + lwa %r3, RETVAL+4(%r1) +# endif mtlr %r0 - addi %r1, %r1, 240 + addi %r1, %r1, STACKFRAME blr # case FFI_TYPE_UINT64 - ld %r3, 112+0(%r1) + ld %r3, RETVAL+0(%r1) mtlr %r0 - addi %r1, %r1, 240 + addi %r1, %r1, STACKFRAME blr # case FFI_TYPE_SINT64 - ld %r3, 112+0(%r1) + ld %r3, RETVAL+0(%r1) mtlr %r0 - addi %r1, %r1, 240 + addi %r1, %r1, STACKFRAME blr # case FFI_TYPE_STRUCT mtlr %r0 - addi %r1, %r1, 240 + addi %r1, %r1, STACKFRAME blr nop # case FFI_TYPE_POINTER - ld %r3, 112+0(%r1) + ld %r3, RETVAL+0(%r1) mtlr %r0 - addi %r1, %r1, 240 + addi %r1, %r1, STACKFRAME blr -# esac +# case FFI_V2_TYPE_FLOAT_HOMOG + lfs %f1, RETVAL+0(%r1) + lfs %f2, RETVAL+4(%r1) + lfs %f3, RETVAL+8(%r1) + b .Lmorefloat +# case FFI_V2_TYPE_DOUBLE_HOMOG + lfd %f1, RETVAL+0(%r1) + lfd %f2, RETVAL+8(%r1) + lfd %f3, RETVAL+16(%r1) + lfd %f4, RETVAL+24(%r1) + mtlr %r0 + lfd %f5, RETVAL+32(%r1) + lfd %f6, RETVAL+40(%r1) + lfd %f7, RETVAL+48(%r1) + lfd %f8, RETVAL+56(%r1) + addi %r1, %r1, STACKFRAME + blr +.Lmorefloat: + lfs %f4, RETVAL+12(%r1) + mtlr %r0 + lfs %f5, RETVAL+16(%r1) + lfs %f6, RETVAL+20(%r1) + lfs %f7, RETVAL+24(%r1) + lfs %f8, RETVAL+28(%r1) + addi %r1, %r1, STACKFRAME + blr +.Lsmall: +# ifdef __LITTLE_ENDIAN__ + ld %r3,RETVAL+0(%r1) + mtlr %r0 + ld %r4,RETVAL+8(%r1) + addi %r1, %r1, STACKFRAME + blr +# else + # A struct smaller than a dword is returned in the low bits of r3 + # ie. right justified. Larger structs are passed left justified + # in r3 and r4. The return value area on the stack will have + # the structs as they are usually stored in memory. + cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT + 7 # size 8 bytes? + neg %r5, %r3 + ld %r3,RETVAL+0(%r1) + blt .Lsmalldown + mtlr %r0 + ld %r4,RETVAL+8(%r1) + addi %r1, %r1, STACKFRAME + blr +.Lsmalldown: + addi %r5, %r5, FFI_V2_TYPE_SMALL_STRUCT + 7 + mtlr %r0 + sldi %r5, %r5, 3 + addi %r1, %r1, STACKFRAME + srd %r3, %r3, %r5 + blr +# endif + .LFE1: .long 0 .byte 0,12,0,1,128,0,0,0 -#ifdef _CALL_LINUX +# if _CALL_ELF == 2 + .size ffi_closure_LINUX64,.-ffi_closure_LINUX64 +# else +# ifdef _CALL_LINUX .size ffi_closure_LINUX64,.-.L.ffi_closure_LINUX64 -#else +# else .size .ffi_closure_LINUX64,.-.ffi_closure_LINUX64 -#endif +# endif +# endif .section .eh_frame,EH_FRAME_FLAGS,@progbits .Lframe1: @@ -239,14 +376,14 @@ ffi_closure_LINUX64: .byte 0x2 # DW_CFA_advance_loc1 .byte .LCFI0-.LFB1 .byte 0xe # DW_CFA_def_cfa_offset - .uleb128 240 + .uleb128 STACKFRAME .byte 0x11 # DW_CFA_offset_extended_sf .uleb128 0x41 .sleb128 -2 .align 3 .LEFDE1: -#endif -#if defined __ELF__ && defined __linux__ +# if defined __ELF__ && defined __linux__ .section .note.GNU-stack,"",@progbits +# endif #endif diff -urp libffi-3.0.13/src/powerpc/linux64.S libffi-current/src/powerpc/linux64.S --- libffi-3.0.13/src/powerpc/linux64.S 2013-03-16 21:49:39.000000000 +1030 +++ libffi-current/src/powerpc/linux64.S 2013-11-17 09:09:09.742314090 +1030 @@ -32,15 +32,22 @@ #ifdef __powerpc64__ .hidden ffi_call_LINUX64 .globl ffi_call_LINUX64 +# if _CALL_ELF == 2 + .text +ffi_call_LINUX64: + addis %r2, %r12, .TOC.-ffi_call_LINUX64@ha + addi %r2, %r2, .TOC.-ffi_call_LINUX64@l + .localentry ffi_call_LINUX64, . - ffi_call_LINUX64 +# else .section ".opd","aw" .align 3 ffi_call_LINUX64: -#ifdef _CALL_LINUX +# ifdef _CALL_LINUX .quad .L.ffi_call_LINUX64,.TOC.@tocbase,0 .type ffi_call_LINUX64,@function .text .L.ffi_call_LINUX64: -#else +# else .hidden .ffi_call_LINUX64 .globl .ffi_call_LINUX64 .quad .ffi_call_LINUX64,.TOC.@tocbase,0 @@ -48,7 +55,8 @@ ffi_call_LINUX64: .type .ffi_call_LINUX64,@function .text .ffi_call_LINUX64: -#endif +# endif +# endif .LFB1: mflr %r0 std %r28, -32(%r1) @@ -63,26 +71,35 @@ ffi_call_LINUX64: mr %r31, %r5 /* flags, */ mr %r30, %r6 /* rvalue, */ mr %r29, %r7 /* function address. */ +/* Save toc pointer, not for the ffi_prep_args64 call, but for the later + bctrl function call. */ +# if _CALL_ELF == 2 + std %r2, 24(%r1) +# else std %r2, 40(%r1) +# endif /* Call ffi_prep_args64. */ mr %r4, %r1 -#ifdef _CALL_LINUX +# if defined _CALL_LINUX || _CALL_ELF == 2 bl ffi_prep_args64 -#else +# else bl .ffi_prep_args64 -#endif +# endif - ld %r0, 0(%r29) +# if _CALL_ELF == 2 + mr %r12, %r29 +# else + ld %r12, 0(%r29) ld %r2, 8(%r29) ld %r11, 16(%r29) - +# endif /* Now do the call. */ /* Set up cr1 with bits 4-7 of the flags. */ mtcrf 0x40, %r31 /* Get the address to call into CTR. */ - mtctr %r0 + mtctr %r12 /* Load all those argument registers. */ ld %r3, -32-(8*8)(%r28) ld %r4, -32-(7*8)(%r28) @@ -117,12 +134,17 @@ ffi_call_LINUX64: /* This must follow the call immediately, the unwinder uses this to find out if r2 has been saved or not. */ +# if _CALL_ELF == 2 + ld %r2, 24(%r1) +# else ld %r2, 40(%r1) +# endif /* Now, deal with the return value. */ mtcrf 0x01, %r31 - bt- 30, .Ldone_return_value - bt- 29, .Lfp_return_value + bt 31, .Lstruct_return_value + bt 30, .Ldone_return_value + bt 29, .Lfp_return_value std %r3, 0(%r30) /* Fall through... */ @@ -130,7 +152,7 @@ ffi_call_LINUX64: /* Restore the registers we used and return. */ mr %r1, %r28 ld %r0, 16(%r28) - ld %r28, -32(%r1) + ld %r28, -32(%r28) mtlr %r0 ld %r29, -24(%r1) ld %r30, -16(%r1) @@ -147,14 +169,48 @@ ffi_call_LINUX64: .Lfloat_return_value: stfs %f1, 0(%r30) b .Ldone_return_value + +.Lstruct_return_value: + bf 29, .Lsmall_struct + bf 28, .Lfloat_homog_return_value + stfd %f1, 0(%r30) + stfd %f2, 8(%r30) + stfd %f3, 16(%r30) + stfd %f4, 24(%r30) + stfd %f5, 32(%r30) + stfd %f6, 40(%r30) + stfd %f7, 48(%r30) + stfd %f8, 56(%r30) + b .Ldone_return_value + +.Lfloat_homog_return_value: + stfs %f1, 0(%r30) + stfs %f2, 4(%r30) + stfs %f3, 8(%r30) + stfs %f4, 12(%r30) + stfs %f5, 16(%r30) + stfs %f6, 20(%r30) + stfs %f7, 24(%r30) + stfs %f8, 28(%r30) + b .Ldone_return_value + +.Lsmall_struct: + std %r3, 0(%r30) + std %r4, 8(%r30) + b .Ldone_return_value + .LFE1: .long 0 .byte 0,12,0,1,128,4,0,0 -#ifdef _CALL_LINUX +# if _CALL_ELF == 2 + .size ffi_call_LINUX64,.-ffi_call_LINUX64 +# else +# ifdef _CALL_LINUX .size ffi_call_LINUX64,.-.L.ffi_call_LINUX64 -#else +# else .size .ffi_call_LINUX64,.-.ffi_call_LINUX64 -#endif +# endif +# endif .section .eh_frame,EH_FRAME_FLAGS,@progbits .Lframe1: @@ -197,8 +253,8 @@ ffi_call_LINUX64: .uleb128 0x4 .align 3 .LEFDE1: -#endif -#if defined __ELF__ && defined __linux__ +# if (defined __ELF__ && defined __linux__) || _CALL_ELF == 2 .section .note.GNU-stack,"",@progbits +# endif #endif diff -urp libffi-3.0.13/src/powerpc/ppc_closure.S libffi-current/src/powerpc/ppc_closure.S --- libffi-3.0.13/src/powerpc/ppc_closure.S 2013-03-16 21:49:39.000000000 +1030 +++ libffi-current/src/powerpc/ppc_closure.S 2013-11-17 13:06:22.569393369 +1030 @@ -159,25 +159,41 @@ ENTRY(ffi_closure_SYSV) #endif # case FFI_TYPE_UINT8 +#ifdef __LITTLE_ENDIAN__ + lbz %r3,112+0(%r1) +#else lbz %r3,112+3(%r1) +#endif mtlr %r0 addi %r1,%r1,144 blr # case FFI_TYPE_SINT8 +#ifdef __LITTLE_ENDIAN__ + lbz %r3,112+0(%r1) +#else lbz %r3,112+3(%r1) +#endif extsb %r3,%r3 mtlr %r0 b .Lfinish # case FFI_TYPE_UINT16 +#ifdef __LITTLE_ENDIAN__ + lhz %r3,112+0(%r1) +#else lhz %r3,112+2(%r1) +#endif mtlr %r0 addi %r1,%r1,144 blr # case FFI_TYPE_SINT16 +#ifdef __LITTLE_ENDIAN__ + lha %r3,112+0(%r1) +#else lha %r3,112+2(%r1) +#endif mtlr %r0 addi %r1,%r1,144 blr @@ -222,7 +238,7 @@ ENTRY(ffi_closure_SYSV) lwz %r3,112+0(%r1) lwz %r4,112+4(%r1) lwz %r5,112+8(%r1) - bl .Luint128 + b .Luint128 # The return types below are only used when the ABI type is FFI_SYSV. # case FFI_SYSV_TYPE_SMALL_STRUCT + 1. One byte struct. @@ -239,9 +255,15 @@ ENTRY(ffi_closure_SYSV) # case FFI_SYSV_TYPE_SMALL_STRUCT + 3. Three byte struct. lwz %r3,112+0(%r1) +#ifdef __LITTLE_ENDIAN__ + mtlr %r0 + addi %r1,%r1,144 + blr +#else srwi %r3,%r3,8 mtlr %r0 b .Lfinish +#endif # case FFI_SYSV_TYPE_SMALL_STRUCT + 4. Four byte struct. lwz %r3,112+0(%r1) @@ -252,20 +274,35 @@ ENTRY(ffi_closure_SYSV) # case FFI_SYSV_TYPE_SMALL_STRUCT + 5. Five byte struct. lwz %r3,112+0(%r1) lwz %r4,112+4(%r1) +#ifdef __LITTLE_ENDIAN__ + mtlr %r0 + b .Lfinish +#else li %r5,24 b .Lstruct567 +#endif # case FFI_SYSV_TYPE_SMALL_STRUCT + 6. Six byte struct. lwz %r3,112+0(%r1) lwz %r4,112+4(%r1) +#ifdef __LITTLE_ENDIAN__ + mtlr %r0 + b .Lfinish +#else li %r5,16 b .Lstruct567 +#endif # case FFI_SYSV_TYPE_SMALL_STRUCT + 7. Seven byte struct. lwz %r3,112+0(%r1) lwz %r4,112+4(%r1) +#ifdef __LITTLE_ENDIAN__ + mtlr %r0 + b .Lfinish +#else li %r5,8 b .Lstruct567 +#endif # case FFI_SYSV_TYPE_SMALL_STRUCT + 8. Eight byte struct. lwz %r3,112+0(%r1) @@ -273,6 +310,7 @@ ENTRY(ffi_closure_SYSV) mtlr %r0 b .Lfinish +#ifndef __LITTLE_ENDIAN__ .Lstruct567: subfic %r6,%r5,32 srw %r4,%r4,%r5 @@ -282,6 +320,7 @@ ENTRY(ffi_closure_SYSV) mtlr %r0 addi %r1,%r1,144 blr +#endif .Luint128: lwz %r6,112+12(%r1) diff -urp libffi-3.0.13/src/powerpc/sysv.S libffi-current/src/powerpc/sysv.S --- libffi-3.0.13/src/powerpc/sysv.S 2013-03-16 21:49:39.000000000 +1030 +++ libffi-current/src/powerpc/sysv.S 2013-11-13 22:36:35.222994628 +1030 @@ -142,19 +142,14 @@ L(float_return_value): #endif L(small_struct_return_value): - extrwi %r6,%r31,2,19 /* number of bytes padding = shift/8 */ - mtcrf 0x02,%r31 /* copy flags to cr[24:27] (cr6) */ - extrwi %r5,%r31,5,19 /* r5 <- number of bits of padding */ - subfic %r6,%r6,4 /* r6 <- number of useful bytes in r3 */ - bf- 25,L(done_return_value) /* struct in r3 ? if not, done. */ -/* smst_one_register: */ - slw %r3,%r3,%r5 /* Left-justify value in r3 */ - mtxer %r6 /* move byte count to XER ... */ - stswx %r3,0,%r30 /* ... and store that many bytes */ - bf+ 26,L(done_return_value) /* struct in r3:r4 ? */ - add %r6,%r6,%r30 /* adjust pointer */ - stswi %r4,%r6,4 /* store last four bytes */ - b L(done_return_value) + /* + * The C code always allocates a properly-aligned 8-byte bounce + * buffer to make this assembly code very simple. Just write out + * r3 and r4 to the buffer to allow the C code to handle the rest. + */ + stw %r3, 0(%r30) + stw %r4, 4(%r30) + b L(done_return_value) .LFE1: END(ffi_call_SYSV) diff -urp libffi-3.0.13/testsuite/libffi.call/cls_double_va.c libffi-current/testsuite/libffi.call/cls_double_va.c --- libffi-3.0.13/testsuite/libffi.call/cls_double_va.c 2013-03-16 21:49:39.000000000 +1030 +++ libffi-current/testsuite/libffi.call/cls_double_va.c 2013-11-13 22:37:13.437459229 +1030 @@ -38,7 +38,7 @@ int main (void) /* This printf call is variadic */ CHECK(ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, 1, 2, &ffi_type_sint, - arg_types) == FFI_OK); + arg_types) == FFI_OK); args[0] = &format; args[1] = &doubleArg; @@ -49,12 +49,10 @@ int main (void) printf("res: %d\n", (int) res); /* { dg-output "\nres: 4" } */ - /* The call to cls_double_va_fn is static, so have to use a normal prep_cif */ - CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 2, &ffi_type_sint, arg_types) == FFI_OK); + CHECK(ffi_prep_closure_loc(pcl, &cif, cls_double_va_fn, NULL, + code) == FFI_OK); - CHECK(ffi_prep_closure_loc(pcl, &cif, cls_double_va_fn, NULL, code) == FFI_OK); - - res = ((int(*)(char*, double))(code))(format, doubleArg); + res = ((int(*)(char*, ...))(code))(format, doubleArg); /* { dg-output "\n7.0" } */ printf("res: %d\n", (int) res); /* { dg-output "\nres: 4" } */ diff -urp libffi-3.0.13/testsuite/libffi.call/cls_longdouble_va.c libffi-current/testsuite/libffi.call/cls_longdouble_va.c --- libffi-3.0.13/testsuite/libffi.call/cls_longdouble_va.c 2013-03-16 21:49:39.000000000 +1030 +++ libffi-current/testsuite/libffi.call/cls_longdouble_va.c 2013-11-13 22:37:13.437459229 +1030 @@ -38,7 +38,7 @@ int main (void) /* This printf call is variadic */ CHECK(ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, 1, 2, &ffi_type_sint, - arg_types) == FFI_OK); + arg_types) == FFI_OK); args[0] = &format; args[1] = &ldArg; @@ -49,13 +49,10 @@ int main (void) printf("res: %d\n", (int) res); /* { dg-output "\nres: 4" } */ - /* The call to cls_longdouble_va_fn is static, so have to use a normal prep_cif */ - CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 2, &ffi_type_sint, - arg_types) == FFI_OK); + CHECK(ffi_prep_closure_loc(pcl, &cif, cls_longdouble_va_fn, NULL, + code) == FFI_OK); - CHECK(ffi_prep_closure_loc(pcl, &cif, cls_longdouble_va_fn, NULL, code) == FFI_OK); - - res = ((int(*)(char*, long double))(code))(format, ldArg); + res = ((int(*)(char*, ...))(code))(format, ldArg); /* { dg-output "\n7.0" } */ printf("res: %d\n", (int) res); /* { dg-output "\nres: 4" } */ diff -urp libffi-3.0.13/doc/libffi.texi libffi-current/doc/libffi.texi --- libffi-3.0.13/doc/libffi.texi 2013-03-16 22:41:19.000000000 +1030 +++ libffi-current/doc/libffi.texi 2013-11-17 09:06:03.209763612 +1030 @@ -184,11 +184,11 @@ This calls the function @var{fn} accordi @var{rvalue} is a pointer to a chunk of memory that will hold the result of the function call. This must be large enough to hold the -result and must be suitably aligned; it is the caller's responsibility +result, no smaller than the system register size (generally 32 or 64 +bits), and must be suitably aligned; it is the caller's responsibility to ensure this. If @var{cif} declares that the function returns @code{void} (using @code{ffi_type_void}), then @var{rvalue} is -ignored. If @var{rvalue} is @samp{NULL}, then the return value is -discarded. +ignored. @var{avalues} is a vector of @code{void *} pointers that point to the memory locations holding the argument values for a call. If @var{cif} @@ -214,7 +214,7 @@ int main() ffi_type *args[1]; void *values[1]; char *s; - int rc; + ffi_arg rc; /* Initialize the argument info vectors */ args[0] = &ffi_type_pointer; @@ -222,7 +222,7 @@ int main() /* Initialize the cif */ if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 1, - &ffi_type_uint, args) == FFI_OK) + &ffi_type_sint, args) == FFI_OK) @{ s = "Hello World!"; ffi_call(&cif, puts, &rc, values); @@ -414,6 +414,7 @@ Here is the corresponding code to descri int i; tm_type.size = tm_type.alignment = 0; + tm_type.type = FFI_TYPE_STRUCT; tm_type.elements = &tm_type_elements; for (i = 0; i < 9; i++) @@ -540,21 +541,23 @@ A trivial example that creates a new @co #include /* Acts like puts with the file given at time of enclosure. */ -void puts_binding(ffi_cif *cif, unsigned int *ret, void* args[], - FILE *stream) +void puts_binding(ffi_cif *cif, void *ret, void* args[], + void *stream) @{ - *ret = fputs(*(char **)args[0], stream); + *(ffi_arg *)ret = fputs(*(char **)args[0], (FILE *)stream); @} +typedef int (*puts_t)(char *); + int main() @{ ffi_cif cif; ffi_type *args[1]; ffi_closure *closure; - int (*bound_puts)(char *); + void *bound_puts; int rc; - + /* Allocate closure and bound_puts */ closure = ffi_closure_alloc(sizeof(ffi_closure), &bound_puts); @@ -565,13 +568,13 @@ int main() /* Initialize the cif */ if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 1, - &ffi_type_uint, args) == FFI_OK) + &ffi_type_sint, args) == FFI_OK) @{ /* Initialize the closure, setting stream to stdout */ - if (ffi_prep_closure_loc(closure, &cif, puts_binding, + if (ffi_prep_closure_loc(closure, &cif, puts_binding, stdout, bound_puts) == FFI_OK) @{ - rc = bound_puts("Hello World!"); + rc = ((puts_t)bound_puts)("Hello World!"); /* rc now holds the result of the call to fputs */ @} @}