|
|
commit 27d2a2d27f3e0060ade9a1a82ce2292aad6c6931 |
|
|
Author: law <law@138bc75d-0d04-0410-961f-82ee72b054a4> |
|
|
Date: Mon Sep 25 23:13:55 2017 +0000 |
|
|
|
|
|
* config/rs6000/rs6000-protos.h (output_probe_stack_range): Update |
|
|
prototype for new argument. |
|
|
* config/rs6000/rs6000.c (rs6000_emit_allocate_stack_1): New function, |
|
|
mostly extracted from rs6000_emit_allocate_stack. |
|
|
(rs6000_emit_probe_stack_range_stack_clash): New function. |
|
|
(rs6000_emit_allocate_stack): Call |
|
|
rs6000_emit_probe_stack_range_stack_clash as needed. |
|
|
(rs6000_emit_probe_stack_range): Add additional argument |
|
|
to call to gen_probe_stack_range{si,di}. |
|
|
(output_probe_stack_range): New. |
|
|
(output_probe_stack_range_1): Renamed from output_probe_stack_range. |
|
|
(output_probe_stack_range_stack_clash): New. |
|
|
(rs6000_emit_prologue): Emit notes into dump file as requested. |
|
|
* rs6000.md (allocate_stack): Handle -fstack-clash-protection. |
|
|
(probe_stack_range<P:mode>): Operand 0 is now early-clobbered. |
|
|
Add additional operand and pass it to output_probe_stack_range. |
|
|
|
|
|
* lib/target-supports.exp |
|
|
(check_effective_target_supports_stack_clash_protection): Enable for |
|
|
rs6000 and powerpc targets. |
|
|
|
|
|
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253179 138bc75d-0d04-0410-961f-82ee72b054a4 |
|
|
|
|
|
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h |
|
|
index d4b93d9970d..cfb23ab80cc 100644 |
|
|
--- a/gcc/config/rs6000/rs6000-protos.h |
|
|
+++ b/gcc/config/rs6000/rs6000-protos.h |
|
|
@@ -114,7 +114,7 @@ extern void rs6000_emit_sCOND (enum machine_mode, rtx[]); |
|
|
extern void rs6000_emit_cbranch (enum machine_mode, rtx[]); |
|
|
extern char * output_cbranch (rtx, const char *, int, rtx); |
|
|
extern char * output_e500_flip_gt_bit (rtx, rtx); |
|
|
-extern const char * output_probe_stack_range (rtx, rtx); |
|
|
+extern const char * output_probe_stack_range (rtx, rtx, rtx); |
|
|
extern rtx rs6000_emit_set_const (rtx, enum machine_mode, rtx, int); |
|
|
extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx); |
|
|
extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx); |
|
|
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c |
|
|
index a9052c6becf..c5d9988c1d9 100644 |
|
|
--- a/gcc/config/rs6000/rs6000.c |
|
|
+++ b/gcc/config/rs6000/rs6000.c |
|
|
@@ -22320,6 +22320,220 @@ rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed) |
|
|
emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p))); |
|
|
} |
|
|
|
|
|
+/* Allocate SIZE_INT bytes on the stack using a store with update style insn |
|
|
+ and set the appropriate attributes for the generated insn. Return the |
|
|
+ first insn which adjusts the stack pointer or the last insn before |
|
|
+ the stack adjustment loop. |
|
|
+ |
|
|
+ SIZE_INT is used to create the CFI note for the allocation. |
|
|
+ |
|
|
+ SIZE_RTX is an rtx containing the size of the adjustment. Note that |
|
|
+ since stacks grow to lower addresses its runtime value is -SIZE_INT. |
|
|
+ |
|
|
+ ORIG_SP contains the backchain value that must be stored at *sp. */ |
|
|
+ |
|
|
+static rtx |
|
|
+rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp) |
|
|
+{ |
|
|
+ rtx insn; |
|
|
+ |
|
|
+ rtx size_rtx = GEN_INT (-size_int); |
|
|
+ if (size_int > 32767) |
|
|
+ { |
|
|
+ rtx tmp_reg = gen_rtx_REG (Pmode, 0); |
|
|
+ /* Need a note here so that try_split doesn't get confused. */ |
|
|
+ if (get_last_insn () == NULL_RTX) |
|
|
+ emit_note (NOTE_INSN_DELETED); |
|
|
+ insn = emit_move_insn (tmp_reg, size_rtx); |
|
|
+ try_split (PATTERN (insn), insn, 0); |
|
|
+ size_rtx = tmp_reg; |
|
|
+ } |
|
|
+ |
|
|
+ if (Pmode == SImode) |
|
|
+ insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx, |
|
|
+ stack_pointer_rtx, |
|
|
+ size_rtx, |
|
|
+ orig_sp)); |
|
|
+ else |
|
|
+ insn = emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx, |
|
|
+ stack_pointer_rtx, |
|
|
+ size_rtx, |
|
|
+ orig_sp)); |
|
|
+ rtx par = PATTERN (insn); |
|
|
+ gcc_assert (GET_CODE (par) == PARALLEL); |
|
|
+ rtx set = XVECEXP (par, 0, 0); |
|
|
+ gcc_assert (GET_CODE (set) == SET); |
|
|
+ rtx mem = SET_DEST (set); |
|
|
+ gcc_assert (MEM_P (mem)); |
|
|
+ MEM_NOTRAP_P (mem) = 1; |
|
|
+ set_mem_alias_set (mem, get_frame_alias_set ()); |
|
|
+ |
|
|
+ RTX_FRAME_RELATED_P (insn) = 1; |
|
|
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
|
|
+ gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
|
|
+ gen_rtx_PLUS (Pmode, |
|
|
+ stack_pointer_rtx, |
|
|
+ GEN_INT (-size_int)))); |
|
|
+ |
|
|
+ /* Emit a blockage to ensure the allocation/probing insns are |
|
|
+ not optimized, combined, removed, etc. Add REG_STACK_CHECK |
|
|
+ note for similar reasons. */ |
|
|
+ if (flag_stack_clash_protection) |
|
|
+ { |
|
|
+ add_reg_note (insn, REG_STACK_CHECK, const0_rtx); |
|
|
+ emit_insn (gen_blockage ()); |
|
|
+ } |
|
|
+ |
|
|
+ return insn; |
|
|
+} |
|
|
+ |
|
|
+static HOST_WIDE_INT |
|
|
+get_stack_clash_protection_probe_interval (void) |
|
|
+{ |
|
|
+ return (HOST_WIDE_INT_1U |
|
|
+ << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL)); |
|
|
+} |
|
|
+ |
|
|
+static HOST_WIDE_INT |
|
|
+get_stack_clash_protection_guard_size (void) |
|
|
+{ |
|
|
+ return (HOST_WIDE_INT_1U |
|
|
+ << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)); |
|
|
+} |
|
|
+ |
|
|
+/* Allocate ORIG_SIZE bytes on the stack and probe the newly |
|
|
+ allocated space every STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes. |
|
|
+ |
|
|
+ COPY_REG, if non-null, should contain a copy of the original |
|
|
+ stack pointer at exit from this function. |
|
|
+ |
|
|
+ This is subtly different than the Ada probing in that it tries hard to |
|
|
+ prevent attacks that jump the stack guard. Thus it is never allowed to |
|
|
+ allocate more than STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes of stack |
|
|
+ space without a suitable probe. */ |
|
|
+static rtx |
|
|
+rs6000_emit_probe_stack_range_stack_clash (HOST_WIDE_INT orig_size, |
|
|
+ rtx copy_reg) |
|
|
+{ |
|
|
+ rtx orig_sp = copy_reg; |
|
|
+ |
|
|
+ HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval (); |
|
|
+ |
|
|
+ /* Round the size down to a multiple of PROBE_INTERVAL. */ |
|
|
+ HOST_WIDE_INT rounded_size = ROUND_DOWN (orig_size, probe_interval); |
|
|
+ |
|
|
+ /* If explicitly requested, |
|
|
+ or the rounded size is not the same as the original size |
|
|
+ or the the rounded size is greater than a page, |
|
|
+ then we will need a copy of the original stack pointer. */ |
|
|
+ if (rounded_size != orig_size |
|
|
+ || rounded_size > probe_interval |
|
|
+ || copy_reg) |
|
|
+ { |
|
|
+ /* If the caller did not request a copy of the incoming stack |
|
|
+ pointer, then we use r0 to hold the copy. */ |
|
|
+ if (!copy_reg) |
|
|
+ orig_sp = gen_rtx_REG (Pmode, 0); |
|
|
+ emit_move_insn (orig_sp, stack_pointer_rtx); |
|
|
+ } |
|
|
+ |
|
|
+ /* There's three cases here. |
|
|
+ |
|
|
+ One is a single probe which is the most common and most efficiently |
|
|
+ implemented as it does not have to have a copy of the original |
|
|
+ stack pointer if there are no residuals. |
|
|
+ |
|
|
+ Second is unrolled allocation/probes which we use if there's just |
|
|
+ a few of them. It needs to save the original stack pointer into a |
|
|
+ temporary for use as a source register in the allocation/probe. |
|
|
+ |
|
|
+ Last is a loop. This is the most uncommon case and least efficient. */ |
|
|
+ rtx retval = NULL; |
|
|
+ if (rounded_size == probe_interval) |
|
|
+ { |
|
|
+ retval = rs6000_emit_allocate_stack_1 (probe_interval, stack_pointer_rtx); |
|
|
+ |
|
|
+ dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size); |
|
|
+ } |
|
|
+ else if (rounded_size <= 8 * probe_interval) |
|
|
+ { |
|
|
+ /* The ABI requires using the store with update insns to allocate |
|
|
+ space and store the backchain into the stack |
|
|
+ |
|
|
+ So we save the current stack pointer into a temporary, then |
|
|
+ emit the store-with-update insns to store the saved stack pointer |
|
|
+ into the right location in each new page. */ |
|
|
+ for (int i = 0; i < rounded_size; i += probe_interval) |
|
|
+ { |
|
|
+ rtx insn = rs6000_emit_allocate_stack_1 (probe_interval, orig_sp); |
|
|
+ |
|
|
+ /* Save the first stack adjustment in RETVAL. */ |
|
|
+ if (i == 0) |
|
|
+ retval = insn; |
|
|
+ } |
|
|
+ |
|
|
+ dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size); |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ /* Compute the ending address. */ |
|
|
+ rtx end_addr |
|
|
+ = copy_reg ? gen_rtx_REG (Pmode, 0) : gen_rtx_REG (Pmode, 12); |
|
|
+ rtx rs = GEN_INT (-rounded_size); |
|
|
+ rtx insn; |
|
|
+ if (add_operand (rs, Pmode)) |
|
|
+ insn = emit_insn (gen_add3_insn (end_addr, stack_pointer_rtx, rs)); |
|
|
+ else |
|
|
+ { |
|
|
+ emit_move_insn (end_addr, GEN_INT (-rounded_size)); |
|
|
+ insn = emit_insn (gen_add3_insn (end_addr, end_addr, |
|
|
+ stack_pointer_rtx)); |
|
|
+ /* Describe the effect of INSN to the CFI engine. */ |
|
|
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
|
|
+ gen_rtx_SET (VOIDmode, end_addr, |
|
|
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
|
|
+ rs))); |
|
|
+ } |
|
|
+ RTX_FRAME_RELATED_P (insn) = 1; |
|
|
+ |
|
|
+ /* Emit the loop. */ |
|
|
+ if (TARGET_64BIT) |
|
|
+ retval = emit_insn (gen_probe_stack_rangedi (stack_pointer_rtx, |
|
|
+ stack_pointer_rtx, orig_sp, |
|
|
+ end_addr)); |
|
|
+ else |
|
|
+ retval = emit_insn (gen_probe_stack_rangesi (stack_pointer_rtx, |
|
|
+ stack_pointer_rtx, orig_sp, |
|
|
+ end_addr)); |
|
|
+ RTX_FRAME_RELATED_P (retval) = 1; |
|
|
+ /* Describe the effect of INSN to the CFI engine. */ |
|
|
+ add_reg_note (retval, REG_FRAME_RELATED_EXPR, |
|
|
+ gen_rtx_SET (VOIDmode, stack_pointer_rtx, end_addr)); |
|
|
+ |
|
|
+ /* Emit a blockage to ensure the allocation/probing insns are |
|
|
+ not optimized, combined, removed, etc. Other cases handle this |
|
|
+ within their call to rs6000_emit_allocate_stack_1. */ |
|
|
+ emit_insn (gen_blockage ()); |
|
|
+ |
|
|
+ dump_stack_clash_frame_info (PROBE_LOOP, rounded_size != orig_size); |
|
|
+ } |
|
|
+ |
|
|
+ if (orig_size != rounded_size) |
|
|
+ { |
|
|
+ /* Allocate (and implicitly probe) any residual space. */ |
|
|
+ HOST_WIDE_INT residual = orig_size - rounded_size; |
|
|
+ |
|
|
+ rtx insn = rs6000_emit_allocate_stack_1 (residual, orig_sp); |
|
|
+ |
|
|
+ /* If the residual was the only allocation, then we can return the |
|
|
+ allocating insn. */ |
|
|
+ if (!retval) |
|
|
+ retval = insn; |
|
|
+ } |
|
|
+ |
|
|
+ return retval; |
|
|
+} |
|
|
+ |
|
|
/* Emit the correct code for allocating stack space, as insns. |
|
|
If COPY_REG, make sure a copy of the old frame is left there. |
|
|
The generated code may use hard register 0 as a temporary. */ |
|
|
@@ -22331,7 +22545,6 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) |
|
|
rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); |
|
|
rtx tmp_reg = gen_rtx_REG (Pmode, 0); |
|
|
rtx todec = gen_int_mode (-size, Pmode); |
|
|
- rtx par, set, mem; |
|
|
|
|
|
if (INTVAL (todec) != -size) |
|
|
{ |
|
|
@@ -22368,6 +22581,22 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) |
|
|
warning (0, "stack limit expression is not supported"); |
|
|
} |
|
|
|
|
|
+ if (flag_stack_clash_protection) |
|
|
+ { |
|
|
+ if (size < get_stack_clash_protection_guard_size ()) |
|
|
+ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); |
|
|
+ else |
|
|
+ { |
|
|
+ rtx insn = rs6000_emit_probe_stack_range_stack_clash (size, copy_reg); |
|
|
+ |
|
|
+ /* If we asked for a copy with an offset, then we still need add in |
|
|
+ the offset. */ |
|
|
+ if (copy_reg && copy_off) |
|
|
+ emit_insn (gen_add3_insn (copy_reg, copy_reg, GEN_INT (copy_off))); |
|
|
+ return; |
|
|
+ } |
|
|
+ } |
|
|
+ |
|
|
if (copy_reg) |
|
|
{ |
|
|
if (copy_off != 0) |
|
|
@@ -22376,39 +22605,12 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) |
|
|
emit_move_insn (copy_reg, stack_reg); |
|
|
} |
|
|
|
|
|
- if (size > 32767) |
|
|
- { |
|
|
- /* Need a note here so that try_split doesn't get confused. */ |
|
|
- if (get_last_insn () == NULL_RTX) |
|
|
- emit_note (NOTE_INSN_DELETED); |
|
|
- insn = emit_move_insn (tmp_reg, todec); |
|
|
- try_split (PATTERN (insn), insn, 0); |
|
|
- todec = tmp_reg; |
|
|
- } |
|
|
- |
|
|
- insn = emit_insn (TARGET_32BIT |
|
|
- ? gen_movsi_update_stack (stack_reg, stack_reg, |
|
|
- todec, stack_reg) |
|
|
- : gen_movdi_di_update_stack (stack_reg, stack_reg, |
|
|
- todec, stack_reg)); |
|
|
/* Since we didn't use gen_frame_mem to generate the MEM, grab |
|
|
it now and set the alias set/attributes. The above gen_*_update |
|
|
calls will generate a PARALLEL with the MEM set being the first |
|
|
operation. */ |
|
|
- par = PATTERN (insn); |
|
|
- gcc_assert (GET_CODE (par) == PARALLEL); |
|
|
- set = XVECEXP (par, 0, 0); |
|
|
- gcc_assert (GET_CODE (set) == SET); |
|
|
- mem = SET_DEST (set); |
|
|
- gcc_assert (MEM_P (mem)); |
|
|
- MEM_NOTRAP_P (mem) = 1; |
|
|
- set_mem_alias_set (mem, get_frame_alias_set ()); |
|
|
- |
|
|
- RTX_FRAME_RELATED_P (insn) = 1; |
|
|
- add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
|
|
- gen_rtx_SET (VOIDmode, stack_reg, |
|
|
- gen_rtx_PLUS (Pmode, stack_reg, |
|
|
- GEN_INT (-size)))); |
|
|
+ insn = rs6000_emit_allocate_stack_1 (size, stack_reg); |
|
|
+ return; |
|
|
} |
|
|
|
|
|
#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) |
|
|
@@ -22490,9 +22692,9 @@ rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) |
|
|
until it is equal to ROUNDED_SIZE. */ |
|
|
|
|
|
if (TARGET_64BIT) |
|
|
- emit_insn (gen_probe_stack_rangedi (r12, r12, r0)); |
|
|
+ emit_insn (gen_probe_stack_rangedi (r12, r12, stack_pointer_rtx, r0)); |
|
|
else |
|
|
- emit_insn (gen_probe_stack_rangesi (r12, r12, r0)); |
|
|
+ emit_insn (gen_probe_stack_rangesi (r12, r12, stack_pointer_rtx, r0)); |
|
|
|
|
|
|
|
|
/* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time |
|
|
@@ -22504,10 +22706,10 @@ rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) |
|
|
} |
|
|
|
|
|
/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are |
|
|
- absolute addresses. */ |
|
|
+ addresses, not offsets. */ |
|
|
|
|
|
-const char * |
|
|
-output_probe_stack_range (rtx reg1, rtx reg2) |
|
|
+static const char * |
|
|
+output_probe_stack_range_1 (rtx reg1, rtx reg2) |
|
|
{ |
|
|
static int labelno = 0; |
|
|
char loop_lab[32], end_lab[32]; |
|
|
@@ -22546,6 +22748,63 @@ output_probe_stack_range (rtx reg1, rtx reg2) |
|
|
return ""; |
|
|
} |
|
|
|
|
|
+/* Probe a range of stack addresses from REG1 to REG3 inclusive. These are |
|
|
+ addresses, not offsets. |
|
|
+ |
|
|
+ REG2 contains the backchain that must be stored into *sp at each allocation. |
|
|
+ |
|
|
+ This is subtly different than the Ada probing above in that it tries hard |
|
|
+ to prevent attacks that jump the stack guard. Thus, it is never allowed |
|
|
+ to allocate more than PROBE_INTERVAL bytes of stack space without a |
|
|
+ suitable probe. */ |
|
|
+ |
|
|
+static const char * |
|
|
+output_probe_stack_range_stack_clash (rtx reg1, rtx reg2, rtx reg3) |
|
|
+{ |
|
|
+ static int labelno = 0; |
|
|
+ char loop_lab[32]; |
|
|
+ rtx xops[3]; |
|
|
+ |
|
|
+ HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval (); |
|
|
+ |
|
|
+ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); |
|
|
+ |
|
|
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); |
|
|
+ |
|
|
+ /* This allocates and probes. */ |
|
|
+ xops[0] = reg1; |
|
|
+ xops[1] = reg2; |
|
|
+ xops[2] = GEN_INT (-probe_interval); |
|
|
+ if (TARGET_64BIT) |
|
|
+ output_asm_insn ("stdu %1,%2(%0)", xops); |
|
|
+ else |
|
|
+ output_asm_insn ("stwu %1,%2(%0)", xops); |
|
|
+ |
|
|
+ /* Jump to LOOP_LAB if TEST_ADDR != LAST_ADDR. */ |
|
|
+ xops[0] = reg1; |
|
|
+ xops[1] = reg3; |
|
|
+ if (TARGET_64BIT) |
|
|
+ output_asm_insn ("cmpd 0,%0,%1", xops); |
|
|
+ else |
|
|
+ output_asm_insn ("cmpw 0,%0,%1", xops); |
|
|
+ |
|
|
+ fputs ("\tbne 0,", asm_out_file); |
|
|
+ assemble_name_raw (asm_out_file, loop_lab); |
|
|
+ fputc ('\n', asm_out_file); |
|
|
+ |
|
|
+ return ""; |
|
|
+} |
|
|
+ |
|
|
+/* Wrapper around the output_probe_stack_range routines. */ |
|
|
+const char * |
|
|
+output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3) |
|
|
+{ |
|
|
+ if (flag_stack_clash_protection) |
|
|
+ return output_probe_stack_range_stack_clash (reg1, reg2, reg3); |
|
|
+ else |
|
|
+ return output_probe_stack_range_1 (reg1, reg3); |
|
|
+} |
|
|
+ |
|
|
/* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced |
|
|
with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2 |
|
|
is not NULL. It would be nice if dwarf2out_frame_debug_expr could |
|
|
@@ -23857,6 +24116,13 @@ rs6000_emit_prologue (void) |
|
|
} |
|
|
} |
|
|
|
|
|
+ /* If we are emitting stack probes, but allocate no stack, then |
|
|
+ just note that in the dump file. */ |
|
|
+ if (flag_stack_clash_protection |
|
|
+ && dump_file |
|
|
+ && !info->push_p) |
|
|
+ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); |
|
|
+ |
|
|
/* Update stack and set back pointer unless this is V.4, |
|
|
for which it was done previously. */ |
|
|
if (!WORLD_SAVE_P (info) && info->push_p |
|
|
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md |
|
|
index cd197213480..3cd70e592c1 100644 |
|
|
--- a/gcc/config/rs6000/rs6000.md |
|
|
+++ b/gcc/config/rs6000/rs6000.md |
|
|
@@ -11822,10 +11822,20 @@ |
|
|
;; |
|
|
;; First, an insn to allocate new stack space for dynamic use (e.g., alloca). |
|
|
;; We move the back-chain and decrement the stack pointer. |
|
|
- |
|
|
+;; |
|
|
+;; Operand1 is more naturally reg_or_short_operand. However, for a large |
|
|
+;; constant alloca, using that predicate will force the generic code to put |
|
|
+;; the constant size into a register before calling the expander. |
|
|
+;; |
|
|
+;; As a result the expander would not have the constant size information |
|
|
+;; in those cases and would have to generate less efficient code. |
|
|
+;; |
|
|
+;; Thus we allow reg_or_cint_operand instead so that the expander can see |
|
|
+;; the constant size. The value is forced into a register if necessary. |
|
|
+;; |
|
|
(define_expand "allocate_stack" |
|
|
[(set (match_operand 0 "gpc_reg_operand" "") |
|
|
- (minus (reg 1) (match_operand 1 "reg_or_short_operand" ""))) |
|
|
+ (minus (reg 1) (match_operand 1 "reg_or_cint_operand" ""))) |
|
|
(set (reg 1) |
|
|
(minus (reg 1) (match_dup 1)))] |
|
|
"" |
|
|
@@ -11835,6 +11845,15 @@ |
|
|
rtx neg_op0; |
|
|
rtx insn, par, set, mem; |
|
|
|
|
|
+ /* By allowing reg_or_cint_operand as the predicate we can get |
|
|
+ better code for stack-clash-protection because we do not lose |
|
|
+ size information. But the rest of the code expects the operand |
|
|
+ to be reg_or_short_operand. If it isn't, then force it into |
|
|
+ a register. */ |
|
|
+ rtx orig_op1 = operands[1]; |
|
|
+ if (!reg_or_short_operand (operands[1], Pmode)) |
|
|
+ operands[1] = force_reg (Pmode, operands[1]); |
|
|
+ |
|
|
emit_move_insn (chain, stack_bot); |
|
|
|
|
|
/* Check stack bounds if necessary. */ |
|
|
@@ -11847,6 +11866,51 @@ |
|
|
emit_insn (gen_cond_trap (LTU, available, operands[1], const0_rtx)); |
|
|
} |
|
|
|
|
|
+ /* Allocate and probe if requested. |
|
|
+ This may look similar to the loop we use for prologue allocations, |
|
|
+ but it is critically different. For the former we know the loop |
|
|
+ will iterate, but do not know that generally here. The former |
|
|
+ uses that knowledge to rotate the loop. Combining them would be |
|
|
+ possible with some performance cost. */ |
|
|
+ if (flag_stack_clash_protection) |
|
|
+ { |
|
|
+ rtx rounded_size, last_addr, residual; |
|
|
+ HOST_WIDE_INT probe_interval; |
|
|
+ compute_stack_clash_protection_loop_data (&rounded_size, &last_addr, |
|
|
+ &residual, &probe_interval, |
|
|
+ orig_op1); |
|
|
+ |
|
|
+ /* We do occasionally get in here with constant sizes, we might |
|
|
+ as well do a reasonable job when we obviously can. */ |
|
|
+ if (rounded_size != const0_rtx) |
|
|
+ { |
|
|
+ rtx loop_lab, end_loop; |
|
|
+ bool rotated = CONST_INT_P (rounded_size); |
|
|
+ |
|
|
+ emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop, |
|
|
+ last_addr, rotated); |
|
|
+ |
|
|
+ if (Pmode == SImode) |
|
|
+ emit_insn (gen_movsi_update_stack (stack_pointer_rtx, |
|
|
+ stack_pointer_rtx, |
|
|
+ GEN_INT (-probe_interval), |
|
|
+ chain)); |
|
|
+ else |
|
|
+ emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx, |
|
|
+ stack_pointer_rtx, |
|
|
+ GEN_INT (-probe_interval), |
|
|
+ chain)); |
|
|
+ emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop, |
|
|
+ last_addr, rotated); |
|
|
+ } |
|
|
+ |
|
|
+ /* Now handle residuals. We just have to set operands[1] correctly |
|
|
+ and let the rest of the expander run. */ |
|
|
+ operands[1] = residual; |
|
|
+ if (!CONST_INT_P (residual)) |
|
|
+ operands[1] = force_reg (Pmode, operands[1]); |
|
|
+ } |
|
|
+ |
|
|
if (GET_CODE (operands[1]) != CONST_INT |
|
|
|| INTVAL (operands[1]) < -32767 |
|
|
|| INTVAL (operands[1]) > 32768) |
|
|
@@ -12994,12 +13058,13 @@ |
|
|
(set_attr "length" "4")]) |
|
|
|
|
|
(define_insn "probe_stack_range<P:mode>" |
|
|
- [(set (match_operand:P 0 "register_operand" "=r") |
|
|
+ [(set (match_operand:P 0 "register_operand" "=&r") |
|
|
(unspec_volatile:P [(match_operand:P 1 "register_operand" "0") |
|
|
- (match_operand:P 2 "register_operand" "r")] |
|
|
+ (match_operand:P 2 "register_operand" "r") |
|
|
+ (match_operand:P 3 "register_operand" "r")] |
|
|
UNSPECV_PROBE_STACK_RANGE))] |
|
|
"" |
|
|
- "* return output_probe_stack_range (operands[0], operands[2]);" |
|
|
+ "* return output_probe_stack_range (operands[0], operands[2], operands[3]);" |
|
|
[(set_attr "type" "three")]) |
|
|
|
|
|
;; Compare insns are next. Note that the RS/6000 has two types of compares, |
|
|
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp |
|
|
index 7c126e4122b..aba99513ed0 100644 |
|
|
--- a/gcc/testsuite/lib/target-supports.exp |
|
|
+++ b/gcc/testsuite/lib/target-supports.exp |
|
|
@@ -5421,12 +5421,12 @@ proc check_effective_target_autoincdec { } { |
|
|
proc check_effective_target_supports_stack_clash_protection { } { |
|
|
|
|
|
# Temporary until the target bits are fully ACK'd. |
|
|
-# if { [istarget aarch*-*-*] |
|
|
-# || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } { |
|
|
+# if { [istarget aarch*-*-*] } { |
|
|
# return 1 |
|
|
# } |
|
|
|
|
|
if { [istarget x86_64-*-*] || [istarget i?86-*-*] |
|
|
+ || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] |
|
|
|| [istarget s390*-*-*] } { |
|
|
return 1 |
|
|
}
|
|
|
|