commit 27d2a2d27f3e0060ade9a1a82ce2292aad6c6931 Author: law Date: Mon Sep 25 23:13:55 2017 +0000 * config/rs6000/rs6000-protos.h (output_probe_stack_range): Update prototype for new argument. * config/rs6000/rs6000.c (rs6000_emit_allocate_stack_1): New function, mostly extracted from rs6000_emit_allocate_stack. (rs6000_emit_probe_stack_range_stack_clash): New function. (rs6000_emit_allocate_stack): Call rs6000_emit_probe_stack_range_stack_clash as needed. (rs6000_emit_probe_stack_range): Add additional argument to call to gen_probe_stack_range{si,di}. (output_probe_stack_range): New. (output_probe_stack_range_1): Renamed from output_probe_stack_range. (output_probe_stack_range_stack_clash): New. (rs6000_emit_prologue): Emit notes into dump file as requested. * rs6000.md (allocate_stack): Handle -fstack-clash-protection. (probe_stack_range): Operand 0 is now early-clobbered. Add additional operand and pass it to output_probe_stack_range. * lib/target-supports.exp (check_effective_target_supports_stack_clash_protection): Enable for rs6000 and powerpc targets. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253179 138bc75d-0d04-0410-961f-82ee72b054a4 diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index d4b93d9970d..cfb23ab80cc 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -114,7 +114,7 @@ extern void rs6000_emit_sCOND (enum machine_mode, rtx[]); extern void rs6000_emit_cbranch (enum machine_mode, rtx[]); extern char * output_cbranch (rtx, const char *, int, rtx); extern char * output_e500_flip_gt_bit (rtx, rtx); -extern const char * output_probe_stack_range (rtx, rtx); +extern const char * output_probe_stack_range (rtx, rtx, rtx); extern rtx rs6000_emit_set_const (rtx, enum machine_mode, rtx, int); extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx); extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index a9052c6becf..c5d9988c1d9 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -22320,6 +22320,220 @@ rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed) emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p))); } +/* Allocate SIZE_INT bytes on the stack using a store with update style insn + and set the appropriate attributes for the generated insn. Return the + first insn which adjusts the stack pointer or the last insn before + the stack adjustment loop. + + SIZE_INT is used to create the CFI note for the allocation. + + SIZE_RTX is an rtx containing the size of the adjustment. Note that + since stacks grow to lower addresses its runtime value is -SIZE_INT. + + ORIG_SP contains the backchain value that must be stored at *sp. */ + +static rtx +rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp) +{ + rtx insn; + + rtx size_rtx = GEN_INT (-size_int); + if (size_int > 32767) + { + rtx tmp_reg = gen_rtx_REG (Pmode, 0); + /* Need a note here so that try_split doesn't get confused. */ + if (get_last_insn () == NULL_RTX) + emit_note (NOTE_INSN_DELETED); + insn = emit_move_insn (tmp_reg, size_rtx); + try_split (PATTERN (insn), insn, 0); + size_rtx = tmp_reg; + } + + if (Pmode == SImode) + insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx, + stack_pointer_rtx, + size_rtx, + orig_sp)); + else + insn = emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx, + stack_pointer_rtx, + size_rtx, + orig_sp)); + rtx par = PATTERN (insn); + gcc_assert (GET_CODE (par) == PARALLEL); + rtx set = XVECEXP (par, 0, 0); + gcc_assert (GET_CODE (set) == SET); + rtx mem = SET_DEST (set); + gcc_assert (MEM_P (mem)); + MEM_NOTRAP_P (mem) = 1; + set_mem_alias_set (mem, get_frame_alias_set ()); + + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, + stack_pointer_rtx, + GEN_INT (-size_int)))); + + /* Emit a blockage to ensure the allocation/probing insns are + not optimized, combined, removed, etc. Add REG_STACK_CHECK + note for similar reasons. */ + if (flag_stack_clash_protection) + { + add_reg_note (insn, REG_STACK_CHECK, const0_rtx); + emit_insn (gen_blockage ()); + } + + return insn; +} + +static HOST_WIDE_INT +get_stack_clash_protection_probe_interval (void) +{ + return (HOST_WIDE_INT_1U + << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL)); +} + +static HOST_WIDE_INT +get_stack_clash_protection_guard_size (void) +{ + return (HOST_WIDE_INT_1U + << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)); +} + +/* Allocate ORIG_SIZE bytes on the stack and probe the newly + allocated space every STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes. + + COPY_REG, if non-null, should contain a copy of the original + stack pointer at exit from this function. + + This is subtly different than the Ada probing in that it tries hard to + prevent attacks that jump the stack guard. Thus it is never allowed to + allocate more than STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes of stack + space without a suitable probe. */ +static rtx +rs6000_emit_probe_stack_range_stack_clash (HOST_WIDE_INT orig_size, + rtx copy_reg) +{ + rtx orig_sp = copy_reg; + + HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval (); + + /* Round the size down to a multiple of PROBE_INTERVAL. */ + HOST_WIDE_INT rounded_size = ROUND_DOWN (orig_size, probe_interval); + + /* If explicitly requested, + or the rounded size is not the same as the original size + or the the rounded size is greater than a page, + then we will need a copy of the original stack pointer. */ + if (rounded_size != orig_size + || rounded_size > probe_interval + || copy_reg) + { + /* If the caller did not request a copy of the incoming stack + pointer, then we use r0 to hold the copy. */ + if (!copy_reg) + orig_sp = gen_rtx_REG (Pmode, 0); + emit_move_insn (orig_sp, stack_pointer_rtx); + } + + /* There's three cases here. + + One is a single probe which is the most common and most efficiently + implemented as it does not have to have a copy of the original + stack pointer if there are no residuals. + + Second is unrolled allocation/probes which we use if there's just + a few of them. It needs to save the original stack pointer into a + temporary for use as a source register in the allocation/probe. + + Last is a loop. This is the most uncommon case and least efficient. */ + rtx retval = NULL; + if (rounded_size == probe_interval) + { + retval = rs6000_emit_allocate_stack_1 (probe_interval, stack_pointer_rtx); + + dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size); + } + else if (rounded_size <= 8 * probe_interval) + { + /* The ABI requires using the store with update insns to allocate + space and store the backchain into the stack + + So we save the current stack pointer into a temporary, then + emit the store-with-update insns to store the saved stack pointer + into the right location in each new page. */ + for (int i = 0; i < rounded_size; i += probe_interval) + { + rtx insn = rs6000_emit_allocate_stack_1 (probe_interval, orig_sp); + + /* Save the first stack adjustment in RETVAL. */ + if (i == 0) + retval = insn; + } + + dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size); + } + else + { + /* Compute the ending address. */ + rtx end_addr + = copy_reg ? gen_rtx_REG (Pmode, 0) : gen_rtx_REG (Pmode, 12); + rtx rs = GEN_INT (-rounded_size); + rtx insn; + if (add_operand (rs, Pmode)) + insn = emit_insn (gen_add3_insn (end_addr, stack_pointer_rtx, rs)); + else + { + emit_move_insn (end_addr, GEN_INT (-rounded_size)); + insn = emit_insn (gen_add3_insn (end_addr, end_addr, + stack_pointer_rtx)); + /* Describe the effect of INSN to the CFI engine. */ + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, end_addr, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + rs))); + } + RTX_FRAME_RELATED_P (insn) = 1; + + /* Emit the loop. */ + if (TARGET_64BIT) + retval = emit_insn (gen_probe_stack_rangedi (stack_pointer_rtx, + stack_pointer_rtx, orig_sp, + end_addr)); + else + retval = emit_insn (gen_probe_stack_rangesi (stack_pointer_rtx, + stack_pointer_rtx, orig_sp, + end_addr)); + RTX_FRAME_RELATED_P (retval) = 1; + /* Describe the effect of INSN to the CFI engine. */ + add_reg_note (retval, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, end_addr)); + + /* Emit a blockage to ensure the allocation/probing insns are + not optimized, combined, removed, etc. Other cases handle this + within their call to rs6000_emit_allocate_stack_1. */ + emit_insn (gen_blockage ()); + + dump_stack_clash_frame_info (PROBE_LOOP, rounded_size != orig_size); + } + + if (orig_size != rounded_size) + { + /* Allocate (and implicitly probe) any residual space. */ + HOST_WIDE_INT residual = orig_size - rounded_size; + + rtx insn = rs6000_emit_allocate_stack_1 (residual, orig_sp); + + /* If the residual was the only allocation, then we can return the + allocating insn. */ + if (!retval) + retval = insn; + } + + return retval; +} + /* Emit the correct code for allocating stack space, as insns. If COPY_REG, make sure a copy of the old frame is left there. The generated code may use hard register 0 as a temporary. */ @@ -22331,7 +22545,6 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); rtx tmp_reg = gen_rtx_REG (Pmode, 0); rtx todec = gen_int_mode (-size, Pmode); - rtx par, set, mem; if (INTVAL (todec) != -size) { @@ -22368,6 +22581,22 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) warning (0, "stack limit expression is not supported"); } + if (flag_stack_clash_protection) + { + if (size < get_stack_clash_protection_guard_size ()) + dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); + else + { + rtx insn = rs6000_emit_probe_stack_range_stack_clash (size, copy_reg); + + /* If we asked for a copy with an offset, then we still need add in + the offset. */ + if (copy_reg && copy_off) + emit_insn (gen_add3_insn (copy_reg, copy_reg, GEN_INT (copy_off))); + return; + } + } + if (copy_reg) { if (copy_off != 0) @@ -22376,39 +22605,12 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) emit_move_insn (copy_reg, stack_reg); } - if (size > 32767) - { - /* Need a note here so that try_split doesn't get confused. */ - if (get_last_insn () == NULL_RTX) - emit_note (NOTE_INSN_DELETED); - insn = emit_move_insn (tmp_reg, todec); - try_split (PATTERN (insn), insn, 0); - todec = tmp_reg; - } - - insn = emit_insn (TARGET_32BIT - ? gen_movsi_update_stack (stack_reg, stack_reg, - todec, stack_reg) - : gen_movdi_di_update_stack (stack_reg, stack_reg, - todec, stack_reg)); /* Since we didn't use gen_frame_mem to generate the MEM, grab it now and set the alias set/attributes. The above gen_*_update calls will generate a PARALLEL with the MEM set being the first operation. */ - par = PATTERN (insn); - gcc_assert (GET_CODE (par) == PARALLEL); - set = XVECEXP (par, 0, 0); - gcc_assert (GET_CODE (set) == SET); - mem = SET_DEST (set); - gcc_assert (MEM_P (mem)); - MEM_NOTRAP_P (mem) = 1; - set_mem_alias_set (mem, get_frame_alias_set ()); - - RTX_FRAME_RELATED_P (insn) = 1; - add_reg_note (insn, REG_FRAME_RELATED_EXPR, - gen_rtx_SET (VOIDmode, stack_reg, - gen_rtx_PLUS (Pmode, stack_reg, - GEN_INT (-size)))); + insn = rs6000_emit_allocate_stack_1 (size, stack_reg); + return; } #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) @@ -22490,9 +22692,9 @@ rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) until it is equal to ROUNDED_SIZE. */ if (TARGET_64BIT) - emit_insn (gen_probe_stack_rangedi (r12, r12, r0)); + emit_insn (gen_probe_stack_rangedi (r12, r12, stack_pointer_rtx, r0)); else - emit_insn (gen_probe_stack_rangesi (r12, r12, r0)); + emit_insn (gen_probe_stack_rangesi (r12, r12, stack_pointer_rtx, r0)); /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time @@ -22504,10 +22706,10 @@ rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) } /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are - absolute addresses. */ + addresses, not offsets. */ -const char * -output_probe_stack_range (rtx reg1, rtx reg2) +static const char * +output_probe_stack_range_1 (rtx reg1, rtx reg2) { static int labelno = 0; char loop_lab[32], end_lab[32]; @@ -22546,6 +22748,63 @@ output_probe_stack_range (rtx reg1, rtx reg2) return ""; } +/* Probe a range of stack addresses from REG1 to REG3 inclusive. These are + addresses, not offsets. + + REG2 contains the backchain that must be stored into *sp at each allocation. + + This is subtly different than the Ada probing above in that it tries hard + to prevent attacks that jump the stack guard. Thus, it is never allowed + to allocate more than PROBE_INTERVAL bytes of stack space without a + suitable probe. */ + +static const char * +output_probe_stack_range_stack_clash (rtx reg1, rtx reg2, rtx reg3) +{ + static int labelno = 0; + char loop_lab[32]; + rtx xops[3]; + + HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval (); + + ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); + + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); + + /* This allocates and probes. */ + xops[0] = reg1; + xops[1] = reg2; + xops[2] = GEN_INT (-probe_interval); + if (TARGET_64BIT) + output_asm_insn ("stdu %1,%2(%0)", xops); + else + output_asm_insn ("stwu %1,%2(%0)", xops); + + /* Jump to LOOP_LAB if TEST_ADDR != LAST_ADDR. */ + xops[0] = reg1; + xops[1] = reg3; + if (TARGET_64BIT) + output_asm_insn ("cmpd 0,%0,%1", xops); + else + output_asm_insn ("cmpw 0,%0,%1", xops); + + fputs ("\tbne 0,", asm_out_file); + assemble_name_raw (asm_out_file, loop_lab); + fputc ('\n', asm_out_file); + + return ""; +} + +/* Wrapper around the output_probe_stack_range routines. */ +const char * +output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3) +{ + if (flag_stack_clash_protection) + return output_probe_stack_range_stack_clash (reg1, reg2, reg3); + else + return output_probe_stack_range_1 (reg1, reg3); +} + /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2 is not NULL. It would be nice if dwarf2out_frame_debug_expr could @@ -23857,6 +24116,13 @@ rs6000_emit_prologue (void) } } + /* If we are emitting stack probes, but allocate no stack, then + just note that in the dump file. */ + if (flag_stack_clash_protection + && dump_file + && !info->push_p) + dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); + /* Update stack and set back pointer unless this is V.4, for which it was done previously. */ if (!WORLD_SAVE_P (info) && info->push_p diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index cd197213480..3cd70e592c1 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -11822,10 +11822,20 @@ ;; ;; First, an insn to allocate new stack space for dynamic use (e.g., alloca). ;; We move the back-chain and decrement the stack pointer. - +;; +;; Operand1 is more naturally reg_or_short_operand. However, for a large +;; constant alloca, using that predicate will force the generic code to put +;; the constant size into a register before calling the expander. +;; +;; As a result the expander would not have the constant size information +;; in those cases and would have to generate less efficient code. +;; +;; Thus we allow reg_or_cint_operand instead so that the expander can see +;; the constant size. The value is forced into a register if necessary. +;; (define_expand "allocate_stack" [(set (match_operand 0 "gpc_reg_operand" "") - (minus (reg 1) (match_operand 1 "reg_or_short_operand" ""))) + (minus (reg 1) (match_operand 1 "reg_or_cint_operand" ""))) (set (reg 1) (minus (reg 1) (match_dup 1)))] "" @@ -11835,6 +11845,15 @@ rtx neg_op0; rtx insn, par, set, mem; + /* By allowing reg_or_cint_operand as the predicate we can get + better code for stack-clash-protection because we do not lose + size information. But the rest of the code expects the operand + to be reg_or_short_operand. If it isn't, then force it into + a register. */ + rtx orig_op1 = operands[1]; + if (!reg_or_short_operand (operands[1], Pmode)) + operands[1] = force_reg (Pmode, operands[1]); + emit_move_insn (chain, stack_bot); /* Check stack bounds if necessary. */ @@ -11847,6 +11866,51 @@ emit_insn (gen_cond_trap (LTU, available, operands[1], const0_rtx)); } + /* Allocate and probe if requested. + This may look similar to the loop we use for prologue allocations, + but it is critically different. For the former we know the loop + will iterate, but do not know that generally here. The former + uses that knowledge to rotate the loop. Combining them would be + possible with some performance cost. */ + if (flag_stack_clash_protection) + { + rtx rounded_size, last_addr, residual; + HOST_WIDE_INT probe_interval; + compute_stack_clash_protection_loop_data (&rounded_size, &last_addr, + &residual, &probe_interval, + orig_op1); + + /* We do occasionally get in here with constant sizes, we might + as well do a reasonable job when we obviously can. */ + if (rounded_size != const0_rtx) + { + rtx loop_lab, end_loop; + bool rotated = CONST_INT_P (rounded_size); + + emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop, + last_addr, rotated); + + if (Pmode == SImode) + emit_insn (gen_movsi_update_stack (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-probe_interval), + chain)); + else + emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-probe_interval), + chain)); + emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop, + last_addr, rotated); + } + + /* Now handle residuals. We just have to set operands[1] correctly + and let the rest of the expander run. */ + operands[1] = residual; + if (!CONST_INT_P (residual)) + operands[1] = force_reg (Pmode, operands[1]); + } + if (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) < -32767 || INTVAL (operands[1]) > 32768) @@ -12994,12 +13058,13 @@ (set_attr "length" "4")]) (define_insn "probe_stack_range" - [(set (match_operand:P 0 "register_operand" "=r") + [(set (match_operand:P 0 "register_operand" "=&r") (unspec_volatile:P [(match_operand:P 1 "register_operand" "0") - (match_operand:P 2 "register_operand" "r")] + (match_operand:P 2 "register_operand" "r") + (match_operand:P 3 "register_operand" "r")] UNSPECV_PROBE_STACK_RANGE))] "" - "* return output_probe_stack_range (operands[0], operands[2]);" + "* return output_probe_stack_range (operands[0], operands[2], operands[3]);" [(set_attr "type" "three")]) ;; Compare insns are next. Note that the RS/6000 has two types of compares, diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 7c126e4122b..aba99513ed0 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -5421,12 +5421,12 @@ proc check_effective_target_autoincdec { } { proc check_effective_target_supports_stack_clash_protection { } { # Temporary until the target bits are fully ACK'd. -# if { [istarget aarch*-*-*] -# || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } { +# if { [istarget aarch*-*-*] } { # return 1 # } if { [istarget x86_64-*-*] || [istarget i?86-*-*] + || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] || [istarget s390*-*-*] } { return 1 }