On Mon, Jul 31, 2017 at 7:38 AM, Jeff Law <l...@redhat.com> wrote: > > This patch introduces generic mechanisms to protect the dynamically > allocated stack space against stack-clash attacks. > > Changes since V2: > > Dynamic allocations can be emitted as unrolled inlined probes or with a > rotated loop. Blockage insns are also properly emitted for the dynamic > area probes and the dynamic area probing now supports targets that may > make optimistic assumptions in their prologues. Finally it uses the new > param to control the probing interval. > > Tests were updated to explicitly specify the guard and probing interval. > New test to check inline/unrolled probes as well as rotated loop. > > > > * explow.c: Include "params.h". > (anti_adjust_stack_and_probe_stack_clash): New function. > (get_stack_check_protect): Likewise. > (compute_stack_clash_protection_loop_data): Likewise. > (emit_stack_clash_protection_loop_start): Likewise. > (emit_stack_clash_protection_loop_end): Likewise. > (allocate_dynamic_stack_space): Use get_stack_check_protect. > Use anti_adjust_stack_and_probe_stack_clash. > * explow.h (compute_stack_clash_protection_loop_data): Prototype. > (emit_stack_clash_protection_loop_start): Likewise. > (emit_stack_clash_protection_loop_end): Likewise. > * rtl.h (get_stack_check_protect): Prototype. > * defaults.h (STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE): > Define new default. > * doc/tm.texi.in (STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE): > Define.
Please make this a hook instead of a target macro. Besides this it looks good (I trust you on the RTL details). Thanks, Richard. > * doc/tm.texi: Rebuilt. > > * config/aarch64/aarch64.c (aarch64_expand_prologue): Use > get_stack_check_protect. > * config/alpha/alpha.c (alpha_expand_prologue): Likewise. > * config/arm/arm.c (arm_expand_prologue): Likewise. > * config/i386/i386.c (ix86_expand_prologue): Likewise. > * config/ia64/ia64.c (ia64_expand_prologue): Likewise. > * config/mips/mips.c (mips_expand_prologue): Likewise. > * config/powerpcspe/powerpcspe.c (rs6000_emit_prologue): Likewise. > * config/rs6000/rs6000.c (rs6000_emit_prologue): Likewise. > * config/sparc/sparc.c (sparc_expand_prologue): Likewise. > > > testsuite > > * gcc.dg/stack-check-3.c: New test. > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index ef1b5a8..0a8b40a 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -3676,12 +3676,14 @@ aarch64_expand_prologue (void) > { > if (crtl->is_leaf && !cfun->calls_alloca) > { > - if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT) > - aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, > - frame_size - STACK_CHECK_PROTECT); > + if (frame_size > PROBE_INTERVAL > + && frame_size > get_stack_check_protect ()) > + aarch64_emit_probe_stack_range (get_stack_check_protect (), > + (frame_size > + - get_stack_check_protect ())); > } > else if (frame_size > 0) > - aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size); > + aarch64_emit_probe_stack_range (get_stack_check_protect (), > frame_size); > } > > aarch64_sub_sp (IP0_REGNUM, initial_adjust, true); > diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c > index 00a69c1..91f3d7c 100644 > --- a/gcc/config/alpha/alpha.c > +++ b/gcc/config/alpha/alpha.c > @@ -7741,7 +7741,7 @@ alpha_expand_prologue (void) > > probed_size = frame_size; > if (flag_stack_check) > - probed_size += STACK_CHECK_PROTECT; > + probed_size += get_stack_check_protect (); > > if (probed_size <= 32768) > { > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index c6101ef..9822ca7 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -21680,13 +21680,13 @@ arm_expand_prologue (void) > > if (crtl->is_leaf && !cfun->calls_alloca) > { > - if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) > - arm_emit_probe_stack_range (STACK_CHECK_PROTECT, > - size - STACK_CHECK_PROTECT, > + if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) > + arm_emit_probe_stack_range (get_stack_check_protect (), > + size - get_stack_check_protect (), > regno, live_regs_mask); > } > else if (size > 0) > - arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size, > + arm_emit_probe_stack_range (get_stack_check_protect (), size, > regno, live_regs_mask); > } > > @@ -27854,7 +27854,7 @@ arm_frame_pointer_required (void) > { > /* We don't have the final size of the frame so adjust. */ > size += 32 * UNITS_PER_WORD; > - if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) > + if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) > return true; > } > else > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 1a8a3a3..0947b3c 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -14638,7 +14638,7 @@ ix86_expand_prologue (void) > HOST_WIDE_INT size = allocate; > > if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000)) > - size = 0x80000000 - STACK_CHECK_PROTECT - 1; > + size = 0x80000000 - get_stack_check_protect () - 1; > > if (TARGET_STACK_PROBE) > { > @@ -14648,18 +14648,20 @@ ix86_expand_prologue (void) > ix86_emit_probe_stack_range (0, size); > } > else > - ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT); > + ix86_emit_probe_stack_range (0, > + size + get_stack_check_protect > ()); > } > else > { > if (crtl->is_leaf && !cfun->calls_alloca) > { > - if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) > - ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, > - size - STACK_CHECK_PROTECT); > + if (size > PROBE_INTERVAL > + && size > get_stack_check_protect ()) > + ix86_emit_probe_stack_range (get_stack_check_protect (), > + size - > get_stack_check_protect ()); > } > else > - ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size); > + ix86_emit_probe_stack_range (get_stack_check_protect (), > size); > } > } > } > diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c > index 617d188..70aef34 100644 > --- a/gcc/config/ia64/ia64.c > +++ b/gcc/config/ia64/ia64.c > @@ -3481,15 +3481,16 @@ ia64_expand_prologue (void) > > if (crtl->is_leaf && !cfun->calls_alloca) > { > - if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) > - ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, > - size - STACK_CHECK_PROTECT, > + if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) > + ia64_emit_probe_stack_range (get_stack_check_protect (), > + size - get_stack_check_protect (), > bs_size); > - else if (size + bs_size > STACK_CHECK_PROTECT) > - ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size); > + else if (size + bs_size > get_stack_check_protect ()) > + ia64_emit_probe_stack_range (get_stack_check_protect (), > + 0, bs_size); > } > else if (size + bs_size > 0) > - ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size); > + ia64_emit_probe_stack_range (get_stack_check_protect (), size, > bs_size); > } > > if (dump_file) > diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c > index 6bfd86a..7d85ce7 100644 > --- a/gcc/config/mips/mips.c > +++ b/gcc/config/mips/mips.c > @@ -12081,12 +12081,12 @@ mips_expand_prologue (void) > { > if (crtl->is_leaf && !cfun->calls_alloca) > { > - if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) > - mips_emit_probe_stack_range (STACK_CHECK_PROTECT, > - size - STACK_CHECK_PROTECT); > + if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) > + mips_emit_probe_stack_range (get_stack_check_protect (), > + size - get_stack_check_protect ()); > } > else if (size > 0) > - mips_emit_probe_stack_range (STACK_CHECK_PROTECT, size); > + mips_emit_probe_stack_range (get_stack_check_protect (), size); > } > > /* Save the registers. Allocate up to MIPS_MAX_FIRST_STACK_STEP > diff --git a/gcc/config/powerpcspe/powerpcspe.c > b/gcc/config/powerpcspe/powerpcspe.c > index 06d66d7..df5d3cd 100644 > --- a/gcc/config/powerpcspe/powerpcspe.c > +++ b/gcc/config/powerpcspe/powerpcspe.c > @@ -29597,12 +29597,12 @@ rs6000_emit_prologue (void) > > if (crtl->is_leaf && !cfun->calls_alloca) > { > - if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) > - rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, > - size - STACK_CHECK_PROTECT); > + if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) > + rs6000_emit_probe_stack_range (get_stack_check_protect (), > + size - get_stack_check_protect ()); > } > else if (size > 0) > - rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size); > + rs6000_emit_probe_stack_range (get_stack_check_protect (), size); > } > > if (TARGET_FIX_AND_CONTINUE) > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index 63a6c80..aa70e30 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -26895,12 +26895,12 @@ rs6000_emit_prologue (void) > > if (crtl->is_leaf && !cfun->calls_alloca) > { > - if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) > - rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, > - size - STACK_CHECK_PROTECT); > + if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) > + rs6000_emit_probe_stack_range (get_stack_check_protect (), > + size - get_stack_check_protect ()); > } > else if (size > 0) > - rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size); > + rs6000_emit_probe_stack_range (get_stack_check_protect (), size); > } > > if (TARGET_FIX_AND_CONTINUE) > diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c > index 790a036..1da032a 100644 > --- a/gcc/config/sparc/sparc.c > +++ b/gcc/config/sparc/sparc.c > @@ -5552,12 +5552,12 @@ sparc_expand_prologue (void) > { > if (crtl->is_leaf && !cfun->calls_alloca) > { > - if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) > - sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, > - size - STACK_CHECK_PROTECT); > + if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) > + sparc_emit_probe_stack_range (get_stack_check_protect (), > + size - get_stack_check_protect ()); > } > else if (size > 0) > - sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size); > + sparc_emit_probe_stack_range (get_stack_check_protect (), size); > } > > if (size == 0) > @@ -5663,12 +5663,12 @@ sparc_flat_expand_prologue (void) > { > if (crtl->is_leaf && !cfun->calls_alloca) > { > - if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) > - sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, > - size - STACK_CHECK_PROTECT); > + if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) > + sparc_emit_probe_stack_range (get_stack_check_protect (), > + size - get_stack_check_protect ()); > } > else if (size > 0) > - sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size); > + sparc_emit_probe_stack_range (get_stack_check_protect (), size); > } > > if (sparc_save_local_in_regs_p) > diff --git a/gcc/defaults.h b/gcc/defaults.h > index 7ad92d9..e5ba317 100644 > --- a/gcc/defaults.h > +++ b/gcc/defaults.h > @@ -1459,6 +1459,13 @@ see the files COPYING3 and COPYING.RUNTIME > respectively. If not, see > #define STACK_CHECK_MAX_VAR_SIZE (STACK_CHECK_MAX_FRAME_SIZE / 100) > #endif > > +/* Whether or not the stack clash protection code must probe > + the last word in the dynamic stack space area. Most targets > + do not need this and thus we define it to zero. */ > +#ifndef STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE > +#define STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE(RESIDUAL) 0 > +#endif > + > /* By default, the C++ compiler will use function addresses in the > vtable entries. Setting this nonzero tells the compiler to use > function descriptors instead. The value of this macro says how > diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi > index 795e492..fa107de 100644 > --- a/gcc/doc/tm.texi > +++ b/gcc/doc/tm.texi > @@ -3419,6 +3419,15 @@ GCC computed the default from the values of the above > macros and you will > normally not need to override that default. > @end defmac > > +@defmac STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE (@var{residual}) > +Some targets make optimistic assumptions about the state of stack > +probing when they emit their prologues. On such targets a probe into > +the end of any dynamically allocated space is likely required for > +safety against stack clash style attacks. Define this variable > +to return nonzero if such a probe is required or zero otherwise. You > +need not define this macro if it would always have the value zero. > +@end defmac > + > @need 2000 > @node Frame Registers > @subsection Registers That Address the Stack Frame > diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in > index 98f2e6b..826bf04 100644 > --- a/gcc/doc/tm.texi.in > +++ b/gcc/doc/tm.texi.in > @@ -2999,6 +2999,15 @@ GCC computed the default from the values of the above > macros and you will > normally not need to override that default. > @end defmac > > +@defmac STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE (@var{residual}) > +Some targets make optimistic assumptions about the state of stack > +probing when they emit their prologues. On such targets a probe into > +the end of any dynamically allocated space is likely required for > +safety against stack clash style attacks. Define this variable > +to return nonzero if such a probe is required or zero otherwise. You > +need not define this macro if it would always have the value zero. > +@end defmac > + > @need 2000 > @node Frame Registers > @subsection Registers That Address the Stack Frame > diff --git a/gcc/explow.c b/gcc/explow.c > index 50074e2..73018ec 100644 > --- a/gcc/explow.c > +++ b/gcc/explow.c > @@ -40,8 +40,10 @@ along with GCC; see the file COPYING3. If not see > #include "expr.h" > #include "common/common-target.h" > #include "output.h" > +#include "params.h" > > static rtx break_out_memory_refs (rtx); > +static void anti_adjust_stack_and_probe_stack_clash (rtx); > > > /* Truncate and perhaps sign-extend C as appropriate for MODE. */ > @@ -1272,6 +1274,29 @@ get_dynamic_stack_size (rtx *psize, unsigned > size_align, > *psize = size; > } > > +/* Return the number of bytes to "protect" on the stack for -fstack-check. > + > + "protect" in the context of -fstack-check means how many bytes we > + should always ensure are available on the stack. More importantly > + this is how many bytes are skipped when probing the stack. > + > + On some targets we want to reuse the -fstack-check prologue support > + to give a degree of protection against stack clashing style attacks. > + > + In that scenario we do not want to skip bytes before probing as that > + would render the stack clash protections useless. > + > + So we never use STACK_CHECK_PROTECT directly. Instead we indirect though > + this helper which allows us to provide different values for > + -fstack-check and -fstack-clash-protection. */ > +HOST_WIDE_INT > +get_stack_check_protect (void) > +{ > + if (flag_stack_clash_protection) > + return 0; > + return STACK_CHECK_PROTECT; > +} > + > /* Return an rtx representing the address of an area of memory dynamically > pushed on the stack. > > @@ -1430,7 +1455,7 @@ allocate_dynamic_stack_space (rtx size, unsigned > size_align, > probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE, > size); > else if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) > - probe_stack_range (STACK_CHECK_PROTECT, size); > + probe_stack_range (get_stack_check_protect (), size); > > /* Don't let anti_adjust_stack emit notes. */ > suppress_reg_args_size = true; > @@ -1483,6 +1508,8 @@ allocate_dynamic_stack_space (rtx size, unsigned > size_align, > > if (flag_stack_check && STACK_CHECK_MOVING_SP) > anti_adjust_stack_and_probe (size, false); > + else if (flag_stack_clash_protection) > + anti_adjust_stack_and_probe_stack_clash (size); > else > anti_adjust_stack (size); > > @@ -1758,6 +1785,219 @@ probe_stack_range (HOST_WIDE_INT first, rtx size) > emit_insn (gen_blockage ()); > } > > +/* Compute parameters for stack clash probing a dynamic stack > + allocation of SIZE bytes. > + > + We compute ROUNDED_SIZE, LAST_ADDR, RESIDUAL and PROBE_INTERVAL. > + > + Additionally we conditionally dump the type of probing that will > + be needed given the values computed. */ > + > +void > +compute_stack_clash_protection_loop_data (rtx *rounded_size, rtx *last_addr, > + rtx *residual, > + HOST_WIDE_INT *probe_interval, > + rtx size) > +{ > + /* Round SIZE down to STACK_CLASH_PROTECTION_PROBE_INTERVAL */ > + *probe_interval > + = PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); > + *rounded_size = simplify_gen_binary (AND, Pmode, size, > + GEN_INT (-*probe_interval)); > + > + /* Compute the value of the stack pointer for the last iteration. > + It's just SP + ROUNDED_SIZE. */ > + rtx rounded_size_op = force_operand (*rounded_size, NULL_RTX); > + *last_addr = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, > + stack_pointer_rtx, > + rounded_size_op), > + NULL_RTX); > + > + /* Compute any residuals not allocated by the loop above. Residuals > + are just the ROUNDED_SIZE - SIZE. */ > + *residual = simplify_gen_binary (MINUS, Pmode, size, *rounded_size); > + > + /* Dump key information to make writing tests easy. */ > + if (dump_file) > + { > + if (*rounded_size == CONST0_RTX (Pmode)) > + fprintf (dump_file, > + "Stack clash skipped dynamic allocation and probing > loop.\n"); > + else if (GET_CODE (*rounded_size) == CONST_INT > + && INTVAL (*rounded_size) <= 4 * *probe_interval) > + fprintf (dump_file, > + "Stack clash dynamic allocation and probing inline.\n"); > + else if (GET_CODE (*rounded_size) == CONST_INT) > + fprintf (dump_file, > + "Stack clash dynamic allocation and probing in " > + "rotated loop.\n"); > + else > + fprintf (dump_file, > + "Stack clash dynamic allocation and probing in loop.\n"); > + > + if (*residual != CONST0_RTX (Pmode)) > + fprintf (dump_file, > + "Stack clash dynamic allocation and probing residuals.\n"); > + else > + fprintf (dump_file, > + "Stack clash skipped dynamic allocation and " > + "probing residuals.\n"); > + } > +} > + > +/* Emit the start of an allocate/probe loop for stack > + clash protection. > + > + LOOP_LAB and END_LAB are returned for use when we emit the > + end of the loop. > + > + LAST addr is the value for SP which stops the loop. */ > +void > +emit_stack_clash_protection_probe_loop_start (rtx *loop_lab, > + rtx *end_lab, > + rtx last_addr, > + bool rotated) > +{ > + /* Essentially we want to emit any setup code, the top of loop > + label and the comparison at the top of the loop. */ > + *loop_lab = gen_label_rtx (); > + *end_lab = gen_label_rtx (); > + > + emit_label (*loop_lab); > + if (!rotated) > + emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, EQ, NULL_RTX, > + Pmode, 1, *end_lab); > +} > + > +/* Emit the end of a stack clash probing loop. > + > + This consists of just the jump back to LOOP_LAB and > + emitting END_LOOP after the loop. */ > + > +void > +emit_stack_clash_protection_probe_loop_end (rtx loop_lab, rtx end_loop, > + rtx last_addr, bool rotated) > +{ > + if (rotated) > + emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, NE, NULL_RTX, > + Pmode, 1, loop_lab); > + else > + emit_jump (loop_lab); > + > + emit_label (end_loop); > + > +} > + > +/* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes) > + while probing it. This pushes when SIZE is positive. SIZE need not > + be constant. > + > + This is subtly different than anti_adjust_stack_and_probe to try and > + prevent stack-clash attacks > + > + 1. It must assume no knowledge of the probing state, any allocation > + must probe. > + > + Consider the case of a 1 byte alloca in a loop. If the sum of the > + allocations is large, then this could be used to jump the guard if > + probes were not emitted. > + > + 2. It never skips probes, whereas anti_adjust_stack_and_probe will > + skip probes on the first couple PROBE_INTERVALs on the assumption > + they're done elsewhere. > + > + 3. It only allocates and probes SIZE bytes, it does not need to > + allocate/probe beyond that because this probing style does not > + guarantee signal handling capability if the guard is hit. */ > + > +static void > +anti_adjust_stack_and_probe_stack_clash (rtx size) > +{ > + /* First ensure SIZE is Pmode. */ > + if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode) > + size = convert_to_mode (Pmode, size, 1); > + > + /* We can get here with a constant size on some targets. */ > + rtx rounded_size, last_addr, residual; > + HOST_WIDE_INT probe_interval; > + compute_stack_clash_protection_loop_data (&rounded_size, &last_addr, > + &residual, &probe_interval, size); > + > + if (rounded_size != CONST0_RTX (Pmode)) > + { > + if (INTVAL (rounded_size) <= 4 * probe_interval) > + { > + for (HOST_WIDE_INT i = 0; > + i < INTVAL (rounded_size); > + i += probe_interval) > + { > + anti_adjust_stack (GEN_INT (probe_interval)); > + > + /* The prologue does not probe residuals. Thus the offset > + here to probe just beyond what the prologue had already > + allocated. */ > + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, > + (probe_interval > + - GET_MODE_SIZE > (word_mode)))); > + emit_insn (gen_blockage ()); > + } > + } > + else > + { > + rtx loop_lab, end_loop; > + bool rotate_loop = GET_CODE (rounded_size) == CONST_INT; > + emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop, > + last_addr, > rotate_loop); > + > + anti_adjust_stack (GEN_INT (probe_interval)); > + > + /* The prologue does not probe residuals. Thus the offset here > + to probe just beyond what the prologue had already allocated. */ > + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, > + (probe_interval > + - GET_MODE_SIZE (word_mode)))); > + > + emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop, > + last_addr, rotate_loop); > + emit_insn (gen_blockage ()); > + } > + } > + > + if (residual != CONST0_RTX (Pmode)) > + { > + rtx x = force_reg (Pmode, plus_constant (Pmode, residual, > + -GET_MODE_SIZE (word_mode))); > + anti_adjust_stack (residual); > + emit_stack_probe (gen_rtx_PLUS (Pmode, stack_pointer_rtx, x)); > + emit_insn (gen_blockage ()); > + } > + > + /* Some targets make optimistic assumptions in their prologues about > + how the caller may have probed the stack. Make sure we honor > + those assumptions when needed. */ > + if (size != CONST0_RTX (Pmode) > + && STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE (residual)) > + { > + /* Ideally we would just probe at *sp. However, if SIZE is not > + a compile-time constant, but is zero at runtime, then *sp > + might hold live data. So probe at *sp if we know that > + an allocation was made, otherwise probe into the red zone > + which is obviously undesirable. */ > + if (GET_CODE (size) == CONST_INT) > + { > + emit_stack_probe (stack_pointer_rtx); > + emit_insn (gen_blockage ()); > + } > + else > + { > + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, > + -GET_MODE_SIZE (word_mode))); > + emit_insn (gen_blockage ()); > + } > + } > +} > + > + > /* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes) > while probing it. This pushes when SIZE is positive. SIZE need not > be constant. If ADJUST_BACK is true, adjust back the stack pointer > diff --git a/gcc/explow.h b/gcc/explow.h > index 217a322..b85c051 100644 > --- a/gcc/explow.h > +++ b/gcc/explow.h > @@ -69,6 +69,15 @@ extern void anti_adjust_stack (rtx); > /* Add some bytes to the stack while probing it. An rtx says how many. */ > extern void anti_adjust_stack_and_probe (rtx, bool); > > +/* Support for building allocation/probing loops for stack-clash > + protection of dyamically allocated stack space. */ > +extern void compute_stack_clash_protection_loop_data (rtx *, rtx *, rtx *, > + HOST_WIDE_INT *, rtx); > +extern void emit_stack_clash_protection_probe_loop_start (rtx *, rtx *, > + rtx, bool); > +extern void emit_stack_clash_protection_probe_loop_end (rtx, rtx, > + rtx, bool); > + > /* This enum is used for the following two functions. */ > enum save_level {SAVE_BLOCK, SAVE_FUNCTION, SAVE_NONLOCAL}; > > diff --git a/gcc/rtl.h b/gcc/rtl.h > index 59da995..24240fc 100644 > --- a/gcc/rtl.h > +++ b/gcc/rtl.h > @@ -2703,6 +2703,7 @@ get_full_set_src_cost (rtx x, machine_mode mode, struct > full_rtx_costs *c) > /* In explow.c */ > extern HOST_WIDE_INT trunc_int_for_mode (HOST_WIDE_INT, machine_mode); > extern rtx plus_constant (machine_mode, rtx, HOST_WIDE_INT, bool = false); > +extern HOST_WIDE_INT get_stack_check_protect (void); > > /* In rtl.c */ > extern rtx rtx_alloc_stat (RTX_CODE MEM_STAT_DECL); > diff --git a/gcc/testsuite/gcc.dg/stack-check-3.c > b/gcc/testsuite/gcc.dg/stack-check-3.c > new file mode 100644 > index 0000000..58fb656 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/stack-check-3.c > @@ -0,0 +1,86 @@ > +/* The goal here is to ensure that dynamic allocations via vlas or > + alloca calls receive probing. > + > + Scanning the RTL or assembly code seems like insanity here as does > + checking for particular allocation sizes and probe offsets. For > + now we just verify that there's an allocation + probe loop and > + residual allocation + probe for f?. */ > + > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-expand > -fno-optimize-sibling-calls --param > stack-clash-protection-probe-interval=4096 --param > stack-clash-protection-guard-size=4096" } */ > +/* { dg-require-effective-target supports_stack_clash_protection } */ > + > +__attribute__((noinline, noclone)) void > +foo (char *p) > +{ > + asm volatile ("" : : "r" (p) : "memory"); > +} > + > +/* Simple VLA, no other locals. */ > +__attribute__((noinline, noclone)) void > +f0 (int x) > +{ > + char vla[x]; > + foo (vla); > +} > + > +/* Simple VLA, small local frame. */ > +__attribute__((noinline, noclone)) void > +f1 (int x) > +{ > + char locals[128]; > + char vla[x]; > + foo (vla); > +} > + > +/* Small constant alloca, no other locals. */ > +__attribute__((noinline, noclone)) void > +f2 (int x) > +{ > + char *vla = __builtin_alloca (128); > + foo (vla); > +} > + > +/* Big constant alloca, small local frame. */ > +__attribute__((noinline, noclone)) void > +f3 (int x) > +{ > + char locals[128]; > + char *vla = __builtin_alloca (16384); > + foo (vla); > +} > + > +/* Big constant alloca, small local frame. */ > +__attribute__((noinline, noclone)) void > +f3a (int x) > +{ > + char locals[128]; > + char *vla = __builtin_alloca (32768); > + foo (vla); > +} > + > +/* Nonconstant alloca, no other locals. */ > +__attribute__((noinline, noclone)) void > +f4 (int x) > +{ > + char *vla = __builtin_alloca (x); > + foo (vla); > +} > + > +/* Nonconstant alloca, small local frame. */ > +__attribute__((noinline, noclone)) void > +f5 (int x) > +{ > + char locals[128]; > + char *vla = __builtin_alloca (x); > + foo (vla); > +} > + > +/* { dg-final { scan-rtl-dump-times "allocation and probing residuals" 7 > "expand" } } */ > + > + > +/* { dg-final { scan-rtl-dump-times "allocation and probing in loop" 7 > "expand" { target callee_realigns_stack } } } */ > +/* { dg-final { scan-rtl-dump-times "allocation and probing in loop" 4 > "expand" { target { ! callee_realigns_stack } } } } */ > +/* { dg-final { scan-rtl-dump-times "allocation and probing in rotated loop" > 1 "expand" { target { ! callee_realigns_stack } } } } */ > +/* { dg-final { scan-rtl-dump-times "allocation and probing inline" 1 > "expand" { target { ! callee_realigns_stack } } } } */ > +/* { dg-final { scan-rtl-dump-times "skipped dynamic allocation and probing > loop" 1 "expand" { target { ! callee_realigns_stack } } } } */ >