[PATCH][RFA/RFC] Stack clash mitigation patch 08/08 - V3

2017-07-30 Thread Jeff Law

These are the s390 patches for stack clash mitigation.

Changes since V2:

Incorporate changes from IBM to address missing functionality.

Hopefully no longer regresses Ada as we no longer define
STACK_CHECK_STATIC_BUILTIN.

Jeff

* config/s390/s390.c (MIN_UNROLL_PROBES): Define.
(allocate_stack_space): New function, partially extracted from
s390_emit_prologue.
(s390_emit_prologue): Track offset to most recent stack probe.
Code to allocate space moved into allocate_stack_space.
Dump actions when no stack is allocated.
(s390_prologue_plus_offset): New function.
(s390_emit_stack_probe): Likewise.


testsuite/

* gcc.dg/stack-check-5.c:  Add argument for s390.

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 958ee3b..7d2481e 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -10999,6 +10999,178 @@ pass_s390_early_mach::execute (function *fun)
 
 } // anon namespace
 
+/* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
+   - push too big immediates to the literal pool and annotate the refs
+   - emit frame related notes for stack pointer changes.  */
+
+static rtx
+s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool 
frame_related_p)
+{
+  rtx insn;
+  rtx orig_offset = offset;
+
+  gcc_assert (REG_P (target));
+  gcc_assert (REG_P (reg));
+  gcc_assert (CONST_INT_P (offset));
+
+  if (offset == const0_rtx)   /* lr/lgr */
+{
+  insn = emit_move_insn (target, reg);
+}
+  else if (DISP_IN_RANGE (INTVAL (offset)))   /* la */
+{
+  insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
+  offset));
+}
+  else
+{
+  if (!satisfies_constraint_K (offset)/* ahi/aghi */
+ && (!TARGET_EXTIMM
+ || (!satisfies_constraint_Op (offset)   /* alfi/algfi */
+ && !satisfies_constraint_On (offset /* slfi/slgfi */
+   offset = force_const_mem (Pmode, offset);
+
+  if (target != reg)
+   {
+ insn = emit_move_insn (target, reg);
+ RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
+   }
+
+  insn = emit_insn (gen_add2_insn (target, offset));
+
+  if (!CONST_INT_P (offset))
+   {
+ annotate_constant_pool_refs (&PATTERN (insn));
+
+ if (frame_related_p)
+   add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+ gen_rtx_SET (target,
+  gen_rtx_PLUS (Pmode, target,
+orig_offset)));
+   }
+}
+
+  RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
+
+  return insn;
+}
+
+/* Emit a compare instruction with a volatile memory access as stack
+   probe.  It does not waste store tags and does not clobber any
+   registers apart from the condition code.  */
+static void
+s390_emit_stack_probe (rtx addr)
+{
+  rtx tmp = gen_rtx_MEM (Pmode, addr);
+  MEM_VOLATILE_P (tmp) = 1;
+  s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
+}
+
+/* Use a runtime loop if we have to emit more probes than this.  */
+#define MIN_UNROLL_PROBES 3
+
+/* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
+   if necessary.  LAST_PROBE_OFFSET contains the offset of the closest
+   probe relative to the stack pointer.
+
+   Note that SIZE is negative.
+
+   The return value is true if TEMP_REG has been clobbered.  */
+static bool
+allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
+ rtx temp_reg)
+{
+  bool temp_reg_clobbered_p = false;
+  HOST_WIDE_INT probe_interval
+= PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
+  HOST_WIDE_INT guard_size
+= PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
+
+  if (flag_stack_clash_protection)
+{
+  if (last_probe_offset + -INTVAL (size) < guard_size)
+   dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
+  else
+   {
+ rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
+ HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
+ HOST_WIDE_INT num_probes = rounded_size / probe_interval;
+ HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
+
+ if (num_probes < MIN_UNROLL_PROBES)
+   {
+ /* Emit unrolled probe statements.  */
+
+ for (unsigned int i = 0; i < num_probes; i++)
+   {
+ s390_prologue_plus_offset (stack_pointer_rtx,
+stack_pointer_rtx,
+GEN_INT (-probe_interval), true);
+ s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
+  stack_pointer_rtx,
+  offset));
+   }
+ dump_stack_

Re: [PATCH][RFA/RFC] Stack clash mitigation patch 08/08 V2

2017-07-23 Thread Jeff Law
On 07/21/2017 07:23 AM, Andreas Krebbel wrote:
> Hi,
> 
> I've used your patch as the base and applied my changes on top.  The
> attached patch is the result, so it is supposed to replace your
> version.  It now also supports emitting a runtime loop.
> 
> It bootstraps fine but unfortunately I see an Ada regression which I
> haven't tracked down yet.
> 
>> FAIL: cb1010a
>> FAIL: gnat.dg/stack_check1.adb execution test
>> FAIL: gnat.dg/stack_check1.adb execution test
Ugh.  The s390 I'm using doesn't have Ada installed, nor is there a
usable version I can reasonably install.

I'm still reasonably  confident this regression is a result of my
defining STACK_CHECK_STATIC_BUILTIN which was needed in earlier versions
of the kit but shouldn't be needed anymore.

Your patches are definitely a step forward.   Thanks again!

jeff



Re: [PATCH][RFA/RFC] Stack clash mitigation patch 08/08 V2

2017-07-21 Thread Jeff Law
On 07/21/2017 07:23 AM, Andreas Krebbel wrote:
> Hi,
> 
> I've used your patch as the base and applied my changes on top.  The
> attached patch is the result, so it is supposed to replace your
> version.  It now also supports emitting a runtime loop.
Thanks a ton!  I'll roll your changes into the V3 patch.

> 
> It bootstraps fine but unfortunately I see an Ada regression which I
> haven't tracked down yet.
> 
>> FAIL: cb1010a
>> FAIL: gnat.dg/stack_check1.adb execution test
>> FAIL: gnat.dg/stack_check1.adb execution test
FWIW, due to time constraints I haven't been testing Ada.  An educated
guess is that this is related to the #define STACK_CHECK_STATIC_BUILTIN
which was never right for s390, but was useful temporarily.  I bet
pulling that out should fix the Ada regression.  One of the design goals
of this work is we're not supposed to affect Ada code generation at all.

There's one slight tweak I think we'll want to make as part of the V3
patch I'm about ready to post.  Specifically I have introduced PARAMS to
control the size of the guard an the size of the probe interval.

The size of the guard affects if we're going to need static probes.  For
s390 I think that we just replace PROBE_INTERVAL with the PARAM_VALUE
for the guard size when we initialize last_probe_offset.

The remainder of PROBE_INTERVAL uses turn into the PARAM_VALUE for the
probe interval size.

I can cobble those together easily I think.

Thanks again!

Jeff


Re: [PATCH][RFA/RFC] Stack clash mitigation patch 08/08 V2

2017-07-21 Thread Andreas Krebbel
Hi,

I've used your patch as the base and applied my changes on top.  The
attached patch is the result, so it is supposed to replace your
version.  It now also supports emitting a runtime loop.

It bootstraps fine but unfortunately I see an Ada regression which I
haven't tracked down yet.

> FAIL: cb1010a
> FAIL: gnat.dg/stack_check1.adb execution test
> FAIL: gnat.dg/stack_check1.adb execution test

Bye,

-Andreas-

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index bbae89b..796ca76 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -11040,6 +11040,179 @@ pass_s390_early_mach::execute (function *fun)
 
 } // anon namespace
 
+/* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
+   - push too big immediates to the literal pool and annotate the refs
+   - emit frame related notes for stack pointer changes.  */
+
+static rtx
+s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool 
frame_related_p)
+{
+  rtx insn;
+  rtx orig_offset = offset;
+
+  gcc_assert (REG_P (target));
+  gcc_assert (REG_P (reg));
+  gcc_assert (CONST_INT_P (offset));
+
+  if (offset == const0_rtx)   /* lr/lgr */
+{
+  insn = emit_move_insn (target, reg);
+}
+  else if (DISP_IN_RANGE (INTVAL (offset)))   /* la */
+{
+  insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
+  offset));
+}
+  else
+{
+  if (!satisfies_constraint_K (offset)/* ahi/aghi */
+ && (!TARGET_EXTIMM
+ || (!satisfies_constraint_Op (offset)   /* alfi/algfi */
+ && !satisfies_constraint_On (offset /* slfi/slgfi */
+   offset = force_const_mem (Pmode, offset);
+
+  if (target != reg)
+   {
+ insn = emit_move_insn (target, reg);
+ RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
+   }
+
+  insn = emit_insn (gen_add2_insn (target, offset));
+
+  if (!CONST_INT_P (offset))
+   {
+ annotate_constant_pool_refs (&PATTERN (insn));
+
+ if (frame_related_p)
+   add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+ gen_rtx_SET (target,
+  gen_rtx_PLUS (Pmode, target,
+orig_offset)));
+   }
+}
+
+  RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
+
+  return insn;
+}
+
+/* Emit a compare instruction with a volatile memory access as stack
+   probe.  It does not waste store tags and does not clobber any
+   registers apart from the condition code.  */
+static void
+s390_emit_stack_probe (rtx addr)
+{
+  rtx tmp = gen_rtx_MEM (Pmode, addr);
+  MEM_VOLATILE_P (tmp) = 1;
+  s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
+}
+
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+#if (PROBE_INTERVAL - 4 > 4095)
+#error "S/390: stack probe offset must fit into short discplacement."
+#endif
+
+/* Use a runtime loop if we have to emit more probes than this.  */
+#define MIN_UNROLL_PROBES 3
+
+/* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
+   if necessary.  LAST_PROBE_OFFSET contains the offset of the closest
+   probe relative to the stack pointer.
+
+   Note that SIZE is negative.
+
+   The return value is true if TEMP_REG has been clobbered.  */
+static bool
+allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
+ rtx temp_reg)
+{
+  bool temp_reg_clobbered_p = false;
+
+  if (flag_stack_clash_protection)
+{
+  if (last_probe_offset + -INTVAL (size) < PROBE_INTERVAL)
+   dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
+  else
+   {
+ rtx offset = GEN_INT (PROBE_INTERVAL - UNITS_PER_LONG);
+ HOST_WIDE_INT rounded_size = -INTVAL (size) & -PROBE_INTERVAL;
+ HOST_WIDE_INT num_probes = rounded_size / PROBE_INTERVAL;
+ HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
+
+ if (num_probes < MIN_UNROLL_PROBES)
+   {
+ /* Emit unrolled probe statements.  */
+
+ for (unsigned int i = 0; i < num_probes; i++)
+   {
+ s390_prologue_plus_offset (stack_pointer_rtx,
+stack_pointer_rtx,
+GEN_INT (-PROBE_INTERVAL), true);
+ s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
+  stack_pointer_rtx,
+  offset));
+   }
+ dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
+   }
+ else
+   {
+ /* Emit a loop probing the pages.  */
+
+ rtx_code_label *loop_start_label = gen_label_rtx ();
+
+ /* From now on temp_reg will be the CFA register.  */
+ s390_prologue_plus_offset (temp_reg, st

[PATCH][RFA/RFC] Stack clash mitigation patch 08/08 V2

2017-07-18 Thread Jeff Law
I don't think this patch has changed in any significant way since the V1
patch.

I have tested a slightly different version which punts stack clash
protection for very large static stack frames -- otherwise tests which
have *huge* frames will timeout, run out of memory during compilation, etc.

--
s390's most interesting property is that the caller allocates space for
the callee to save registers into.

So we start with a very conservative assumption about the offset between
SP and the most recent stack probe.  As we encounter those register
saves we may be able to decrease that offset.  And like aarch64 as we
allocate space, the offset increases.  If the offset crosses
PROBE_INTERVAL, we must emit probes.

For large frames, I did not implement an allocate/probe in a loop.
Someone with a better understanding of the architecture is better suited
for that work.  I'll note that you're going to need another scratch
register   This is the cause of the xfail of one test which expects to
see a prologue allocate/probe loop.

s390 has a -mbackchain option.  I'm not sure where it's used, but we do
try to handle it in the initial offset computation.   However, we don't
handle it in the actual allocations that occur when -fstack-clash-protection

Other than the xfail noted above, the s390 uses the same tests as the
x86, ppc and aarch64 ports.

I suspect we're going to need further iteration here.

* config/s390/s390.c (PROBE_INTERVAL): Define.
(allocate_stack_space): New function, partially extracted from
s390_emit_prologue.
(s390_emit_prologue): Track offset to most recent stack probe.
Code to allocate space moved into allocate_stack_space.
Dump actions when no stack is allocated.

testsuite/

* gcc.dg/stack-check-6.c: xfail for s390*-*-*.

commit 0d2fdca4d86238f2fc095c7d91013e927c6ecf0c
Author: Jeff Law 
Date:   Fri Jul 7 17:25:35 2017 +

S390 implementatoin

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 958ee3b..7d4020c 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -10999,6 +10999,107 @@ pass_s390_early_mach::execute (function *fun)
 
 } // anon namespace
 
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+/* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
+   if necessary.  LAST_PROBE_OFFSET contains the offset of the closest
+   probe relative to the stack pointer.
+
+   Note that SIZE is negative. 
+
+   TMP_REG_IS_LIVE indicates that TEMP_REG actually holds a live
+   value and must be restored if we clobber it.  */
+static void
+allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
+ rtx temp_reg, bool temp_reg_is_live)
+{
+  rtx insn;
+
+  /* If we are emitting stack probes and a SIZE allocation would cross
+ the PROBE_INTERVAL boundary, then we need significantly different
+ sequences to allocate and probe the stack.  */
+  if (flag_stack_clash_protection
+  && last_probe_offset + -INTVAL (size) < PROBE_INTERVAL)
+dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
+  else if (flag_stack_clash_protection
+  && last_probe_offset + -INTVAL (size) >= PROBE_INTERVAL)
+{
+  rtx memref;
+
+  HOST_WIDE_INT rounded_size = -INTVAL (size) & -PROBE_INTERVAL;
+
+  emit_move_insn (temp_reg, GEN_INT (PROBE_INTERVAL - 8));
+
+  /* We really should have a runtime loop version as well.  */
+  for (unsigned int i = 0; i < rounded_size; i += PROBE_INTERVAL)
+   {
+ insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+  GEN_INT (-PROBE_INTERVAL)));
+ RTX_FRAME_RELATED_P (insn);
+
+ /* We just allocated PROBE_INTERVAL bytes of stack space.  Thus,
+a probe is mandatory here, but LAST_PROBE_OFFSET does not
+change.  */
+ memref = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, temp_reg,
+stack_pointer_rtx));
+ MEM_VOLATILE_P (memref);
+ emit_move_insn (memref, temp_reg);
+   }
+
+  /* Handle any residual allocation request.  */
+  HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
+  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+  GEN_INT (-residual)));
+  RTX_FRAME_RELATED_P (insn) = 1;
+  last_probe_offset += residual;
+  if (last_probe_offset >= PROBE_INTERVAL)
+   {
+ emit_move_insn (temp_reg, GEN_INT (residual
+- GET_MODE_SIZE (word_mode)));
+ memref = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, temp_reg,
+stack_pointer_rtx));
+ MEM_VOLATILE_P (memref);
+ emit_move_insn (memref, temp_reg);
+   }
+
+  /* We clobbered TEMP_REG, but it really isn't a temporary at this point,
+restore its value.  */
+  if (temp_reg_is_live

Re: [PATCH][RFA/RFC] Stack clash mitigation patch 08/08

2017-07-14 Thread Jeff Law
On 07/14/2017 08:29 AM, Andreas Krebbel wrote:
> On 07/11/2017 11:21 PM, Jeff Law wrote:
>> This patch adds s390 support for stack-clash mitigation.
>>
>> s390's most interesting property is that the caller allocates space for
>> the callee to save registers into.
>>
>> So much like aarch64, we start with a very conservative assumption about
>> the offset between SP and the most recent stack probe.  As we encounter
>> those register saves we may be able to decrease that offset.  And like
>> aarch64 as we allocate space, the offset increases.  If the offset
>> crosses PROBE_INTERVAL, we must emit probes.
>>
>> Because the register saves hit the caller's frame s390 in some ways
>> generates code more like x86/ppc.  Though if there aren't any register
>> saves, then the resulting code looks more like aarch64.
>>
>> For large frames, I did not implement an allocate/probe in a loop.
>> Someone with a better understanding of the architecture is better suited
>> for that work.  I'll note that you're going to need another scratch
>> register :-)  This is the cause of the xfail of one test which expects
>> to see a prologue allocate/probe loop.
>>
>> s390 has a -mbackchain option.  I'm not sure where it's used, but we do
>> try to handle it in the initial offset computation.   However, we don't
>> handle it in the actual allocations that occur when -fstack-check=clash
>> is enabled.
>>
>> s390 does not have a -fstack-check=specific implementation.  I have not
>> tried to add one.  But I have defined STACK_CHECK_STATIC_BUILTIN.  I
>> haven't investigated what side effects that might have.
>>
>> Other than the xfail noted above, the s390 uses the same tests as the
>> x86, ppc and aarch64 ports.
>>
>> I suspect we're going to need further iteration here.
>>
>> Thoughts/Comments?
> 
> I'll have a look and run some tests to come back with an answer next week.
Thanks.  That'd be greatly appreciated.

Jeff


Re: [PATCH][RFA/RFC] Stack clash mitigation patch 08/08

2017-07-14 Thread Andreas Krebbel
On 07/11/2017 11:21 PM, Jeff Law wrote:
> This patch adds s390 support for stack-clash mitigation.
> 
> s390's most interesting property is that the caller allocates space for
> the callee to save registers into.
> 
> So much like aarch64, we start with a very conservative assumption about
> the offset between SP and the most recent stack probe.  As we encounter
> those register saves we may be able to decrease that offset.  And like
> aarch64 as we allocate space, the offset increases.  If the offset
> crosses PROBE_INTERVAL, we must emit probes.
> 
> Because the register saves hit the caller's frame s390 in some ways
> generates code more like x86/ppc.  Though if there aren't any register
> saves, then the resulting code looks more like aarch64.
> 
> For large frames, I did not implement an allocate/probe in a loop.
> Someone with a better understanding of the architecture is better suited
> for that work.  I'll note that you're going to need another scratch
> register :-)  This is the cause of the xfail of one test which expects
> to see a prologue allocate/probe loop.
> 
> s390 has a -mbackchain option.  I'm not sure where it's used, but we do
> try to handle it in the initial offset computation.   However, we don't
> handle it in the actual allocations that occur when -fstack-check=clash
> is enabled.
> 
> s390 does not have a -fstack-check=specific implementation.  I have not
> tried to add one.  But I have defined STACK_CHECK_STATIC_BUILTIN.  I
> haven't investigated what side effects that might have.
> 
> Other than the xfail noted above, the s390 uses the same tests as the
> x86, ppc and aarch64 ports.
> 
> I suspect we're going to need further iteration here.
> 
> Thoughts/Comments?

I'll have a look and run some tests to come back with an answer next week.

Bye,

-Andreas-



[PATCH][RFA/RFC] Stack clash mitigation patch 08/08

2017-07-11 Thread Jeff Law
This patch adds s390 support for stack-clash mitigation.

s390's most interesting property is that the caller allocates space for
the callee to save registers into.

So much like aarch64, we start with a very conservative assumption about
the offset between SP and the most recent stack probe.  As we encounter
those register saves we may be able to decrease that offset.  And like
aarch64 as we allocate space, the offset increases.  If the offset
crosses PROBE_INTERVAL, we must emit probes.

Because the register saves hit the caller's frame s390 in some ways
generates code more like x86/ppc.  Though if there aren't any register
saves, then the resulting code looks more like aarch64.

For large frames, I did not implement an allocate/probe in a loop.
Someone with a better understanding of the architecture is better suited
for that work.  I'll note that you're going to need another scratch
register :-)  This is the cause of the xfail of one test which expects
to see a prologue allocate/probe loop.

s390 has a -mbackchain option.  I'm not sure where it's used, but we do
try to handle it in the initial offset computation.   However, we don't
handle it in the actual allocations that occur when -fstack-check=clash
is enabled.

s390 does not have a -fstack-check=specific implementation.  I have not
tried to add one.  But I have defined STACK_CHECK_STATIC_BUILTIN.  I
haven't investigated what side effects that might have.

Other than the xfail noted above, the s390 uses the same tests as the
x86, ppc and aarch64 ports.

I suspect we're going to need further iteration here.

Thoughts/Comments?

Jeff


* config/s390/s390.c (PROBE_INTERVAL): Define.
(allocate_stack_space): New function, partially extracted from
s390_emit_prologue.
(s390_emit_prologue): Track offset to most recent stack probe.
Code to allocate space moved into allocate_stack_space.
Dump actions when no stack is allocated.
* config/s390/s390.h (STACK_CHECK_STATIC_BUILTIN): Define.

testsuite/

* gcc.dg/stack-check-6.c: xfail for s390*-*-*.

commit 56523059d48f55991e7607dbde248f2aabe3e7e3
Author: Jeff Law 
Date:   Fri Jul 7 17:25:35 2017 +

S390 implementatoin

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 958ee3b..cddb393 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -10999,6 +10999,107 @@ pass_s390_early_mach::execute (function *fun)
 
 } // anon namespace
 
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+/* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
+   if necessary.  LAST_PROBE_OFFSET contains the offset of the closest
+   probe relative to the stack pointer.
+
+   Note that SIZE is negative. 
+
+   TMP_REG_IS_LIVE indicates that TEMP_REG actually holds a live
+   value and must be restored if we clobber it.  */
+static void
+allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
+ rtx temp_reg, bool tmp_reg_is_live)
+{
+  rtx insn;
+
+  /* If we are emitting stack probes and a SIZE allocation would cross
+ the PROBE_INTERVAL boundary, then we need significantly different
+ sequences to allocate and probe the stack.  */
+  if (flag_stack_check == STACK_CLASH_BUILTIN_STACK_CHECK
+  && last_probe_offset + -INTVAL (size) < PROBE_INTERVAL)
+dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
+  else if (flag_stack_check == STACK_CLASH_BUILTIN_STACK_CHECK
+  && last_probe_offset + -INTVAL (size) >= PROBE_INTERVAL)
+{
+  rtx memref;
+
+  HOST_WIDE_INT rounded_size = -INTVAL (size) & -PROBE_INTERVAL;
+
+  emit_move_insn (temp_reg, GEN_INT (PROBE_INTERVAL - 8));
+
+  /* We really should have a runtime loop version as well.  */
+  for (unsigned int i = 0; i < rounded_size; i += PROBE_INTERVAL)
+   {
+ insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+  GEN_INT (-PROBE_INTERVAL)));
+ RTX_FRAME_RELATED_P (insn);
+
+ /* We just allocated PROBE_INTERVAL bytes of stack space.  Thus,
+a probe is mandatory here, but LAST_PROBE_OFFSET does not
+change.  */
+ memref = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, temp_reg,
+stack_pointer_rtx));
+ MEM_VOLATILE_P (memref);
+ emit_move_insn (memref, temp_reg);
+   }
+
+  /* Handle any residual allocation request.  */
+  HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
+  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+  GEN_INT (-residual)));
+  RTX_FRAME_RELATED_P (insn) = 1;
+  last_probe_offset += residual;
+  if (last_probe_offset >= PROBE_INTERVAL)
+   {
+ emit_move_insn (temp_reg, GEN_INT (residual
+- GET_MODE_SIZE (word_mode)));
+ memref = gen_rtx_MEM (Pmode, gen_rtx_PLUS