Re: [ARM] Fix PR middle-end/65958

2015-12-04 Thread Eric Botcazou
> Minor nit.  Since this is used in an unspec_volatile, the name should be
> UNSPECV_ and defined in the unspecv enum.

Ouch.  It turns out that the ARM implementation has it too so I have installed 
the attached patchlet on top of the others after minimal retesting.


PR middle-end/65958
* config/arm/unspecs.md (unspec): Remove UNSPEC_PROBE_STACK_RANGE.
(unspecv): Add VUNSPEC_PROBE_STACK_RANGE.
* config/arm/arm.md (probe_stack_range): Adjust.
* config/aarch64/aarch64.md (unspec): Remove UNSPEC_PROBE_STACK_RANGE.
(unspecv): Add UNSPECV_PROBE_STACK_RANGE.
(probe_stack_range_): Adjust.

-- 
Eric BotcazouIndex: config/arm/arm.md
===
--- config/arm/arm.md	(revision 231250)
+++ config/arm/arm.md	(working copy)
@@ -8183,7 +8183,7 @@ (define_insn "probe_stack_range"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "0")
 			 (match_operand:SI 2 "register_operand" "r")]
-			 UNSPEC_PROBE_STACK_RANGE))]
+			 VUNSPEC_PROBE_STACK_RANGE))]
   "TARGET_32BIT"
 {
   return output_probe_stack_range (operands[0], operands[2]);
Index: config/arm/unspecs.md
===
--- config/arm/unspecs.md	(revision 231250)
+++ config/arm/unspecs.md	(working copy)
@@ -84,7 +84,6 @@ (define_c_enum "unspec" [
   UNSPEC_VRINTA ; Represent a float to integral float rounding
 ; towards nearest, ties away from zero.
   UNSPEC_PROBE_STACK; Probe stack memory reference
-  UNSPEC_PROBE_STACK_RANGE ; Probe stack range
 ])
 
 (define_c_enum "unspec" [
@@ -147,6 +146,7 @@ (define_c_enum "unspecv" [
   VUNSPEC_STL		; Represent a store-register-release.
   VUNSPEC_GET_FPSCR	; Represent fetch of FPSCR content.
   VUNSPEC_SET_FPSCR	; Represent assign of FPSCR content.
+  VUNSPEC_PROBE_STACK_RANGE ; Represent stack range probing.
 ])
 
 ;; Enumerators for NEON unspecs.
Index: config/aarch64/aarch64.md
===
--- config/aarch64/aarch64.md	(revision 231259)
+++ config/aarch64/aarch64.md	(working copy)
@@ -104,7 +104,6 @@ (define_c_enum "unspec" [
 UNSPEC_MB
 UNSPEC_NOP
 UNSPEC_PRLG_STK
-UNSPEC_PROBE_STACK_RANGE
 UNSPEC_RBIT
 UNSPEC_SISD_NEG
 UNSPEC_SISD_SSHL
@@ -139,6 +138,7 @@ (define_c_enum "unspecv" [
 UNSPECV_GET_FPSR		; Represent fetch of FPSR content.
 UNSPECV_SET_FPSR		; Represent assign of FPSR content.
 UNSPECV_BLOCKAGE		; Represent a blockage
+UNSPECV_PROBE_STACK_RANGE	; Represent stack range probing.
   ]
 )
 
@@ -4968,7 +4968,7 @@ (define_insn "probe_stack_range_

Re: [ARM] Fix PR middle-end/65958

2015-12-04 Thread Richard Earnshaw
> + (unspec_volatile:PTR [(match_operand:PTR 1 "register_operand" "0")
> +   (match_operand:PTR 2 "register_operand" "r")]
> +UNSPEC_PROBE_STACK_RANGE))]

Minor nit.  Since this is used in an unspec_volatile, the name should be
UNSPECV_ and defined in the unspecv enum.

Otherwise OK once testing is complete.

R.


On 03/12/15 12:17, Eric Botcazou wrote:
>> I can understand this restriction, but...
>>
>>> +  /* See the same assertion on PROBE_INTERVAL above.  */
>>> +  gcc_assert ((first % 4096) == 0);
>>
>> ... why isn't this a test that FIRST is aligned to PROBE_INTERVAL?
> 
> Because that isn't guaranteed, FIRST is related to the size of the protection 
> area while PROBE_INTERVAL is related to the page size.
> 
>> blank line between declarations and code. Also, can we come up with a
>> suitable define for 4096 here that expresses the context and then use
>> that consistently through the remainder of this function?
> 
> OK, let's use ARITH_BASE.
> 
>>> +(define_insn "probe_stack_range"
>>> +  [(set (match_operand:DI 0 "register_operand" "=r")
>>> +   (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
>>> +(match_operand:DI 2 "register_operand" "r")]
>>> +UNSPEC_PROBE_STACK_RANGE))]
>>
>> I think this should really use PTRmode, so that it's ILP32 ready (I'm
>> not going to ask you to make sure that works though, since I suspect
>> there are still other issues to resolve with ILP32 at this time).
> 
> Done.  Manually tested for now, I'll fully test it if approved.
> 
> 
> PR middle-end/65958
> * config/aarch64/aarch64-protos.h (aarch64_output_probe_stack-range):
> Declare.
> * config/aarch64/aarch64.md: Declare UNSPECV_BLOCKAGE and
> UNSPEC_PROBE_STACK_RANGE.
> (blockage): New instruction.
> (probe_stack_range_): Likewise.
> * config/aarch64/aarch64.c (aarch64_emit_probe_stack_range): New
> function.
> (aarch64_output_probe_stack_range): Likewise.
> (aarch64_expand_prologue): Invoke aarch64_emit_probe_stack_range if
> static builtin stack checking is enabled.
> * config/aarch64/aarch64-linux.h (STACK_CHECK_STATIC_BUILTIN):
> Define.
> 
> 
> pr65958-2c.diff
> 
> 
> Index: config/aarch64/aarch64-linux.h
> ===
> --- config/aarch64/aarch64-linux.h(revision 231206)
> +++ config/aarch64/aarch64-linux.h(working copy)
> @@ -88,4 +88,7 @@
>  #undef TARGET_BINDS_LOCAL_P
>  #define TARGET_BINDS_LOCAL_P default_binds_local_p_2
>  
> +/* Define this to be nonzero if static stack checking is supported.  */
> +#define STACK_CHECK_STATIC_BUILTIN 1
> +
>  #endif  /* GCC_AARCH64_LINUX_H */
> Index: config/aarch64/aarch64-protos.h
> ===
> --- config/aarch64/aarch64-protos.h   (revision 231206)
> +++ config/aarch64/aarch64-protos.h   (working copy)
> @@ -340,6 +340,7 @@ void aarch64_asm_output_labelref (FILE *
>  void aarch64_cpu_cpp_builtins (cpp_reader *);
>  void aarch64_elf_asm_named_section (const char *, unsigned, tree);
>  const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
> +const char * aarch64_output_probe_stack_range (rtx, rtx);
>  void aarch64_err_no_fpadvsimd (machine_mode, const char *);
>  void aarch64_expand_epilogue (bool);
>  void aarch64_expand_mov_immediate (rtx, rtx);
> Index: config/aarch64/aarch64.c
> ===
> --- config/aarch64/aarch64.c  (revision 231206)
> +++ config/aarch64/aarch64.c  (working copy)
> @@ -62,6 +62,7 @@
>  #include "sched-int.h"
>  #include "cortex-a57-fma-steering.h"
>  #include "target-globals.h"
> +#include "common/common-target.h"
>  
>  /* This file should be included last.  */
>  #include "target-def.h"
> @@ -2183,6 +2184,179 @@ aarch64_libgcc_cmp_return_mode (void)
>return SImode;
>  }
>  
> +#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
> +
> +/* We use the 12-bit shifted immediate arithmetic instructions so values
> +   must be multiple of (1 << 12), i.e. 4096.  */
> +#define ARITH_BASE 4096
> +
> +#if (PROBE_INTERVAL % ARITH_BASE) != 0
> +#error Cannot use simple address calculation for stack probing
> +#endif
> +
> +/* The pair of scratch registers used for stack probing.  */
> +#define PROBE_STACK_FIRST_REG  9
> +#define PROBE_STACK_SECOND_REG 10
> +
> +/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
> +   inclusive.  These are offsets from the current stack pointer.  */
> +
> +static void
> +aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
> +{
> +  rtx reg1 = gen_rtx_REG (ptr_mode, PROBE_STACK_FIRST_REG);
> +
> +  /* See the same assertion on PROBE_INTERVAL above.  */
> +  gcc_assert ((first % ARITH_BASE) == 0);
> +
> +  /* See if we have a constant small 

Re: [ARM] Fix PR middle-end/65958

2015-12-04 Thread Eric Botcazou
> Looks ok to me.  OK /Marcus

Thanks.  Testing was successful so I have installed it with a small change 
(s/ARITH_BASE/ARITH_FACTOR/, it's a bit more mathematically correct).

-- 
Eric Botcazou


Re: [ARM] Fix PR middle-end/65958

2015-12-04 Thread Marcus Shawcroft
On 3 December 2015 at 12:17, Eric Botcazou  wrote:
>> I can understand this restriction, but...
>>
>> > +  /* See the same assertion on PROBE_INTERVAL above.  */
>> > +  gcc_assert ((first % 4096) == 0);
>>
>> ... why isn't this a test that FIRST is aligned to PROBE_INTERVAL?
>
> Because that isn't guaranteed, FIRST is related to the size of the protection
> area while PROBE_INTERVAL is related to the page size.
>
>> blank line between declarations and code. Also, can we come up with a
>> suitable define for 4096 here that expresses the context and then use
>> that consistently through the remainder of this function?
>
> OK, let's use ARITH_BASE.
>
>> > +(define_insn "probe_stack_range"
>> > +  [(set (match_operand:DI 0 "register_operand" "=r")
>> > +   (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
>> > +(match_operand:DI 2 "register_operand" "r")]
>> > +UNSPEC_PROBE_STACK_RANGE))]
>>
>> I think this should really use PTRmode, so that it's ILP32 ready (I'm
>> not going to ask you to make sure that works though, since I suspect
>> there are still other issues to resolve with ILP32 at this time).
>
> Done.  Manually tested for now, I'll fully test it if approved.

Looks ok to me.  OK /Marcus


Re: [ARM] Fix PR middle-end/65958

2015-12-03 Thread Eric Botcazou
> I can understand this restriction, but...
> 
> > +  /* See the same assertion on PROBE_INTERVAL above.  */
> > +  gcc_assert ((first % 4096) == 0);
> 
> ... why isn't this a test that FIRST is aligned to PROBE_INTERVAL?

Because that isn't guaranteed, FIRST is related to the size of the protection 
area while PROBE_INTERVAL is related to the page size.

> blank line between declarations and code. Also, can we come up with a
> suitable define for 4096 here that expresses the context and then use
> that consistently through the remainder of this function?

OK, let's use ARITH_BASE.

> > +(define_insn "probe_stack_range"
> > +  [(set (match_operand:DI 0 "register_operand" "=r")
> > +   (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
> > +(match_operand:DI 2 "register_operand" "r")]
> > +UNSPEC_PROBE_STACK_RANGE))]
> 
> I think this should really use PTRmode, so that it's ILP32 ready (I'm
> not going to ask you to make sure that works though, since I suspect
> there are still other issues to resolve with ILP32 at this time).

Done.  Manually tested for now, I'll fully test it if approved.


PR middle-end/65958
* config/aarch64/aarch64-protos.h (aarch64_output_probe_stack-range):
Declare.
* config/aarch64/aarch64.md: Declare UNSPECV_BLOCKAGE and
UNSPEC_PROBE_STACK_RANGE.
(blockage): New instruction.
(probe_stack_range_): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_probe_stack_range): New
function.
(aarch64_output_probe_stack_range): Likewise.
(aarch64_expand_prologue): Invoke aarch64_emit_probe_stack_range if
static builtin stack checking is enabled.
* config/aarch64/aarch64-linux.h (STACK_CHECK_STATIC_BUILTIN):
Define.

-- 
Eric BotcazouIndex: config/aarch64/aarch64-linux.h
===
--- config/aarch64/aarch64-linux.h	(revision 231206)
+++ config/aarch64/aarch64-linux.h	(working copy)
@@ -88,4 +88,7 @@
 #undef TARGET_BINDS_LOCAL_P
 #define TARGET_BINDS_LOCAL_P default_binds_local_p_2
 
+/* Define this to be nonzero if static stack checking is supported.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
 #endif  /* GCC_AARCH64_LINUX_H */
Index: config/aarch64/aarch64-protos.h
===
--- config/aarch64/aarch64-protos.h	(revision 231206)
+++ config/aarch64/aarch64-protos.h	(working copy)
@@ -340,6 +340,7 @@ void aarch64_asm_output_labelref (FILE *
 void aarch64_cpu_cpp_builtins (cpp_reader *);
 void aarch64_elf_asm_named_section (const char *, unsigned, tree);
 const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
+const char * aarch64_output_probe_stack_range (rtx, rtx);
 void aarch64_err_no_fpadvsimd (machine_mode, const char *);
 void aarch64_expand_epilogue (bool);
 void aarch64_expand_mov_immediate (rtx, rtx);
Index: config/aarch64/aarch64.c
===
--- config/aarch64/aarch64.c	(revision 231206)
+++ config/aarch64/aarch64.c	(working copy)
@@ -62,6 +62,7 @@
 #include "sched-int.h"
 #include "cortex-a57-fma-steering.h"
 #include "target-globals.h"
+#include "common/common-target.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -2183,6 +2184,179 @@ aarch64_libgcc_cmp_return_mode (void)
   return SImode;
 }
 
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+/* We use the 12-bit shifted immediate arithmetic instructions so values
+   must be multiple of (1 << 12), i.e. 4096.  */
+#define ARITH_BASE 4096
+
+#if (PROBE_INTERVAL % ARITH_BASE) != 0
+#error Cannot use simple address calculation for stack probing
+#endif
+
+/* The pair of scratch registers used for stack probing.  */
+#define PROBE_STACK_FIRST_REG  9
+#define PROBE_STACK_SECOND_REG 10
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.  */
+
+static void
+aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+  rtx reg1 = gen_rtx_REG (ptr_mode, PROBE_STACK_FIRST_REG);
+
+  /* See the same assertion on PROBE_INTERVAL above.  */
+  gcc_assert ((first % ARITH_BASE) == 0);
+
+  /* See if we have a constant small number of probes to generate.  If so,
+ that's the easy case.  */
+  if (size <= PROBE_INTERVAL)
+{
+  const HOST_WIDE_INT base = ROUND_UP (size, ARITH_BASE);
+
+  emit_set_insn (reg1,
+		 plus_constant (ptr_mode,
+stack_pointer_rtx, -(first + base)));
+  emit_stack_probe (plus_constant (ptr_mode, reg1, base - size));
+}
+
+  /* The run-time loop is made up of 8 insns in the generic case while the
+ compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
+  else if (size <= 4 * PROBE_INTERVAL)
+{
+  HOST_WIDE_INT i, rem;
+
+  emit_set_insn (reg1,
+		

Re: [ARM] Fix PR middle-end/65958

2015-12-03 Thread Richard Earnshaw
Sorry for the delay, very busy on other things these days...

On 16/11/15 20:00, Eric Botcazou wrote:
>> More comments inline.
>
> Revised version attached, which addresses all your comments and in
particular
> removes the
>
> +#if PROBE_INTERVAL > 4096
> +#error Cannot use indexed addressing mode for stack probing
> +#endif
>
> compile-time assertion.  It generates the same code for PROBE_INTERVAL
== 4096
> as before and it generates code that can be assembled for 8192.
>
> Tested on Aarch64/Linux, OK for the mainline?
>

> +#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
> +
> +/* We use the 12-bit shifted immediate arithmetic instructions so values
> +   must be multiple of (1 << 12), i.e. 4096.  */
> +#if (PROBE_INTERVAL % 4096) != 0

I can understand this restriction, but...

> +  /* See the same assertion on PROBE_INTERVAL above.  */
> +  gcc_assert ((first % 4096) == 0);

... why isn't this a test that FIRST is aligned to PROBE_INTERVAL?

> +  /* See if we have a constant small number of probes to generate.
If so,
> + that's the easy case.  */
> +  if (size <= PROBE_INTERVAL)
> +{
> +  const HOST_WIDE_INT base = ROUND_UP (size, 4096);
> +  emit_set_insn (reg1,

blank line between declarations and code. Also, can we come up with a
suitable define for 4096 here that expresses the context and then use
that consistently through the remainder of this function?

> +(define_insn "probe_stack_range"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
> +  (match_operand:DI 2 "register_operand" "r")]
> +  UNSPEC_PROBE_STACK_RANGE))]

I think this should really use PTRmode, so that it's ILP32 ready (I'm
not going to ask you to make sure that works though, since I suspect
there are still other issues to resolve with ILP32 at this time).

R.




Re: [ARM] Fix PR middle-end/65958

2015-11-24 Thread Eric Botcazou
Richard,

> Revised version attached, which addresses all your comments and in
> particular removes the
> 
> +#if PROBE_INTERVAL > 4096
> +#error Cannot use indexed addressing mode for stack probing
> +#endif
> 
> compile-time assertion.  

Any objection to me applying this revised version?  I'd like to close the PR.

-- 
Eric Botcazou


Re: [ARM] Fix PR middle-end/65958

2015-11-16 Thread Eric Botcazou
> More comments inline.

Revised version attached, which addresses all your comments and in particular 
removes the

+#if PROBE_INTERVAL > 4096
+#error Cannot use indexed addressing mode for stack probing
+#endif

compile-time assertion.  It generates the same code for PROBE_INTERVAL == 4096 
as before and it generates code that can be assembled for 8192.

Tested on Aarch64/Linux, OK for the mainline?


2015-11-16  Tristan Gingold  
Eric Botcazou  

PR middle-end/65958
* config/aarch64/aarch64-protos.h (aarch64_output_probe_stack-range):
Declare.
* config/aarch64/aarch64.md: Declare UNSPECV_BLOCKAGE and
UNSPEC_PROBE_STACK_RANGE.
(blockage): New instruction.
(probe_stack_range): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_probe_stack_range): New
function.
(aarch64_output_probe_stack_range): Likewise.
(aarch64_expand_prologue): Invoke aarch64_emit_probe_stack_range if
static builtin stack checking is enabled.
* config/aarch64/aarch64-linux.h (STACK_CHECK_STATIC_BUILTIN):
Define.


2015-11-16  Eric Botcazou  

* gcc.target/aarch64/stack-checking.c: New test.

-- 
Eric Botcazou/* { dg-do run { target { *-*-linux* } } } */
/* { dg-options "-fstack-check" } */

int main(void)
{
  char *p;
  if (1)
{
  char i[48];
  p = __builtin_alloca(8);
  p[0] = 1;
}

  if (1)
{
  char i[48], j[64];
  j[32] = 0;
}

  return !p[0];
}
Index: config/aarch64/aarch64-linux.h
===
--- config/aarch64/aarch64-linux.h	(revision 230397)
+++ config/aarch64/aarch64-linux.h	(working copy)
@@ -88,4 +88,7 @@
 #undef TARGET_BINDS_LOCAL_P
 #define TARGET_BINDS_LOCAL_P default_binds_local_p_2
 
+/* Define this to be nonzero if static stack checking is supported.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
 #endif  /* GCC_AARCH64_LINUX_H */
Index: config/aarch64/aarch64-protos.h
===
--- config/aarch64/aarch64-protos.h	(revision 230397)
+++ config/aarch64/aarch64-protos.h	(working copy)
@@ -340,6 +340,7 @@ void aarch64_asm_output_labelref (FILE *
 void aarch64_cpu_cpp_builtins (cpp_reader *);
 void aarch64_elf_asm_named_section (const char *, unsigned, tree);
 const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
+const char * aarch64_output_probe_stack_range (rtx, rtx);
 void aarch64_err_no_fpadvsimd (machine_mode, const char *);
 void aarch64_expand_epilogue (bool);
 void aarch64_expand_mov_immediate (rtx, rtx);
Index: config/aarch64/aarch64.c
===
--- config/aarch64/aarch64.c	(revision 230397)
+++ config/aarch64/aarch64.c	(working copy)
@@ -62,6 +62,7 @@
 #include "sched-int.h"
 #include "cortex-a57-fma-steering.h"
 #include "target-globals.h"
+#include "common/common-target.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -2151,6 +2152,169 @@ aarch64_libgcc_cmp_return_mode (void)
   return SImode;
 }
 
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+/* We use the 12-bit shifted immediate arithmetic instructions so values
+   must be multiple of (1 << 12), i.e. 4096.  */
+#if (PROBE_INTERVAL % 4096) != 0
+#error Cannot use simple address calculation for stack probing
+#endif
+
+/* The pair of scratch registers used for stack probing.  */
+#define PROBE_STACK_FIRST_REG  9
+#define PROBE_STACK_SECOND_REG 10
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.  */
+
+static void
+aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+  rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
+
+  /* See the same assertion on PROBE_INTERVAL above.  */
+  gcc_assert ((first % 4096) == 0);
+
+  /* See if we have a constant small number of probes to generate.  If so,
+ that's the easy case.  */
+  if (size <= PROBE_INTERVAL)
+{
+  const HOST_WIDE_INT base = ROUND_UP (size, 4096);
+  emit_set_insn (reg1,
+		 plus_constant (Pmode, stack_pointer_rtx,
+	   -(first + base)));
+  emit_stack_probe (plus_constant (Pmode, reg1, base - size));
+}
+
+  /* The run-time loop is made up of 8 insns in the generic case while the
+ compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
+  else if (size <= 4 * PROBE_INTERVAL)
+{
+  HOST_WIDE_INT i, rem;
+
+  emit_set_insn (reg1,
+		 plus_constant (Pmode, stack_pointer_rtx,
+	   -(first + PROBE_INTERVAL)));
+  emit_stack_probe (reg1);
+
+  /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
+	 it exceeds SIZE.  If only two probes are needed, this will not
+	 generate any code.  Then probe at FIRST + SIZE.  */
+  for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+	{
+	  emit

Re: [ARM] Fix PR middle-end/65958

2015-11-03 Thread Eric Botcazou
> Yes, but that's not usual, ARM and SPARC have it too, 4096 happens to be the
> limit of reg+off addressing mode on several architectures.

... not unusual...

-- 
Eric Botcazou


Re: [ARM] Fix PR middle-end/65958

2015-11-03 Thread Eric Botcazou
> Unless there really is common code between the two patches, this should
> be separated out into two posts, one for ARM and one for AArch64.

The ARM bits were approved by Ramana and installed right away.

> Hmm, so if PROBE_INTERVAL != 4096 we barf!

Yes, but that's not usual, ARM and SPARC have it too, 4096 happens to be the 
limit of reg+off addressing mode on several architectures.

> While that's safe and probably right for Linux, on some OSes there might
> be a minimum page size of 16k or even 64k.  It would be nice if we could
> support that.

OK, but we cannot test anything at the moment.

> Ug!  Manifest constants should be moved to pre-defines.
> PROBE_STACK_BASE_REG?

OK.

> > +
> > +  /* The following code uses indexed address calculation on FIRST.  */
> > +  gcc_assert ((first % 4096) == 0);
> 
> where's 4096 come from?

It's the same constraint as above:

#if (PROBE_INTERVAL % 4096) != 0
#error Cannot use indexed address calculation for stack probing
#endif

to be able to use the 12-bit shifted immediate instructions. 

> More manifest constants.

Yeah, consistency first. ;-)

> This should be annotated with the sequence length.

OK, thanks for the review, I'll adjust.

-- 
Eric Botcazou


Re: [ARM] Fix PR middle-end/65958

2015-11-03 Thread Richard Earnshaw
On 06/10/15 11:11, Eric Botcazou wrote:
>> Thanks - I have no further comments on this patch. We probably need to
>> implement the same on AArch64 too in order to avoid similar problems.
> 
> Here's the implementation for aarch64, very similar but simpler since there 
> is 
> no shortage of scratch registers; the only thing to note is the new blockage 
> pattern.  This was tested on real hardware but not with Linux, instead with 
> Darwin (experimental port of the toolchain to iOS) and makes it possible to 
> pass ACATS (Ada conformance testsuite which requires stack checking).
> 
> There is also a couple of tweaks for the ARM implementation: a cosmetic one 
> for the probe_stack pattern and one for the output_probe_stack_range loop.
> 
> 
> 2015-10-06  Tristan Gingold  
> Eric Botcazou  
> 
> PR middle-end/65958
>   * config/aarch64/aarch64-protos.h (aarch64_output_probe_stack-range):
>   Declare.
>   * config/aarch64/aarch64.md: Declare UNSPECV_BLOCKAGE and
>   UNSPEC_PROBE_STACK_RANGE.
>   (blockage): New instruction.
>   (probe_stack_range): Likewise.
>   * config/aarch64/aarch64.c (aarch64_emit_probe_stack_range): New
>   function.
>   (aarch64_output_probe_stack_range): Likewise.
>   (aarch64_expand_prologue): Invoke aarch64_emit_probe_stack_range if
>   static builtin stack checking is enabled.
>   * config/aarch64/aarch64-linux.h (STACK_CHECK_STATIC_BUILTIN):
>   Define.
> 
>   * config/arm/arm.c (arm_emit_probe_stack_range): Adjust comment.
>   (output_probe_stack_range): Rotate the loop and simplify.
>   (thumb1_expand_prologue): Tweak sorry message.
>   * config/arm/arm.md (probe_stack): Use bare string.
> 
> 
> 2015-10-06  Eric Botcazou  
> 
> * gcc.target/aarch64/stack-checking.c: New test.
> 

Unless there really is common code between the two patches, this should
be separated out into two posts, one for ARM and one for AArch64.

More comments inline.

> 
> pr65958-2.diff
> 
> 
> Index: config/aarch64/aarch64-linux.h
> ===
> --- config/aarch64/aarch64-linux.h(revision 228512)
> +++ config/aarch64/aarch64-linux.h(working copy)
> @@ -88,4 +88,7 @@
>  #undef TARGET_BINDS_LOCAL_P
>  #define TARGET_BINDS_LOCAL_P default_binds_local_p_2
>  
> +/* Define this to be nonzero if static stack checking is supported.  */
> +#define STACK_CHECK_STATIC_BUILTIN 1
> +
>  #endif  /* GCC_AARCH64_LINUX_H */
> Index: config/aarch64/aarch64-protos.h
> ===
> --- config/aarch64/aarch64-protos.h   (revision 228512)
> +++ config/aarch64/aarch64-protos.h   (working copy)
> @@ -316,6 +316,7 @@ void aarch64_asm_output_labelref (FILE *
>  void aarch64_cpu_cpp_builtins (cpp_reader *);
>  void aarch64_elf_asm_named_section (const char *, unsigned, tree);
>  const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
> +const char * aarch64_output_probe_stack_range (rtx, rtx);
>  void aarch64_err_no_fpadvsimd (machine_mode, const char *);
>  void aarch64_expand_epilogue (bool);
>  void aarch64_expand_mov_immediate (rtx, rtx);
> Index: config/aarch64/aarch64.c
> ===
> --- config/aarch64/aarch64.c  (revision 228512)
> +++ config/aarch64/aarch64.c  (working copy)
> @@ -76,6 +76,7 @@
>  #include "sched-int.h"
>  #include "cortex-a57-fma-steering.h"
>  #include "target-globals.h"
> +#include "common/common-target.h"
>  
>  /* This file should be included last.  */
>  #include "target-def.h"
> @@ -2144,6 +2145,167 @@ aarch64_libgcc_cmp_return_mode (void)
>return SImode;
>  }
>  
> +#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
> +
> +#if (PROBE_INTERVAL % 4096) != 0
> +#error Cannot use indexed address calculation for stack probing
> +#endif
> +
> +#if PROBE_INTERVAL > 4096
> +#error Cannot use indexed addressing mode for stack probing
> +#endif
> +

Hmm, so if PROBE_INTERVAL != 4096 we barf!

While that's safe and probably right for Linux, on some OSes there might
be a minimum page size of 16k or even 64k.  It would be nice if we could
support that.

> +/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
> +   inclusive.  These are offsets from the current stack pointer.  */
> +
> +static void
> +aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
> +{
> +  rtx reg9 = gen_rtx_REG (Pmode, 9);

Ug!  Manifest constants should be moved to pre-defines.
PROBE_STACK_BASE_REG?

> +
> +  /* The following code uses indexed address calculation on FIRST.  */
> +  gcc_assert ((first % 4096) == 0);

where's 4096 come from?

> +
> +  /* See if we have a constant small number of probes to generate.  If so,
> + that's the easy case.  */
> +  if (size <= PROBE_INTERVAL)
> +{
> +  emit_set_insn (reg9,
> +  plus_constant (Pmode, stack_pointer_rtx,
> + 

Re: [ARM] Fix PR middle-end/65958

2015-10-28 Thread Eric Botcazou
> Thanks - the arm backend changes are ok - but you need to wait for an
> AArch64 maintainer to review the AArch64 changes.

I installed the ARM changes long ago but the Aarch64 ones are still pending.

-- 
Eric Botcazou


Re: [ARM] Fix PR middle-end/65958

2015-10-07 Thread Eric Botcazou
> If I read aarch64_emit_probe_stack_range correctly, these two
> instructions are generated when (size <= PROBE_INTERVAL).  If
> size <= 4 * PROBE_INTERVAL, more instructions are generated,
> 
>  sub x9, sp, #16384
>  str xzr, [x9]
> 
>  sub x9, x9, #PROBE_INTERVAL
>  str xzr, [x9]
>   ... /* At most two instances of these two insn. */
> 
>  either
>  sub x9, x9, #PROBE_INTERVAL
>  str xzr, [x9, #offset]
>  or
>  str xzr, [x9, -16]
> 
> > A probing loop uses both x9 and x10:
> > sub x9, sp, #12288
> > sub x10, sp, #36864
> > 
> > LPSRL0:
> > sub x9, x9, 4096
> > str xzr, [x9]
> > cmp x9, x10
> > b.neLPSRL0
> 
> The probing loop is used when size > 4 * PROBE_INTERVAL
> 
> > with an optional last probe:
> > str xzr, [x10,#-16]
> 
> and there can be an optional instruction before the probe,
> 
>   sub x10, x10, #PROBE_INTERVAL
> 
> Let me know if my understanding above is wrong.

That's correct, probes can come with a 'sub' instruction just before.

> s/aarch64_output_probe_stack-range/aarch64_output_probe_stack_range

Thanks, will fix.

-- 
Eric Botcazou


Re: [ARM] Fix PR middle-end/65958

2015-10-07 Thread Yao Qi

Hi Eric,
Thanks for the examples.  I am able to map these instructions in your
examples back to RTX in your patch.

On 07/10/15 10:09, Eric Botcazou wrote:

Yes, it will generate either individual probes or a probing loop before the
frame is established when -fstack-check is passed.  For aarch64, a probe is a
store based on x9 of the form:

str xzr, [x9, #offset]

with preceding instructions to compute x9 from sp, typically:

sub x9, sp, #16384



If I read aarch64_emit_probe_stack_range correctly, these two
instructions are generated when (size <= PROBE_INTERVAL).  If
size <= 4 * PROBE_INTERVAL, more instructions are generated,

sub x9, sp, #16384
str xzr, [x9]

sub x9, x9, #PROBE_INTERVAL
str xzr, [x9]
... /* At most two instances of these two insn. */

either
sub x9, x9, #PROBE_INTERVAL
str xzr, [x9, #offset]
or
str xzr, [x9, -16]


A probing loop uses both x9 and x10:

sub x9, sp, #12288
sub x10, sp, #36864
LPSRL0:
sub x9, x9, 4096
str xzr, [x9]
cmp x9, x10
b.neLPSRL0



The probing loop is used when size > 4 * PROBE_INTERVAL


with an optional last probe:

str xzr, [x10,#-16]


and there can be an optional instruction before the probe,

sub x10, x10, #PROBE_INTERVAL

Let me know if my understanding above is wrong.

When I read the examples and your patch, I happen to see a
nit in ChangeLog entry,


2015-10-06  Tristan Gingold  
Eric Botcazou  

PR middle-end/65958
* config/aarch64/aarch64-protos.h (aarch64_output_probe_stack-range):
Declare.


s/aarch64_output_probe_stack-range/aarch64_output_probe_stack_range

--
Yao (齐尧)


Re: [ARM] Fix PR middle-end/65958

2015-10-07 Thread Eric Botcazou
> I assume that this patch (and arm patch) will change the instruction
> sequences in prologue.  If so, do you have some examples about how
> prologue is changed with this patch?  I need to adapt GDB prologue
> analyser to these new instruction sequences.

Yes, it will generate either individual probes or a probing loop before the 
frame is established when -fstack-check is passed.  For aarch64, a probe is a 
store based on x9 of the form:

str xzr, [x9, #offset]

with preceding instructions to compute x9 from sp, typically:

sub x9, sp, #16384

A probing loop uses both x9 and x10:

sub x9, sp, #12288
sub x10, sp, #36864
LPSRL0:
sub x9, x9, 4096
str xzr, [x9]
cmp x9, x10
b.neLPSRL0

with an optional last probe:

str xzr, [x10,#-16]

For arm, it's more convoluted because the base register may vary but, in 
simple cases (in particular if the function is not nested), it's IP:

str r0, [ip, #offset]

-- 
Eric Botcazou


Re: [ARM] Fix PR middle-end/65958

2015-10-07 Thread Yao Qi

Hi Eric,

On 06/10/15 11:11, Eric Botcazou wrote:

Here's the implementation for aarch64, very similar but simpler since there is
no shortage of scratch registers; the only thing to note is the new blockage
pattern.  This was tested on real hardware but not with Linux, instead with
Darwin (experimental port of the toolchain to iOS) and makes it possible to
pass ACATS (Ada conformance testsuite which requires stack checking).

There is also a couple of tweaks for the ARM implementation: a cosmetic one
for the probe_stack pattern and one for the output_probe_stack_range loop.


I assume that this patch (and arm patch) will change the instruction
sequences in prologue.  If so, do you have some examples about how
prologue is changed with this patch?  I need to adapt GDB prologue
analyser to these new instruction sequences.

--
Yao (齐尧)


Re: [ARM] Fix PR middle-end/65958

2015-10-06 Thread Ramana Radhakrishnan


On 06/10/15 11:11, Eric Botcazou wrote:
>> Thanks - I have no further comments on this patch. We probably need to
>> implement the same on AArch64 too in order to avoid similar problems.
> 
> Here's the implementation for aarch64, very similar but simpler since there 
> is 
> no shortage of scratch registers; the only thing to note is the new blockage 
> pattern.  This was tested on real hardware but not with Linux, instead with 
> Darwin (experimental port of the toolchain to iOS) and makes it possible to 
> pass ACATS (Ada conformance testsuite which requires stack checking).
> 
> There is also a couple of tweaks for the ARM implementation: a cosmetic one 
> for the probe_stack pattern and one for the output_probe_stack_range loop.
> 
> 
> 2015-10-06  Tristan Gingold  
> Eric Botcazou  
> 
> PR middle-end/65958
>   * config/aarch64/aarch64-protos.h (aarch64_output_probe_stack-range):
>   Declare.
>   * config/aarch64/aarch64.md: Declare UNSPECV_BLOCKAGE and
>   UNSPEC_PROBE_STACK_RANGE.
>   (blockage): New instruction.
>   (probe_stack_range): Likewise.
>   * config/aarch64/aarch64.c (aarch64_emit_probe_stack_range): New
>   function.
>   (aarch64_output_probe_stack_range): Likewise.
>   (aarch64_expand_prologue): Invoke aarch64_emit_probe_stack_range if
>   static builtin stack checking is enabled.
>   * config/aarch64/aarch64-linux.h (STACK_CHECK_STATIC_BUILTIN):
>   Define.
> 
>   * config/arm/arm.c (arm_emit_probe_stack_range): Adjust comment.
>   (output_probe_stack_range): Rotate the loop and simplify.
>   (thumb1_expand_prologue): Tweak sorry message.
>   * config/arm/arm.md (probe_stack): Use bare string.
> 
> 
> 2015-10-06  Eric Botcazou  
> 
> * gcc.target/aarch64/stack-checking.c: New test.
> 


Thanks - the arm backend changes are ok - but you need to wait for an AArch64 
maintainer to review the AArch64 changes.

I've CC'd a couple of them on this.

Ramana


Re: [ARM] Fix PR middle-end/65958

2015-10-06 Thread Eric Botcazou
> Thanks - I have no further comments on this patch. We probably need to
> implement the same on AArch64 too in order to avoid similar problems.

Here's the implementation for aarch64, very similar but simpler since there is 
no shortage of scratch registers; the only thing to note is the new blockage 
pattern.  This was tested on real hardware but not with Linux, instead with 
Darwin (experimental port of the toolchain to iOS) and makes it possible to 
pass ACATS (Ada conformance testsuite which requires stack checking).

There is also a couple of tweaks for the ARM implementation: a cosmetic one 
for the probe_stack pattern and one for the output_probe_stack_range loop.


2015-10-06  Tristan Gingold  
Eric Botcazou  

PR middle-end/65958
* config/aarch64/aarch64-protos.h (aarch64_output_probe_stack-range):
Declare.
* config/aarch64/aarch64.md: Declare UNSPECV_BLOCKAGE and
UNSPEC_PROBE_STACK_RANGE.
(blockage): New instruction.
(probe_stack_range): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_probe_stack_range): New
function.
(aarch64_output_probe_stack_range): Likewise.
(aarch64_expand_prologue): Invoke aarch64_emit_probe_stack_range if
static builtin stack checking is enabled.
* config/aarch64/aarch64-linux.h (STACK_CHECK_STATIC_BUILTIN):
Define.

* config/arm/arm.c (arm_emit_probe_stack_range): Adjust comment.
(output_probe_stack_range): Rotate the loop and simplify.
(thumb1_expand_prologue): Tweak sorry message.
* config/arm/arm.md (probe_stack): Use bare string.


2015-10-06  Eric Botcazou  

* gcc.target/aarch64/stack-checking.c: New test.

-- 
Eric BotcazouIndex: config/aarch64/aarch64-linux.h
===
--- config/aarch64/aarch64-linux.h	(revision 228512)
+++ config/aarch64/aarch64-linux.h	(working copy)
@@ -88,4 +88,7 @@
 #undef TARGET_BINDS_LOCAL_P
 #define TARGET_BINDS_LOCAL_P default_binds_local_p_2
 
+/* Define this to be nonzero if static stack checking is supported.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
 #endif  /* GCC_AARCH64_LINUX_H */
Index: config/aarch64/aarch64-protos.h
===
--- config/aarch64/aarch64-protos.h	(revision 228512)
+++ config/aarch64/aarch64-protos.h	(working copy)
@@ -316,6 +316,7 @@ void aarch64_asm_output_labelref (FILE *
 void aarch64_cpu_cpp_builtins (cpp_reader *);
 void aarch64_elf_asm_named_section (const char *, unsigned, tree);
 const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
+const char * aarch64_output_probe_stack_range (rtx, rtx);
 void aarch64_err_no_fpadvsimd (machine_mode, const char *);
 void aarch64_expand_epilogue (bool);
 void aarch64_expand_mov_immediate (rtx, rtx);
Index: config/aarch64/aarch64.c
===
--- config/aarch64/aarch64.c	(revision 228512)
+++ config/aarch64/aarch64.c	(working copy)
@@ -76,6 +76,7 @@
 #include "sched-int.h"
 #include "cortex-a57-fma-steering.h"
 #include "target-globals.h"
+#include "common/common-target.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -2144,6 +2145,167 @@ aarch64_libgcc_cmp_return_mode (void)
   return SImode;
 }
 
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+#if (PROBE_INTERVAL % 4096) != 0
+#error Cannot use indexed address calculation for stack probing
+#endif
+
+#if PROBE_INTERVAL > 4096
+#error Cannot use indexed addressing mode for stack probing
+#endif
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.  */
+
+static void
+aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+  rtx reg9 = gen_rtx_REG (Pmode, 9);
+
+  /* The following code uses indexed address calculation on FIRST.  */
+  gcc_assert ((first % 4096) == 0);
+
+  /* See if we have a constant small number of probes to generate.  If so,
+ that's the easy case.  */
+  if (size <= PROBE_INTERVAL)
+{
+  emit_set_insn (reg9,
+		 plus_constant (Pmode, stack_pointer_rtx,
+	   -(first + PROBE_INTERVAL)));
+  emit_stack_probe (plus_constant (Pmode, reg9, PROBE_INTERVAL - size));
+}
+
+  /* The run-time loop is made up of 8 insns in the generic case while the
+ compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
+  else if (size <= 4 * PROBE_INTERVAL)
+{
+  HOST_WIDE_INT i, rem;
+
+  emit_set_insn (reg9,
+		 plus_constant (Pmode, stack_pointer_rtx,
+	   -(first + PROBE_INTERVAL)));
+  emit_stack_probe (reg9);
+
+  /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
+	 it exceeds SIZE.  If only two probes are needed, this will not
+	 generate any code.  Then probe at FIRST + SIZE.  */
+  for (i = 2 * PROBE_INTE

Re: [ARM] Fix PR middle-end/65958

2015-09-21 Thread Eric Botcazou
> On targets using thumb1, I can see:
> - the new test failing (you should probably add a dg-skip or an
> effective-target directive)

OK, I have added:

/* { dg-skip-if "" { arm_thumb1 } } */

as in the ivopts-orig_biv-inc.c test.

> - gcc.dg/pr48134.c now fails, saying:
> sorry, unimplemented: -fstack-check=specific for THUMB1
> while the error message is the same as for your new test, I'm
> wondering why/how it actually passed before your patch?

Yeah, it's a bug, -fstack-check=specific should have been rejected before for 
all ARM targets since it was not implemented.  Will fix.

-- 
Eric Botcazou


Re: [ARM] Fix PR middle-end/65958

2015-09-20 Thread Christophe Lyon
Hi Eric,


On 6 July 2015 at 17:46, Ramana Radhakrishnan
 wrote:
>
>
> On 18/06/15 20:02, Eric Botcazou wrote:
>>> Please mark this pattern with (set_attr "type" "multiple").
>>
>> Done.
>>
>>> While I suspect that stack probing is done before any insns with invalid
>>> constants in the function, it would be better to model the length of
>>> this insn so that the minipool logic is not confused later in terms of
>>> placement of constant pools.
>>
>> OK, I can put an upper bound.
>>
>>> Shouldn't the pattern contain clobbers for the CC register or is that
>>> unnecessary for the same reason as above ?
>>
>> That's unnecessary, UNSPEC_VOLATILEs are optimization barriers so no CC-
>> related instructions can be moved up to before the instruction.
>>
>>> Additionally please add
>>>
>>> (set_attr "conds" "clob")
>>>
>>> to this pattern so that the CCFSM state machine doesn't go awry in any
>>> of these cases.
>>
>> Also done.
>>
>
> Thanks - I have no further comments on this patch. We probably need to 
> implement the same on AArch64 too in order to avoid similar problems.
>
>
> OK with the afore mentioned changes.
>
> regards
> Ramana

On targets using thumb1, I can see:
- the new test failing (you should probably add a dg-skip or an
effective-target directive)
- gcc.dg/pr48134.c now fails, saying:
sorry, unimplemented: -fstack-check=specific for THUMB1
while the error message is the same as for your new test, I'm
wondering why/how it actually passed before your patch?

Christophe.


Re: [ARM] Fix PR middle-end/65958

2015-07-06 Thread Ramana Radhakrishnan


On 18/06/15 20:02, Eric Botcazou wrote:
>> Please mark this pattern with (set_attr "type" "multiple").
> 
> Done.
> 
>> While I suspect that stack probing is done before any insns with invalid
>> constants in the function, it would be better to model the length of
>> this insn so that the minipool logic is not confused later in terms of
>> placement of constant pools.
> 
> OK, I can put an upper bound.
> 
>> Shouldn't the pattern contain clobbers for the CC register or is that
>> unnecessary for the same reason as above ?
> 
> That's unnecessary, UNSPEC_VOLATILEs are optimization barriers so no CC-
> related instructions can be moved up to before the instruction.
> 
>> Additionally please add
>>
>> (set_attr "conds" "clob")
>>
>> to this pattern so that the CCFSM state machine doesn't go awry in any
>> of these cases.
> 
> Also done.
> 

Thanks - I have no further comments on this patch. We probably need to 
implement the same on AArch64 too in order to avoid similar problems.


OK with the afore mentioned changes.

regards
Ramana


Re: [ARM] Fix PR middle-end/65958

2015-06-18 Thread Eric Botcazou
> Please mark this pattern with (set_attr "type" "multiple").

Done.

> While I suspect that stack probing is done before any insns with invalid
> constants in the function, it would be better to model the length of
> this insn so that the minipool logic is not confused later in terms of
> placement of constant pools.

OK, I can put an upper bound.

> Shouldn't the pattern contain clobbers for the CC register or is that
> unnecessary for the same reason as above ?

That's unnecessary, UNSPEC_VOLATILEs are optimization barriers so no CC-
related instructions can be moved up to before the instruction.

> Additionally please add
> 
> (set_attr "conds" "clob")
> 
> to this pattern so that the CCFSM state machine doesn't go awry in any
> of these cases.

Also done.

-- 
Eric Botcazou


Re: [ARM] Fix PR middle-end/65958

2015-06-17 Thread Ramana Radhakrishnan
I am not very familiar with this feature entirely so please bear with me 
during review and will find some time to do some experiments with the 
feature during this week, but a couple of things with respect to the 
patch immediately spring to mind.




+(define_insn "probe_stack_range"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "0")
+(match_operand:SI 2 "register_operand" "r")]
+UNSPEC_PROBE_STACK_RANGE))]
+  "TARGET_32BIT"
+{
+  return output_probe_stack_range (operands[0], operands[2]);
+})
+


Please mark this pattern with (set_attr "type" "multiple").

While I suspect that stack probing is done before any insns with invalid 
constants in the function, it would be better to model the length of 
this insn so that the minipool logic is not confused later in terms of 
placement of constant pools.


Shouldn't the pattern contain clobbers for the CC register or is that 
unnecessary for the same reason as above ?


Additionally please add

(set_attr "conds" "clob")

to this pattern so that the CCFSM state machine doesn't go awry in any 
of these cases.



regards
Ramana










[ARM] Fix PR middle-end/65958

2015-06-11 Thread Eric Botcazou
Hi,

this fixes PR middle-end/65958 on the ARM, the architecture for which it was 
reported, by implementing stack checking by means of probing in the back-end.  
Other mainstream back-ends (alpha, i386, ia64, mips, rs6000, sparc) already 
have such an implementation.  The middle-end contains a generic implementation 
but it has severe limitations and can generate wrong code in conjunction with 
dynamic allocation patterns (PR 65958 is an example with dynamic arrays).

As for the other architectures, the patch implements stack probing for the 
static part of the frame, i.e. it generates stack probes in the prologue of 
functions which have a large frame or are non-leaf before establishing the 
frame.  Unfortunately, as is the case for x86-32, the scarcity of registers 
coupled with the calling conventions make the implementation a bit convoluted, 
but it works for both APCS and non-APCS frames, ARM and Thumb-2 modes.  We 
have been using it at AdaCore for a couple of years on Linux and VxWorks.

Tested on arm-eabi and arm-linux-gnueabi, OK for the mainline?


2015-06-11  Eric Botcazou  

PR middle-end/65958
* config/arm/linux-elf.h (STACK_CHECK_STATIC_BUILTIN): Define.
* config/arm/arm-protos.h (output_probe_stack_range): Declare.
* config/arm/arm.c: Include common/common-target.h.
(use_return_insn): Return 0 if the static chain register was saved
above a non-APCS frame.
(arm_compute_static_chain_stack_bytes): Adjust for stack checking.
(struct scratch_reg): New.
(get_scratch_register_on_entry): New function.
(release_scratch_register_on_entry): Likewise.
(arm_emit_probe_stack_range): Likewise.
(output_probe_stack_range): Likewise.
(arm_expand_prologue): Factor out code dealing with the IP register
for nested function and adjust it for stack checking.
Invoke arm_emit_probe_stack_range if static builtin stack checking
is enabled.
(thumb1_expand_prologue): Sorry out if static builtin stack checking
is enabled.
(arm_expand_epilogue): Add the saved static chain register, if any, to
the amount of pre-pushed registers to pop.
(arm_frame_pointer_required): Return true if static stack checking is
enabled and we want to catch the exception with the EABI unwinder.
* config/arm/unspecs.md (UNSPEC_PROBE_STACK): New constant.
(UNSPEC_PROBE_STACK_RANGE): Likewise.
* config/arm/arm.md (probe_stack): New insn.
(probe_stack_range): Likewise.


2015-06-11  Eric Botcazou  

* gcc.target/arm/stack-checking.c: New test.


-- 
Eric BotcazouIndex: config/arm/linux-elf.h
===
--- config/arm/linux-elf.h	(revision 224264)
+++ config/arm/linux-elf.h	(working copy)
@@ -124,3 +124,6 @@
to COPY relocated symbol in the executable.  See PR65780.  */
 #undef TARGET_BINDS_LOCAL_P
 #define TARGET_BINDS_LOCAL_P default_binds_local_p_2
+
+/* Define this to be nonzero if static stack checking is supported.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
Index: config/arm/arm.c
===
--- config/arm/arm.c	(revision 224264)
+++ config/arm/arm.c	(working copy)
@@ -72,6 +72,7 @@
 #include "target.h"
 #include "sched-int.h"
 #include "target-def.h"
+#include "common/common-target.h"
 #include "debug.h"
 #include "langhooks.h"
 #include "df.h"
@@ -3599,7 +3600,11 @@ use_return_insn (int iscond, rtx sibling
   /* Or if there is a stack adjustment.  However, if the stack pointer
 	 is saved on the stack, we can use a pre-incrementing stack load.  */
   || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
- && stack_adjust == 4)))
+ && stack_adjust == 4))
+  /* Or if the static chain register was saved above the frame, under the
+	 assumption that the stack pointer isn't saved on the stack.  */
+  || (!(TARGET_APCS_FRAME && frame_pointer_needed)
+  && arm_compute_static_chain_stack_bytes() != 0))
 return 0;
 
   saved_int_regs = offsets->saved_regs_mask;
@@ -19080,8 +19085,10 @@ static int
 arm_compute_static_chain_stack_bytes (void)
 {
   /* See the defining assertion in arm_expand_prologue.  */
-  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
-  && IS_NESTED (arm_current_func_type ())
+  if (IS_NESTED (arm_current_func_type ())
+  && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
+	  || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+	  && !df_regs_ever_live_p (LR_REGNUM)))
   && arm_r3_live_at_start_p ()
   && crtl->args.pretend_args_size == 0)
 return 4;
@@ -19176,7 +19183,6 @@ arm_compute_save_reg_mask (void)
   return save_reg_mask;
 }
 
-
 /* Compute a bit mask of which registers need to be
saved on the stack for the current function.  */
 static unsigned long
@@ -21004,6 +21010,240 @@ thumb_set_frame_pointer