Hi,

due to the AAPCS parameter passing of 8-byte aligned structures, which happen to
be 8-byte aligned or only 4-byte aligned in the test case, ldrd instructions
are generated that may access 4-byte aligned stack slots, which will trap on 
ARMv5 and
ARMv6 according to the following document:


http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473m/dom1361290002364.html
says:

"In ARMv5TE, or in ARMv6 when SCTLR.U is 0, LDRD and STRD doubleword data 
transfers must be
eight-byte aligned.  Use ALIGN 8 before memory allocation directives such as 
DCQ if the data
is to be accessed using LDRD or STRD.  This is not required in ARMv6 when 
SCTLR.U is 1, or in
ARMv7, because in these versions, doubleword data transfers can be 
word-aligned."


The reason why the ldrd instruction is generated seems to be a missing 
alignment check in the
function output_move_double.  But when that is fixed, it turns out that if the 
parameter happens
to be 8-byte aligned by chance, they still have MEM_ALIGN = 4, which prevents 
the ldrd completely.

The reason for that is in function.c (assign_parm_find_stack_rtl), where values 
that happen to be
aligned to STACK_BOUNDARY, are only  aligned to PARM_BOUNDARY.


Bootstrapped and reg-tested on x86_64-pc-linux-gnu and arm-linux-gnueabihf with 
all languages.
Is it OK for trunk?


Thanks
Bernd.
2019-02-05  Bernd Edlinger  <bernd.edlin...@hotmail.de>

	* config/arm/arm.c (output_move_double): Check required memory
	alignment for ldrd/strd instructions.
	* function.c (assign_parm_find_stack_rtl): Use larger alignment
	when possible.

testsuite:
2019-02-05  Bernd Edlinger  <bernd.edlin...@hotmail.de>

	* gcc.target/arm/unaligned-argument-1.c: New test.
	* gcc.target/arm/unaligned-argument-2.c: New test.

Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	(revision 268337)
+++ gcc/config/arm/arm.c	(working copy)
@@ -18303,6 +18303,8 @@ output_move_double (rtx *operands, bool emit, int
       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
 
       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
+      bool allow_ldrd = TARGET_LDRD
+			&& align_ok_ldrd_strd (MEM_ALIGN (operands[1]), 0);
 
       switch (GET_CODE (XEXP (operands[1], 0)))
 	{
@@ -18310,8 +18312,8 @@ output_move_double (rtx *operands, bool emit, int
 
 	  if (emit)
 	    {
-	      if (TARGET_LDRD
-		  && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
+	      if (allow_ldrd
+		  && !(fix_cm3_ldrd && reg0 == REGNO (XEXP (operands[1], 0))))
 		output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
 	      else
 		output_asm_insn ("ldmia%?\t%m1, %M0", operands);
@@ -18319,7 +18321,7 @@ output_move_double (rtx *operands, bool emit, int
 	  break;
 
 	case PRE_INC:
-	  gcc_assert (TARGET_LDRD);
+	  gcc_assert (allow_ldrd);
 	  if (emit)
 	    output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
 	  break;
@@ -18327,7 +18329,7 @@ output_move_double (rtx *operands, bool emit, int
 	case PRE_DEC:
 	  if (emit)
 	    {
-	      if (TARGET_LDRD)
+	      if (allow_ldrd)
 		output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
 	      else
 		output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
@@ -18337,7 +18339,7 @@ output_move_double (rtx *operands, bool emit, int
 	case POST_INC:
 	  if (emit)
 	    {
-	      if (TARGET_LDRD)
+	      if (allow_ldrd)
 		output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
 	      else
 		output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
@@ -18345,7 +18347,7 @@ output_move_double (rtx *operands, bool emit, int
 	  break;
 
 	case POST_DEC:
-	  gcc_assert (TARGET_LDRD);
+	  gcc_assert (allow_ldrd);
 	  if (emit)
 	    output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
 	  break;
@@ -18483,7 +18485,7 @@ output_move_double (rtx *operands, bool emit, int
 		    }
 		  otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
 		  operands[1] = otherops[0];
-		  if (TARGET_LDRD
+		  if (allow_ldrd
 		      && (REG_P (otherops[2])
 			  || TARGET_THUMB2
 			  || (CONST_INT_P (otherops[2])
@@ -18544,7 +18546,7 @@ output_move_double (rtx *operands, bool emit, int
 	      if (count)
 		*count = 2;
 
-	      if (TARGET_LDRD)
+	      if (allow_ldrd)
 		return "ldrd%?\t%0, [%1]";
 
 	      return "ldmia%?\t%1, %M0";
@@ -18589,7 +18591,8 @@ output_move_double (rtx *operands, bool emit, int
 	 values but user assembly constraints can force an odd
 	 starting register.  */
       bool allow_strd = TARGET_LDRD
-			 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
+			 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1)
+			 && align_ok_ldrd_strd (MEM_ALIGN (operands[0]), 0);
       switch (GET_CODE (XEXP (operands[0], 0)))
         {
 	case REG:
Index: gcc/function.c
===================================================================
--- gcc/function.c	(revision 268337)
+++ gcc/function.c	(working copy)
@@ -2698,8 +2698,20 @@ assign_parm_find_stack_rtl (tree parm, struct assi
      intentionally forcing upward padding.  Otherwise we have to come
      up with a guess at the alignment based on OFFSET_RTX.  */
   poly_int64 offset;
-  if (data->locate.where_pad != PAD_DOWNWARD || data->entry_parm)
+  if (data->locate.where_pad == PAD_NONE || data->entry_parm)
     align = boundary;
+  else if (data->locate.where_pad == PAD_UPWARD)
+    {
+      align = boundary;
+      if (poly_int_rtx_p (offset_rtx, &offset)
+	  && STACK_POINTER_OFFSET == 0)
+	{
+	  unsigned int offset_align = known_alignment (offset) * BITS_PER_UNIT;
+	  if (offset_align == 0 || offset_align > STACK_BOUNDARY)
+	    offset_align = STACK_BOUNDARY;
+	  align = MAX (align, offset_align);
+	}
+    }
   else if (poly_int_rtx_p (offset_rtx, &offset))
     {
       align = least_bit_hwi (boundary);
Index: gcc/testsuite/gcc.target/arm/unaligned-argument-1.c
===================================================================
--- gcc/testsuite/gcc.target/arm/unaligned-argument-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/unaligned-argument-1.c	(working copy)
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-marm -march=armv6 -mno-unaligned-access -mfloat-abi=soft -mabi=aapcs -O3" } */
+
+struct s {
+  int a, b;
+} __attribute__((aligned(8)));
+
+struct s f0;
+
+void f(int a, int b, int c, int d, struct s f)
+{
+  /* f is on a 64 bit aligned stack slot, thus ldrd OK.  */
+  f0 = f;
+}
+
+/* { dg-final { scan-assembler-times "ldrd" 1 } } */
+/* { dg-final { scan-assembler-times "strd" 1 } } */
Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c
===================================================================
--- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c	(working copy)
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-marm -march=armv6 -mno-unaligned-access -mfloat-abi=soft -mabi=aapcs -O3" } */
+
+struct s {
+  int a, b;
+} __attribute__((aligned(8)));
+
+struct s f0;
+
+void f(int a, int b, int c, int d, int e, struct s f)
+{
+  /* f is on a 32 bit aligned stack slot, thus no ldrd.  */
+  f0 = f;
+}
+
+/* { dg-final { scan-assembler-times "ldrd" 0 } } */
+/* { dg-final { scan-assembler-times "strd" 1 } } */

Reply via email to