Hi, due to the AAPCS parameter passing of 8-byte aligned structures, which happen to be 8-byte aligned or only 4-byte aligned in the test case, ldrd instructions are generated that may access 4-byte aligned stack slots, which will trap on ARMv5 and ARMv6 according to the following document:
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473m/dom1361290002364.html says: "In ARMv5TE, or in ARMv6 when SCTLR.U is 0, LDRD and STRD doubleword data transfers must be eight-byte aligned. Use ALIGN 8 before memory allocation directives such as DCQ if the data is to be accessed using LDRD or STRD. This is not required in ARMv6 when SCTLR.U is 1, or in ARMv7, because in these versions, doubleword data transfers can be word-aligned." The reason why the ldrd instruction is generated seems to be a missing alignment check in the function output_move_double. But when that is fixed, it turns out that if the parameter happens to be 8-byte aligned by chance, they still have MEM_ALIGN = 4, which prevents the ldrd completely. The reason for that is in function.c (assign_parm_find_stack_rtl), where values that happen to be aligned to STACK_BOUNDARY, are only aligned to PARM_BOUNDARY. Bootstrapped and reg-tested on x86_64-pc-linux-gnu and arm-linux-gnueabihf with all languages. Is it OK for trunk? Thanks Bernd.
2019-02-05 Bernd Edlinger <bernd.edlin...@hotmail.de> * config/arm/arm.c (output_move_double): Check required memory alignment for ldrd/strd instructions. * function.c (assign_parm_find_stack_rtl): Use larger alignment when possible. testsuite: 2019-02-05 Bernd Edlinger <bernd.edlin...@hotmail.de> * gcc.target/arm/unaligned-argument-1.c: New test. * gcc.target/arm/unaligned-argument-2.c: New test. Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c (revision 268337) +++ gcc/config/arm/arm.c (working copy) @@ -18303,6 +18303,8 @@ output_move_double (rtx *operands, bool emit, int otherops[0] = gen_rtx_REG (SImode, 1 + reg0); gcc_assert (code1 == MEM); /* Constraints should ensure this. */ + bool allow_ldrd = TARGET_LDRD + && align_ok_ldrd_strd (MEM_ALIGN (operands[1]), 0); switch (GET_CODE (XEXP (operands[1], 0))) { @@ -18310,8 +18312,8 @@ output_move_double (rtx *operands, bool emit, int if (emit) { - if (TARGET_LDRD - && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0)))) + if (allow_ldrd + && !(fix_cm3_ldrd && reg0 == REGNO (XEXP (operands[1], 0)))) output_asm_insn ("ldrd%?\t%0, [%m1]", operands); else output_asm_insn ("ldmia%?\t%m1, %M0", operands); @@ -18319,7 +18321,7 @@ output_move_double (rtx *operands, bool emit, int break; case PRE_INC: - gcc_assert (TARGET_LDRD); + gcc_assert (allow_ldrd); if (emit) output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands); break; @@ -18327,7 +18329,7 @@ output_move_double (rtx *operands, bool emit, int case PRE_DEC: if (emit) { - if (TARGET_LDRD) + if (allow_ldrd) output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands); else output_asm_insn ("ldmdb%?\t%m1!, %M0", operands); @@ -18337,7 +18339,7 @@ output_move_double (rtx *operands, bool emit, int case POST_INC: if (emit) { - if (TARGET_LDRD) + if (allow_ldrd) output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands); else output_asm_insn ("ldmia%?\t%m1!, %M0", operands); @@ -18345,7 +18347,7 @@ output_move_double (rtx *operands, bool emit, int break; case POST_DEC: - gcc_assert (TARGET_LDRD); + gcc_assert (allow_ldrd); if (emit) output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands); break; @@ -18483,7 +18485,7 @@ output_move_double (rtx *operands, bool emit, int } otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1); operands[1] = otherops[0]; - if (TARGET_LDRD + if (allow_ldrd && (REG_P (otherops[2]) || TARGET_THUMB2 || (CONST_INT_P (otherops[2]) @@ -18544,7 +18546,7 @@ output_move_double (rtx *operands, bool emit, int if (count) *count = 2; - if (TARGET_LDRD) + if (allow_ldrd) return "ldrd%?\t%0, [%1]"; return "ldmia%?\t%1, %M0"; @@ -18589,7 +18591,8 @@ output_move_double (rtx *operands, bool emit, int values but user assembly constraints can force an odd starting register. */ bool allow_strd = TARGET_LDRD - && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1); + && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1) + && align_ok_ldrd_strd (MEM_ALIGN (operands[0]), 0); switch (GET_CODE (XEXP (operands[0], 0))) { case REG: Index: gcc/function.c =================================================================== --- gcc/function.c (revision 268337) +++ gcc/function.c (working copy) @@ -2698,8 +2698,20 @@ assign_parm_find_stack_rtl (tree parm, struct assi intentionally forcing upward padding. Otherwise we have to come up with a guess at the alignment based on OFFSET_RTX. */ poly_int64 offset; - if (data->locate.where_pad != PAD_DOWNWARD || data->entry_parm) + if (data->locate.where_pad == PAD_NONE || data->entry_parm) align = boundary; + else if (data->locate.where_pad == PAD_UPWARD) + { + align = boundary; + if (poly_int_rtx_p (offset_rtx, &offset) + && STACK_POINTER_OFFSET == 0) + { + unsigned int offset_align = known_alignment (offset) * BITS_PER_UNIT; + if (offset_align == 0 || offset_align > STACK_BOUNDARY) + offset_align = STACK_BOUNDARY; + align = MAX (align, offset_align); + } + } else if (poly_int_rtx_p (offset_rtx, &offset)) { align = least_bit_hwi (boundary); Index: gcc/testsuite/gcc.target/arm/unaligned-argument-1.c =================================================================== --- gcc/testsuite/gcc.target/arm/unaligned-argument-1.c (revision 0) +++ gcc/testsuite/gcc.target/arm/unaligned-argument-1.c (working copy) @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-marm -march=armv6 -mno-unaligned-access -mfloat-abi=soft -mabi=aapcs -O3" } */ + +struct s { + int a, b; +} __attribute__((aligned(8))); + +struct s f0; + +void f(int a, int b, int c, int d, struct s f) +{ + /* f is on a 64 bit aligned stack slot, thus ldrd OK. */ + f0 = f; +} + +/* { dg-final { scan-assembler-times "ldrd" 1 } } */ +/* { dg-final { scan-assembler-times "strd" 1 } } */ Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c =================================================================== --- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c (revision 0) +++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c (working copy) @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-marm -march=armv6 -mno-unaligned-access -mfloat-abi=soft -mabi=aapcs -O3" } */ + +struct s { + int a, b; +} __attribute__((aligned(8))); + +struct s f0; + +void f(int a, int b, int c, int d, int e, struct s f) +{ + /* f is on a 32 bit aligned stack slot, thus no ldrd. */ + f0 = f; +} + +/* { dg-final { scan-assembler-times "ldrd" 0 } } */ +/* { dg-final { scan-assembler-times "strd" 1 } } */