On 05/02/2019 15:07, Bernd Edlinger wrote: > Hi, > > due to the AAPCS parameter passing of 8-byte aligned structures, which happen > to > be 8-byte aligned or only 4-byte aligned in the test case, ldrd instructions > are generated that may access 4-byte aligned stack slots, which will trap on > ARMv5 and > ARMv6 according to the following document: > > > http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473m/dom1361290002364.html > says: > > "In ARMv5TE, or in ARMv6 when SCTLR.U is 0, LDRD and STRD doubleword data > transfers must be > eight-byte aligned. Use ALIGN 8 before memory allocation directives such as > DCQ if the data > is to be accessed using LDRD or STRD. This is not required in ARMv6 when > SCTLR.U is 1, or in > ARMv7, because in these versions, doubleword data transfers can be > word-aligned." > > > The reason why the ldrd instruction is generated seems to be a missing > alignment check in the > function output_move_double. But when that is fixed, it turns out that if > the parameter happens > to be 8-byte aligned by chance, they still have MEM_ALIGN = 4, which prevents > the ldrd completely. > > The reason for that is in function.c (assign_parm_find_stack_rtl), where > values that happen to be > aligned to STACK_BOUNDARY, are only aligned to PARM_BOUNDARY. > > > Bootstrapped and reg-tested on x86_64-pc-linux-gnu and arm-linux-gnueabihf > with all languages. > Is it OK for trunk? > > > Thanks > Bernd. > > > patch-arm-align-abi.diff > > 2019-02-05 Bernd Edlinger <bernd.edlin...@hotmail.de> > > * config/arm/arm.c (output_move_double): Check required memory > alignment for ldrd/strd instructions. > * function.c (assign_parm_find_stack_rtl): Use larger alignment > when possible. > > testsuite: > 2019-02-05 Bernd Edlinger <bernd.edlin...@hotmail.de> > > * gcc.target/arm/unaligned-argument-1.c: New test. > * gcc.target/arm/unaligned-argument-2.c: New test. > > Index: gcc/config/arm/arm.c > =================================================================== > --- gcc/config/arm/arm.c (revision 268337) > +++ gcc/config/arm/arm.c (working copy) > @@ -18303,6 +18303,8 @@ output_move_double (rtx *operands, bool emit, int > otherops[0] = gen_rtx_REG (SImode, 1 + reg0); > > gcc_assert (code1 == MEM); /* Constraints should ensure this. */ > + bool allow_ldrd = TARGET_LDRD > + && align_ok_ldrd_strd (MEM_ALIGN (operands[1]), 0); > > switch (GET_CODE (XEXP (operands[1], 0))) > { > @@ -18310,8 +18312,8 @@ output_move_double (rtx *operands, bool emit, int > > if (emit) > { > - if (TARGET_LDRD > - && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0)))) > + if (allow_ldrd > + && !(fix_cm3_ldrd && reg0 == REGNO (XEXP (operands[1], 0)))) > output_asm_insn ("ldrd%?\t%0, [%m1]", operands); > else > output_asm_insn ("ldmia%?\t%m1, %M0", operands); > @@ -18319,7 +18321,7 @@ output_move_double (rtx *operands, bool emit, int > break; > > case PRE_INC: > - gcc_assert (TARGET_LDRD); > + gcc_assert (allow_ldrd); > if (emit) > output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands); > break; > @@ -18327,7 +18329,7 @@ output_move_double (rtx *operands, bool emit, int > case PRE_DEC: > if (emit) > { > - if (TARGET_LDRD) > + if (allow_ldrd) > output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands); > else > output_asm_insn ("ldmdb%?\t%m1!, %M0", operands); > @@ -18337,7 +18339,7 @@ output_move_double (rtx *operands, bool emit, int > case POST_INC: > if (emit) > { > - if (TARGET_LDRD) > + if (allow_ldrd) > output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands); > else > output_asm_insn ("ldmia%?\t%m1!, %M0", operands); > @@ -18345,7 +18347,7 @@ output_move_double (rtx *operands, bool emit, int > break; > > case POST_DEC: > - gcc_assert (TARGET_LDRD); > + gcc_assert (allow_ldrd); > if (emit) > output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands); > break; > @@ -18483,7 +18485,7 @@ output_move_double (rtx *operands, bool emit, int > } > otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1); > operands[1] = otherops[0]; > - if (TARGET_LDRD > + if (allow_ldrd > && (REG_P (otherops[2]) > || TARGET_THUMB2 > || (CONST_INT_P (otherops[2]) > @@ -18544,7 +18546,7 @@ output_move_double (rtx *operands, bool emit, int > if (count) > *count = 2; > > - if (TARGET_LDRD) > + if (allow_ldrd) > return "ldrd%?\t%0, [%1]"; > > return "ldmia%?\t%1, %M0"; > @@ -18589,7 +18591,8 @@ output_move_double (rtx *operands, bool emit, int > values but user assembly constraints can force an odd > starting register. */ > bool allow_strd = TARGET_LDRD > - && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1); > + && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1) > + && align_ok_ldrd_strd (MEM_ALIGN (operands[0]), 0); > switch (GET_CODE (XEXP (operands[0], 0))) > { > case REG: > Index: gcc/function.c > =================================================================== > --- gcc/function.c (revision 268337) > +++ gcc/function.c (working copy) > @@ -2698,8 +2698,20 @@ assign_parm_find_stack_rtl (tree parm, struct assi > intentionally forcing upward padding. Otherwise we have to come > up with a guess at the alignment based on OFFSET_RTX. */ > poly_int64 offset; > - if (data->locate.where_pad != PAD_DOWNWARD || data->entry_parm) > + if (data->locate.where_pad == PAD_NONE || data->entry_parm) > align = boundary; > + else if (data->locate.where_pad == PAD_UPWARD) > + { > + align = boundary; > + if (poly_int_rtx_p (offset_rtx, &offset) > + && STACK_POINTER_OFFSET == 0) > + { > + unsigned int offset_align = known_alignment (offset) * BITS_PER_UNIT; > + if (offset_align == 0 || offset_align > STACK_BOUNDARY) > + offset_align = STACK_BOUNDARY; > + align = MAX (align, offset_align); > + } > + } > else if (poly_int_rtx_p (offset_rtx, &offset)) > { > align = least_bit_hwi (boundary); > Index: gcc/testsuite/gcc.target/arm/unaligned-argument-1.c > =================================================================== > --- gcc/testsuite/gcc.target/arm/unaligned-argument-1.c (revision 0) > +++ gcc/testsuite/gcc.target/arm/unaligned-argument-1.c (working copy) > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-marm -march=armv6 -mno-unaligned-access -mfloat-abi=soft > -mabi=aapcs -O3" } */ > + > +struct s { > + int a, b; > +} __attribute__((aligned(8))); > + > +struct s f0; > + > +void f(int a, int b, int c, int d, struct s f) > +{ > + /* f is on a 64 bit aligned stack slot, thus ldrd OK. */ > + f0 = f; > +} > + > +/* { dg-final { scan-assembler-times "ldrd" 1 } } */ > +/* { dg-final { scan-assembler-times "strd" 1 } } */ > Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c > =================================================================== > --- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c (revision 0) > +++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c (working copy) > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-marm -march=armv6 -mno-unaligned-access -mfloat-abi=soft > -mabi=aapcs -O3" } */ > + > +struct s { > + int a, b; > +} __attribute__((aligned(8))); > + > +struct s f0; > + > +void f(int a, int b, int c, int d, int e, struct s f) > +{ > + /* f is on a 32 bit aligned stack slot, thus no ldrd. */ > + f0 = f; > +} > + > +/* { dg-final { scan-assembler-times "ldrd" 0 } } */ > +/* { dg-final { scan-assembler-times "strd" 1 } } */ >
As explained on our other thread, I think this is papering over a deeper problem. Put simply, (mem/c:DI (plus:SI (reg/f:SI 104 virtual-incoming-args) (const_int 4 [0x4])) [1 f+0 S8 A32]) is not valid RTL on a strict alignment target (DImode with A32) and should never be generated. I've created PR89544. R.