On 05/02/2019 15:07, Bernd Edlinger wrote:
> Hi,
> 
> due to the AAPCS parameter passing of 8-byte aligned structures, which happen 
> to
> be 8-byte aligned or only 4-byte aligned in the test case, ldrd instructions
> are generated that may access 4-byte aligned stack slots, which will trap on 
> ARMv5 and
> ARMv6 according to the following document:
> 
> 
> http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473m/dom1361290002364.html
> says:
> 
> "In ARMv5TE, or in ARMv6 when SCTLR.U is 0, LDRD and STRD doubleword data 
> transfers must be
> eight-byte aligned.  Use ALIGN 8 before memory allocation directives such as 
> DCQ if the data
> is to be accessed using LDRD or STRD.  This is not required in ARMv6 when 
> SCTLR.U is 1, or in
> ARMv7, because in these versions, doubleword data transfers can be 
> word-aligned."
> 
> 
> The reason why the ldrd instruction is generated seems to be a missing 
> alignment check in the
> function output_move_double.  But when that is fixed, it turns out that if 
> the parameter happens
> to be 8-byte aligned by chance, they still have MEM_ALIGN = 4, which prevents 
> the ldrd completely.
> 
> The reason for that is in function.c (assign_parm_find_stack_rtl), where 
> values that happen to be
> aligned to STACK_BOUNDARY, are only  aligned to PARM_BOUNDARY.
> 
> 
> Bootstrapped and reg-tested on x86_64-pc-linux-gnu and arm-linux-gnueabihf 
> with all languages.
> Is it OK for trunk?
> 
> 
> Thanks
> Bernd.
> 
> 
> patch-arm-align-abi.diff
> 
> 2019-02-05  Bernd Edlinger  <bernd.edlin...@hotmail.de>
> 
>       * config/arm/arm.c (output_move_double): Check required memory
>       alignment for ldrd/strd instructions.
>       * function.c (assign_parm_find_stack_rtl): Use larger alignment
>       when possible.
> 
> testsuite:
> 2019-02-05  Bernd Edlinger  <bernd.edlin...@hotmail.de>
> 
>       * gcc.target/arm/unaligned-argument-1.c: New test.
>       * gcc.target/arm/unaligned-argument-2.c: New test.
> 
> Index: gcc/config/arm/arm.c
> ===================================================================
> --- gcc/config/arm/arm.c      (revision 268337)
> +++ gcc/config/arm/arm.c      (working copy)
> @@ -18303,6 +18303,8 @@ output_move_double (rtx *operands, bool emit, int
>        otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
>  
>        gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
> +      bool allow_ldrd = TARGET_LDRD
> +                     && align_ok_ldrd_strd (MEM_ALIGN (operands[1]), 0);
>  
>        switch (GET_CODE (XEXP (operands[1], 0)))
>       {
> @@ -18310,8 +18312,8 @@ output_move_double (rtx *operands, bool emit, int
>  
>         if (emit)
>           {
> -           if (TARGET_LDRD
> -               && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
> +           if (allow_ldrd
> +               && !(fix_cm3_ldrd && reg0 == REGNO (XEXP (operands[1], 0))))
>               output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
>             else
>               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
> @@ -18319,7 +18321,7 @@ output_move_double (rtx *operands, bool emit, int
>         break;
>  
>       case PRE_INC:
> -       gcc_assert (TARGET_LDRD);
> +       gcc_assert (allow_ldrd);
>         if (emit)
>           output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
>         break;
> @@ -18327,7 +18329,7 @@ output_move_double (rtx *operands, bool emit, int
>       case PRE_DEC:
>         if (emit)
>           {
> -           if (TARGET_LDRD)
> +           if (allow_ldrd)
>               output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
>             else
>               output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
> @@ -18337,7 +18339,7 @@ output_move_double (rtx *operands, bool emit, int
>       case POST_INC:
>         if (emit)
>           {
> -           if (TARGET_LDRD)
> +           if (allow_ldrd)
>               output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
>             else
>               output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
> @@ -18345,7 +18347,7 @@ output_move_double (rtx *operands, bool emit, int
>         break;
>  
>       case POST_DEC:
> -       gcc_assert (TARGET_LDRD);
> +       gcc_assert (allow_ldrd);
>         if (emit)
>           output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
>         break;
> @@ -18483,7 +18485,7 @@ output_move_double (rtx *operands, bool emit, int
>                   }
>                 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
>                 operands[1] = otherops[0];
> -               if (TARGET_LDRD
> +               if (allow_ldrd
>                     && (REG_P (otherops[2])
>                         || TARGET_THUMB2
>                         || (CONST_INT_P (otherops[2])
> @@ -18544,7 +18546,7 @@ output_move_double (rtx *operands, bool emit, int
>             if (count)
>               *count = 2;
>  
> -           if (TARGET_LDRD)
> +           if (allow_ldrd)
>               return "ldrd%?\t%0, [%1]";
>  
>             return "ldmia%?\t%1, %M0";
> @@ -18589,7 +18591,8 @@ output_move_double (rtx *operands, bool emit, int
>        values but user assembly constraints can force an odd
>        starting register.  */
>        bool allow_strd = TARGET_LDRD
> -                      && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
> +                      && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1)
> +                      && align_ok_ldrd_strd (MEM_ALIGN (operands[0]), 0);
>        switch (GET_CODE (XEXP (operands[0], 0)))
>          {
>       case REG:
> Index: gcc/function.c
> ===================================================================
> --- gcc/function.c    (revision 268337)
> +++ gcc/function.c    (working copy)
> @@ -2698,8 +2698,20 @@ assign_parm_find_stack_rtl (tree parm, struct assi
>       intentionally forcing upward padding.  Otherwise we have to come
>       up with a guess at the alignment based on OFFSET_RTX.  */
>    poly_int64 offset;
> -  if (data->locate.where_pad != PAD_DOWNWARD || data->entry_parm)
> +  if (data->locate.where_pad == PAD_NONE || data->entry_parm)
>      align = boundary;
> +  else if (data->locate.where_pad == PAD_UPWARD)
> +    {
> +      align = boundary;
> +      if (poly_int_rtx_p (offset_rtx, &offset)
> +       && STACK_POINTER_OFFSET == 0)
> +     {
> +       unsigned int offset_align = known_alignment (offset) * BITS_PER_UNIT;
> +       if (offset_align == 0 || offset_align > STACK_BOUNDARY)
> +         offset_align = STACK_BOUNDARY;
> +       align = MAX (align, offset_align);
> +     }
> +    }
>    else if (poly_int_rtx_p (offset_rtx, &offset))
>      {
>        align = least_bit_hwi (boundary);
> Index: gcc/testsuite/gcc.target/arm/unaligned-argument-1.c
> ===================================================================
> --- gcc/testsuite/gcc.target/arm/unaligned-argument-1.c       (revision 0)
> +++ gcc/testsuite/gcc.target/arm/unaligned-argument-1.c       (working copy)
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-marm -march=armv6 -mno-unaligned-access -mfloat-abi=soft 
> -mabi=aapcs -O3" } */
> +
> +struct s {
> +  int a, b;
> +} __attribute__((aligned(8)));
> +
> +struct s f0;
> +
> +void f(int a, int b, int c, int d, struct s f)
> +{
> +  /* f is on a 64 bit aligned stack slot, thus ldrd OK.  */
> +  f0 = f;
> +}
> +
> +/* { dg-final { scan-assembler-times "ldrd" 1 } } */
> +/* { dg-final { scan-assembler-times "strd" 1 } } */
> Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c
> ===================================================================
> --- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c       (revision 0)
> +++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c       (working copy)
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-marm -march=armv6 -mno-unaligned-access -mfloat-abi=soft 
> -mabi=aapcs -O3" } */
> +
> +struct s {
> +  int a, b;
> +} __attribute__((aligned(8)));
> +
> +struct s f0;
> +
> +void f(int a, int b, int c, int d, int e, struct s f)
> +{
> +  /* f is on a 32 bit aligned stack slot, thus no ldrd.  */
> +  f0 = f;
> +}
> +
> +/* { dg-final { scan-assembler-times "ldrd" 0 } } */
> +/* { dg-final { scan-assembler-times "strd" 1 } } */
> 

As explained on our other thread, I think this is papering over a deeper
problem.  Put simply,

(mem/c:DI (plus:SI (reg/f:SI 104 virtual-incoming-args)
        (const_int 4 [0x4])) [1 f+0 S8 A32])

is not valid RTL on a strict alignment target (DImode with A32) and
should never be generated.

I've created PR89544.

R.

Reply via email to