Hi,

This fixes PR103500 i.e. ensuring that stack slots for
passed-by-reference overaligned types are appropriately aligned. For the
testcase:

typedef struct __attribute__((aligned(32))) {
  long x,y;
} S;
S x;
void f(S);
void g(void) { f(x); }

on AArch64, we currently generate (at -O2):

g:
        adrp    x1, .LANCHOR0
        add     x1, x1, :lo12:.LANCHOR0
        stp     x29, x30, [sp, -48]!
        mov     x29, sp
        ldp     q0, q1, [x1]
        add     x0, sp, 16
        stp     q0, q1, [sp, 16]
        bl      f
        ldp     x29, x30, [sp], 48
        ret

so the stack slot for the passed-by-reference copy of the structure is
at sp + 16, and the sp is only guaranteed to be 16-byte aligned, so the
structure is only 16-byte aligned. The PCS requires the structure to be
32-byte aligned. After this patch, we generate:

g:
        adrp    x1, .LANCHOR0
        add     x1, x1, :lo12:.LANCHOR0
        stp     x29, x30, [sp, -64]!
        mov     x29, sp
        add     x0, sp, 47
        ldp     q0, q1, [x1]
        and     x0, x0, -32
        stp     q0, q1, [x0]
        bl      f
        ldp     x29, x30, [sp], 64
        ret

i.e. we ensure 32-byte alignment for the struct.

The approach taken here is similar to that in
function.c:assign_parm_setup_block where it handles the case for
DECL_ALIGN (parm) > MAX_SUPPORTED_STACK_ALIGNMENT. This in turn is
similar to the approach taken in cfgexpand.c:expand_stack_vars (where
the function calls get_dynamic_stack_size) which is the code that
handles the alignment for overaligned structures as addressable local
variables (see the related case discussed in the PR).

This patch also updates the aapcs64 test mentioned in the PR to avoid
the frontend folding away the alignment check. I've confirmed that the
execution test actually fails on aarch64-linux-gnu prior to the patch
being applied and passes afterwards.

Bootstrapped and regtested on aarch64-linux-gnu, x86_64-linux-gnu, and
arm-linux-gnueabihf: no regressions.

I'd appreciate any feedback. Is it OK for trunk?

Thanks,
Alex

gcc/ChangeLog:

        PR middle-end/103500
        * function.c (get_stack_local_alignment): Align BLKmode overaligned
        types to the alignment required by the type.
        (assign_stack_temp_for_type): Handle BLKmode overaligned stack
        slots by allocating a larger-than-necessary buffer and aligning
        the address within appropriately.

gcc/testsuite/ChangeLog:

        PR middle-end/103500
        * gcc.target/aarch64/aapcs64/rec_align-8.c (test_pass_by_ref):
        Prevent the frontend from folding our alignment check away by
        using snprintf to store the pointer into a string and recovering
        it with sscanf.
diff --git a/gcc/function.c b/gcc/function.c
index 61b3bd036b8..5ed722ab959 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -278,7 +278,9 @@ get_stack_local_alignment (tree type, machine_mode mode)
   unsigned int alignment;
 
   if (mode == BLKmode)
-    alignment = BIGGEST_ALIGNMENT;
+    alignment = (type && TYPE_ALIGN (type) > MAX_SUPPORTED_STACK_ALIGNMENT)
+      ? TYPE_ALIGN (type)
+      : BIGGEST_ALIGNMENT;
   else
     alignment = GET_MODE_ALIGNMENT (mode);
 
@@ -872,21 +874,35 @@ assign_stack_temp_for_type (machine_mode mode, poly_int64 
size, tree type)
 
       p = ggc_alloc<temp_slot> ();
 
-      /* We are passing an explicit alignment request to assign_stack_local.
-        One side effect of that is assign_stack_local will not round SIZE
-        to ensure the frame offset remains suitably aligned.
-
-        So for requests which depended on the rounding of SIZE, we go ahead
-        and round it now.  We also make sure ALIGNMENT is at least
-        BIGGEST_ALIGNMENT.  */
-      gcc_assert (mode != BLKmode || align == BIGGEST_ALIGNMENT);
-      p->slot = assign_stack_local_1 (mode,
-                                     (mode == BLKmode
-                                      ? aligned_upper_bound (size,
-                                                             (int) align
-                                                             / BITS_PER_UNIT)
-                                      : size),
-                                     align, 0);
+      if (mode == BLKmode && align > MAX_SUPPORTED_STACK_ALIGNMENT)
+       {
+         rtx allocsize = gen_int_mode (size, Pmode);
+         get_dynamic_stack_size (&allocsize, 0, align, NULL);
+         gcc_assert (CONST_INT_P (allocsize));
+         size = UINTVAL (allocsize);
+         p->slot = assign_stack_local_1 (mode,
+                                         size,
+                                         BIGGEST_ALIGNMENT, 0);
+         rtx addr = align_dynamic_address (XEXP (p->slot, 0), align);
+         mark_reg_pointer (addr, align);
+         p->slot = gen_rtx_MEM (GET_MODE (p->slot), addr);
+         MEM_NOTRAP_P (p->slot) = 1;
+       }
+      else
+       /* We are passing an explicit alignment request to assign_stack_local.
+          One side effect of that is assign_stack_local will not round SIZE
+          to ensure the frame offset remains suitably aligned.
+
+          So for requests which depended on the rounding of SIZE, we go ahead
+          and round it now.  We also make sure ALIGNMENT is at least
+          BIGGEST_ALIGNMENT.  */
+       p->slot = assign_stack_local_1 (mode,
+                                       (mode == BLKmode
+                                        ? aligned_upper_bound (size,
+                                                               (int) align
+                                                               / BITS_PER_UNIT)
+                                        : size),
+                                       align, 0);
 
       p->align = align;
 
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/rec_align-8.c 
b/gcc/testsuite/gcc.target/aarch64/aapcs64/rec_align-8.c
index c9351802191..0b0ac0b9b97 100644
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/rec_align-8.c
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/rec_align-8.c
@@ -21,10 +21,18 @@ test_pass_by_ref (int x0, overaligned x1, int x2)
     abort ();
   if (memcmp ((void *) &x1, (void *)&a, sizeof (overaligned)))
     abort ();
-  long addr = ((long) &x1) & 31;
+
+  char buf[64];
+  void *ptr;
+
+  __builtin_snprintf (buf, sizeof(buf), "%p", &x1);
+  if (__builtin_sscanf(buf, "%p", &ptr) != 1)
+    abort ();
+
+  long addr = ((long) ptr) & 31;
   if (addr != 0)
     {
-      __builtin_printf ("Alignment was %d\n", addr);
+      __builtin_printf ("Alignment was %ld\n", addr);
       abort ();
     }
 }

Reply via email to