This patch improves alloca alignment.  Currently alloca reserves
too much space as it aligns twice, and generates unnecessary stack
alignment code.  For example alloca (16) generates:

        sub     sp, sp, #32   ???
        mov     x1, sp

Similarly alloca (x) generates:

        add     x0, x0, 30    ???
        and     x0, x0, -16
        sub     sp, sp, x0
        mov     x0, sp

__builtin_alloca_with_align (x, 256):
        add     x0, x0, 78    ???
        and     x0, x0, -16
        sub     sp, sp, x0
        add     x0, sp, 63
        and     x0, x0, -64

As can be seen the alignment adjustment value is incorrect.
When the requested alignment is lower than the stack alignment, no
extra alignment is needed.  If the requested alignment is higher,
we need to increase the size by the difference of the requested 
alignment and the stack alignment.  As a result, the alloca alignment
is exactly as expected:

alloca (16):
        sub     sp, sp, #16
        mov     x1, sp

alloca (x):
        add     x0, x0, 15
        and     x0, x0, -16
        sub     sp, sp, x0
        mov     x0, sp

__builtin_alloca_with_align (x, 256):
        add     x0, x0, 63
        and     x0, x0, -16
        sub     sp, sp, x0
        add     x0, sp, 63
        and     x0, x0, -64

ChangeLog:
2017-07-26  Wilco Dijkstra  <wdijk...@arm.com>

        * explow.c (get_dynamic_stack_size): Improve dynamic alignment.

--
diff --git a/gcc/explow.c b/gcc/explow.c
index 
50074e281edd5270c76d29feac6b7a92f598d11d..fbdda5fa1e303664e346f975270415b40aed252d
 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -1234,15 +1234,22 @@ get_dynamic_stack_size (rtx *psize, unsigned size_align,
      example), so we must preventively align the value.  We leave space
      in SIZE for the hole that might result from the alignment operation.  */
 
-  extra = (required_align - BITS_PER_UNIT) / BITS_PER_UNIT;
-  size = plus_constant (Pmode, size, extra);
-  size = force_operand (size, NULL_RTX);
-
-  if (flag_stack_usage_info && pstack_usage_size)
-    *pstack_usage_size += extra;
+  /* Since the stack is presumed to be aligned before this allocation,
+     we only need to increase the size of the allocation if the required
+     alignment is more than the stack alignment.
+     Note size_align doesn't need to be updated - if it is larger than the
+     stack alignment, size remains a multiple of the stack alignment, so
+     we can skip rounding up to the stack alignment.  */
+  if (required_align > MAX_SUPPORTED_STACK_ALIGNMENT)
+    {
+      extra = (required_align - MAX_SUPPORTED_STACK_ALIGNMENT)
+       / BITS_PER_UNIT;
+      size = plus_constant (Pmode, size, extra);
+      size = force_operand (size, NULL_RTX);
 
-  if (extra && size_align > BITS_PER_UNIT)
-    size_align = BITS_PER_UNIT;
+      if (flag_stack_usage_info && pstack_usage_size)
+       *pstack_usage_size += extra;
+    }
 
   /* Round the size to a multiple of the required stack alignment.
      Since the stack is presumed to be rounded before this allocation,

Reply via email to