On Sun, Aug 5, 2018 at 12:15 AM, Uros Bizjak <ubiz...@gmail.com> wrote:
>>> OK, but please add a comment, so in the future we will still know the
>>> purpose of the magic number.
>>>
>>
>> Like this?
>>
>> H.J.
>> ---
>> cfun->machine->max_used_stack_alignment is used to decide how stack frame
>> should be aligned.  This is independent of any psABIs nor 32-bit vs 64-bit.
>> It is always safe to compute max_used_stack_alignment.  We compute it only
>> if 128-bit aligned load/store may be generated on misaligned stack slot
>> which will lead to segfault.
>>
>> gcc/
>>
>>         PR target/86386
>>         * config/i386/i386.c (ix86_finalize_stack_frame_flags): Set
>>         cfun->machine->max_used_stack_alignment if needed.
>>
>> gcc/testsuite/
>>
>>         PR target/86386
>>         * gcc.target/i386/pr86386.c: New file.
>
> OK, but please write the condition as ">= 128". The number 64 confused
> me; I was thinking that it has something to do with minimum alignment
> on 64bit target, while 128 clearly shows that alignment is related to
> SSE moves. With ">= 128", I think that code is clear enough that a
> long comment is not needed.
>
> Thanks, and sorry for the confusion,
> Uros.
>

This is what I checked in.  I kept the comment change.  I will backport it
to GCC 8 after a few days.

Thanks.

-- 
H.J.
From 51af74d8d141aeceaacf2c9eb8e0b126e8f6c13b Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Thu, 2 Aug 2018 10:43:03 -0700
Subject: [PATCH] i386: Set cfun->machine->max_used_stack_alignment if needed

cfun->machine->max_used_stack_alignment is used to decide how stack frame
should be aligned.  This is independent of any psABIs nor 32-bit vs 64-bit.
It is always safe to compute max_used_stack_alignment.  We compute it only
if 128-bit aligned load/store may be generated on misaligned stack slot
which will lead to segfault.

gcc/

	PR target/86386
	* config/i386/i386.c (ix86_finalize_stack_frame_flags): Set
	cfun->machine->max_used_stack_alignment if needed.

gcc/testsuite/

	PR target/86386
	* gcc.target/i386/pr86386.c: New file.
---
 gcc/config/i386/i386.c                  | 14 +++++++------
 gcc/testsuite/gcc.target/i386/pr86386.c | 26 +++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr86386.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ee409cfe7e4..7554fd1f659 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -13281,12 +13281,14 @@ ix86_finalize_stack_frame_flags (void)
 	  recompute_frame_layout_p = true;
 	}
     }
-  else if (crtl->max_used_stack_slot_alignment
-	   > crtl->preferred_stack_boundary)
-    {
-      /* We don't need to realign stack.  But we still need to keep
-	 stack frame properly aligned to satisfy the largest alignment
-	 of stack slots.  */
+  else if (crtl->max_used_stack_slot_alignment >= 128)
+    {
+      /* We don't need to realign stack.  max_used_stack_alignment is
+	 used to decide how stack frame should be aligned.  This is
+	 independent of any psABIs nor 32-bit vs 64-bit.  It is always
+	 safe to compute max_used_stack_alignment.  We compute it only
+	 if 128-bit aligned load/store may be generated on misaligned
+	 stack slot which will lead to segfault.   */
       if (ix86_find_max_used_stack_alignment (stack_alignment, true))
 	cfun->machine->max_used_stack_alignment
 	  = stack_alignment / BITS_PER_UNIT;
diff --git a/gcc/testsuite/gcc.target/i386/pr86386.c b/gcc/testsuite/gcc.target/i386/pr86386.c
new file mode 100644
index 00000000000..a67cf45444e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr86386.c
@@ -0,0 +1,26 @@
+/* PR target/86386 */
+/* { dg-do run { target { avx_runtime && int128 } } } */
+/* { dg-options "-Os -fno-tree-dce -mstringop-strategy=vector_loop -mavx" } */
+
+unsigned c, d, e, f;
+
+unsigned __attribute__((noipa))
+foo (unsigned char g, unsigned short h, unsigned i, unsigned long long j,
+     unsigned char k, unsigned short l, unsigned m, unsigned __int128 n)
+{
+  __builtin_memset (&e, 0, 3);
+  n <<= m;
+  __builtin_memcpy (&m, 2 + (char *) &n, 1);
+  m >>= 0;
+  d ^= __builtin_mul_overflow (l, n, &m);
+  return m;
+}
+
+int
+main ()
+{
+  unsigned __int128 x = foo (0, 0, 0, 0, 0, 4, 1, 3);
+  if (x != 24)
+    __builtin_abort ();
+  return 0;
+}
-- 
2.17.1

Reply via email to