vpbroadcastd/vpbroadcastq is avaiable under TARGET_AVX2, but
vec_dup{v4di,v8si} pattern is avaiable under AVX with memory operand.
And it will cause LRA/Reload to generate spill and reload if we put
constant in register.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/ChangeLog:

        PR target/112992
        * config/i386/i386-expand.cc
        (ix86_convert_const_wide_int_to_broadcast): Don't convert to
        broadcast for vec_dup{v4di,v8si} when TARGET_AVX2 is not
        available.
        (ix86_broadcast_from_constant): Allow broadcast for V4DI/V8SI
        when !TARGET_AVX2 since it will be forced to memory later.
        (ix86_expand_vector_move): Force constant to mem for
        vec_dup{vssi,v4di} when TARGET_AVX2 is not available.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr100865-7a.c: Adjust testcase.
        * gcc.target/i386/pr100865-7c.c: Ditto.
        * gcc.target/i386/pr112992.c: New test.
---
 gcc/config/i386/i386-expand.cc              | 48 +++++++++++++--------
 gcc/testsuite/gcc.target/i386/pr100865-7a.c |  3 +-
 gcc/testsuite/gcc.target/i386/pr100865-7c.c |  3 +-
 gcc/testsuite/gcc.target/i386/pr112992.c    | 30 +++++++++++++
 4 files changed, 62 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr112992.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a53d69d5400..fad4f34f905 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -297,6 +297,12 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode 
mode, rtx op)
   if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
     return nullptr;
 
+  unsigned int msize = GET_MODE_SIZE (mode);
+
+  /* Only optimized for vpbroadcast[bwsd]/vbroadcastss with xmm/ymm/zmm.  */
+  if (msize != 16 && msize != 32 && msize != 64)
+    return nullptr;
+
   /* Convert CONST_WIDE_INT to a non-standard SSE constant integer
      broadcast only if vector broadcast is available.  */
   if (!TARGET_AVX
@@ -309,18 +315,23 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode 
mode, rtx op)
   HOST_WIDE_INT val = CONST_WIDE_INT_ELT (op, 0);
   HOST_WIDE_INT val_broadcast;
   scalar_int_mode broadcast_mode;
-  if (TARGET_AVX2
+  /* vpbroadcastb zmm requires TARGET_AVX512BW.  */
+  if ((msize == 64 ? TARGET_AVX512BW : TARGET_AVX2)
       && ix86_broadcast (val, GET_MODE_BITSIZE (QImode),
                         val_broadcast))
     broadcast_mode = QImode;
-  else if (TARGET_AVX2
+  else if ((msize == 64 ? TARGET_AVX512BW : TARGET_AVX2)
           && ix86_broadcast (val, GET_MODE_BITSIZE (HImode),
                              val_broadcast))
     broadcast_mode = HImode;
-  else if (ix86_broadcast (val, GET_MODE_BITSIZE (SImode),
+  /* vbroadcasts[sd] only support memory operand w/o AVX2.
+     When msize == 16, pshufs is used for vec_duplicate.
+     when msize == 64, vpbroadcastd is used, and TARGET_AVX512F must be 
existed.  */
+  else if ((msize != 32 || TARGET_AVX2)
+          && ix86_broadcast (val, GET_MODE_BITSIZE (SImode),
                           val_broadcast))
     broadcast_mode = SImode;
-  else if (TARGET_64BIT
+  else if (TARGET_64BIT && (msize != 32 || TARGET_AVX2)
           && ix86_broadcast (val, GET_MODE_BITSIZE (DImode),
                              val_broadcast))
     broadcast_mode = DImode;
@@ -596,23 +607,17 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op)
       && INTEGRAL_MODE_P (mode))
     return nullptr;
 
+  unsigned int msize = GET_MODE_SIZE (mode);
+  unsigned int inner_size = GET_MODE_SIZE (GET_MODE_INNER ((mode)));
+
   /* Convert CONST_VECTOR to a non-standard SSE constant integer
      broadcast only if vector broadcast is available.  */
-  if (!(TARGET_AVX2
-       || (TARGET_AVX
-           && (GET_MODE_INNER (mode) == SImode
-               || GET_MODE_INNER (mode) == DImode))
-       || FLOAT_MODE_P (mode))
-      || standard_sse_constant_p (op, mode))
+  if (standard_sse_constant_p (op, mode))
     return nullptr;
 
-  /* Don't broadcast from a 64-bit integer constant in 32-bit mode.
-     We can still put 64-bit integer constant in memory when
-     avx512 embed broadcast is available.  */
-  if (GET_MODE_INNER (mode) == DImode && !TARGET_64BIT
-      && (!TARGET_AVX512F
-         || (GET_MODE_SIZE (mode) == 64 && !TARGET_EVEX512)
-         || (GET_MODE_SIZE (mode) < 64 && !TARGET_AVX512VL)))
+  /* vpbroadcast[b,w] is available under TARGET_AVX2.
+     or TARGET_AVX512BW for zmm.  */
+  if (inner_size < 4 && !(msize == 64 ? TARGET_AVX512BW : TARGET_AVX2))
     return nullptr;
 
   if (GET_MODE_INNER (mode) == TImode)
@@ -710,7 +715,14 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
             constant or scalar mem.  */
          op1 = gen_reg_rtx (mode);
          if (FLOAT_MODE_P (mode)
-             || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode))
+             || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode)
+             /* vbroadcastss/vbroadcastsd only supports memory operand
+                w/o AVX2, force them into memory to avoid spill to
+                memory.  */
+             || (GET_MODE_SIZE (mode) == 32
+                 && (GET_MODE_INNER (mode) == DImode
+                     || GET_MODE_INNER (mode) == SImode)
+                 && !TARGET_AVX2))
            first = force_const_mem (GET_MODE_INNER (mode), first);
          bool ok = ix86_expand_vector_init_duplicate (false, mode,
                                                       op1, first);
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7a.c 
b/gcc/testsuite/gcc.target/i386/pr100865-7a.c
index f6f2be91120..7de7d4a3ce3 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-7a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-7a.c
@@ -11,7 +11,6 @@ foo (void)
     array[i] = -45;
 }
 
-/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, 
%ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, 
%ymm\[0-9\]+" 1  } } */
 /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */
-/* { dg-final { scan-assembler-not "vpbroadcastq" { target ia32 } } } */
 /* { dg-final { scan-assembler-not "vmovdqa" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7c.c 
b/gcc/testsuite/gcc.target/i386/pr100865-7c.c
index 4d50bb7e2f6..edbfd5b09ed 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-7c.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-7c.c
@@ -11,7 +11,6 @@ foo (void)
     array[i] = -45;
 }
 
-/* { dg-final { scan-assembler-times "vbroadcastsd" 1 { target { ! ia32 } } } 
} */
+/* { dg-final { scan-assembler-times "vbroadcastsd" 1  } } */
 /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */
-/* { dg-final { scan-assembler-not "vbroadcastsd" { target ia32 } } } */
 /* { dg-final { scan-assembler-not "vmovdqa" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr112992.c 
b/gcc/testsuite/gcc.target/i386/pr112992.c
new file mode 100644
index 00000000000..743e64dccba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112992.c
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx -mno-avx2 -O2 " } */
+/* { dg-final { scan-assembler-not {(?n)(%rsp)} } } */
+
+typedef unsigned long long v4di __attribute((vector_size(32)));
+typedef unsigned int v8si __attribute((vector_size(32)));
+typedef unsigned short v16hi __attribute((vector_size(32)));
+typedef unsigned char v32qi __attribute((vector_size(32)));
+
+#define MASK  0x01010101
+#define MASKL 0x0101010101010101ULL
+#define MASKS 0x0101
+
+v4di fooq() {
+  return (v4di){MASKL,MASKL,MASKL,MASKL};
+}
+
+v8si food() {
+  return (v8si){MASK,MASK,MASK,MASK,MASK,MASK,MASK,MASK};
+}
+
+v16hi foow() {
+  return (v16hi){MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,
+    MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS};
+}
+
+v32qi foob() {
+  return (v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+}
-- 
2.31.1

Reply via email to