[PATCH] x86: Use Yw constraint on *ssse3_pshufbv8qi3

H.J. Lu via Gcc-patches Sun, 27 Mar 2022 11:14:36 -0700

Since AVX512VL and AVX512BW are required for AVX512 VPSHUFB, replace the
"Yv" register constraint with the "Yw" register constraint.


gcc/

        PR target/105068
        * config/i386/sse.md (*ssse3_pshufbv8qi3): Replace "Yv" with
        "Yw".

gcc/testsuite/

        PR target/105068
        * gcc.target/i386/pr105068.c: New test.
---
 gcc/config/i386/sse.md                   |  6 +--
 gcc/testsuite/gcc.target/i386/pr105068.c | 47 ++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr105068.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 33bd2c4768a..58d2bd972ed 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -20758,9 +20758,9 @@ (define_expand "ssse3_pshufbv8qi3"
 })
 
 (define_insn_and_split "*ssse3_pshufbv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
-       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
-                     (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
+       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yw")
+                     (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")
                      (match_operand:V4SI 4 "reg_or_const_vector_operand"
                                          "i,3,3")]
                     UNSPEC_PSHUFB))
diff --git a/gcc/testsuite/gcc.target/i386/pr105068.c 
b/gcc/testsuite/gcc.target/i386/pr105068.c
new file mode 100644
index 00000000000..e5fb0338e3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr105068.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-Og -march=x86-64 -mavx512vl -fsanitize=thread 
-fstack-protector-all" } */
+
+typedef char __attribute__((__vector_size__(8))) C;
+typedef int __attribute__((__vector_size__(8))) U;
+typedef int __attribute__((__vector_size__(16))) V;
+typedef int __attribute__((__vector_size__(32))) W;
+typedef long long __attribute__((__vector_size__(64))) L;
+typedef _Float64 __attribute__((__vector_size__(16))) F;
+typedef _Float64 __attribute__((__vector_size__(64))) G;
+C c;
+int i;
+
+U foo0( W v256u32_0,
+           W v256s32_0,
+           V v128u64_0,
+           V v128s64_0,
+           W v256u64_0,
+           W v256s64_0,
+           L v512s64_0,
+           W v256u128_0,
+           W v256s128_0,
+           V v128f32_0,
+           W v256f32_0,
+           F F_0,
+           W v256f64_0,
+           G G_0) {
+  C U_1 = __builtin_ia32_pshufb(c, c);
+  G_0 += __builtin_convertvector(v512s64_0, G);
+  F F_1 = __builtin_shufflevector(F_0, G_0, 2, 2);
+  W W_r = v256u32_0 + v256s32_0 + v256u64_0 + v256s64_0 + v256u128_0 +
+                    v256s128_0 + v256f32_0 + v256f64_0;
+  V V_r = ((union {
+                      W a;
+                      V b;
+                    })W_r)
+                        .b +
+                    i + v128u64_0 + v128s64_0 + v128f32_0 +
+                    (V)F_1;
+  U U_r = ((union {
+                    V a;
+                    U b;
+                  })V_r)
+                      .b +
+                  (U)U_1;
+  return U_r;
+}
-- 
2.35.1

[PATCH] x86: Use Yw constraint on *ssse3_pshufbv8qi3

Reply via email to