Since AVX512VL and AVX512BW are required for AVX512 VPSHUFB, replace the
"Yv" register constraint with the "Yw" register constraint.
gcc/
PR target/105068
* config/i386/sse.md (*ssse3_pshufbv8qi3): Replace "Yv" with
"Yw".
gcc/testsuite/
PR target/105068
* gcc.target/i386/pr105068.c: New test.
---
gcc/config/i386/sse.md | 6 +--
gcc/testsuite/gcc.target/i386/pr105068.c | 47 ++++++++++++++++++++++++
2 files changed, 50 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr105068.c
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 33bd2c4768a..58d2bd972ed 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -20758,9 +20758,9 @@ (define_expand "ssse3_pshufbv8qi3"
})
(define_insn_and_split "*ssse3_pshufbv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
- (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
- (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yw")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")
(match_operand:V4SI 4 "reg_or_const_vector_operand"
"i,3,3")]
UNSPEC_PSHUFB))
diff --git a/gcc/testsuite/gcc.target/i386/pr105068.c
b/gcc/testsuite/gcc.target/i386/pr105068.c
new file mode 100644
index 00000000000..e5fb0338e3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr105068.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-Og -march=x86-64 -mavx512vl -fsanitize=thread
-fstack-protector-all" } */
+
+typedef char __attribute__((__vector_size__(8))) C;
+typedef int __attribute__((__vector_size__(8))) U;
+typedef int __attribute__((__vector_size__(16))) V;
+typedef int __attribute__((__vector_size__(32))) W;
+typedef long long __attribute__((__vector_size__(64))) L;
+typedef _Float64 __attribute__((__vector_size__(16))) F;
+typedef _Float64 __attribute__((__vector_size__(64))) G;
+C c;
+int i;
+
+U foo0( W v256u32_0,
+ W v256s32_0,
+ V v128u64_0,
+ V v128s64_0,
+ W v256u64_0,
+ W v256s64_0,
+ L v512s64_0,
+ W v256u128_0,
+ W v256s128_0,
+ V v128f32_0,
+ W v256f32_0,
+ F F_0,
+ W v256f64_0,
+ G G_0) {
+ C U_1 = __builtin_ia32_pshufb(c, c);
+ G_0 += __builtin_convertvector(v512s64_0, G);
+ F F_1 = __builtin_shufflevector(F_0, G_0, 2, 2);
+ W W_r = v256u32_0 + v256s32_0 + v256u64_0 + v256s64_0 + v256u128_0 +
+ v256s128_0 + v256f32_0 + v256f64_0;
+ V V_r = ((union {
+ W a;
+ V b;
+ })W_r)
+ .b +
+ i + v128u64_0 + v128s64_0 + v128f32_0 +
+ (V)F_1;
+ U U_r = ((union {
+ V a;
+ U b;
+ })V_r)
+ .b +
+ (U)U_1;
+ return U_r;
+}
--
2.35.1