1) Fix predicate of operands[3] in cond_<insn><mode> since only
const_vec_dup_operand is excepted for masked operations, and pass real
count to ix86_vgf2p8affine_shift_matrix.

2) Pass operands[2] instead of operands[1] to
gen_vgf2p8affineqb_<mode>_mask which excepted the operand to shifted,
but operands[1] is mask operand in cond_<insn><mode>.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/ChangeLog:

        PR target/121699
        * config/i386/predicates.md (const_vec_dup_operand): New
        predicate.
        * config/i386/sse.md (cond_<insn><mode>): Fix predicate of
        operands[3], and fix wrong operands passed to
        ix86_vgf2p8affine_shift_matrix and
        gen_vgf2p8affineqb_<mode>_mask.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr121699.c: New test.
---
 gcc/config/i386/predicates.md            |  3 +++
 gcc/config/i386/sse.md                   |  8 ++++----
 gcc/testsuite/gcc.target/i386/pr121699.c | 23 +++++++++++++++++++++++
 3 files changed, 30 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121699.c

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 175798cff69..5dbe444847f 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1319,6 +1319,9 @@ (define_predicate "nonimmediate_or_const_vec_dup_operand"
   (ior (match_operand 0 "nonimmediate_operand")
        (match_test "const_vec_duplicate_p (op)")))
 
+(define_predicate "const_vec_dup_operand"
+       (match_test "const_vec_duplicate_p (op)"))
+
 ;; Return true when OP is either register operand, or any
 ;; CONST_VECTOR.
 (define_predicate "reg_or_const_vector_operand"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 505095040f7..73906b85d89 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -27001,19 +27001,19 @@ (define_expand "<insn><mode>3"
   DONE;
 })
 
-; not generated by vectorizer?
 (define_expand "cond_<insn><mode>"
   [(set (match_operand:VI1_AVX512VL 0 "register_operand")
        (vec_merge:VI1_AVX512VL
          (any_shift:VI1_AVX512VL
            (match_operand:VI1_AVX512VL 2 "register_operand")
-           (match_operand:VI1_AVX512VL 3 
"nonimmediate_or_const_vec_dup_operand"))
+           (match_operand:VI1_AVX512VL 3 "const_vec_dup_operand"))
          (match_operand:VI1_AVX512VL 4 "nonimm_or_0_operand")
        (match_operand:<avx512fmaskmode> 1 "register_operand")))]
   "TARGET_GFNI && TARGET_AVX512F"
 {
-  rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], 
<CODE>);
-  emit_insn (gen_vgf2p8affineqb_<mode>_mask (operands[0], operands[1], matrix,
+  rtx count = XVECEXP (operands[3], 0, 0);
+  rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], count, <CODE>);
+  emit_insn (gen_vgf2p8affineqb_<mode>_mask (operands[0], operands[2], matrix,
                                             const0_rtx, operands[4],
                                             operands[1]));
   DONE;
diff --git a/gcc/testsuite/gcc.target/i386/pr121699.c 
b/gcc/testsuite/gcc.target/i386/pr121699.c
new file mode 100644
index 00000000000..80c1404bebe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121699.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=znver4 -O3" } */
+
+typedef struct
+{
+  int u32;
+} nir_const_value;
+
+nir_const_value *evaluate_prmt_nv__dst_val;
+
+int evaluate_prmt_nv__src_0, evaluate_prmt_nv_src;
+
+void
+evaluate_prmt_nv (unsigned num_components)
+{
+  for (unsigned _i = 0; _i < num_components; _i++)
+    {
+      char x = evaluate_prmt_nv_src;
+      if (evaluate_prmt_nv__src_0)
+        x = x >> 7;
+      evaluate_prmt_nv__dst_val[_i].u32 = x;
+    }
+}
-- 
2.34.1

Reply via email to