1) Fix predicate of operands[3] in cond_<insn><mode> since only
const_vec_dup_operand is excepted for masked operations, and pass real
count to ix86_vgf2p8affine_shift_matrix.
2) Pass operands[2] instead of operands[1] to
gen_vgf2p8affineqb_<mode>_mask which excepted the operand to shifted,
but operands[1] is mask operand in cond_<insn><mode>.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.
gcc/ChangeLog:
PR target/121699
* config/i386/predicates.md (const_vec_dup_operand): New
predicate.
* config/i386/sse.md (cond_<insn><mode>): Fix predicate of
operands[3], and fix wrong operands passed to
ix86_vgf2p8affine_shift_matrix and
gen_vgf2p8affineqb_<mode>_mask.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr121699.c: New test.
---
gcc/config/i386/predicates.md | 3 +++
gcc/config/i386/sse.md | 8 ++++----
gcc/testsuite/gcc.target/i386/pr121699.c | 23 +++++++++++++++++++++++
3 files changed, 30 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr121699.c
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 175798cff69..5dbe444847f 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1319,6 +1319,9 @@ (define_predicate "nonimmediate_or_const_vec_dup_operand"
(ior (match_operand 0 "nonimmediate_operand")
(match_test "const_vec_duplicate_p (op)")))
+(define_predicate "const_vec_dup_operand"
+ (match_test "const_vec_duplicate_p (op)"))
+
;; Return true when OP is either register operand, or any
;; CONST_VECTOR.
(define_predicate "reg_or_const_vector_operand"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 505095040f7..73906b85d89 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -27001,19 +27001,19 @@ (define_expand "<insn><mode>3"
DONE;
})
-; not generated by vectorizer?
(define_expand "cond_<insn><mode>"
[(set (match_operand:VI1_AVX512VL 0 "register_operand")
(vec_merge:VI1_AVX512VL
(any_shift:VI1_AVX512VL
(match_operand:VI1_AVX512VL 2 "register_operand")
- (match_operand:VI1_AVX512VL 3
"nonimmediate_or_const_vec_dup_operand"))
+ (match_operand:VI1_AVX512VL 3 "const_vec_dup_operand"))
(match_operand:VI1_AVX512VL 4 "nonimm_or_0_operand")
(match_operand:<avx512fmaskmode> 1 "register_operand")))]
"TARGET_GFNI && TARGET_AVX512F"
{
- rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2],
<CODE>);
- emit_insn (gen_vgf2p8affineqb_<mode>_mask (operands[0], operands[1], matrix,
+ rtx count = XVECEXP (operands[3], 0, 0);
+ rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], count, <CODE>);
+ emit_insn (gen_vgf2p8affineqb_<mode>_mask (operands[0], operands[2], matrix,
const0_rtx, operands[4],
operands[1]));
DONE;
diff --git a/gcc/testsuite/gcc.target/i386/pr121699.c
b/gcc/testsuite/gcc.target/i386/pr121699.c
new file mode 100644
index 00000000000..80c1404bebe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121699.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=znver4 -O3" } */
+
+typedef struct
+{
+ int u32;
+} nir_const_value;
+
+nir_const_value *evaluate_prmt_nv__dst_val;
+
+int evaluate_prmt_nv__src_0, evaluate_prmt_nv_src;
+
+void
+evaluate_prmt_nv (unsigned num_components)
+{
+ for (unsigned _i = 0; _i < num_components; _i++)
+ {
+ char x = evaluate_prmt_nv_src;
+ if (evaluate_prmt_nv__src_0)
+ x = x >> 7;
+ evaluate_prmt_nv__dst_val[_i].u32 = x;
+ }
+}
--
2.34.1