Hi,
This implements mask reductions by first counting the bits in the mask
(vcpop.m) and then comparing the resulting scalar against 0 or len.
Changes from v1:
- Add tests (almost verbatim from aarch64).
Regtested on rv64gcv_zvl512b.
Regards
Robin
gcc/ChangeLog:
* config/riscv/autovec.md (reduc_sbool_and_scal_<mode>): New
expander.
(reduc_sbool_ior_scal_<mode>): Ditto.
(reduc_sbool_xor_scal_<mode>): Ditto.
* config/riscv/riscv-protos.h (expand_mask_reduction): Declare.
* config/riscv/riscv-v.cc (expand_mask_reduction): New function.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/reduc/reduc-bool-1.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc-bool-2.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc-bool-3.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc-bool-4.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc-bool-5.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc-bool-6.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc-bool-7.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc-bool-8.c: New test.
---
gcc/config/riscv/autovec.md | 31 +++++++++++
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-v.cc | 48 +++++++++++++++++
.../riscv/rvv/autovec/reduc/reduc-bool-1.c | 52 +++++++++++++++++++
.../riscv/rvv/autovec/reduc/reduc-bool-2.c | 52 +++++++++++++++++++
.../riscv/rvv/autovec/reduc/reduc-bool-3.c | 52 +++++++++++++++++++
.../riscv/rvv/autovec/reduc/reduc-bool-4.c | 52 +++++++++++++++++++
.../riscv/rvv/autovec/reduc/reduc-bool-5.c | 50 ++++++++++++++++++
.../riscv/rvv/autovec/reduc/reduc-bool-6.c | 50 ++++++++++++++++++
.../riscv/rvv/autovec/reduc/reduc-bool-7.c | 50 ++++++++++++++++++
.../riscv/rvv/autovec/reduc/reduc-bool-8.c | 50 ++++++++++++++++++
11 files changed, 488 insertions(+)
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-1.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-2.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-3.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-4.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-5.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-6.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-7.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-8.c
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index cec0113fca3..c694684328f 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2301,6 +2301,37 @@ (define_expand "reduc_xor_scal_<mode>"
DONE;
})
+;; -------------------------------------------------------------------------
+;; ---- [INT] Mask reductions
+;; -------------------------------------------------------------------------
+
+(define_expand "reduc_sbool_and_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:VB_VLS 1 "register_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_mask_reduction (operands, AND);
+ DONE;
+})
+
+(define_expand "reduc_sbool_ior_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:VB_VLS 1 "register_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_mask_reduction (operands, IOR);
+ DONE;
+})
+
+(define_expand "reduc_sbool_xor_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:VB_VLS 1 "register_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_mask_reduction (operands, XOR);
+ DONE;
+})
+
;; -------------------------------------------------------------------------
;; ---- [FP] Tree reductions
;; -------------------------------------------------------------------------
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a372779cf9f..d97773256c9 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -664,6 +664,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, unsigned, rtx *, rtx);
+void expand_mask_reduction (rtx *, rtx_code);
void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
void expand_vec_nearbyint (rtx, rtx, machine_mode, machine_mode);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 5e30b77b4eb..f4a35fdf6ad 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4885,6 +4885,54 @@ expand_reduction (unsigned unspec, unsigned
unspec_for_vl0_safe,
emit_insn (gen_pred_extract_first (m1_mode, scalar_dest, m1_tmp2));
}
+/* Expand mask reductions. OPS are {dest, src} where DEST's mode
+ is QImode and SRC's mode is a mask mode.
+ CODE is one of AND, IOR, XOR. */
+
+void
+expand_mask_reduction (rtx *ops, rtx_code code)
+{
+ machine_mode mode = GET_MODE (ops[1]);
+ rtx dest = ops[0];
+ gcc_assert (GET_MODE (dest) == QImode);
+
+ rtx tmp = gen_reg_rtx (Xmode);
+ rtx cpop_ops[] = {tmp, ops[1]};
+ emit_vlmax_insn (code_for_pred_popcount (mode, Xmode), CPOP_OP, cpop_ops);
+
+ bool eq_zero = false;
+
+ /* AND reduction is popcount (mask) == len,
+ IOR reduction is popcount (mask) != 0,
+ XOR reduction is popcount (mask) & 1 != 0. */
+ if (code == AND)
+ {
+ rtx len = gen_int_mode (GET_MODE_NUNITS (mode), HImode);
+ tmp = expand_binop (Xmode, sub_optab, tmp, len, NULL, true,
+ OPTAB_DIRECT);
+ eq_zero = true;
+ }
+ else if (code == IOR)
+ ;
+ else if (code == XOR)
+ tmp = expand_binop (Xmode, and_optab, tmp, GEN_INT (1), NULL, true,
+ OPTAB_DIRECT);
+ else
+ gcc_unreachable ();
+
+ rtx els = gen_label_rtx ();
+ rtx end = gen_label_rtx ();
+
+ riscv_expand_conditional_branch (els, eq_zero ? EQ : NE, tmp, const0_rtx);
+ emit_move_insn (dest, const0_rtx);
+ emit_jump_insn (gen_jump (end));
+ emit_barrier ();
+
+ emit_label (els);
+ emit_move_insn (dest, const1_rtx);
+ emit_label (end);
+}
+
/* Prepare ops for ternary operations.
It can be called before or after RA. */
void
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-1.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-1.c
new file mode 100644
index 00000000000..d4d540126ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model
-fdump-tree-vect-details" } */
+
+char p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-2.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-2.c
new file mode 100644
index 00000000000..3256206b5fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model
-fdump-tree-vect-details" } */
+
+short p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-3.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-3.c
new file mode 100644
index 00000000000..4303cf3846c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-3.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model
-fdump-tree-vect-details" } */
+
+int p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-4.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-4.c
new file mode 100644
index 00000000000..bf85ca7eb45
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-4.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model
-fdump-tree-vect-details" } */
+
+long long p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-5.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-5.c
new file mode 100644
index 00000000000..3ed5a6c6799
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-5.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model
-fdump-tree-vect-details" } */
+
+char p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-6.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-6.c
new file mode 100644
index 00000000000..6f0de436413
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-6.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model
-fdump-tree-vect-details" } */
+
+short p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-7.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-7.c
new file mode 100644
index 00000000000..233d5b67c3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-7.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model
-fdump-tree-vect-details" } */
+
+int p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-8.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-8.c
new file mode 100644
index 00000000000..80a27e0811c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-8.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model
-fdump-tree-vect-details" } */
+
+long long p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
--
2.51.1