Hi,
The PR involves large mask vectors (e.g. V128BI) from which we take
the popcount. Currently a (popcount:DI (V128BI)) is assumed to have
at most 8 set bits as we assume the popcount operand also has DImode.
This patch uses the operand mode for unary operations and thus
calculates a proper nonzero-bits mask.
We could do the same estimate for ctz and clz but they use nonzero in a
non-poly way and I didn't want to change more than necessary. Therefore the
patch just returns -1 when we have a different operand mode for ctz/clz.
Bootstrapped and regtested on x86, power10, aarch64 still running.
Regtested on riscv64.
Regards
Robin
PR rtl-optimization/123501
gcc/ChangeLog:
* rtlanal.cc (nonzero_bits1): Use operand mode instead of
operation mode.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/reduc/pr123501.c: New test.
---
gcc/rtlanal.cc | 35 +++++++++++++++----
.../riscv/rvv/autovec/reduc/pr123501.c | 21 +++++++++++
2 files changed, 50 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/pr123501.c
diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc
index cac82e49111..da47f9ba22e 100644
--- a/gcc/rtlanal.cc
+++ b/gcc/rtlanal.cc
@@ -4795,6 +4795,24 @@ nonzero_bits1 (const_rtx x, scalar_int_mode mode,
const_rtx known_x,
unsigned int mode_width = GET_MODE_PRECISION (mode);
+ /* For unary ops like ffs or popcount we want to determine the number of
+ nonzero bits from the operand. This only matters with very large
+ vector modes. A
+ (popcount:DI (V128BImode)
+ should not get a nonzero-bit mask of (1 << 7) - 1 as that could
+ lead to incorrect optimizations based on it, see PR123501. */
+ unsigned int op_mode_width = mode_width;
+ machine_mode op_mode = mode;
+ if (UNARY_P (x))
+ {
+ const_rtx op = XEXP (x, 0);
+ if (GET_MODE_PRECISION (GET_MODE (op)).is_constant ())
+ {
+ op_mode = GET_MODE (op);
+ op_mode_width = GET_MODE_PRECISION (op_mode).to_constant ();
+ }
+ }
+
if (CONST_INT_P (x))
{
if (SHORT_IMMEDIATES_SIGN_EXTEND
@@ -5198,13 +5216,16 @@ nonzero_bits1 (const_rtx x, scalar_int_mode mode,
const_rtx known_x,
case FFS:
case POPCOUNT:
/* This is at most the number of bits in the mode. */
- nonzero = (HOST_WIDE_INT_UC (2) << (floor_log2 (mode_width))) - 1;
+ nonzero = (HOST_WIDE_INT_UC (2) << (floor_log2 (op_mode_width))) - 1;
break;
case CLZ:
/* If CLZ has a known value at zero, then the nonzero bits are
- that value, plus the number of bits in the mode minus one. */
- if (CLZ_DEFINED_VALUE_AT_ZERO (mode, nonzero))
+ that value, plus the number of bits in the mode minus one.
+ If we have a different operand mode, don't try to get nonzero
+ bits as currently nonzero is not a poly_int. */
+ if (op_mode == mode
+ && CLZ_DEFINED_VALUE_AT_ZERO (mode, nonzero))
nonzero
|= (HOST_WIDE_INT_1U << (floor_log2 (mode_width))) - 1;
else
@@ -5213,8 +5234,10 @@ nonzero_bits1 (const_rtx x, scalar_int_mode mode,
const_rtx known_x,
case CTZ:
/* If CTZ has a known value at zero, then the nonzero bits are
- that value, plus the number of bits in the mode minus one. */
- if (CTZ_DEFINED_VALUE_AT_ZERO (mode, nonzero))
+ that value, plus the number of bits in the mode minus one.
+ See above for op_mode != mode. */
+ if (op_mode == mode
+ && CLZ_DEFINED_VALUE_AT_ZERO (mode, nonzero))
nonzero
|= (HOST_WIDE_INT_1U << (floor_log2 (mode_width))) - 1;
else
@@ -5223,7 +5246,7 @@ nonzero_bits1 (const_rtx x, scalar_int_mode mode,
const_rtx known_x,
case CLRSB:
/* This is at most the number of bits in the mode minus 1. */
- nonzero = (HOST_WIDE_INT_1U << (floor_log2 (mode_width))) - 1;
+ nonzero = (HOST_WIDE_INT_1U << (floor_log2 (op_mode_width))) - 1;
break;
case PARITY:
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/pr123501.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/pr123501.c
new file mode 100644
index 00000000000..b43fe1e8c91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/pr123501.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvl256b -mrvv-vector-bits=zvl
-mrvv-max-lmul=m4" } */
+
+long long b;
+_Bool a=1;
+char e[13];
+
+int main() {
+ for (long h=0; h<13; ++h)
+ e[h] = 110;
+ for (int i=5; i<9; i++)
+ for (int k=0; k<1031; k++) {
+ int l = e[0] ? e[i] : 0;
+ a = l ? a:l;
+ }
+ b = (int)a;
+ if (b != 1)
+ __builtin_abort ();
+}
+
+/* { dg-final { scan-assembler-times "vcpop.m" 2 } } */
--
2.52.0