Hi,
RVV's vectors can get very large with LMUL8. In the PR we have
256-element char vectors which get permuted. For permuting them
we use a mask vectype that is deduced from the element type
without checking if the permute indices fit this type.
That leads to an invalid permute mask which gets optimized away.
This patch punts if the permute index might overflow the mask type.
Bootstrapped and regtested on x86, power10, and aarch64.
Regtested on riscv64.
Regards
Robin
PR tree-optimization/123414
gcc/ChangeLog:
* tree-ssa-forwprop.cc (simplify_vector_constructor):
Give up if the permute index might overflow the mask type.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/pr123414.c: New test.
---
.../gcc.target/riscv/rvv/autovec/pr123414.c | 31 +++++++++++++++++++
gcc/tree-ssa-forwprop.cc | 13 ++++++--
2 files changed, 41 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr123414.c
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr123414.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr123414.c
new file mode 100644
index 00000000000..a28ce23f058
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr123414.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mrvv-max-lmul=m8 -O3 -fsigned-char
-fno-strict-aliasing -fwrapv -fdump-tree-optimized -std=gnu99" } */
+
+signed char a=2;
+long long b;
+long c = 93;
+int e[1][9];
+
+void
+g (long cc, int ee[][9])
+{
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 5; j++)
+ for (unsigned k = 0; k < 9; k++)
+ {
+ a *= cc;
+ for (int l = 0; l < 6; l += (ee[k] <= 0) + 2)
+ ;
+ }
+}
+
+int main() {
+ g( c, e);
+ b = (int)a;
+ if (b != 34)
+ __builtin_abort ();
+}
+
+/* We should have four vector constructors that must not get optimized away.
+ */
+/* { dg-final { scan-tree-dump-times
"\[a-zA-Z_\]\[a-zA-Z0-9_\]+.=.\\\{\[a-zA-Z0-9._\]+, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1," 4
"optimized" } } */
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index d00f7b9004e..9435e4dc409 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -4177,9 +4177,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
machine_mode vmode = TYPE_MODE (perm_type);
if (!can_vec_perm_const_p (vmode, vmode, indices))
return false;
- mask_type
- = build_vector_type (build_nonstandard_integer_type (elem_size, 1),
- refnelts);
+ /* With an ELEM_SIZEd integer we can shuffle at most
+ (1 << ELEM_SIZE) / 2 - 1 elements. For a more fine-grained
+ check we could take the maximum of the element values
+ but for now this is sufficient. */
+ if (refnelts < (HOST_WIDE_INT_1U << (elem_size - 1)))
+ mask_type
+ = build_vector_type (build_nonstandard_integer_type (elem_size, 1),
+ refnelts);
+ else
+ return false;
if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT
|| maybe_ne (GET_MODE_SIZE (TYPE_MODE (mask_type)),
GET_MODE_SIZE (TYPE_MODE (perm_type))))
--
2.52.0