From: Pan Li <pan2...@intel.com>

This patch would like to fix one bug exported by RV32 test case
multiple_rgroup_run-2.c. The mask should be restricted by elen in
vector, and the condition between the vmv.s.x and the vmv.v.x should
take inner_bits_size rather than constants.

After this patch, below failures on RV32 will be fixed.

FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-3.c -std=c99 -O3 
-ftree-vectorize --param riscv-autovec-preference=fixed-vlmax execution test

Signed-off-by: Pan Li <pan2...@intel.com>

gcc/ChangeLog:

        * config/riscv/riscv-v.cc (rvv_builder::get_merge_scalar_mask):
        Take elen instead of scalar BITS_PER_WORD.
        (expand_vector_init_merge_repeating_sequence): Use inner_bits_size
        instead of scaler BITS_PER_WORD.
---
 gcc/config/riscv/riscv-v.cc | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e07d5c2901a..db1a5529419 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -399,10 +399,19 @@ rvv_builder::get_merge_scalar_mask (unsigned int 
index_in_pattern) const
 {
   unsigned HOST_WIDE_INT mask = 0;
   unsigned HOST_WIDE_INT base_mask = (1ULL << index_in_pattern);
+  /* We restrict the limit to the elen of RVV. For example:
+     -march=zve32*, the ELEN is 32.
+     -march=zve64*, the ELEN is 64.
+     The related vmv.v.x/vmv.s.x is restricted to ELEN as above, we cannot
+     take care of case like below when ELEN=32
+     vsetvil e64,m1
+     vmv.v.x/vmv.s.x
+   */
+  unsigned int elen = TARGET_VECTOR_ELEN_64 ? 64 : 32;
 
-  gcc_assert (BITS_PER_WORD % npatterns () == 0);
+  gcc_assert (elen % npatterns () == 0);
 
-  int limit = BITS_PER_WORD / npatterns ();
+  int limit = elen / npatterns ();
 
   for (int i = 0; i < limit; i++)
     mask |= base_mask << (i * npatterns ());
@@ -1928,7 +1937,7 @@ expand_vector_init_merge_repeating_sequence (rtx target,
       rtx mask = gen_reg_rtx (mask_mode);
       rtx dup = gen_reg_rtx (dup_mode);
 
-      if (full_nelts <= BITS_PER_WORD) /* vmv.s.x.  */
+      if (full_nelts <= builder.inner_bits_size ()) /* vmv.s.x.  */
        {
          rtx ops[] = {dup, gen_scalar_move_mask (dup_mask_mode),
            RVV_VUNDEF (dup_mode), merge_mask};
@@ -1938,7 +1947,8 @@ expand_vector_init_merge_repeating_sequence (rtx target,
       else /* vmv.v.x.  */
        {
          rtx ops[] = {dup, force_reg (GET_MODE_INNER (dup_mode), merge_mask)};
-         rtx vl = gen_int_mode (CEIL (full_nelts, BITS_PER_WORD), Pmode);
+         rtx vl = gen_int_mode (CEIL (full_nelts, builder.inner_bits_size ()),
+                                Pmode);
          emit_nonvlmax_integer_move_insn (code_for_pred_broadcast (dup_mode),
                                           ops, vl);
        }
-- 
2.34.1

Reply via email to