From: Ju-Zhe Zhong <juzhe.zh...@rivai.ai>

Hi, Richi.

Sorry for making mistake on LEN_MASK_GATHER_LOAD/LEN_MASK_SCATTER_STORE
with SELECT_VL loop control.

Consider this following case:
#define TEST_LOOP(DATA_TYPE, BITS)                                             \
  void __attribute__ ((noinline, noclone))                                     \
  f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,   \
                          INDEX##BITS stride, INDEX##BITS n)                   \
  {                                                                            \
    for (INDEX##BITS i = 0; i < n; ++i)                                        \
      dest[i] += src[i * stride];                                              \
  }

When "stride" is a constant, current flow works fine.
However, when "stride" is a variable. It causes an ICE:
# vectp_src.67_85 = PHI <vectp_src.67_86(6), src_21(D)(12)>
...
_96 = .SELECT_VL (ivtmp_94, 4);
...
ivtmp_78 = ((sizetype) _39 * (sizetype) _96) * 4;
vect__11.69_87 = .LEN_MASK_GATHER_LOAD (vectp_src.67_85, _84, 4, { 0, 0, 0, 0 
}, { -1, -1, -1, -1 }, _96, 0);
...
vectp_src.67_86 = vectp_src.67_85 + ivtmp_78;

Becase the IR: ivtmp_78 = ((sizetype) _39 * (sizetype) _96) * 4;

Instead, I split the IR into:

step_stride = _39
step = step_stride * 4
ivtmp_78 = step * _96

I don't think this patch's code is elegant enough, could you help me refine 
these codes?

Thanks.

gcc/ChangeLog:

        * tree-vect-stmts.cc (vect_get_strided_load_store_ops): Fix ICE.

---
 gcc/tree-vect-stmts.cc | 38 +++++++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index c10a4be60eb..12d1b0f1ac0 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3172,12 +3172,40 @@ vect_get_strided_load_store_ops (stmt_vec_info 
stmt_info,
         vectp_a.9_26 = vectp_a.9_7 + ivtmp_8;  */
       tree loop_len
        = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
-      tree tmp
-       = fold_build2 (MULT_EXPR, sizetype,
-                      fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
-                      loop_len);
+      tree tmp;
+      gassign *assign;
+
+      if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
+       tmp = fold_build2 (MULT_EXPR, sizetype,
+                          fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
+                          loop_len);
+      else
+       {
+         /* If DR_STEP = (unsigned int) _37 * 4;
+            Extract _37 and 4, explicit MULT_EXPR.  */
+
+         /* 1. step_stride = (unsigned int) _37.  */
+         tree step_stride = make_ssa_name (create_tmp_var (sizetype));
+         assign = gimple_build_assign (
+           step_stride, TREE_OPERAND (TREE_OPERAND (DR_STEP (dr), 0), 0));
+         gsi_insert_before (gsi, assign, GSI_SAME_STMT);
+
+         /* 2. step = step_stride * 4.  */
+         tree step_align = TREE_OPERAND (TREE_OPERAND (DR_STEP (dr), 0), 1);
+         tree step = make_ssa_name (create_tmp_var (sizetype));
+         assign
+           = gimple_build_assign (step, fold_build2 (MULT_EXPR, sizetype,
+                                                     step_stride, step_align));
+         gsi_insert_before (gsi, assign, GSI_SAME_STMT);
+
+         /* 3. tmp = step * loop_len.  */
+         tmp = make_ssa_name (create_tmp_var (sizetype));
+         assign = gimple_build_assign (tmp, fold_build2 (MULT_EXPR, sizetype,
+                                                         step, loop_len));
+         gsi_insert_before (gsi, assign, GSI_SAME_STMT);
+       }
       tree bump = make_temp_ssa_name (sizetype, NULL, "ivtmp");
-      gassign *assign = gimple_build_assign (bump, tmp);
+      assign = gimple_build_assign (bump, tmp);
       gsi_insert_before (gsi, assign, GSI_SAME_STMT);
       *dataref_bump = bump;
     }
-- 
2.36.3

Reply via email to