[PATCH V13] VECT: Add decrement IV iteration loop control by variable amount support

2023-05-24 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch is supporting decrement IV by following the flow designed by Richard:

(1) In vect_set_loop_condition_partial_vectors, for the first iteration of:
call vect_set_loop_controls_directly.

(2) vect_set_loop_controls_directly calculates "step" as in your patch.
If rgc has 1 control, this step is the SSA name created for that control.
Otherwise the step is a fresh SSA name, as in your patch.

(3) vect_set_loop_controls_directly stores this step somewhere for later
use, probably in LOOP_VINFO.  Let's use "S" to refer to this stored step.

(4) After the vect_set_loop_controls_directly call above, and outside
the "if" statement that now contains vect_set_loop_controls_directly,
check whether rgc->controls.length () > 1.  If so, use
vect_adjust_loop_lens_control to set the controls based on S.

Then the only caller of vect_adjust_loop_lens_control is
vect_set_loop_condition_partial_vectors.  And the starting
step for vect_adjust_loop_lens_control is always S.

This patch has well tested for single-rgroup and multiple-rgroup (SLP) and
passed all testcase in RISC-V port.

Also, pass tests for multiple-rgroup (non-SLP) tested on vec_pack_trunk.


gcc/ChangeLog:

* tree-vect-loop-manip.cc (vect_set_loop_controls_directly): Add 
decrement IV support.
(vect_adjust_loop_lens_control): Ditto.
(vect_set_loop_condition_partial_vectors): Ditto.
* tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): New variable.
* tree-vectorizer.h (LOOP_VINFO_USING_DECREMENTING_IV_P): New macro.
(LOOP_VINFO_DECREMENTING_IV_STEP): New macro.

---
 gcc/tree-vect-loop-manip.cc | 179 +---
 gcc/tree-vect-loop.cc   |  13 +++
 gcc/tree-vectorizer.h   |  12 +++
 3 files changed, 193 insertions(+), 11 deletions(-)

diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index ff6159e08d5..3a872668f89 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -468,6 +468,38 @@ vect_set_loop_controls_directly (class loop *loop, 
loop_vec_info loop_vinfo,
   gimple_stmt_iterator incr_gsi;
   bool insert_after;
   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
+  if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
+{
+  /* single rgroup:
+...
+_10 = (unsigned long) count_12(D);
+...
+# ivtmp_9 = PHI 
+_36 = MIN_EXPR ;
+...
+vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
+...
+ivtmp_35 = ivtmp_9 - _36;
+...
+if (ivtmp_35 != 0)
+  goto ; [83.33%]
+else
+  goto ; [16.67%]
+  */
+  nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
+  tree step = rgc->controls.length () == 1 ? rgc->controls[0]
+  : make_ssa_name (iv_type);
+  /* Create decrement IV.  */
+  create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
+insert_after, &index_before_incr, &index_after_incr);
+  gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
+   index_before_incr,
+   nitems_step));
+  LOOP_VINFO_DECREMENTING_IV_STEP (loop_vinfo) = step;
+  return index_after_incr;
+}
+
+  /* Create increment IV.  */
   create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
 loop, &incr_gsi, insert_after, &index_before_incr,
 &index_after_incr);
@@ -683,6 +715,63 @@ vect_set_loop_controls_directly (class loop *loop, 
loop_vec_info loop_vinfo,
   return next_ctrl;
 }
 
+/* Try to use adjust loop lens for multiple-rgroups.
+
+ _36 = MIN_EXPR ;
+
+ First length (MIN (X, VF/N)):
+   loop_len_15 = MIN_EXPR <_36, VF/N>;
+
+ Second length:
+   tmp = _36 - loop_len_15;
+   loop_len_16 = MIN (tmp, VF/N);
+
+ Third length:
+   tmp2 = tmp - loop_len_16;
+   loop_len_17 = MIN (tmp2, VF/N);
+
+ Last length:
+   loop_len_18 = tmp2 - loop_len_17;
+*/
+
+static void
+vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
+  rgroup_controls *dest_rgm, tree step)
+{
+  tree ctrl_type = dest_rgm->type;
+  poly_uint64 nitems_per_ctrl
+= TYPE_VECTOR_SUBPARTS (ctrl_type) * dest_rgm->factor;
+  tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
+
+  for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
+{
+  tree ctrl = dest_rgm->controls[i];
+  if (i == 0)
+   {
+ /* First iteration: MIN (X, VF/N) capped to the range [0, VF/N].  */
+ gassign *assign
+   = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
+ gimple_seq_add_stmt (seq, assign);
+   }
+  else if (i == dest_rgm->controls.length () - 1)
+   {
+ /* Last iteration: Remain capped to the range [0, VF/

Re: [PATCH V13] VECT: Add decrement IV iteration loop control by variable amount support

2023-05-24 Thread 钟居哲
Forget about V13. Plz go directly review V14.
https://gcc.gnu.org/pipermail/gcc-patches/2023-May/619478.html 

Thanks.



juzhe.zh...@rivai.ai
 
From: juzhe.zhong
Date: 2023-05-24 22:29
To: gcc-patches
CC: richard.sandiford; rguenther; Ju-Zhe Zhong
Subject: [PATCH V13] VECT: Add decrement IV iteration loop control by variable 
amount support
From: Ju-Zhe Zhong 
 
This patch is supporting decrement IV by following the flow designed by Richard:
 
(1) In vect_set_loop_condition_partial_vectors, for the first iteration of:
call vect_set_loop_controls_directly.
 
(2) vect_set_loop_controls_directly calculates "step" as in your patch.
If rgc has 1 control, this step is the SSA name created for that control.
Otherwise the step is a fresh SSA name, as in your patch.
 
(3) vect_set_loop_controls_directly stores this step somewhere for later
use, probably in LOOP_VINFO.  Let's use "S" to refer to this stored step.
 
(4) After the vect_set_loop_controls_directly call above, and outside
the "if" statement that now contains vect_set_loop_controls_directly,
check whether rgc->controls.length () > 1.  If so, use
vect_adjust_loop_lens_control to set the controls based on S.
 
Then the only caller of vect_adjust_loop_lens_control is
vect_set_loop_condition_partial_vectors.  And the starting
step for vect_adjust_loop_lens_control is always S.
 
This patch has well tested for single-rgroup and multiple-rgroup (SLP) and
passed all testcase in RISC-V port.
 
Also, pass tests for multiple-rgroup (non-SLP) tested on vec_pack_trunk.
 
 
gcc/ChangeLog:
 
* tree-vect-loop-manip.cc (vect_set_loop_controls_directly): Add 
decrement IV support.
(vect_adjust_loop_lens_control): Ditto.
(vect_set_loop_condition_partial_vectors): Ditto.
* tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): New variable.
* tree-vectorizer.h (LOOP_VINFO_USING_DECREMENTING_IV_P): New macro.
(LOOP_VINFO_DECREMENTING_IV_STEP): New macro.
 
---
gcc/tree-vect-loop-manip.cc | 179 +---
gcc/tree-vect-loop.cc   |  13 +++
gcc/tree-vectorizer.h   |  12 +++
3 files changed, 193 insertions(+), 11 deletions(-)
 
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index ff6159e08d5..3a872668f89 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -468,6 +468,38 @@ vect_set_loop_controls_directly (class loop *loop, 
loop_vec_info loop_vinfo,
   gimple_stmt_iterator incr_gsi;
   bool insert_after;
   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
+  if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
+{
+  /* single rgroup:
+ ...
+ _10 = (unsigned long) count_12(D);
+ ...
+ # ivtmp_9 = PHI 
+ _36 = MIN_EXPR ;
+ ...
+ vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
+ ...
+ ivtmp_35 = ivtmp_9 - _36;
+ ...
+ if (ivtmp_35 != 0)
+goto ; [83.33%]
+ else
+goto ; [16.67%]
+  */
+  nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
+  tree step = rgc->controls.length () == 1 ? rgc->controls[0]
+: make_ssa_name (iv_type);
+  /* Create decrement IV.  */
+  create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
+ insert_after, &index_before_incr, &index_after_incr);
+  gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
+ index_before_incr,
+ nitems_step));
+  LOOP_VINFO_DECREMENTING_IV_STEP (loop_vinfo) = step;
+  return index_after_incr;
+}
+
+  /* Create increment IV.  */
   create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
 loop, &incr_gsi, insert_after, &index_before_incr,
 &index_after_incr);
@@ -683,6 +715,63 @@ vect_set_loop_controls_directly (class loop *loop, 
loop_vec_info loop_vinfo,
   return next_ctrl;
}
+/* Try to use adjust loop lens for multiple-rgroups.
+
+ _36 = MIN_EXPR ;
+
+ First length (MIN (X, VF/N)):
+   loop_len_15 = MIN_EXPR <_36, VF/N>;
+
+ Second length:
+   tmp = _36 - loop_len_15;
+   loop_len_16 = MIN (tmp, VF/N);
+
+ Third length:
+   tmp2 = tmp - loop_len_16;
+   loop_len_17 = MIN (tmp2, VF/N);
+
+ Last length:
+   loop_len_18 = tmp2 - loop_len_17;
+*/
+
+static void
+vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
+rgroup_controls *dest_rgm, tree step)
+{
+  tree ctrl_type = dest_rgm->type;
+  poly_uint64 nitems_per_ctrl
+= TYPE_VECTOR_SUBPARTS (ctrl_type) * dest_rgm->factor;
+  tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
+
+  for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
+{
+  tree ctrl = dest_rgm->controls[i];
+  if (i == 0)
+ {
+   /* First iteration: MIN (X, VF/N) capped to the range [0, VF/N].  */
+   gassign *assign
+ = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
+   gimple_seq_add_stmt (seq, assign);
+ }
+  e