The following adds SLP support for vectorizing single-lane inductions
with variable length vectors.

This is a WIP patch, local testing for SVE and riscv is fine but the
CI might discover issues.

        PR tree-optimization/116566
        * tree-vect-loop.cc (vectorizable_induction): Handle single-lane
        SLP for VLA vectors.
---
 gcc/tree-vect-loop.cc | 192 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 156 insertions(+), 36 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 0ce1bf8ebba..206c44226bd 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10282,7 +10282,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
   gimple *new_stmt;
   gphi *induction_phi;
   tree induc_def, vec_dest;
-  tree init_expr, step_expr;
   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   unsigned i;
   tree expr;
@@ -10368,7 +10367,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
     iv_loop = loop;
   gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
 
-  if (slp_node && !nunits.is_constant ())
+  if (slp_node && (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != 1))
     {
       /* The current SLP code creates the step value element-by-element.  */
       if (dump_enabled_p ())
@@ -10386,7 +10385,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
       return false;
     }
 
-  step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
+  tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
   gcc_assert (step_expr != NULL_TREE);
   if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
       && !type_has_mode_precision_p (TREE_TYPE (step_expr)))
@@ -10474,9 +10473,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
        [i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2].  */
   if (slp_node)
     {
-      /* Enforced above.  */
-      unsigned int const_nunits = nunits.to_constant ();
-
       /* The initial values are vectorized, but any lanes > group_size
         need adjustment.  */
       slp_tree init_node
@@ -10498,11 +10494,12 @@ vectorizable_induction (loop_vec_info loop_vinfo,
 
       /* Now generate the IVs.  */
       unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-      gcc_assert ((const_nunits * nvects) % group_size == 0);
+      gcc_assert (multiple_p (nunits * nvects, group_size));
       unsigned nivs;
+      unsigned HOST_WIDE_INT const_nunits;
       if (nested_in_vect_loop)
        nivs = nvects;
-      else
+      else if (nunits.is_constant (&const_nunits))
        {
          /* Compute the number of distinct IVs we need.  First reduce
             group_size if it is a multiple of const_nunits so we get
@@ -10513,21 +10510,42 @@ vectorizable_induction (loop_vec_info loop_vinfo,
          nivs = least_common_multiple (group_sizep,
                                        const_nunits) / const_nunits;
        }
+      else
+       {
+         gcc_assert (SLP_TREE_LANES (slp_node) == 1);
+         nivs = 1;
+       }
+      gimple_seq init_stmts = NULL;
       tree stept = TREE_TYPE (step_vectype);
       tree lupdate_mul = NULL_TREE;
       if (!nested_in_vect_loop)
        {
-         /* The number of iterations covered in one vector iteration.  */
-         unsigned lup_mul = (nvects * const_nunits) / group_size;
-         lupdate_mul
-           = build_vector_from_val (step_vectype,
-                                    SCALAR_FLOAT_TYPE_P (stept)
-                                    ? build_real_from_wide (stept, lup_mul,
-                                                            UNSIGNED)
-                                    : build_int_cstu (stept, lup_mul));
+         if (nunits.is_constant ())
+           {
+             /* The number of iterations covered in one vector iteration.  */
+             unsigned lup_mul = (nvects * const_nunits) / group_size;
+             lupdate_mul
+               = build_vector_from_val (step_vectype,
+                                        SCALAR_FLOAT_TYPE_P (stept)
+                                        ? build_real_from_wide (stept, lup_mul,
+                                                                UNSIGNED)
+                                        : build_int_cstu (stept, lup_mul));
+           }
+         else
+           {
+             if (SCALAR_FLOAT_TYPE_P (stept))
+               {
+                 tree tem = build_int_cst (integer_type_node, vf);
+                 lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, 
tem);
+               }
+             else
+               lupdate_mul = build_int_cst (stept, vf);
+             lupdate_mul = gimple_build_vector_from_val (&init_stmts,
+                                                         step_vectype,
+                                                         lupdate_mul);
+           }
        }
       tree peel_mul = NULL_TREE;
-      gimple_seq init_stmts = NULL;
       if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
        {
          if (SCALAR_FLOAT_TYPE_P (stept))
@@ -10539,10 +10557,15 @@ vectorizable_induction (loop_vec_info loop_vinfo,
          peel_mul = gimple_build_vector_from_val (&init_stmts,
                                                   step_vectype, peel_mul);
        }
+      tree step_mul = NULL_TREE;
       unsigned ivn;
       auto_vec<tree> vec_steps;
       for (ivn = 0; ivn < nivs; ++ivn)
        {
+         gimple_seq stmts = NULL;
+         bool invariant = true;
+         if (nunits.is_constant ())
+           {
          tree_vector_builder step_elts (step_vectype, const_nunits, 1);
          tree_vector_builder init_elts (vectype, const_nunits, 1);
          tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
@@ -10570,13 +10593,65 @@ vectorizable_induction (loop_vec_info loop_vinfo,
                                   : build_int_cstu (stept, mul_elt));
            }
          vec_step = gimple_build_vector (&init_stmts, &step_elts);
-         vec_steps.safe_push (vec_step);
-         tree step_mul = gimple_build_vector (&init_stmts, &mul_elts);
-         if (peel_mul)
-           step_mul = gimple_build (&init_stmts, MINUS_EXPR, step_vectype,
-                                    step_mul, peel_mul);
+         step_mul = gimple_build_vector (&init_stmts, &mul_elts);
          if (!init_node)
            vec_init = gimple_build_vector (&init_stmts, &init_elts);
+           }
+         else
+           {
+             if (init_node)
+               ;
+             else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0])))
+               {
+                 new_name = gimple_convert (&init_stmts, stept, inits[0]);
+                 /* Build the initial value directly from a VEC_SERIES_EXPR.  
*/
+                 vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR,
+                                          step_vectype, new_name, steps[0]);
+                 if (!useless_type_conversion_p (vectype, step_vectype))
+                   vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+                                            vectype, vec_init);
+               }
+             else
+               {
+                 /* Build:
+                    [base, base, base, ...]
+                    + (vectype) [0, 1, 2, ...] * [step, step, step, ...].  */
+                 gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0])));
+                 gcc_assert (flag_associative_math);
+                 tree index = build_index_vector (step_vectype, 0, 1);
+                 new_name = gimple_convert (&init_stmts, TREE_TYPE (steps[0]),
+                                            inits[0]);
+                 tree base_vec = gimple_build_vector_from_val (&init_stmts, 
step_vectype,
+                                                               new_name);
+                 tree step_vec = gimple_build_vector_from_val (&init_stmts, 
step_vectype,
+                                                               steps[0]);
+                 vec_init = gimple_build (&init_stmts, FLOAT_EXPR, 
step_vectype, index);
+                 vec_init = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+                                          vec_init, step_vec);
+                 vec_init = gimple_build (&init_stmts, PLUS_EXPR, step_vectype,
+                                          vec_init, base_vec);
+                 if (!useless_type_conversion_p (vectype, step_vectype))
+                   vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+                                            vectype, vec_init);
+               }
+             /* iv_loop is nested in the loop to be vectorized. Generate:
+                vec_step = [S, S, S, S]  */
+             t = unshare_expr (steps[0]);
+             gcc_assert (CONSTANT_CLASS_P (t)
+                         || TREE_CODE (t) == SSA_NAME);
+             vec_step = gimple_build_vector_from_val (&init_stmts,
+                                                      step_vectype, t);
+           }
+         vec_steps.safe_push (vec_step);
+         if (peel_mul)
+           {
+             if (!step_mul)
+               step_mul = peel_mul;
+             else
+               step_mul = gimple_build (&init_stmts,
+                                        MINUS_EXPR, step_vectype,
+                                        step_mul, peel_mul);
+           }
 
          /* Create the induction-phi that defines the induction-operand.  */
          vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
@@ -10587,9 +10662,35 @@ vectorizable_induction (loop_vec_info loop_vinfo,
          /* Create the iv update inside the loop  */
          tree up = vec_step;
          if (lupdate_mul)
-           up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
-                              vec_step, lupdate_mul);
-         gimple_seq stmts = NULL;
+           {
+             if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+               {
+                 /* When we're using loop_len produced by SELEC_VL, the 
non-final
+                    iterations are not always processing VF elements.  So 
vectorize
+                    induction variable instead of
+
+                    _21 = vect_vec_iv_.6_22 + { VF, ... };
+
+                    We should generate:
+
+                    _35 = .SELECT_VL (ivtmp_33, VF);
+                    vect_cst__22 = [vec_duplicate_expr] _35;
+                    _21 = vect_vec_iv_.6_22 + vect_cst__22;  */
+                 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+                 tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
+                                               vectype, 0, 0);
+                 expr = gimple_convert (&stmts, stept, unshare_expr (len));
+                 lupdate_mul = gimple_build_vector_from_val (&stmts,
+                                                             step_vectype,
+                                                             expr);
+                 up = gimple_build (&stmts, MULT_EXPR,
+                                    step_vectype, vec_step, lupdate_mul);
+               }
+             else
+               up = gimple_build (&init_stmts,
+                                  MULT_EXPR, step_vectype,
+                                  vec_step, lupdate_mul);
+           }
          vec_def = gimple_convert (&stmts, step_vectype, induc_def);
          vec_def = gimple_build (&stmts,
                                  PLUS_EXPR, step_vectype, vec_def, up);
@@ -10601,8 +10702,10 @@ vectorizable_induction (loop_vec_info loop_vinfo,
          if (init_node)
            vec_init = vect_get_slp_vect_def (init_node, ivn);
          if (!nested_in_vect_loop
+             && step_mul
              && !integer_zerop (step_mul))
            {
+             gcc_assert (invariant);
              vec_def = gimple_convert (&init_stmts, step_vectype, vec_init);
              up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
                                 vec_step, step_mul);
@@ -10619,8 +10722,11 @@ vectorizable_induction (loop_vec_info loop_vinfo,
       if (!nested_in_vect_loop)
        {
          /* Fill up to the number of vectors we need for the whole group.  */
-         nivs = least_common_multiple (group_size,
-                                       const_nunits) / const_nunits;
+         if (nunits.is_constant ())
+           nivs = least_common_multiple (group_size,
+                                         const_nunits) / const_nunits;
+         else
+           nivs = 1;
          vec_steps.reserve (nivs-ivn);
          for (; ivn < nivs; ++ivn)
            {
@@ -10633,14 +10739,28 @@ vectorizable_induction (loop_vec_info loop_vinfo,
         stmts by adding VF' * stride to the IVs generated above.  */
       if (ivn < nvects)
        {
-         unsigned vfp
-           = least_common_multiple (group_size, const_nunits) / group_size;
-         tree lupdate_mul
-           = build_vector_from_val (step_vectype,
-                                    SCALAR_FLOAT_TYPE_P (stept)
-                                    ? build_real_from_wide (stept,
-                                                            vfp, UNSIGNED)
-                                    : build_int_cstu (stept, vfp));
+         if (nunits.is_constant ())
+           {
+             unsigned vfp = least_common_multiple (group_size, const_nunits) / 
group_size;
+             lupdate_mul
+                 = build_vector_from_val (step_vectype,
+                                          SCALAR_FLOAT_TYPE_P (stept)
+                                          ? build_real_from_wide (stept,
+                                                                  vfp, 
UNSIGNED)
+                                          : build_int_cstu (stept, vfp));
+           }
+         else
+           {
+             if (SCALAR_FLOAT_TYPE_P (stept))
+               {
+                 tree tem = build_int_cst (integer_type_node, nunits);
+                 lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, 
tem);
+               }
+             else
+               lupdate_mul = build_int_cst (stept, nunits);
+             lupdate_mul = gimple_build_vector_from_val (&init_stmts,
+                                                         step_vectype, 
lupdate_mul);
+           }
          for (; ivn < nvects; ++ivn)
            {
              gimple *iv
@@ -10672,7 +10792,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
       return true;
     }
 
-  init_expr = vect_phi_initial_value (phi);
+  tree init_expr = vect_phi_initial_value (phi);
 
   gimple_seq stmts = NULL;
   if (!nested_in_vect_loop)
-- 
2.43.0

Reply via email to