https://gcc.gnu.org/g:2291c5c4471040acd3da83b5e2cbe56d7b4720e9

commit r16-7435-g2291c5c4471040acd3da83b5e2cbe56d7b4720e9
Author: Juergen Christ <[email protected]>
Date:   Thu Feb 5 11:42:45 2026 +0100

    tree-optimization/122297 - fix load/store bias handling
    
    When load/store with length is used and only QImode versions are
    available, vectorizable_live_operation produces wrong results for
    VEC_EXTRACT.  Provide a flag to vect_get_loop_len to specify if
    bias-adjusted length should be used or not.
    
    gcc/ChangeLog:
    
            PR tree-optimization/122297
            * tree-vect-loop.cc (vectorize_fold_left_reduction): Adjust.
            (vectorizable_induction): Adjust.
            (vectorizable_live_operation_1): Adjust.
            (vect_get_loop_len): Provide parameter to select bias-adjusted
            length.
            (vect_gen_loop_len_mask): Adjust.
            (vect_update_ivs_after_vectorizer_for_early_breaks): Adjust.
            * tree-vect-stmts.cc (vect_get_strided_load_store_ops): Adjust.
            (vectorizable_call): Adjust.
            (vectorizable_operation): Adjust.
            (vectorizable_store): Adjust.
            (vectorizable_load): Adjust.
            (vectorizable_condition): Adjust.
            * tree-vectorizer.h (vect_get_loop_len): Add parameter.
    
    gcc/testsuite/ChangeLog:
    
            PR tree-optimization/122297
            * gcc.dg/vect/nodump-extractlast-1.c: Fix typo.
            * gcc.dg/vect/nodump-extractlast-2.c: New test.
    
    Signed-off-by: Juergen Christ <[email protected]>

Diff:
---
 gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c |  2 +-
 gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c | 23 +++++++++
 gcc/tree-vect-loop.cc                            | 59 ++++++++++++++++--------
 gcc/tree-vect-stmts.cc                           | 24 +++++-----
 gcc/tree-vectorizer.h                            |  2 +-
 5 files changed, 79 insertions(+), 31 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c 
b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
index 980ac3e42188..83d8a38f13e3 100644
--- a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
+++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
@@ -1,4 +1,4 @@
-/* Check for a bung in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
+/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
    using VEC_EXTRACT.  */
 /* { dg-require-effective-target vect_int } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c 
b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
new file mode 100644
index 000000000000..9697687c1084
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
@@ -0,0 +1,23 @@
+/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
+   using VEC_EXTRACT.  This variant uses .LEN_LOAD which might use QImode
+   vectors during load, but SImode vectors for the extraction.  */
+int __attribute__ ((noinline, noclone))
+test_int (int *x, int n, int value)
+{
+  int last;
+  for (int j = 0; j < n; ++j)
+    {
+      last = x[j];
+      x[j] = last * value;
+    }
+  return last;
+}
+
+int
+main ()
+{
+  int arr[] = {1,2,3,4,5,1};
+  if (test_int (arr, sizeof (arr) / sizeof (arr[0]), 42) != 1)
+    __builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 0947962fcf2b..a7daeb72a5c7 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6544,7 +6544,7 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
       if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
        {
          len = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num, vectype_in,
-                                  i, 1);
+                                  i, 1, false);
          signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
          bias = build_int_cst (intQI_type_node, biasval);
          if (!is_cond_op)
@@ -9938,7 +9938,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
                   _21 = vect_vec_iv_.6_22 + vect_cst__22;  */
              vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
              tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
-                                           vectype, 0, 0);
+                                           vectype, 0, 0, false);
              if (SCALAR_FLOAT_TYPE_P (stept))
                expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
              else
@@ -10085,7 +10085,7 @@ vectorizable_live_operation_1 (loop_vec_info 
loop_vinfo, basic_block exit_bb,
     {
       /* Emit:
 
-        SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>
+        SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>
 
         where VEC_LHS is the vectorized live-out result, LEN is the length of
         the vector, BIAS is the load-store bias.  The bias should not be used
@@ -10096,21 +10096,14 @@ vectorizable_live_operation_1 (loop_vec_info 
loop_vinfo, basic_block exit_bb,
       gimple_stmt_iterator gsi = gsi_last (tem);
       tree len = vect_get_loop_len (loop_vinfo, &gsi,
                                    &LOOP_VINFO_LENS (loop_vinfo),
-                                   1, vectype, 0, 1);
+                                   1, vectype, 0, 1, false);
       gimple_seq_add_seq (&stmts, tem);
 
-      /* BIAS + 1.  */
-      signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
-      tree bias_plus_one
-       = int_const_binop (PLUS_EXPR,
-                          build_int_cst (TREE_TYPE (len), biasval),
-                          build_one_cst (TREE_TYPE (len)));
-
-      /* LAST_INDEX = LEN - (BIAS + 1).  */
+      /* LAST_INDEX = LEN - 1.  */
       tree last_index = gimple_build (&stmts, MINUS_EXPR, TREE_TYPE (len),
-                                    len, bias_plus_one);
+                                    len, build_one_cst (TREE_TYPE (len)));
 
-      /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>.  */
+      /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>.  */
       tree scalar_res
        = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
                        vec_lhs_phi, last_index);
@@ -10731,7 +10724,7 @@ vect_record_loop_len (loop_vec_info loop_vinfo, 
vec_loop_lens *lens,
 tree
 vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
                   vec_loop_lens *lens, unsigned int nvectors, tree vectype,
-                  unsigned int index, unsigned int factor)
+                  unsigned int index, unsigned int factor, bool adjusted)
 {
   rgroup_controls *rgl = &(*lens)[nvectors - 1];
   bool use_bias_adjusted_len =
@@ -10764,7 +10757,7 @@ vect_get_loop_len (loop_vec_info loop_vinfo, 
gimple_stmt_iterator *gsi,
        }
     }
 
-  if (use_bias_adjusted_len)
+  if (use_bias_adjusted_len && adjusted)
     return rgl->bias_adjusted_ctrl;
 
   tree loop_len = rgl->controls[index];
@@ -10787,6 +10780,36 @@ vect_get_loop_len (loop_vec_info loop_vinfo, 
gimple_stmt_iterator *gsi,
            gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
        }
     }
+  else if (factor && rgl->factor != factor)
+    {
+      /* The number of scalars per iteration, scalar occupied bytes and
+        the number of vectors are both compile-time constants.  */
+      unsigned int nscalars_per_iter
+       = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
+                    LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
+      unsigned int rglvecsize = rgl->factor * rgl->max_nscalars_per_iter;
+      unsigned int vecsize = nscalars_per_iter * factor;
+      if (rglvecsize > vecsize)
+       {
+         unsigned int fac = rglvecsize / vecsize;
+         tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+         gimple_seq seq = NULL;
+         loop_len = gimple_build (&seq, EXACT_DIV_EXPR, iv_type, loop_len,
+                                  build_int_cst (iv_type, fac));
+         if (seq)
+           gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+       }
+      else if (rglvecsize < vecsize)
+       {
+         unsigned int fac = vecsize / rglvecsize;
+         tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+         gimple_seq seq = NULL;
+         loop_len = gimple_build (&seq, MULT_EXPR, iv_type, loop_len,
+                                  build_int_cst (iv_type, fac));
+         if (seq)
+           gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+       }
+    }
   return loop_len;
 }
 
@@ -10804,7 +10827,7 @@ vect_gen_loop_len_mask (loop_vec_info loop_vinfo, 
gimple_stmt_iterator *gsi,
   tree all_one_mask = build_all_ones_cst (vectype);
   tree all_zero_mask = build_zero_cst (vectype);
   tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, 
index,
-                               factor);
+                               factor, true);
   tree bias = build_int_cst (intQI_type_node,
                             LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo));
   tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask");
@@ -11075,7 +11098,7 @@ vect_update_ivs_after_vectorizer_for_early_breaks 
(loop_vec_info loop_vinfo)
     {
       vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
       tree_vf = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
-                                  NULL_TREE, 0, 0);
+                                  NULL_TREE, 0, 0, true);
     }
 
   tree iter_var;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 77f6d7a639ae..22285250aa8d 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3181,7 +3181,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, 
slp_tree node,
         .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
         vectp_a.9_26 = vectp_a.9_7 + ivtmp_8;  */
       tree loop_len
-       = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
+       = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0, 
true);
       tree tmp
        = fold_build2 (MULT_EXPR, sizetype,
                       fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
@@ -3252,7 +3252,7 @@ vect_get_loop_variant_data_ptr_increment (
      addr = addr + .SELECT_VL (ARG..) * step;
   */
   tree loop_len
-    = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
+    = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0, true);
   tree len_type = TREE_TYPE (loop_len);
   /* Since the outcome of .SELECT_VL is element size, we should adjust
      it into bytesize so that it can be used in address pointer variable
@@ -3888,7 +3888,7 @@ vectorizable_call (vec_info *vinfo,
                    {
                      unsigned int vec_num = vec_oprnds0.length ();
                      tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
-                                                   vec_num, vectype_out, i, 1);
+                                                   vec_num, vectype_out, i, 1, 
true);
                      signed char biasval
                        = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
                      tree bias = build_int_cst (intQI_type_node, biasval);
@@ -7098,7 +7098,7 @@ vectorizable_operation (vec_info *vinfo,
          if (len_loop_p)
            {
              tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
-                                           vec_num, vectype, i, 1);
+                                           vec_num, vectype, i, 1, true);
              signed char biasval
                = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
              tree bias = build_int_cst (intQI_type_node, biasval);
@@ -8814,7 +8814,7 @@ vectorizable_store (vec_info *vinfo,
            {
              if (loop_lens)
                final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              ncopies, vectype, j, 1);
+                                              ncopies, vectype, j, 1, true);
              else
                final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
              signed char biasval
@@ -9012,7 +9012,7 @@ vectorizable_store (vec_info *vinfo,
                  if (loop_lens)
                    final_len = vect_get_loop_len (loop_vinfo, gsi,
                                                   loop_lens, num_stmts,
-                                                  vectype, j, 1);
+                                                  vectype, j, 1, true);
                  else
                    final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
 
@@ -9396,7 +9396,7 @@ vectorizable_store (vec_info *vinfo,
          unsigned factor
            = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
          final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                        vec_num, vectype, i, factor);
+                                        vec_num, vectype, i, factor, true);
        }
       else if (final_mask)
        {
@@ -10759,7 +10759,7 @@ vectorizable_load (vec_info *vinfo,
            {
              if (loop_lens)
                final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              ncopies, vectype, j, 1);
+                                              ncopies, vectype, j, 1, true);
              else
                final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
              signed char biasval
@@ -10967,7 +10967,7 @@ vectorizable_load (vec_info *vinfo,
                {
                  if (loop_lens)
                    final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                                  vec_num, vectype, i, 1);
+                                                  vec_num, vectype, i, 1, 
true);
                  else
                    final_len = build_int_cst (sizetype,
                                               TYPE_VECTOR_SUBPARTS (vectype));
@@ -11419,7 +11419,7 @@ vectorizable_load (vec_info *vinfo,
                unsigned factor
                  = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
                final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              vec_num, vectype, i, factor);
+                                              vec_num, vectype, i, factor, 
true);
              }
            else if (final_mask)
              {
@@ -12498,8 +12498,10 @@ vectorizable_condition (vec_info *vinfo,
            {
              if (lens)
                {
+                 /* ??? Do we really want the adjusted LEN here?  Isn't this
+                    based on number of elements?  */
                  len = vect_get_loop_len (loop_vinfo, gsi, lens,
-                                          vec_num, vectype, i, 1);
+                                          vec_num, vectype, i, 1, true);
                  signed char biasval
                    = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
                  bias = build_int_cst (intQI_type_node, biasval);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 4849830204d7..5c700535ed26 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2678,7 +2678,7 @@ extern void vect_record_loop_len (loop_vec_info, 
vec_loop_lens *, unsigned int,
                                  tree, unsigned int);
 extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
                               vec_loop_lens *, unsigned int, tree,
-                              unsigned int, unsigned int);
+                              unsigned int, unsigned int, bool);
 extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
                                    gimple_stmt_iterator *, vec_loop_lens *,
                                    unsigned int, tree, tree, unsigned int,

Reply via email to