When the costing refactoring happened we ended up with some strange
inter-mixing of VMAT unrelated code.  The following moves stuff
closer to where it's actually used, at the expense of duplicating
some lines.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

        * tree-vect-stmts.cc (vectorizable_load): Un-factor VMAT
        specific code to their handling blocks.
---
 gcc/tree-vect-stmts.cc | 62 ++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 32 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index c2d5f18721f..c617d80e4e7 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -10216,29 +10216,6 @@ vectorizable_load (vec_info *vinfo,
 
   tree bump;
   tree vec_offset = NULL_TREE;
-  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-    {
-      aggr_type = NULL_TREE;
-      bump = NULL_TREE;
-    }
-  else if (memory_access_type == VMAT_GATHER_SCATTER)
-    {
-      aggr_type = elem_type;
-      if (!costing_p)
-       vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo,
-                                        gsi, &gs_info,
-                                        &bump, &vec_offset, loop_lens);
-    }
-  else
-    {
-      if (memory_access_type == VMAT_LOAD_STORE_LANES)
-       aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
-      else
-       aggr_type = vectype;
-      if (!costing_p)
-       bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
-                                           memory_access_type, loop_lens);
-    }
 
   auto_vec<tree> vec_offsets;
   auto_vec<tree> vec_masks;
@@ -10253,6 +10230,11 @@ vectorizable_load (vec_info *vinfo,
       gcc_assert (alignment_support_scheme == dr_aligned
                  || alignment_support_scheme == dr_unaligned_supported);
 
+      aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
+      if (!costing_p)
+       bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
+                                           memory_access_type, loop_lens);
+
       unsigned int inside_cost = 0, prologue_cost = 0;
       /* For costing some adjacent vector loads, we'd like to cost with
         the total number of them once instead of cost each one by one. */
@@ -10414,20 +10396,31 @@ vectorizable_load (vec_info *vinfo,
     {
       gcc_assert (!grouped_load && !slp_perm);
 
-      unsigned int inside_cost = 0, prologue_cost = 0;
-
       /* 1. Create the vector or array pointer update chain.  */
-      if (!costing_p)
+      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
        {
-         if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+         aggr_type = NULL_TREE;
+         bump = NULL_TREE;
+         if (!costing_p)
            vect_get_gather_scatter_ops (loop, slp_node, &gs_info, &dataref_ptr,
                                         &vec_offsets);
-         else
-           dataref_ptr
-             = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
-                                         at_loop, offset, &dummy, gsi,
-                                         &ptr_incr, false, bump);
        }
+      else
+       {
+         aggr_type = elem_type;
+         if (!costing_p)
+           {
+             vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo,
+                                              gsi, &gs_info,
+                                              &bump, &vec_offset, loop_lens);
+             dataref_ptr
+                 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+                                             at_loop, offset, &dummy, gsi,
+                                             &ptr_incr, false, bump);
+           }
+       }
+
+      unsigned int inside_cost = 0, prologue_cost = 0;
 
       gimple *new_stmt = NULL;
       for (i = 0; i < vec_num; i++)
@@ -10749,6 +10742,11 @@ vectorizable_load (vec_info *vinfo,
       return true;
     }
 
+  aggr_type = vectype;
+  if (!costing_p)
+    bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
+                                       memory_access_type, loop_lens);
+
   poly_uint64 group_elt = 0;
   unsigned int inside_cost = 0, prologue_cost = 0;
   /* For costing some adjacent vector loads, we'd like to cost with
-- 
2.43.0

Reply via email to