When the costing refactoring happened we ended up with some strange inter-mixing of VMAT unrelated code. The following moves stuff closer to where it's actually used, at the expense of duplicating some lines.
Bootstrap and regtest running on x86_64-unknown-linux-gnu. * tree-vect-stmts.cc (vectorizable_load): Un-factor VMAT specific code to their handling blocks. --- gcc/tree-vect-stmts.cc | 62 ++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index c2d5f18721f..c617d80e4e7 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -10216,29 +10216,6 @@ vectorizable_load (vec_info *vinfo, tree bump; tree vec_offset = NULL_TREE; - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) - { - aggr_type = NULL_TREE; - bump = NULL_TREE; - } - else if (memory_access_type == VMAT_GATHER_SCATTER) - { - aggr_type = elem_type; - if (!costing_p) - vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, - gsi, &gs_info, - &bump, &vec_offset, loop_lens); - } - else - { - if (memory_access_type == VMAT_LOAD_STORE_LANES) - aggr_type = build_array_type_nelts (elem_type, group_size * nunits); - else - aggr_type = vectype; - if (!costing_p) - bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, - memory_access_type, loop_lens); - } auto_vec<tree> vec_offsets; auto_vec<tree> vec_masks; @@ -10253,6 +10230,11 @@ vectorizable_load (vec_info *vinfo, gcc_assert (alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); + aggr_type = build_array_type_nelts (elem_type, group_size * nunits); + if (!costing_p) + bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, + memory_access_type, loop_lens); + unsigned int inside_cost = 0, prologue_cost = 0; /* For costing some adjacent vector loads, we'd like to cost with the total number of them once instead of cost each one by one. */ @@ -10414,20 +10396,31 @@ vectorizable_load (vec_info *vinfo, { gcc_assert (!grouped_load && !slp_perm); - unsigned int inside_cost = 0, prologue_cost = 0; - /* 1. Create the vector or array pointer update chain. */ - if (!costing_p) + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) { - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + aggr_type = NULL_TREE; + bump = NULL_TREE; + if (!costing_p) vect_get_gather_scatter_ops (loop, slp_node, &gs_info, &dataref_ptr, &vec_offsets); - else - dataref_ptr - = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, - at_loop, offset, &dummy, gsi, - &ptr_incr, false, bump); } + else + { + aggr_type = elem_type; + if (!costing_p) + { + vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, + gsi, &gs_info, + &bump, &vec_offset, loop_lens); + dataref_ptr + = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, + at_loop, offset, &dummy, gsi, + &ptr_incr, false, bump); + } + } + + unsigned int inside_cost = 0, prologue_cost = 0; gimple *new_stmt = NULL; for (i = 0; i < vec_num; i++) @@ -10749,6 +10742,11 @@ vectorizable_load (vec_info *vinfo, return true; } + aggr_type = vectype; + if (!costing_p) + bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, + memory_access_type, loop_lens); + poly_uint64 group_elt = 0; unsigned int inside_cost = 0, prologue_cost = 0; /* For costing some adjacent vector loads, we'd like to cost with -- 2.43.0