> -----Original Message----- > From: Richard Biener <rguent...@suse.de> > Sent: Tuesday, August 26, 2025 8:59 AM > To: gcc-patches@gcc.gnu.org > Cc: Tamar Christina <tamar.christ...@arm.com> > Subject: [PATCH][aarch64] Compute vect_reduc_type off SLP node instead of > stmt-info > > The following changes the vect_reduc_type API to work on the SLP node. > The API is only used from the aarch64 backend, so all changes are there. > In particular I noticed aarch64_force_single_cycle is invoked even > for scalar costing (where the flag tested isn't computed yet), I > figured in scalar costing all reductions are a single cycle.
Yeah, this was a fix for PR110625 which changed scalar reductions to correctly be 1. > > Bootstrap and regtest running on x86_64-unknown-linux-gnu. > > OK for the aarch64 parts? OK. Thanks, Tamar > > Thanks, > Richard. > > * tree-vectorizer.h (vect_reduc_type): Get SLP node as argument. > * config/aarch64/aarch64.cc (aarch64_sve_in_loop_reduction_latency): > Take SLO node as argument and adjust. > (aarch64_in_loop_reduction_latency): Likewise. > (aarch64_detect_vector_stmt_subtype): Adjust. > (aarch64_vector_costs::count_ops): Likewise. Treat reductions > during scalar costing as single-cycle. > --- > gcc/config/aarch64/aarch64.cc | 21 ++++++++++++++------- > gcc/tree-vectorizer.h | 16 ++++++++++------ > 2 files changed, 24 insertions(+), 13 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index fb8311b655d..eb9e2cfaab0 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -17420,10 +17420,11 @@ aarch64_bool_compound_p (vec_info *vinfo, > stmt_vec_info stmt_info, > instructions. */ > static unsigned int > aarch64_sve_in_loop_reduction_latency (vec_info *vinfo, > + slp_tree node, > stmt_vec_info stmt_info, > const sve_vec_cost *sve_costs) > { > - switch (vect_reduc_type (vinfo, stmt_info)) > + switch (vect_reduc_type (vinfo, node)) > { > case EXTRACT_LAST_REDUCTION: > return sve_costs->clast_cost; > @@ -17463,7 +17464,9 @@ aarch64_sve_in_loop_reduction_latency (vec_info > *vinfo, > - If VEC_FLAGS & VEC_ANY_SVE, return the loop carry latency of the > SVE implementation. */ > static unsigned int > -aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info, > +aarch64_in_loop_reduction_latency (vec_info *vinfo, > + slp_tree node, > + stmt_vec_info stmt_info, > unsigned int vec_flags) > { > const cpu_vector_cost *vec_costs = aarch64_tune_params.vec_costs; > @@ -17476,7 +17479,8 @@ aarch64_in_loop_reduction_latency (vec_info > *vinfo, stmt_vec_info stmt_info, > if (sve_costs) > { > unsigned int latency > - = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs); > + = aarch64_sve_in_loop_reduction_latency (vinfo, node, > + stmt_info, sve_costs); > if (latency) > return latency; > } > @@ -17575,7 +17579,8 @@ aarch64_detect_vector_stmt_subtype (vec_info > *vinfo, vect_cost_for_stmt kind, > && sve_costs) > { > unsigned int latency > - = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs); > + = aarch64_sve_in_loop_reduction_latency (vinfo, node, > + stmt_info, sve_costs); > if (latency) > return latency; > } > @@ -17787,8 +17792,10 @@ aarch64_vector_costs::count_ops (unsigned int > count, vect_cost_for_stmt kind, > && vect_is_reduction (stmt_info)) > { > unsigned int base > - = aarch64_in_loop_reduction_latency (m_vinfo, stmt_info, m_vec_flags); > - if (aarch64_force_single_cycle (m_vinfo, stmt_info)) > + = aarch64_in_loop_reduction_latency (m_vinfo, node, > + stmt_info, m_vec_flags); > + if (m_costing_for_scalar > + || aarch64_force_single_cycle (m_vinfo, stmt_info)) > /* ??? Ideally we'd use a tree to reduce the copies down to 1 vector, > and then accumulate that, but at the moment the loop-carried > dependency includes all copies. */ > @@ -17901,7 +17908,7 @@ aarch64_vector_costs::count_ops (unsigned int > count, vect_cost_for_stmt kind, > have only accounted for one. */ > if (stmt_info > && (kind == vector_stmt || kind == vec_to_scalar) > - && vect_reduc_type (m_vinfo, stmt_info) == COND_REDUCTION) > + && vect_reduc_type (m_vinfo, node) == COND_REDUCTION) > ops->general_ops += count; > > /* Count the predicate operations needed by an SVE comparison. */ > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index 2948deffdd6..58311318706 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -2877,14 +2877,18 @@ vect_is_reduction (stmt_vec_info stmt_info) > /* If STMT_INFO describes a reduction, return the vect_reduction_type > of the reduction it describes, otherwise return -1. */ > inline int > -vect_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info) > +vect_reduc_type (vec_info *vinfo, slp_tree node) > { > if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo)) > - if (STMT_VINFO_REDUC_DEF (stmt_info)) > - { > - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); > - return int (STMT_VINFO_REDUC_TYPE (reduc_info)); > - } > + { > + stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node); > + if (STMT_VINFO_REDUC_DEF (stmt_info)) > + { > + stmt_vec_info reduc_info > + = info_for_reduction (loop_vinfo, stmt_info); > + return int (STMT_VINFO_REDUC_TYPE (reduc_info)); > + } > + } > return -1; > } > > -- > 2.43.0