> -----Original Message-----
> From: Richard Biener <rguent...@suse.de>
> Sent: Tuesday, August 26, 2025 8:59 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Tamar Christina <tamar.christ...@arm.com>
> Subject: [PATCH][aarch64] Compute vect_reduc_type off SLP node instead of
> stmt-info
> 
> The following changes the vect_reduc_type API to work on the SLP node.
> The API is only used from the aarch64 backend, so all changes are there.
> In particular I noticed aarch64_force_single_cycle is invoked even
> for scalar costing (where the flag tested isn't computed yet), I
> figured in scalar costing all reductions are a single cycle.

Yeah, this was a fix for PR110625 which changed scalar reductions
to correctly be 1.

> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> 
> OK for the aarch64 parts?

OK.

Thanks,
Tamar

> 
> Thanks,
> Richard.
> 
>       * tree-vectorizer.h (vect_reduc_type): Get SLP node as argument.
>       * config/aarch64/aarch64.cc (aarch64_sve_in_loop_reduction_latency):
>       Take SLO node as argument and adjust.
>       (aarch64_in_loop_reduction_latency): Likewise.
>       (aarch64_detect_vector_stmt_subtype): Adjust.
>       (aarch64_vector_costs::count_ops): Likewise.  Treat reductions
>       during scalar costing as single-cycle.
> ---
>  gcc/config/aarch64/aarch64.cc | 21 ++++++++++++++-------
>  gcc/tree-vectorizer.h         | 16 ++++++++++------
>  2 files changed, 24 insertions(+), 13 deletions(-)
> 
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index fb8311b655d..eb9e2cfaab0 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -17420,10 +17420,11 @@ aarch64_bool_compound_p (vec_info *vinfo,
> stmt_vec_info stmt_info,
>     instructions.  */
>  static unsigned int
>  aarch64_sve_in_loop_reduction_latency (vec_info *vinfo,
> +                                    slp_tree node,
>                                      stmt_vec_info stmt_info,
>                                      const sve_vec_cost *sve_costs)
>  {
> -  switch (vect_reduc_type (vinfo, stmt_info))
> +  switch (vect_reduc_type (vinfo, node))
>      {
>      case EXTRACT_LAST_REDUCTION:
>        return sve_costs->clast_cost;
> @@ -17463,7 +17464,9 @@ aarch64_sve_in_loop_reduction_latency (vec_info
> *vinfo,
>     - If VEC_FLAGS & VEC_ANY_SVE, return the loop carry latency of the
>       SVE implementation.  */
>  static unsigned int
> -aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info,
> +aarch64_in_loop_reduction_latency (vec_info *vinfo,
> +                                slp_tree node,
> +                                stmt_vec_info stmt_info,
>                                  unsigned int vec_flags)
>  {
>    const cpu_vector_cost *vec_costs = aarch64_tune_params.vec_costs;
> @@ -17476,7 +17479,8 @@ aarch64_in_loop_reduction_latency (vec_info
> *vinfo, stmt_vec_info stmt_info,
>    if (sve_costs)
>      {
>        unsigned int latency
> -     = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs);
> +     = aarch64_sve_in_loop_reduction_latency (vinfo, node,
> +                                              stmt_info, sve_costs);
>        if (latency)
>       return latency;
>      }
> @@ -17575,7 +17579,8 @@ aarch64_detect_vector_stmt_subtype (vec_info
> *vinfo, vect_cost_for_stmt kind,
>        && sve_costs)
>      {
>        unsigned int latency
> -     = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs);
> +     = aarch64_sve_in_loop_reduction_latency (vinfo, node,
> +                                              stmt_info, sve_costs);
>        if (latency)
>       return latency;
>      }
> @@ -17787,8 +17792,10 @@ aarch64_vector_costs::count_ops (unsigned int
> count, vect_cost_for_stmt kind,
>        && vect_is_reduction (stmt_info))
>      {
>        unsigned int base
> -     = aarch64_in_loop_reduction_latency (m_vinfo, stmt_info, m_vec_flags);
> -      if (aarch64_force_single_cycle (m_vinfo, stmt_info))
> +     = aarch64_in_loop_reduction_latency (m_vinfo, node,
> +                                          stmt_info, m_vec_flags);
> +      if (m_costing_for_scalar
> +       || aarch64_force_single_cycle (m_vinfo, stmt_info))
>       /* ??? Ideally we'd use a tree to reduce the copies down to 1 vector,
>          and then accumulate that, but at the moment the loop-carried
>          dependency includes all copies.  */
> @@ -17901,7 +17908,7 @@ aarch64_vector_costs::count_ops (unsigned int
> count, vect_cost_for_stmt kind,
>       have only accounted for one.  */
>    if (stmt_info
>        && (kind == vector_stmt || kind == vec_to_scalar)
> -      && vect_reduc_type (m_vinfo, stmt_info) == COND_REDUCTION)
> +      && vect_reduc_type (m_vinfo, node) == COND_REDUCTION)
>      ops->general_ops += count;
> 
>    /* Count the predicate operations needed by an SVE comparison.  */
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 2948deffdd6..58311318706 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2877,14 +2877,18 @@ vect_is_reduction (stmt_vec_info stmt_info)
>  /* If STMT_INFO describes a reduction, return the vect_reduction_type
>     of the reduction it describes, otherwise return -1.  */
>  inline int
> -vect_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info)
> +vect_reduc_type (vec_info *vinfo, slp_tree node)
>  {
>    if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
> -    if (STMT_VINFO_REDUC_DEF (stmt_info))
> -      {
> -     stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
> -     return int (STMT_VINFO_REDUC_TYPE (reduc_info));
> -      }
> +    {
> +      stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
> +      if (STMT_VINFO_REDUC_DEF (stmt_info))
> +     {
> +       stmt_vec_info reduc_info
> +         = info_for_reduction (loop_vinfo, stmt_info);
> +       return int (STMT_VINFO_REDUC_TYPE (reduc_info));
> +     }
> +    }
>    return -1;
>  }
> 
> --
> 2.43.0

Reply via email to