-mtune=neoverse-512tvb sets the likely SVE vector length to 128 bits,
but it also takes into account Neoverse V1, which is a 256-bit target.
This patch adds this VF (VL) factor to aarch64_vec_op_count.

Tested on aarch64-linux-gnu & applied.

Richard


gcc/
        * config/aarch64/aarch64.c (aarch64_vec_op_count::m_vf_factor):
        New member variable.
        (aarch64_vec_op_count::aarch64_vec_op_count): Add a parameter for it.
        (aarch64_vec_op_count::vf_factor): New function.
        (aarch64_vector_costs::aarch64_vector_costs): When costing for
        neoverse-512tvb, pass a vf_factor of 2 for the Neoverse V1 version
        of an SVE loop.
        (aarch64_vector_costs::adjust_body_cost): Read the vf factor
        instead of hard-coding 2.
---
 gcc/config/aarch64/aarch64.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 241cef8c5d9..5fa64fe5350 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14711,9 +14711,12 @@ class aarch64_vec_op_count
 {
 public:
   aarch64_vec_op_count () = default;
-  aarch64_vec_op_count (const aarch64_vec_issue_info *, unsigned int);
+  aarch64_vec_op_count (const aarch64_vec_issue_info *, unsigned int,
+                       unsigned int = 1);
 
   unsigned int vec_flags () const { return m_vec_flags; }
+  unsigned int vf_factor () const { return m_vf_factor; }
+
   const aarch64_base_vec_issue_info *base_issue_info () const;
   const aarch64_simd_vec_issue_info *simd_issue_info () const;
   const aarch64_sve_vec_issue_info *sve_issue_info () const;
@@ -14753,13 +14756,23 @@ private:
      - If M_VEC_FLAGS & VEC_ANY_SVE is nonzero then this structure describes
        SVE code.  */
   unsigned int m_vec_flags = 0;
+
+  /* Assume that, when the code is executing on the core described
+     by M_ISSUE_INFO, one iteration of the loop will handle M_VF_FACTOR
+     times more data than the vectorizer anticipates.
+
+     This is only ever different from 1 for SVE.  It allows us to consider
+     what would happen on a 256-bit SVE target even when the -mtune
+     parameters say that the “likely” SVE length is 128 bits.  */
+  unsigned int m_vf_factor = 1;
 };
 
 aarch64_vec_op_count::
 aarch64_vec_op_count (const aarch64_vec_issue_info *issue_info,
-                     unsigned int vec_flags)
+                     unsigned int vec_flags, unsigned int vf_factor)
   : m_issue_info (issue_info),
-    m_vec_flags (vec_flags)
+    m_vec_flags (vec_flags),
+    m_vf_factor (vf_factor)
 {
 }
 
@@ -14973,7 +14986,11 @@ aarch64_vector_costs::aarch64_vector_costs (vec_info 
*vinfo,
       if (m_vec_flags & VEC_ANY_SVE)
        m_advsimd_ops.quick_push ({ issue_info, VEC_ADVSIMD });
       if (aarch64_tune_params.vec_costs == &neoverse512tvb_vector_cost)
-       m_ops.quick_push ({ &neoversev1_vec_issue_info, m_vec_flags });
+       {
+         unsigned int vf_factor = (m_vec_flags & VEC_ANY_SVE) ? 2 : 1;
+         m_ops.quick_push ({ &neoversev1_vec_issue_info, m_vec_flags,
+                             vf_factor });
+       }
     }
 }
 
@@ -16111,8 +16128,9 @@ adjust_body_cost (loop_vec_info loop_vinfo,
          if (dump_enabled_p ())
            dump_printf_loc (MSG_NOTE, vect_location,
                             "Neoverse V1 estimate:\n");
-         adjust_body_cost_sve (&m_ops[1], scalar_cycles_per_iter * 2,
-                               advsimd_cycles_per_iter * 2,
+         auto vf_factor = m_ops[1].vf_factor ();
+         adjust_body_cost_sve (&m_ops[1], scalar_cycles_per_iter * vf_factor,
+                               advsimd_cycles_per_iter * vf_factor,
                                could_use_advsimd, orig_body_cost,
                                &body_cost, &should_disparage);
        }
-- 
2.25.1

Reply via email to