-mtune=neoverse-512tvb sets the likely SVE vector length to 128 bits, but it also takes into account Neoverse V1, which is a 256-bit target. This patch adds this VF (VL) factor to aarch64_vec_op_count.
Tested on aarch64-linux-gnu & applied. Richard gcc/ * config/aarch64/aarch64.c (aarch64_vec_op_count::m_vf_factor): New member variable. (aarch64_vec_op_count::aarch64_vec_op_count): Add a parameter for it. (aarch64_vec_op_count::vf_factor): New function. (aarch64_vector_costs::aarch64_vector_costs): When costing for neoverse-512tvb, pass a vf_factor of 2 for the Neoverse V1 version of an SVE loop. (aarch64_vector_costs::adjust_body_cost): Read the vf factor instead of hard-coding 2. --- gcc/config/aarch64/aarch64.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 241cef8c5d9..5fa64fe5350 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -14711,9 +14711,12 @@ class aarch64_vec_op_count { public: aarch64_vec_op_count () = default; - aarch64_vec_op_count (const aarch64_vec_issue_info *, unsigned int); + aarch64_vec_op_count (const aarch64_vec_issue_info *, unsigned int, + unsigned int = 1); unsigned int vec_flags () const { return m_vec_flags; } + unsigned int vf_factor () const { return m_vf_factor; } + const aarch64_base_vec_issue_info *base_issue_info () const; const aarch64_simd_vec_issue_info *simd_issue_info () const; const aarch64_sve_vec_issue_info *sve_issue_info () const; @@ -14753,13 +14756,23 @@ private: - If M_VEC_FLAGS & VEC_ANY_SVE is nonzero then this structure describes SVE code. */ unsigned int m_vec_flags = 0; + + /* Assume that, when the code is executing on the core described + by M_ISSUE_INFO, one iteration of the loop will handle M_VF_FACTOR + times more data than the vectorizer anticipates. + + This is only ever different from 1 for SVE. It allows us to consider + what would happen on a 256-bit SVE target even when the -mtune + parameters say that the “likely” SVE length is 128 bits. */ + unsigned int m_vf_factor = 1; }; aarch64_vec_op_count:: aarch64_vec_op_count (const aarch64_vec_issue_info *issue_info, - unsigned int vec_flags) + unsigned int vec_flags, unsigned int vf_factor) : m_issue_info (issue_info), - m_vec_flags (vec_flags) + m_vec_flags (vec_flags), + m_vf_factor (vf_factor) { } @@ -14973,7 +14986,11 @@ aarch64_vector_costs::aarch64_vector_costs (vec_info *vinfo, if (m_vec_flags & VEC_ANY_SVE) m_advsimd_ops.quick_push ({ issue_info, VEC_ADVSIMD }); if (aarch64_tune_params.vec_costs == &neoverse512tvb_vector_cost) - m_ops.quick_push ({ &neoversev1_vec_issue_info, m_vec_flags }); + { + unsigned int vf_factor = (m_vec_flags & VEC_ANY_SVE) ? 2 : 1; + m_ops.quick_push ({ &neoversev1_vec_issue_info, m_vec_flags, + vf_factor }); + } } } @@ -16111,8 +16128,9 @@ adjust_body_cost (loop_vec_info loop_vinfo, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "Neoverse V1 estimate:\n"); - adjust_body_cost_sve (&m_ops[1], scalar_cycles_per_iter * 2, - advsimd_cycles_per_iter * 2, + auto vf_factor = m_ops[1].vf_factor (); + adjust_body_cost_sve (&m_ops[1], scalar_cycles_per_iter * vf_factor, + advsimd_cycles_per_iter * vf_factor, could_use_advsimd, orig_body_cost, &body_cost, &should_disparage); } -- 2.25.1