> >> Do you see vect_constant_defs in practice, or is this just for > >> completeness? > >> I would expect any constants to appear as direct operands. I don't > >> mind keeping it if it's just a belt-and-braces thing though. > > > > In the latency case where I had allow_constants the early rejection > > based on the operand itself wouldn't be rejected so in that case I > > still needed to reject them but do so after the multiply check. While > > they do appear as direct operands as well they also have their own > > nodes, in particular for SLP so the constants are handled as a group. > > Ah, OK, thanks. > > > But can also check CONSTANT_CLASS_P (rhs) if that's preferrable. > > No, what you did is more correct. I just wasn't sure at first which case it > was > handling.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/aarch64.cc (aarch64_multiply_add_p): Update handling
of constants.
(aarch64_adjust_stmt_cost): Use it.
(aarch64_vector_costs::count_ops): Likewise.
(aarch64_vector_costs::add_stmt_cost): Pass vinfo to
aarch64_adjust_stmt_cost.
--- inline copy of patch ---
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index
d4d7602554592b9042b8eaf389eff1ec80c2090e..7cc5916ce06b2635346c807da9306738b939ebc6
100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -16410,10 +16410,6 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info
stmt_info,
if (code != PLUS_EXPR && code != MINUS_EXPR)
return false;
- if (CONSTANT_CLASS_P (gimple_assign_rhs1 (assign))
- || CONSTANT_CLASS_P (gimple_assign_rhs2 (assign)))
- return false;
-
for (int i = 1; i < 3; ++i)
{
tree rhs = gimple_op (assign, i);
@@ -16441,7 +16437,8 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info
stmt_info,
return false;
def_stmt_info = vinfo->lookup_def (rhs);
if (!def_stmt_info
- || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_external_def)
+ || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_external_def
+ || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_constant_def)
return false;
}
@@ -16721,8 +16718,9 @@ aarch64_sve_adjust_stmt_cost (class vec_info *vinfo,
vect_cost_for_stmt kind,
and which when vectorized would operate on vector type VECTYPE. Add the
cost of any embedded operations. */
static fractional_cost
-aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
- tree vectype, fractional_cost stmt_cost)
+aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind,
+ stmt_vec_info stmt_info, tree vectype,
+ unsigned vec_flags, fractional_cost stmt_cost)
{
if (vectype)
{
@@ -16745,6 +16743,14 @@ aarch64_adjust_stmt_cost (vect_cost_for_stmt kind,
stmt_vec_info stmt_info,
break;
}
+ gassign *assign = dyn_cast<gassign *> (STMT_VINFO_STMT (stmt_info));
+ if (assign && !vect_is_reduction (stmt_info))
+ {
+ /* For MLA we need to reduce the cost since MLA is 1 instruction. */
+ if (aarch64_multiply_add_p (vinfo, stmt_info, vec_flags))
+ return 0;
+ }
+
if (kind == vector_stmt || kind == vec_to_scalar)
if (tree cmp_type = vect_embedded_comparison_type (stmt_info))
{
@@ -16814,7 +16820,8 @@ aarch64_vector_costs::count_ops (unsigned int count,
vect_cost_for_stmt kind,
}
/* Assume that multiply-adds will become a single operation. */
- if (stmt_info && aarch64_multiply_add_p (m_vinfo, stmt_info, m_vec_flags))
+ if (stmt_info
+ && aarch64_multiply_add_p (m_vinfo, stmt_info, m_vec_flags))
return;
/* Count the basic operation cost associated with KIND. */
@@ -17060,8 +17067,8 @@ aarch64_vector_costs::add_stmt_cost (int count,
vect_cost_for_stmt kind,
{
/* Account for any extra "embedded" costs that apply additively
to the base cost calculated above. */
- stmt_cost = aarch64_adjust_stmt_cost (kind, stmt_info, vectype,
- stmt_cost);
+ stmt_cost = aarch64_adjust_stmt_cost (m_vinfo, kind, stmt_info,
+ vectype, m_vec_flags, stmt_cost);
/* If we're recording a nonzero vector loop body cost for the
innermost loop, also estimate the operations that would need
rb17618.patch
Description: rb17618.patch
