在 2024/1/24 下午5:36, Li Wei 写道:
We found that when only 128-bit vectorization was enabled, 549.fotonik3d_r
failed to vectorize effectively. For this reason, we adjust the cost of
128-bit vector_stmt that match the multiply-add pattern to facilitate 128-bit
vectorization.
The experimental results show that after the modification, 549.fotonik3d_r
performance can be improved by 9.77% under the 128-bit vectorization option.
gcc/ChangeLog:
* config/loongarch/loongarch.cc (loongarch_multiply_add_p): New.
(loongarch_vector_costs::add_stmt_cost): Adjust.
gcc/testsuite/ChangeLog:
* gfortran.dg/vect/vect-10.f90: New test.
---
gcc/config/loongarch/loongarch.cc | 42 +++++++++++++
gcc/testsuite/gfortran.dg/vect/vect-10.f90 | 71 ++++++++++++++++++++++
2 files changed, 113 insertions(+)
create mode 100644 gcc/testsuite/gfortran.dg/vect/vect-10.f90
diff --git a/gcc/config/loongarch/loongarch.cc
b/gcc/config/loongarch/loongarch.cc
index 072c68d97e3..32a0b6f43e8 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4096,6 +4096,36 @@
loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi
return 1 << ceil_log2 (uf);
}
+static bool
+loongarch_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info)
+{
+ gassign *assign = dyn_cast<gassign *> (stmt_info->stmt);
+ if (!assign)
+ return false;
+ tree_code code = gimple_assign_rhs_code (assign);
+ if (code != PLUS_EXPR && code != MINUS_EXPR)
+ return false;
+
+ auto is_mul_result = [&](int i)
+ {
+ tree rhs = gimple_op (assign, i);
+ if (TREE_CODE (rhs) != SSA_NAME)
+ return false;
+
+ stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
+ if (!def_stmt_info
+ || STMT_VINFO_DEF_TYPE (def_stmt_info) != vect_internal_def)
+ return false;
+ gassign *rhs_assign = dyn_cast<gassign *> (def_stmt_info->stmt);
+ if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR)
+ return false;
+
+ return true;
+ };
+
+ return is_mul_result (1) || is_mul_result (2);
+}
+
unsigned
loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree,
@@ -4108,6 +4138,18 @@ loongarch_vector_costs::add_stmt_cost (int count,
vect_cost_for_stmt kind,
{
int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype,
misalign);
+ if (vectype && stmt_info)
+ {
+ gassign *assign = dyn_cast<gassign *> (STMT_VINFO_STMT (stmt_info));
+ machine_mode mode = TYPE_MODE (vectype);
Hi, Liwei:
I think the code here needs to be commented.
Thanks.
+ if (kind == vector_stmt && GET_MODE_SIZE (mode) == 16 && assign)
+ {
+ if (!vect_is_reduction (stmt_info)
+ && loongarch_multiply_add_p (m_vinfo, stmt_info))
+ stmt_cost = 0;
+ }
+ }
+
retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
m_costs[where] += retval;