When BB reduction vectorization picks up a chain with an ASM def in it and that's inside the vectorized region we fail to get its LHS. Instead of trying to get the correct def the following avoids vectorizing such def and instead keeps it as def to add in the epilog.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. PR tree-optimization/113018 * tree-vect-slp.cc (vect_slp_check_for_roots): Only start SLP discovery from stmts with a LHS. --- gcc/tree-vect-slp.cc | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index f8a168caa60..a82fca45161 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -7419,7 +7419,12 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo) invalid = true; break; } - if (chain[i].dt != vect_internal_def) + if (chain[i].dt != vect_internal_def + /* Avoid stmts where the def is not the LHS, like + ASMs. */ + || (gimple_get_lhs (bb_vinfo->lookup_def + (chain[i].op)->stmt) + != chain[i].op)) remain_cnt++; } if (!invalid && chain.length () - remain_cnt > 1) @@ -7431,8 +7436,11 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo) remain.create (remain_cnt); for (unsigned i = 0; i < chain.length (); ++i) { - if (chain[i].dt == vect_internal_def) - stmts.quick_push (bb_vinfo->lookup_def (chain[i].op)); + stmt_vec_info stmt_info; + if (chain[i].dt == vect_internal_def + && ((stmt_info = bb_vinfo->lookup_def (chain[i].op)), + gimple_get_lhs (stmt_info->stmt) == chain[i].op)) + stmts.quick_push (stmt_info); else remain.quick_push (chain[i].op); } -- 2.35.3