On Fri, 20 Oct 2023, Tamar Christina wrote: > Hi All, > > The previous patch tried to remove PHI nodes that dominated the first loop, > however the correct fix is to only remove .MEM nodes. > > This patch thus makes the condition a bit stricter and only tries to remove > MEM phi nodes. > > I couldn't figure out a way to easily determine if a particular PHI is vUSE > related, so the patch does: > > 1. check if the definition is a vDEF and not defined in main loop. > 2. check if the definition is a PHI and not defined in main loop. > 3. check if the definition is a default definition. > > For no 2 and 3 we may misidentify the PHI, in both cases the value is defined > outside of the loop version block which also makes it ok to remove. > > Bootstrapped Regtested on aarch64-none-linux-gnu, > powerpc64le-unknown-linux-gnu, > x86_64-none-linux-gnu and no issues. > > Tested all default testsuites. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/111860 > * tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg): > Drop .MEM nodes only. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/111860 > * gcc.dg/vect/pr111860-2.c: New test. > * gcc.dg/vect/pr111860-3.c: New test. > > --- inline copy of patch -- > diff --git a/gcc/testsuite/gcc.dg/vect/pr111860-2.c > b/gcc/testsuite/gcc.dg/vect/pr111860-2.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..07f64ffb5318c9d7817d46802d123cc9a2d65ec9 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr111860-2.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O -fno-tree-sink -ftree-vectorize" } */ > +int buffer_ctrl_ctx_0, buffer_ctrl_p1, buffer_ctrl_cmd; > + > +int > +buffer_ctrl (long ret, int i) > +{ > + switch (buffer_ctrl_cmd) > + { > + case 1: > + buffer_ctrl_ctx_0 = 0; > + for (; i; i++) > + if (buffer_ctrl_p1) > + ret++; > + } > + return ret; > +} > diff --git a/gcc/testsuite/gcc.dg/vect/pr111860-3.c > b/gcc/testsuite/gcc.dg/vect/pr111860-3.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..07f64ffb5318c9d7817d46802d123cc9a2d65ec9 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr111860-3.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O -fno-tree-sink -ftree-vectorize" } */ > +int buffer_ctrl_ctx_0, buffer_ctrl_p1, buffer_ctrl_cmd; > + > +int > +buffer_ctrl (long ret, int i) > +{ > + switch (buffer_ctrl_cmd) > + { > + case 1: > + buffer_ctrl_ctx_0 = 0; > + for (; i; i++) > + if (buffer_ctrl_p1) > + ret++; > + } > + return ret; > +} > diff --git a/gcc/testsuite/gcc.dg/vect/pr111860.c > b/gcc/testsuite/gcc.dg/vect/pr111860.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..36f0774601040418bc6b7f27c9425b2bf93b18cb > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr111860.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > + > +int optimize_path_n, optimize_path_d; > +int *optimize_path_d_0; > +extern void path_threeOpt( long); > +void optimize_path() { > + int i; > + long length; > + i = 0; > + for (; i <= optimize_path_n; i++) > + optimize_path_d = 0; > + i = 0; > + for (; i < optimize_path_n; i++) > + length += optimize_path_d_0[i]; > + path_threeOpt(length); > +} > diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc > index > 1f7779b9834c3aef3c6a993fab916224fab03147..fc55278e63f7a48943fdc32c5e207110cf14507e > 100644 > --- a/gcc/tree-vect-loop-manip.cc > +++ b/gcc/tree-vect-loop-manip.cc > @@ -1626,13 +1626,33 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop > *loop, edge loop_exit, > edge temp_e = redirect_edge_and_branch (exit, new_preheader); > flush_pending_stmts (temp_e); > } > - > /* Record the new SSA names in the cache so that we can skip > materializing > them again when we fill in the rest of the LCSSA variables. */ > for (auto phi : new_phis) > { > tree new_arg = gimple_phi_arg (phi, 0)->def; > new_phi_args.put (new_arg, gimple_phi_result (phi));
don't you want to skip this as well? > + > + if (!SSA_VAR_P (new_arg)) > + continue; > + /* If the PHI MEM node dominates the loop then we shouldn't create > + a new LC-SSSA PHI for it in the intermediate block. */ > + gimple *def_stmt = SSA_NAME_DEF_STMT (new_arg); > + basic_block def_bb = gimple_bb (def_stmt); > + /* A MEM phi that consitutes a new DEF for the vUSE chain can either > + be a .VDEF or a PHI that operates on MEM. */ > + if (((gimple_vdef (def_stmt) || is_a <gphi *> (def_stmt)) > + /* And said definition must not be inside the main loop. */ > + && (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))) > + /* Or we must be a parameter. In the last two cases we may remove > + a non-MEM PHI node, but since they dominate both loops the > + removal is unlikely to cause trouble as the exits must already > + be using them. */ > + || SSA_NAME_IS_DEFAULT_DEF (new_arg)) > + { > + auto gsi = gsi_for_stmt (phi); > + remove_phi_node (&gsi, true); > + } May I suggest the simpler gimple *def; if (virtual_operand_p (new_arg) && (SSA_NAME_IS_DEFAULT_DEF (new_arg) || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (new_arg)))) { > + auto gsi = gsi_for_stmt (phi); > + remove_phi_node (&gsi, true); } ? OK with that change. Richard.