On Fri, May 5, 2023 at 5:18 PM Andrew Pinski via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > While looking into a different issue, I noticed that it > would take until the second forwprop pass to do some > forward proping and it was because the ssa name was > used more than once but the second statement was > "dead" and we don't remove that until much later. > > So this uses simple_dce_from_worklist instead of manually > removing of the known unused statements instead. > Propagate engine does not do a cleanupcfg afterwards either but manually > cleans up possible EH edges so simple_dce_from_worklist > needs to communicate that back to the propagate engine. > > Some testcases needed to be updated/changed even because of better > optimization. > gcc.dg/pr81192.c even had to be changed to be using the gimple FE so it would > be less fragile in the future too. > gcc.dg/tree-ssa/pr98737-1.c was failing because __atomic_fetch_ was being > matched > but in those cases, the result was not being used so both __atomic_fetch_ and > __atomic_x_and_fetch_ are valid choices and would not make a code generation > difference. > evrp7.c, evrp8.c, vrp35.c, vrp36.c: just needed a slightly change as the > removal message > is different slightly. > kernels-alias-8.c: ccp1 is able to remove an unused load which causes ealias > to have > one less load to analysis so update the expected scan #. > > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
OK. Thanks, Richard. > gcc/ChangeLog: > > PR tree-optimization/109691 > * tree-ssa-dce.cc (simple_dce_from_worklist): Add need_eh_cleanup > argument. > If the removed statement can throw, have need_eh_cleanup > include the bb of that statement. > * tree-ssa-dce.h (simple_dce_from_worklist): Update declaration. > * tree-ssa-propagate.cc (struct prop_stats_d): Remove > num_dce. > (substitute_and_fold_dom_walker::substitute_and_fold_dom_walker): > Initialize dceworklist instead of stmts_to_remove. > (substitute_and_fold_dom_walker::~substitute_and_fold_dom_walker): > Destore dceworklist instead of stmts_to_remove. > (substitute_and_fold_dom_walker::before_dom_children): > Set dceworklist instead of adding to stmts_to_remove. > (substitute_and_fold_engine::substitute_and_fold): > Call simple_dce_from_worklist instead of poping > from the list. > Don't update the stat on removal statements. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/evrp7.c: Update for output change. > * gcc.dg/tree-ssa/evrp8.c: Likewise. > * gcc.dg/tree-ssa/vrp35.c: Likewise. > * gcc.dg/tree-ssa/vrp36.c: Likewise. > * gcc.dg/tree-ssa/pr98737-1.c: Update scan-tree-dump-not > to check for assignment too instead of just a call. > * c-c++-common/goacc/kernels-alias-8.c: Update test > for removal of load. > * gcc.dg/pr81192.c: Rewrite testcase in gimple based test. > --- > .../c-c++-common/goacc/kernels-alias-8.c | 6 +- > gcc/testsuite/gcc.dg/pr81192.c | 64 ++++++++++++++++--- > gcc/testsuite/gcc.dg/tree-ssa/evrp7.c | 2 +- > gcc/testsuite/gcc.dg/tree-ssa/evrp8.c | 2 +- > gcc/testsuite/gcc.dg/tree-ssa/pr98737-1.c | 7 +- > gcc/testsuite/gcc.dg/tree-ssa/vrp35.c | 2 +- > gcc/testsuite/gcc.dg/tree-ssa/vrp36.c | 2 +- > gcc/tree-ssa-dce.cc | 7 +- > gcc/tree-ssa-dce.h | 2 +- > gcc/tree-ssa-propagate.cc | 39 ++--------- > 10 files changed, 82 insertions(+), 51 deletions(-) > > diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-alias-8.c > b/gcc/testsuite/c-c++-common/goacc/kernels-alias-8.c > index 69200ccf192..c3922e33241 100644 > --- a/gcc/testsuite/c-c++-common/goacc/kernels-alias-8.c > +++ b/gcc/testsuite/c-c++-common/goacc/kernels-alias-8.c > @@ -16,7 +16,9 @@ foo (int *a, size_t n) > } > } > > -/* Only the omp_data_i related loads should be annotated with cliques. */ > -/* { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } */ > +/* Only the omp_data_i related loads should be annotated with cliques. > + Note ccp can remove one of the omp_data_i loads which is why there > + is there only one clique base still there. */ > +/* { dg-final { scan-tree-dump-times "clique 1 base 1" 1 "ealias" } } */ > /* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 2 "ealias" } } */ > > diff --git a/gcc/testsuite/gcc.dg/pr81192.c b/gcc/testsuite/gcc.dg/pr81192.c > index 6cab6056558..f6d201ee71a 100644 > --- a/gcc/testsuite/gcc.dg/pr81192.c > +++ b/gcc/testsuite/gcc.dg/pr81192.c > @@ -1,5 +1,58 @@ > -/* { dg-options "-Os -fdump-tree-pre-details -fdisable-tree-evrp > -fno-tree-dse" } */ > +/* { dg-options "-Os -fgimple -fdump-tree-pre-details -fdisable-tree-evrp > -fno-tree-dse" } */ > > +#if __SIZEOF_INT__ == 2 > +#define unsigned __UINT32_TYPE__ > +#define int __INT32_TYPE__ > +#endif > + > +unsigned a; > +int b, c; > + > +void __GIMPLE(ssa, startwith("pre")) fn2 () > +{ > + int b_lsm6; > + int j; > + int c0_1; > + int iftmp2_8; > + > + __BB(2): > + a = _Literal (unsigned)30; > + c0_1 = c; > + b_lsm6_9 = b; > + goto __BB7; > + > + __BB(3): > + if (j_6(D) != 2147483647) > + goto __BB4; > + else > + goto __BB5; > + > + __BB(4): > + iftmp2_8 = j_6(D) + 1; > + goto __BB5; > + > + __BB(5): > + b_lsm6_10 = 2147483647; > + goto __BB6; > + > + __BB(6): > + if (c0_1 != 0) > + goto __BB3; > + else > + goto __BB8; > + > + __BB(8): > + goto __BB7; > + > + __BB(7): > + goto __BB6; > + > +} > + > +#if 0 > +/* This used to be a C based testcase but ccp3 would now would remove > + the setting of iftmp2_8 (in the above gimple) which would cause PRE > + not to test what PRE was doing incorrectly. The original code is below. */ > /* Disable tree-evrp because the new version of evrp sees > <bb 3> : > if (j_8(D) != 2147483647) > @@ -18,14 +71,6 @@ which causes the situation being tested to dissapear > before we get to PRE. */ > > /* Likewise disable DSE which also elides the tail merging "opportunity". */ > > -#if __SIZEOF_INT__ == 2 > -#define unsigned __UINT32_TYPE__ > -#define int __INT32_TYPE__ > -#endif > - > -unsigned a; > -int b, c; > - > static int > fn1 (int p1, int p2) > { > @@ -41,5 +86,6 @@ fn2 (void) > for (; c; b = fn1 (j, 1)) > ; > } > +#endif > > /* { dg-final { scan-tree-dump-times "(?n)find_duplicates: <bb .*> duplicate > of <bb .*>" 1 "pre" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/evrp7.c > b/gcc/testsuite/gcc.dg/tree-ssa/evrp7.c > index 16fbe65e4d9..07314304bcf 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/evrp7.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/evrp7.c > @@ -11,4 +11,4 @@ int test1(int i, int k) > return 1; > } > > -/* { dg-final { scan-tree-dump "Removing dead stmt \[^\r\n\]* = j_.* == 10" > "evrp" } } */ > +/* { dg-final { scan-tree-dump "Removing dead stmt:\[^\r\n\]* = j_.* == 10" > "evrp" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/evrp8.c > b/gcc/testsuite/gcc.dg/tree-ssa/evrp8.c > index b7e5c7aa2de..a968346fea6 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/evrp8.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/evrp8.c > @@ -8,4 +8,4 @@ int foo(int i) > return 1; > } > > -/* { dg-final { scan-tree-dump "Removing dead stmt \[^\r\n\]* = i_.* == 1" > "evrp" } } */ > +/* { dg-final { scan-tree-dump "Removing dead stmt:\[^\r\n\]* = i_.* == 1" > "evrp" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr98737-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr98737-1.c > index e313a7fa79d..8e105b46a8b 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr98737-1.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr98737-1.c > @@ -2,8 +2,11 @@ > /* { dg-do compile { target i?86-*-* x86_64-*-* powerpc*-*-* aarch64*-*-* } > } */ > /* { dg-options "-O2 -fdump-tree-optimized -fcompare-debug" } */ > /* { dg-additional-options "-march=i686" { target ia32 } } */ > -/* { dg-final { scan-tree-dump-not "__atomic_fetch_" "optimized" } } */ > -/* { dg-final { scan-tree-dump-not "__sync_fetch_and_" "optimized" } } */ > +/* Note the choice between __atomic_fetch_and_* and __atomic_and_*_fetch > + if the result is not used does not matter. so check if make sure we don't > + have __atomic_fetch_ with an assignment */ > +/* { dg-final { scan-tree-dump-not "= __atomic_fetch_" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "= __sync_fetch_and_" "optimized" } } */ > > typedef signed char schar; > typedef unsigned long ulong; > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp35.c > b/gcc/testsuite/gcc.dg/tree-ssa/vrp35.c > index a372a18cc43..1d76677bd3d 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/vrp35.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp35.c > @@ -11,4 +11,4 @@ int test1(int i, int k) > return 1; > } > > -/* { dg-final { scan-tree-dump "Removing dead stmt \[^\r\n\]* = j_.* == 10" > "vrp1" } } */ > +/* { dg-final { scan-tree-dump "Removing dead stmt:\[^\r\n\]* = j_.* == 10" > "vrp1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp36.c > b/gcc/testsuite/gcc.dg/tree-ssa/vrp36.c > index 1f77b539d70..1e9c7f424bb 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/vrp36.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp36.c > @@ -8,4 +8,4 @@ int foo(int i) > return 1; > } > > -/* { dg-final { scan-tree-dump "Removing dead stmt \[^\r\n\]* = i_.* == 1" > "vrp1" } } */ > +/* { dg-final { scan-tree-dump "Removing dead stmt:\[^\r\n\]* = i_.* == 1" > "vrp1" } } */ > diff --git a/gcc/tree-ssa-dce.cc b/gcc/tree-ssa-dce.cc > index e4c32e630df..6554b5db03e 100644 > --- a/gcc/tree-ssa-dce.cc > +++ b/gcc/tree-ssa-dce.cc > @@ -2097,7 +2097,7 @@ make_pass_cd_dce (gcc::context *ctxt) > use operands number. */ > > void > -simple_dce_from_worklist (bitmap worklist) > +simple_dce_from_worklist (bitmap worklist, bitmap need_eh_cleanup) > { > int phiremoved = 0; > int stmtremoved = 0; > @@ -2127,6 +2127,11 @@ simple_dce_from_worklist (bitmap worklist) > if (stmt_unremovable_because_of_non_call_eh_p (cfun, t)) > continue; > > + /* Tell the caller that we removed a statement that might > + throw so it could cleanup the cfg for that block. */ > + if (need_eh_cleanup && stmt_could_throw_p (cfun, t)) > + bitmap_set_bit (need_eh_cleanup, gimple_bb (t)->index); > + > /* Add uses to the worklist. */ > ssa_op_iter iter; > use_operand_p use_p; > diff --git a/gcc/tree-ssa-dce.h b/gcc/tree-ssa-dce.h > index f2542157608..5522359f5b2 100644 > --- a/gcc/tree-ssa-dce.h > +++ b/gcc/tree-ssa-dce.h > @@ -18,5 +18,5 @@ along with GCC; see the file COPYING3. If not see > > #ifndef TREE_SSA_DCE_H > #define TREE_SSA_DCE_H > -extern void simple_dce_from_worklist (bitmap); > +extern void simple_dce_from_worklist (bitmap, bitmap = nullptr); > #endif > diff --git a/gcc/tree-ssa-propagate.cc b/gcc/tree-ssa-propagate.cc > index 76708ca185f..5573d360699 100644 > --- a/gcc/tree-ssa-propagate.cc > +++ b/gcc/tree-ssa-propagate.cc > @@ -38,6 +38,7 @@ > #include "cfgloop.h" > #include "tree-cfgcleanup.h" > #include "cfganal.h" > +#include "tree-ssa-dce.h" > > /* This file implements a generic value propagation engine based on > the same propagation used by the SSA-CCP algorithm [1]. > @@ -527,7 +528,6 @@ struct prop_stats_d > long num_const_prop; > long num_copy_prop; > long num_stmts_folded; > - long num_dce; > }; > > static struct prop_stats_d prop_stats; > @@ -641,14 +641,14 @@ public: > something_changed (false), > substitute_and_fold_engine (engine) > { > - stmts_to_remove.create (0); > + dceworklist = BITMAP_ALLOC (NULL); > stmts_to_fixup.create (0); > need_eh_cleanup = BITMAP_ALLOC (NULL); > need_ab_cleanup = BITMAP_ALLOC (NULL); > } > ~substitute_and_fold_dom_walker () > { > - stmts_to_remove.release (); > + BITMAP_FREE (dceworklist); > stmts_to_fixup.release (); > BITMAP_FREE (need_eh_cleanup); > BITMAP_FREE (need_ab_cleanup); > @@ -661,7 +661,7 @@ public: > } > > bool something_changed; > - vec<gimple *> stmts_to_remove; > + bitmap dceworklist; > vec<gimple *> stmts_to_fixup; > bitmap need_eh_cleanup; > bitmap need_ab_cleanup; > @@ -760,7 +760,7 @@ substitute_and_fold_dom_walker::before_dom_children > (basic_block bb) > print_generic_expr (dump_file, sprime); > fprintf (dump_file, "\n"); > } > - stmts_to_remove.safe_push (phi); > + bitmap_set_bit (dceworklist, SSA_NAME_VERSION (res)); > continue; > } > } > @@ -802,7 +802,7 @@ substitute_and_fold_dom_walker::before_dom_children > (basic_block bb) > print_generic_expr (dump_file, sprime); > fprintf (dump_file, "\n"); > } > - stmts_to_remove.safe_push (stmt); > + bitmap_set_bit (dceworklist, SSA_NAME_VERSION (lhs)); > continue; > } > } > @@ -970,30 +970,7 @@ substitute_and_fold_engine::substitute_and_fold > (basic_block block) > substitute_and_fold_dom_walker walker (CDI_DOMINATORS, this); > walker.walk (block ? block : ENTRY_BLOCK_PTR_FOR_FN (cfun)); > > - /* We cannot remove stmts during the BB walk, especially not release > - SSA names there as that destroys the lattice of our callers. > - Remove stmts in reverse order to make debug stmt creation possible. */ > - while (!walker.stmts_to_remove.is_empty ()) > - { > - gimple *stmt = walker.stmts_to_remove.pop (); > - if (dump_file && dump_flags & TDF_DETAILS) > - { > - fprintf (dump_file, "Removing dead stmt "); > - print_gimple_stmt (dump_file, stmt, 0); > - fprintf (dump_file, "\n"); > - } > - prop_stats.num_dce++; > - gimple_stmt_iterator gsi = gsi_for_stmt (stmt); > - if (gimple_code (stmt) == GIMPLE_PHI) > - remove_phi_node (&gsi, true); > - else > - { > - unlink_stmt_vdef (stmt); > - gsi_remove (&gsi, true); > - release_defs (stmt); > - } > - } > - > + simple_dce_from_worklist (walker.dceworklist, walker.need_eh_cleanup); > if (!bitmap_empty_p (walker.need_eh_cleanup)) > gimple_purge_all_dead_eh_edges (walker.need_eh_cleanup); > if (!bitmap_empty_p (walker.need_ab_cleanup)) > @@ -1021,8 +998,6 @@ substitute_and_fold_engine::substitute_and_fold > (basic_block block) > prop_stats.num_copy_prop); > statistics_counter_event (cfun, "Statements folded", > prop_stats.num_stmts_folded); > - statistics_counter_event (cfun, "Statements deleted", > - prop_stats.num_dce); > > return walker.something_changed; > } > -- > 2.39.1 >