> I think we should turn this option on by default, document that and note > that some languages (C++) say loops terminate.
To enable this option at -O2 is not very suitable, seems to be more aggressive. Better to turn it on at -O3. >> + /* Avoid doing so for OpenACC abstraction calls >> + (IFN_GOACC_LOOP), because later pass that lowers those >> + calls need to access lhs of calls. */ >> + && (!gimple_call_internal_p (stmt) >> + || gimple_call_internal_fn (stmt) != IFN_GOACC_LOOP)) > You can use gimple_call_internal_p (stmt, IFN_GOACC_LOOP) > Thomas? This part looks OK to me but it seems lowering could deal with this > as well? I remove the change here, and fix the problem in oacc lowering. Feng ---- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 37aab79..1ad2a6d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2019-06-04 Feng Xue <f...@os.amperecomputing.com> + + PR tree-optimization/89713 + * doc/invoke.texi (-ffinite-loop): Document new option. + * common.opt (-ffinite-loop): New option. + * tree-ssa-dce.c (mark_stmt_if_obviously_necessary): Mark + IFN_GOACC_LOOP calls as necessary. + * tree-ssa-loop-niter.c (finite_loop): Assume loop with an exit is + finite. + * omp-offload.c (oacc_xform_loop): Skip lowering if return value of + IFN_GOACC_LOOP call is not used. + * toplev.c (process_options): Enable -ffinite-loop by default at -O3. + 2019-06-04 Alan Modra <amo...@gmail.com> PR target/90689 diff --git a/gcc/common.opt b/gcc/common.opt index 0e72fd0..66a1ff2 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1437,6 +1437,10 @@ ffinite-math-only Common Report Var(flag_finite_math_only) Optimization SetByCombined Assume no NaNs or infinities are generated. +ffinite-loop +Common Report Var(flag_finite_loop) Optimization Init(-1) +Assume that loops with an exit will terminate and not loop indefinitely. + ffixed- Common Joined RejectNegative Var(common_deferred_options) Defer -ffixed-<register> Mark <register> as being unavailable to the compiler. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 91c9bb8..0a36b6c 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -412,6 +412,7 @@ Objective-C and Objective-C++ Dialects}. -fdevirtualize-at-ltrans -fdse @gol -fearly-inlining -fipa-sra -fexpensive-optimizations -ffat-lto-objects @gol -ffast-math -ffinite-math-only -ffloat-store -fexcess-precision=@var{style} @gol +-ffinite-loop @gol -fforward-propagate -ffp-contract=@var{style} -ffunction-sections @gol -fgcse -fgcse-after-reload -fgcse-las -fgcse-lm -fgraphite-identity @gol -fgcse-sm -fhoist-adjacent-loads -fif-conversion @gol @@ -8327,6 +8328,7 @@ by @option{-O2} and also turns on the following optimization flags: -ftree-loop-distribute-patterns @gol -ftree-loop-distribution @gol -ftree-loop-vectorize @gol +-ffinite-loop @gol -ftree-partial-pre @gol -ftree-slp-vectorize @gol -funswitch-loops @gol @@ -9503,6 +9505,15 @@ that may set @code{errno} but are otherwise free of side effects. This flag is enabled by default at @option{-O2} and higher if @option{-Os} is not also specified. +@item -ffinite-loop +@opindex ffinite-loop +@opindex fno-finite-loop +Assume that a loop with an exit will eventually take the exit and not loop +indefinitely. This allows the compiler to remove loops that otherwise have +no side-effects, not considering eventual endless looping as such. + +This option is enabled by default at @option{-O3}. + @item -ftree-dominator-opts @opindex ftree-dominator-opts Perform a variety of simple scalar cleanups (constant/copy diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c index 97ae47b..369122f 100644 --- a/gcc/omp-offload.c +++ b/gcc/omp-offload.c @@ -300,7 +300,7 @@ oacc_xform_loop (gcall *call) tree chunk_size = NULL_TREE; unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5)); tree lhs = gimple_call_lhs (call); - tree type = TREE_TYPE (lhs); + tree type = NULL_TREE; tree diff_type = TREE_TYPE (range); tree r = NULL_TREE; gimple_seq seq = NULL; @@ -308,6 +308,15 @@ oacc_xform_loop (gcall *call) unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any) + /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */ + if (!lhs) + { + gsi_replace_with_seq (&gsi, seq, true); + return; + } + + type = TREE_TYPE (lhs); + #ifdef ACCEL_COMPILER chunk_size = gimple_call_arg (call, 4); if (integer_minus_onep (chunk_size) /* Force static allocation. */ diff --git a/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C new file mode 100644 index 0000000..e374155 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-cddce2 -ffinite-loop" } */ + +#include <string> +#include <vector> +#include <list> +#include <set> +#include <map> + +using namespace std; + +int foo (vector<string> &v, list<string> &l, set<string> &s, map<int, string> &m) +{ + for (vector<string>::iterator it = v.begin (); it != v.end (); ++it) + it->length(); + + for (list<string>::iterator it = l.begin (); it != l.end (); ++it) + it->length(); + + for (map<int, string>::iterator it = m.begin (); it != m.end (); ++it) + it->first + it->second.length(); + + for (set<string>::iterator it0 = s.begin (); it0 != s.end(); ++it0) + for (vector<string>::reverse_iterator it1 = v.rbegin(); it1 != v.rend(); ++it1) + { + it0->length(); + it1->length(); + } + + return 0; +} +/* { dg-final { scan-tree-dump-not "if" "cddce2"} } */ + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c b/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c new file mode 100644 index 0000000..ffca49c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-cddce1 -ffinite-loop" } */ + +typedef struct list { + char pad[15]; + struct list *next; +} list; + +int data; + +list *head, *tail; + +int __attribute__((pure)) pfn (int); + +int foo (unsigned u, int s) +{ + unsigned i; + list *p; + int j; + + for (i = 0; i < u; i += 2) + ; + + for (p = head; p; p = p->next) + ; + + for (j = data; j & s; j = pfn (j + 3)) + ; + + for (p = head; p != tail; p = p->next) + for (j = data + 1; j > s; j = pfn (j + 2)) + ; + + return 0; +} +/* { dg-final { scan-tree-dump-not "if" "cddce1"} } */ + diff --git a/gcc/toplev.c b/gcc/toplev.c index d300ac2..1c82ac4 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -1708,6 +1708,10 @@ process_options (void) flag_prefetch_loop_arrays = 0; } + /* Enable -ffinite-loop with -O3 optimization level. */ + if (flag_finite_loop == -1) + flag_finite_loop = optimize >= 3; + /* The presence of IEEE signaling NaNs, implies all math can trap. */ if (flag_signaling_nans) flag_trapping_math = 1; diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c index 2478219..179605e 100644 --- a/gcc/tree-ssa-dce.c +++ b/gcc/tree-ssa-dce.c @@ -245,6 +245,17 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive) mark_stmt_necessary (stmt, true); return; } + /* IFN_GOACC_LOOP calls are necessary in that they are used to + represent parameter (i.e. step, bound) of a lowered OpenACC + partitioned loop. But this kind of partitioned loop might not + survive from aggressive loop removal for it has loop exit and + is assumed to be finite. Therefore, we need to explicitly mark + these calls. (An example is libgomp.oacc-c-c++-common/pr84955.c) */ + if (gimple_call_internal_p (stmt, IFN_GOACC_LOOP)) + { + mark_stmt_necessary (stmt, true); + return; + } if (!gimple_call_lhs (stmt)) return; break; diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index 470b6a2..c25cb1d 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -2798,6 +2798,27 @@ finite_loop_p (struct loop *loop) loop->num); return true; } + + if (flag_finite_loop) + { + unsigned i; + vec<edge> exits = get_loop_exit_edges (loop); + edge ex; + + /* If the loop has any non-EH exit, we can assume it will terminate. */ + FOR_EACH_VEC_ELT (exits, i, ex) + if (!(ex->flags & EDGE_EH)) + { + exits.release (); + if (dump_file) + fprintf (dump_file, "Assume loop %i to be finite: it has an exit " + "and -ffinite-loop is on.\n", loop->num); + return true; + } + + exits.release (); + } + return false; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c new file mode 100644 index 0000000..845268b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-cddce2 -ffinite-loop" } */ + +int +f1 (void) +{ + int i, j; + +#pragma acc parallel loop tile(2,3) + for (i = 1; i < 10; i++) + for (j = 1; j < 10; j++) + for (;;) + ; + + return i + j; +} + +int +f2 (void) +{ + int i, j, k; + +#pragma acc parallel loop tile(2,3) + for (i = 1; i < 10; i++) + for (j = 1; j < 10; j++) + for (k = 1; k < 10; k++) + ; + + return i + j; +} +/* { dg-final { scan-tree-dump-not "if" "cddce2"} } */