On Tue, 30 Oct 2012, Jan Hubicka wrote: > Hi, > this patch implements the second part of planned change - to determine loop > bounds > based by shortest path discovery. This allows to bound number of iterations > on loops with bounds in statements that do not dominate the latch. > > I originally planned to implement this as part of maybe_lower_iteration_bound, > but I found doing it separately is more readable. While both performs walk of > the loop body, both do that for different reasons. > > discover_iteration_bound_by_body_walk walks from header to latch, while > maybe_lower_iteration_bound walks from header to first statement with side > effects looking if there is exit on the way. > > Both walks can be skipped for many loops, but each with different reasons. > > Bootstrapped/regtested x86_64-linux, OK? > > * tree-ssa-loop-niter.c (double_int_cmp, bound_index, > discover_iteration_bound_by_body_walk): New functions. > (discover_iteration_bound_by_body_walk): Use it. > > * gcc.dg/tree-ssa/loop-38.c: New testcase. > > Index: tree-ssa-loop-niter.c > =================================================================== > --- tree-ssa-loop-niter.c (revision 192989) > +++ tree-ssa-loop-niter.c (working copy) > @@ -2955,6 +2955,234 @@ gcov_type_to_double_int (gcov_type val) > return ret; > } > > +/* Compare double ints, callback for qsort. */ > + > +int > +double_int_cmp (const void *p1, const void *p2) > +{ > + const double_int *d1 = (const double_int *)p1; > + const double_int *d2 = (const double_int *)p2; > + if (*d1 == *d2) > + return 0; > + if (d1->ult (*d2)) > + return -1; > + return 1; > +} > + > +/* Return index of BOUND in BOUNDS array sorted in increasing order. > + Lookup by binary search. */ > + > +int > +bound_index (VEC (double_int, heap) *bounds, double_int bound) > +{ > + unsigned int end = VEC_length (double_int, bounds); > + unsigned int begin = 0; > + > + /* Find a matching index by means of a binary search. */ > + while (begin != end) > + { > + unsigned int middle = (begin + end) / 2; > + double_int index = VEC_index (double_int, bounds, middle); > + > + if (index == bound) > + return middle; > + else if (index.ult (bound)) > + begin = middle + 1; > + else > + end = middle; > + } > + gcc_unreachable (); > +} > + > +/* Used to hold vector of queues of basic blocks bellow. */ > +typedef VEC (basic_block, heap) *bb_queue; > +DEF_VEC_P(bb_queue); > +DEF_VEC_ALLOC_P(bb_queue,heap); > + > +/* We recorded loop bounds only for statements dominating loop latch (and > thus > + executed each loop iteration). If there are any bounds on statements not > + dominating the loop latch we can improve the estimate by walking the loop > + body and seeing if every path from loop header to loop latch contains > + some bounded statement. */ > + > +static void > +discover_iteration_bound_by_body_walk (struct loop *loop) > +{ > + pointer_map_t *bb_bounds; > + struct nb_iter_bound *elt; > + VEC (double_int, heap) *bounds = NULL; > + VEC (bb_queue, heap) *queues = NULL; > + bb_queue queue = NULL; > + ptrdiff_t queue_index; > + ptrdiff_t latch_index = 0; > + pointer_map_t *visited; > + > + /* Discover what bounds may interest us. */ > + for (elt = loop->bounds; elt; elt = elt->next) > + { > + double_int bound = elt->bound; > + > + /* Exit terminates loop at given iteration, while non-exits produce > undefined > + effect on the next iteration. */ > + if (!elt->is_exit) > + { > + bound += double_int_one; > + /* Overflow, give up on this bound. */ > + if (bound == double_int_zero) > + continue; > + } > + > + if (!loop->any_upper_bound > + || bound.ult (loop->nb_iterations_upper_bound)) > + VEC_safe_push (double_int, heap, bounds, bound); > + } > + > + /* Exit early if there is nothing to do. */ > + if (!bounds) > + return; > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, " Trying to walk loop body to reduce the bound.\n"); > + > + /* Sort the bounds in decreasing order. */ > + qsort (VEC_address (double_int, bounds), VEC_length (double_int, bounds), > + sizeof (double_int), double_int_cmp); > + > + /* For every basic block record the lowest bound that is guaranteed to > + terminate the loop. */ > + > + bb_bounds = pointer_map_create (); > + for (elt = loop->bounds; elt; elt = elt->next) > + { > + double_int bound = elt->bound; > + if (!elt->is_exit) > + { > + bound += double_int_one; > + /* Overflow, give up on this bound. */ > + if (bound == double_int_zero) > + continue; > + } > + > + if (!loop->any_upper_bound > + || bound.ult (loop->nb_iterations_upper_bound)) > + { > + ptrdiff_t index = bound_index (bounds, bound); > + void **entry = pointer_map_contains (bb_bounds, > + gimple_bb (elt->stmt)); > + if (!entry) > + *pointer_map_insert (bb_bounds, > + gimple_bb (elt->stmt)) = (void *)index; > + else if ((ptrdiff_t)*entry > index) > + *entry = (void *)index; > + } > + } > + > + visited = pointer_map_create ();
visited is a poor name for a map ... Otherwise looks ok. Thanks, Richard. > + > + /* Perform shortest path discovery loop->header ... loop->latch. > + > + The "distance" is given by the smallest loop bound of basic block > + present in the path and we look for path with largest smallest bound > + on it. > + > + To avoid the need for fibonaci heap on double ints we simply compress > + double ints into indexes to BOUNDS array and then represent the queue > + as arrays of queues for every index. > + Index of VEC_length (BOUNDS) means that the execution of given BB has > + no bounds determined. > + > + VISITED is a pointer map translating basic block into smallest index > + it was inserted into the priority queue with. */ > + latch_index = -1; > + > + /* Start walk in loop header with index set to infinite bound. */ > + queue_index = VEC_length (double_int, bounds); > + VEC_safe_grow_cleared (bb_queue, heap, queues, queue_index + 1); > + VEC_safe_push (basic_block, heap, queue, loop->header); > + VEC_replace (bb_queue, queues, queue_index, queue); > + *pointer_map_insert (visited, loop->header) = (void *)queue_index; > + > + for (; queue_index >= 0; queue_index--) > + { > + if (latch_index < queue_index) > + { > + while (VEC_length (basic_block, > + VEC_index (bb_queue, queues, queue_index))) > + { > + basic_block bb; > + ptrdiff_t bound_index = queue_index; > + void **entry; > + edge e; > + edge_iterator ei; > + > + queue = VEC_index (bb_queue, queues, queue_index); > + bb = VEC_pop (basic_block, queue); > + > + /* OK, we later increased BB priority, skip it. */ > + if ((ptrdiff_t)*pointer_map_contains (visited, bb) > queue_index) > + continue; > + > + /* See if we can improve the bound. */ > + entry = pointer_map_contains (bb_bounds, bb); > + if (entry && (ptrdiff_t)*entry < bound_index) > + bound_index = (ptrdiff_t)*entry; > + > + /* Insert succesors into the queue, watch for latch edge > + and record greatest index we saw. */ > + FOR_EACH_EDGE (e, ei, bb->succs) > + { > + bool insert = false; > + void **entry; > + > + if (loop_exit_edge_p (loop, e)) > + continue; > + > + if (e == loop_latch_edge (loop) > + && latch_index < bound_index) > + latch_index = bound_index; > + else if (!(entry = pointer_map_contains (visited, e->dest))) > + { > + insert = true; > + *pointer_map_insert (visited, e->dest) = (void > *)bound_index; > + } > + else if ((ptrdiff_t)*entry < bound_index) > + { > + insert = true; > + *entry = (void *)bound_index; > + } > + > + if (insert) > + { > + bb_queue queue2 = VEC_index (bb_queue, queues, > bound_index); > + VEC_safe_push (basic_block, heap, queue2, e->dest); > + VEC_replace (bb_queue, queues, bound_index, queue2); > + } > + } > + } > + } > + else > + VEC_free (basic_block, heap, VEC_index (bb_queue, queues, queue_index)); > + } > + > + gcc_assert (latch_index >= 0); > + if (latch_index < VEC_length (double_int, bounds)) > + { > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "Found better loop bound "); > + dump_double_int (dump_file, > + VEC_index (double_int, bounds, latch_index), true); > + fprintf (dump_file, "\n"); > + } > + record_niter_bound (loop, VEC_index (double_int, bounds, latch_index), > + false, true); > + } > + > + VEC_free (bb_queue, heap, queues); > + pointer_map_destroy (bb_bounds); > + pointer_map_destroy (visited); > +} > + > /* See if every path cross the loop goes through a statement that is known > to not execute at the last iteration. In that case we can decrese > iteration > count by 1. */ > @@ -3102,6 +3330,8 @@ estimate_numbers_of_iterations_loop (str > > infer_loop_bounds_from_undefined (loop); > > + discover_iteration_bound_by_body_walk (loop); > + > maybe_lower_iteration_bound (loop); > > /* If we have a measured profile, use it to estimate the number of > Index: testsuite/gcc.dg/tree-ssa/loop-38.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/loop-38.c (revision 0) > +++ testsuite/gcc.dg/tree-ssa/loop-38.c (revision 0) > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-cunrolli-details" } */ > +int a[10]; > +int b[11]; > +t(int n) > +{ > + int i; > + int sum = 0; > + for (i=0;i<n;i++) > + if (q()) > + sum+=a[i]; > + else > + sum+=b[i]; > + return sum; > +} > +/* { dg-final { scan-tree-dump "Found better loop bound 11" "cunrolli" } } */ > +/* { dg-final { scan-tree-dump "Loop 1 iterates at most 11 times" "cunrolli" > } } */ > +/* { dg-final { cleanup-tree-dump "cunrolli" } } */ > > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend