Hi,
this patch commonizes the maximal iteration estimate logic in between SCEV and
loop-iv. Both are now using loop->nb_iterations_upper_bound. I decided to
keep same API for SCEV code as for RTL code, so I made
estimated_loop_iterations and max_loop_iterations to not try to recompute
bounds and ICE when invoked without SCEV fired on.
The patch updates RTL optimizers to use the estimated_loop_iterations and
max_loop_iterations. This has few advantages:
1) loop unroller can now take into account estimates stored into
loop structure by earlier pass (I think none exist though)
It is however better then using expected_loop_iterations since
profile might get out of date with expansion.
2) loop peeling code now use max iterations bounds. This makes it i.e.
to peel vectorizer prologues/epilogues/scalar loops so -fpeel-loops
now improves my low iteration count testcase by about 10%
3) Same for loop unswithcing.
I am not really friend with the new double_int API. I copied some existing
examples but find it ugly. Why do we miss operators for comparsions and
division? Why from_*/to_* can't be a cast at least for basic integer types?
Regtested/bootstrapped x86_64-linux, seems sane?
I also wonder if loop vectorizer should not update the estimates after
loop iteration count is reduced by vectorizing.
Honza
* loop-unswitch.c (unswitch_single_loop): Use
estimated_loop_iterations_int to prevent unswitching when loop
is known to not roll.
* tree-ssa-loop-niter.c (estimated_loop_iterations): Do not segfault
when SCEV is not initialized.
(max_loop_iterations): Likewise.
* tree-ssa-loop-unswitch.c (tree_ssa_unswitch_loops): Use
estimated_loop_iterations_int to prevent unswithcing when
loop is known to not roll.
* tree-scalar-evolution.c (scev_initialized_p): New function.
* tree-scalar-evolution.h (scev_initialized_p): Likewise.
* loop-unroll.c (decide_peel_once_rolling): Use
max_loop_iterations_int.
(unroll_loop_constant_iterations): Update
nb_iterations_upper_bound and nb_iterations_estimate.
(decide_unroll_runtime_iterations): Use
estimated_loop_iterations or max_loop_iterations;
(unroll_loop_runtime_iterations): fix profile updating.
(decide_peel_simple): Use estimated_loop_iterations
and max_loop_iterations.
(decide_unroll_stupid): Use estimated_loop_iterations
ad max_loop_iterations.
* loop-doloop.c (doloop_modify): Use max_loop_iterations_int.
(doloop_optimize): Likewise.
* loop-iv.c (iv_number_of_iterations): Use record_niter_bound.
(find_simple_exit): Likewise.
* cfgloop.h (struct niter_desc): Remove niter_max.
Index: loop-unswitch.c
===================================================================
*** loop-unswitch.c (revision 191867)
--- loop-unswitch.c (working copy)
*************** unswitch_single_loop (struct loop *loop,
*** 257,262 ****
--- 257,263 ----
rtx cond, rcond = NULL_RTX, conds, rconds, acond, cinsn;
int repeat;
edge e;
+ HOST_WIDE_INT iterations;
/* Do not unswitch too much. */
if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL))
*************** unswitch_single_loop (struct loop *loop,
*** 299,305 ****
}
/* Nor if the loop usually does not roll. */
! if (expected_loop_iterations (loop) < 1)
{
if (dump_file)
fprintf (dump_file, ";; Not unswitching, loop iterations < 1\n");
--- 300,307 ----
}
/* Nor if the loop usually does not roll. */
! iterations = estimated_loop_iterations_int (loop);
! if (iterations >= 0 && iterations <= 1)
{
if (dump_file)
fprintf (dump_file, ";; Not unswitching, loop iterations < 1\n");
Index: tree-ssa-loop-niter.c
===================================================================
*** tree-ssa-loop-niter.c (revision 191867)
--- tree-ssa-loop-niter.c (working copy)
*************** estimate_numbers_of_iterations_loop (str
*** 3012,3020 ****
bool
estimated_loop_iterations (struct loop *loop, double_int *nit)
{
! estimate_numbers_of_iterations_loop (loop);
if (!loop->any_estimate)
! return false;
*nit = loop->nb_iterations_estimate;
return true;
--- 3012,3034 ----
bool
estimated_loop_iterations (struct loop *loop, double_int *nit)
{
! /* When SCEV information is available, try to update loop iterations
! estimate. Otherwise just return whatever we recorded earlier. */
! if (scev_initialized_p ())
! estimate_numbers_of_iterations_loop (loop);
!
! /* Even if the bound is not recorded, possibly we can derrive one from
! profile. */
if (!loop->any_estimate)
! {
! if (loop->header->count)
! {
! *nit = gcov_type_to_double_int
! (expected_loop_iterations_unbounded (loop) + 1);
! return true;
! }
! return false;
! }
*nit = loop->nb_iterations_estimate;
return true;
*************** estimated_loop_iterations (struct loop *
*** 3027,3033 ****
bool
max_loop_iterations (struct loop *loop, double_int *nit)
{
! estimate_numbers_of_iterations_loop (loop);
if (!loop->any_upper_bound)
return false;
--- 3041,3050 ----
bool
max_loop_iterations (struct loop *loop, double_int *nit)
{
! /* When SCEV information is available, try to update loop iterations
! estimate. Otherwise just return whatever we recorded earlier. */
! if (scev_initialized_p ())
! estimate_numbers_of_iterations_loop (loop);
if (!loop->any_upper_bound)
return false;
Index: tree-ssa-loop-unswitch.c
===================================================================
*** tree-ssa-loop-unswitch.c (revision 191867)
--- tree-ssa-loop-unswitch.c (working copy)
*************** tree_ssa_unswitch_loops (void)
*** 78,83 ****
--- 78,84 ----
loop_iterator li;
struct loop *loop;
bool changed = false;
+ HOST_WIDE_INT iterations;
/* Go through inner loops (only original ones). */
FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
*************** tree_ssa_unswitch_loops (void)
*** 102,107 ****
--- 103,118 ----
continue;
}
+ /* If the loop is not expected to iterate, there is no need
+ for unswitching. */
+ iterations = estimated_loop_iterations_int (loop);
+ if (iterations >= 0 && iterations <= 1)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, ";; Not unswitching, loop is not expected to
iterate\n");
+ continue;
+ }
+
changed |= tree_unswitch_single_loop (loop, 0);
}
Index: tree-scalar-evolution.c
===================================================================
*** tree-scalar-evolution.c (revision 191867)
--- tree-scalar-evolution.c (working copy)
*************** scev_initialize (void)
*** 3124,3129 ****
--- 3124,3137 ----
}
}
+ /* Return true if SCEV is initialized. */
+
+ bool
+ scev_initialized_p (void)
+ {
+ return scalar_evolution_info != NULL;
+ }
+
/* Cleans up the information cached by the scalar evolutions analysis
in the hash table. */
Index: tree-scalar-evolution.h
===================================================================
*** tree-scalar-evolution.h (revision 191867)
--- tree-scalar-evolution.h (working copy)
*************** extern tree number_of_exit_cond_executio
*** 27,32 ****
--- 27,33 ----
extern gimple get_loop_exit_condition (const struct loop *);
extern void scev_initialize (void);
+ extern bool scev_initialized_p (void);
extern void scev_reset (void);
extern void scev_reset_htab (void);
extern void scev_finalize (void);
Index: loop-unroll.c
===================================================================
*** loop-unroll.c (revision 191867)
--- loop-unroll.c (working copy)
*************** decide_peel_once_rolling (struct loop *l
*** 341,347 ****
|| desc->assumptions
|| desc->infinite
|| !desc->const_iter
! || desc->niter != 0)
{
if (dump_file)
fprintf (dump_file,
--- 341,348 ----
|| desc->assumptions
|| desc->infinite
|| !desc->const_iter
! || (desc->niter != 0
! && max_loop_iterations_int (loop) != 0))
{
if (dump_file)
fprintf (dump_file,
*************** unroll_loop_constant_iterations (struct
*** 695,701 ****
desc->noloop_assumptions = NULL_RTX;
desc->niter -= exit_mod;
! desc->niter_max -= exit_mod;
}
SET_BIT (wont_exit, 1);
--- 696,708 ----
desc->noloop_assumptions = NULL_RTX;
desc->niter -= exit_mod;
! loop->nb_iterations_upper_bound -= double_int::from_uhwi (exit_mod);
! if (loop->any_estimate
! && double_int::from_uhwi (exit_mod).ule
! (loop->nb_iterations_estimate))
! loop->nb_iterations_estimate -= double_int::from_uhwi (exit_mod);
! else
! loop->any_estimate = false;
}
SET_BIT (wont_exit, 1);
*************** unroll_loop_constant_iterations (struct
*** 733,739 ****
apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
desc->niter -= exit_mod + 1;
! desc->niter_max -= exit_mod + 1;
desc->noloop_assumptions = NULL_RTX;
SET_BIT (wont_exit, 0);
--- 740,751 ----
apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
desc->niter -= exit_mod + 1;
! if (loop->any_estimate
! && double_int::from_uhwi (exit_mod + 1).ule
! (loop->nb_iterations_estimate))
! loop->nb_iterations_estimate -= double_int::from_uhwi (exit_mod +
1);
! else
! loop->any_estimate = false;
desc->noloop_assumptions = NULL_RTX;
SET_BIT (wont_exit, 0);
*************** unroll_loop_constant_iterations (struct
*** 782,788 ****
}
desc->niter /= max_unroll + 1;
! desc->niter_max /= max_unroll + 1;
desc->niter_expr = GEN_INT (desc->niter);
/* Remove the edges. */
--- 794,808 ----
}
desc->niter /= max_unroll + 1;
! loop->nb_iterations_upper_bound
! = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (exit_mod
! + 1),
! FLOOR_DIV_EXPR);
! if (loop->any_estimate)
! loop->nb_iterations_estimate
! = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (exit_mod
! + 1),
! FLOOR_DIV_EXPR);
desc->niter_expr = GEN_INT (desc->niter);
/* Remove the edges. */
*************** decide_unroll_runtime_iterations (struct
*** 803,808 ****
--- 823,829 ----
{
unsigned nunroll, nunroll_by_av, i;
struct niter_desc *desc;
+ double_int iterations;
if (!(flags & UAP_UNROLL))
{
*************** decide_unroll_runtime_iterations (struct
*** 856,864 ****
}
/* If we have profile feedback, check whether the loop rolls. */
! if ((loop->header->count
! && expected_loop_iterations (loop) < 2 * nunroll)
! || desc->niter_max < 2 * nunroll)
{
if (dump_file)
fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
--- 877,886 ----
}
/* If we have profile feedback, check whether the loop rolls. */
! if ((estimated_loop_iterations (loop, &iterations)
! || max_loop_iterations (loop, &iterations))
! && iterations.fits_shwi ()
! && iterations.to_shwi () <= 2 * nunroll)
{
if (dump_file)
fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
*************** unroll_loop_runtime_iterations (struct l
*** 1092,1097 ****
--- 1114,1120 ----
single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
e = make_edge (swtch, preheader,
single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
+ e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
e->probability = p;
}
*************** unroll_loop_runtime_iterations (struct l
*** 1111,1116 ****
--- 1134,1140 ----
single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
e = make_edge (swtch, preheader,
single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
+ e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
e->probability = p;
}
*************** unroll_loop_runtime_iterations (struct l
*** 1172,1184 ****
desc->niter_expr =
simplify_gen_binary (UDIV, desc->mode, old_niter,
GEN_INT (max_unroll + 1));
! desc->niter_max /= max_unroll + 1;
if (exit_at_end)
{
desc->niter_expr =
simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
desc->noloop_assumptions = NULL_RTX;
! desc->niter_max--;
}
if (dump_file)
--- 1196,1221 ----
desc->niter_expr =
simplify_gen_binary (UDIV, desc->mode, old_niter,
GEN_INT (max_unroll + 1));
! loop->nb_iterations_upper_bound
! = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (max_unroll
! + 1),
! FLOOR_DIV_EXPR);
! if (loop->any_estimate)
! loop->nb_iterations_estimate
! = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (max_unroll
! + 1),
! FLOOR_DIV_EXPR);
if (exit_at_end)
{
desc->niter_expr =
simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
desc->noloop_assumptions = NULL_RTX;
! --loop->nb_iterations_upper_bound;
! if (loop->any_estimate
! && loop->nb_iterations_estimate != double_int_zero)
! --loop->nb_iterations_estimate;
! else
! loop->any_estimate = false;
}
if (dump_file)
*************** decide_peel_simple (struct loop *loop, i
*** 1196,1201 ****
--- 1233,1239 ----
{
unsigned npeel;
struct niter_desc *desc;
+ double_int iterations;
if (!(flags & UAP_PEEL))
{
*************** decide_peel_simple (struct loop *loop, i
*** 1239,1261 ****
return;
}
! if (loop->header->count)
{
! unsigned niter = expected_loop_iterations (loop);
! if (niter + 1 > npeel)
{
if (dump_file)
{
fprintf (dump_file, ";; Not peeling loop, rolls too much (");
fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
! (HOST_WIDEST_INT) (niter + 1));
fprintf (dump_file, " iterations > %d [maximum peelings])\n",
npeel);
}
return;
}
! npeel = niter + 1;
}
else
{
/* For now we have no good heuristics to decide whether loop peeling
--- 1277,1306 ----
return;
}
! /* If we have realistic estimate on number of iterations, use it. */
! if (estimated_loop_iterations (loop, &iterations))
{
! if (!iterations.fits_shwi ()
! || iterations.to_shwi () + 1 > npeel)
{
if (dump_file)
{
fprintf (dump_file, ";; Not peeling loop, rolls too much (");
fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
! (HOST_WIDEST_INT) (iterations.to_shwi () + 1));
fprintf (dump_file, " iterations > %d [maximum peelings])\n",
npeel);
}
return;
}
! npeel = iterations.to_shwi () + 1;
}
+ /* If we have small enough bound on iterations, we can still peel
(completely
+ unroll). */
+ else if (max_loop_iterations (loop, &iterations)
+ && iterations.fits_shwi ()
+ && iterations.to_shwi () + 1 <= npeel)
+ npeel = iterations.to_shwi () + 1;
else
{
/* For now we have no good heuristics to decide whether loop peeling
*************** decide_unroll_stupid (struct loop *loop,
*** 1349,1354 ****
--- 1394,1400 ----
{
unsigned nunroll, nunroll_by_av, i;
struct niter_desc *desc;
+ double_int iterations;
if (!(flags & UAP_UNROLL_ALL))
{
*************** decide_unroll_stupid (struct loop *loop,
*** 1401,1409 ****
}
/* If we have profile feedback, check whether the loop rolls. */
! if ((loop->header->count
! && expected_loop_iterations (loop) < 2 * nunroll)
! || desc->niter_max < 2 * nunroll)
{
if (dump_file)
fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
--- 1447,1456 ----
}
/* If we have profile feedback, check whether the loop rolls. */
! if ((estimated_loop_iterations (loop, &iterations)
! || max_loop_iterations (loop, &iterations))
! && iterations.fits_shwi ()
! && iterations.to_shwi () <= 2 * nunroll)
{
if (dump_file)
fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
Index: loop-doloop.c
===================================================================
*** loop-doloop.c (revision 191867)
--- loop-doloop.c (working copy)
*************** doloop_modify (struct loop *loop, struct
*** 410,415 ****
--- 410,416 ----
basic_block loop_end = desc->out_edge->src;
enum machine_mode mode;
rtx true_prob_val;
+ HOST_WIDE_INT iterations;
jump_insn = BB_END (loop_end);
*************** doloop_modify (struct loop *loop, struct
*** 460,468 ****
/* Determine if the iteration counter will be non-negative.
Note that the maximum value loaded is iterations_max - 1. */
! if (desc->niter_max
! <= ((unsigned HOST_WIDEST_INT) 1
! << (GET_MODE_PRECISION (mode) - 1)))
nonneg = 1;
break;
--- 461,471 ----
/* Determine if the iteration counter will be non-negative.
Note that the maximum value loaded is iterations_max - 1. */
! iterations = max_loop_iterations_int (loop);
! if (iterations >= 0
! && (iterations
! <= ((unsigned HOST_WIDEST_INT) 1
! << (GET_MODE_PRECISION (mode) - 1))))
nonneg = 1;
break;
*************** doloop_modify (struct loop *loop, struct
*** 548,556 ****
{
rtx init;
unsigned level = get_loop_level (loop) + 1;
init = gen_doloop_begin (counter_reg,
desc->const_iter ? desc->niter_expr : const0_rtx,
! GEN_INT (desc->niter_max),
GEN_INT (level));
if (init)
{
--- 551,567 ----
{
rtx init;
unsigned level = get_loop_level (loop) + 1;
+ double_int iter;
+ rtx iter_rtx;
+
+ if (!max_loop_iterations (loop, &iter)
+ || !iter.fits_shwi ())
+ iter_rtx = const0_rtx;
+ else
+ iter_rtx = GEN_INT (iter.to_shwi());
init = gen_doloop_begin (counter_reg,
desc->const_iter ? desc->niter_expr : const0_rtx,
! iter_rtx,
GEN_INT (level));
if (init)
{
*************** doloop_optimize (struct loop *loop)
*** 608,613 ****
--- 619,625 ----
struct niter_desc *desc;
unsigned word_mode_size;
unsigned HOST_WIDE_INT word_mode_max;
+ double_int iter;
if (dump_file)
fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
*************** doloop_optimize (struct loop *loop)
*** 658,664 ****
count = copy_rtx (desc->niter_expr);
iterations = desc->const_iter ? desc->niter_expr : const0_rtx;
! iterations_max = GEN_INT (desc->niter_max);
level = get_loop_level (loop) + 1;
/* Generate looping insn. If the pattern FAILs then give up trying
--- 670,680 ----
count = copy_rtx (desc->niter_expr);
iterations = desc->const_iter ? desc->niter_expr : const0_rtx;
! if (!max_loop_iterations (loop, &iter)
! || !iter.fits_shwi ())
! iterations_max = const0_rtx;
! else
! iterations_max = GEN_INT (iter.to_shwi());
level = get_loop_level (loop) + 1;
/* Generate looping insn. If the pattern FAILs then give up trying
*************** doloop_optimize (struct loop *loop)
*** 678,684 ****
computed, we must be sure that the number of iterations fits into
the new mode. */
&& (word_mode_size >= GET_MODE_PRECISION (mode)
! || desc->niter_max <= word_mode_max))
{
if (word_mode_size > GET_MODE_PRECISION (mode))
{
--- 694,700 ----
computed, we must be sure that the number of iterations fits into
the new mode. */
&& (word_mode_size >= GET_MODE_PRECISION (mode)
! || iter <= double_int::from_shwi (word_mode_max)))
{
if (word_mode_size > GET_MODE_PRECISION (mode))
{
Index: loop-iv.c
===================================================================
*** loop-iv.c (revision 191867)
--- loop-iv.c (working copy)
*************** iv_number_of_iterations (struct loop *lo
*** 2293,2302 ****
desc->const_iter = false;
desc->niter_expr = NULL_RTX;
- desc->niter_max = 0;
- if (loop->any_upper_bound
- && loop->nb_iterations_upper_bound.fits_uhwi ())
- desc->niter_max = loop->nb_iterations_upper_bound.low;
cond = GET_CODE (condition);
gcc_assert (COMPARISON_P (condition));
--- 2293,2298 ----
*************** iv_number_of_iterations (struct loop *lo
*** 2566,2574 ****
? iv0.base
: mode_mmin);
max = (up - down) / inc + 1;
! if (!desc->niter_max
! || max < desc->niter_max)
! desc->niter_max = max;
if (iv0.step == const0_rtx)
{
--- 2562,2569 ----
? iv0.base
: mode_mmin);
max = (up - down) / inc + 1;
! record_niter_bound (loop, double_int::from_shwi (max),
! false, true);
if (iv0.step == const0_rtx)
{
*************** iv_number_of_iterations (struct loop *lo
*** 2779,2792 ****
unsigned HOST_WIDEST_INT val = INTVAL (desc->niter_expr);
desc->const_iter = true;
! desc->niter_max = desc->niter = val & GET_MODE_MASK (desc->mode);
}
else
{
max = determine_max_iter (loop, desc, old_niter);
! if (!desc->niter_max
! || max < desc->niter_max)
! desc->niter_max = max;
/* simplify_using_initial_values does a copy propagation on the
registers
in the expression for the number of iterations. This prolongs life
--- 2774,2789 ----
unsigned HOST_WIDEST_INT val = INTVAL (desc->niter_expr);
desc->const_iter = true;
! desc->niter = val & GET_MODE_MASK (desc->mode);
! record_niter_bound (loop, double_int::from_shwi (desc->niter),
! false, true);
}
else
{
max = determine_max_iter (loop, desc, old_niter);
! gcc_assert (max);
! record_niter_bound (loop, double_int::from_shwi (max),
! false, true);
/* simplify_using_initial_values does a copy propagation on the
registers
in the expression for the number of iterations. This prolongs life
*************** zero_iter_simplify:
*** 2811,2817 ****
zero_iter:
desc->const_iter = true;
desc->niter = 0;
! desc->niter_max = 0;
desc->noloop_assumptions = NULL_RTX;
desc->niter_expr = const0_rtx;
return;
--- 2808,2815 ----
zero_iter:
desc->const_iter = true;
desc->niter = 0;
! record_niter_bound (loop, double_int_zero,
! true, true);
desc->noloop_assumptions = NULL_RTX;
desc->niter_expr = const0_rtx;
return;
*************** find_simple_exit (struct loop *loop, str
*** 2945,2953 ****
print_rtl (dump_file, desc->niter_expr);
fprintf (dump_file, "\n");
! fprintf (dump_file, " upper bound: ");
! fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter_max);
! fprintf (dump_file, "\n");
}
else
fprintf (dump_file, "Loop %d is not simple.\n", loop->num);
--- 2943,2952 ----
print_rtl (dump_file, desc->niter_expr);
fprintf (dump_file, "\n");
! fprintf (dump_file, " upper bound: %li\n",
! (long)max_loop_iterations_int (loop));
! fprintf (dump_file, " realistic bound: %li\n",
! (long)estimated_loop_iterations_int (loop));
}
else
fprintf (dump_file, "Loop %d is not simple.\n", loop->num);
Index: cfgloop.h
===================================================================
*** cfgloop.h (revision 191867)
--- cfgloop.h (working copy)
*************** struct niter_desc
*** 386,394 ****
/* Number of iterations if constant. */
unsigned HOST_WIDEST_INT niter;
- /* Upper bound on the number of iterations. */
- unsigned HOST_WIDEST_INT niter_max;
-
/* Assumptions under that the rest of the information is valid. */
rtx assumptions;
--- 386,391 ----