On Mon, 22 Oct 2012, Jan Hubicka wrote: > Hi, > this patch updates tree_unroll_loops_completely to update loop closed SSA. > WHen unlooping the loop some basic blocks may move out of the other loops > and that makes the need to check their use and add PHIs. > Fortunately update_loop_close_ssa already support local updates and thus > this can be done quite cheaply by recoridng the blocks in fix_bb_placements > and passing it along. > > I tried the patch with TODO_update_ssa_no_phi but that causes weird bug > in 3 fortran testcases because VOPS seems to not be in the loop closed > form. We can track this incrementally I suppose.
Yeah, we need to update the checking code to verify loop-closedness of VOPs and see why we don't properly rewrite into it. > Bootstrapped/regtested x86_64-linux, OK? Ok. Thanks, Richard. > Honza > > PR middle-end/54967 > * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Take > loop_closed_ssa_invalidated parameter; pass it along. > (canonicalize_loop_induction_variables): Update loop closed SSA. > (tree_unroll_loops_completely): Likewise. > * cfgloop.h (unloop): UPdate prototype. > * cfgloopmanip.c (fix_bb_placements): Record BBs updated into > optional bitmap. > (unloop): Update to pass along loop_closed_ssa_invalidated. > > * gfortran.dg/pr54967.f90: New testcase. > Index: tree-ssa-loop-ivcanon.c > =================================================================== > --- tree-ssa-loop-ivcanon.c (revision 192632) > +++ tree-ssa-loop-ivcanon.c (working copy) > @@ -390,13 +390,16 @@ loop_edge_to_cancel (struct loop *loop) > EXIT is the exit of the loop that should be eliminated. > IRRED_INVALIDATED is used to bookkeep if information about > irreducible regions may become invalid as a result > - of the transformation. */ > + of the transformation. > + LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case > + when we need to go into loop closed SSA form. */ > > static bool > try_unroll_loop_completely (struct loop *loop, > edge exit, tree niter, > enum unroll_level ul, > - bool *irred_invalidated) > + bool *irred_invalidated, > + bitmap loop_closed_ssa_invalidated) > { > unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns; > gimple cond; > @@ -562,7 +565,7 @@ try_unroll_loop_completely (struct loop > locus = latch_edge->goto_locus; > > /* Unloop destroys the latch edge. */ > - unloop (loop, irred_invalidated); > + unloop (loop, irred_invalidated, loop_closed_ssa_invalidated); > > /* Create new basic block for the latch edge destination and wire > it in. */ > @@ -615,7 +618,8 @@ static bool > canonicalize_loop_induction_variables (struct loop *loop, > bool create_iv, enum unroll_level ul, > bool try_eval, > - bool *irred_invalidated) > + bool *irred_invalidated, > + bitmap loop_closed_ssa_invalidated) > { > edge exit = NULL; > tree niter; > @@ -663,7 +667,8 @@ canonicalize_loop_induction_variables (s > (int)max_loop_iterations_int (loop)); > } > > - if (try_unroll_loop_completely (loop, exit, niter, ul, irred_invalidated)) > + if (try_unroll_loop_completely (loop, exit, niter, ul, irred_invalidated, > + loop_closed_ssa_invalidated)) > return true; > > if (create_iv > @@ -683,13 +688,15 @@ canonicalize_induction_variables (void) > struct loop *loop; > bool changed = false; > bool irred_invalidated = false; > + bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); > > FOR_EACH_LOOP (li, loop, 0) > { > changed |= canonicalize_loop_induction_variables (loop, > true, UL_SINGLE_ITER, > true, > - &irred_invalidated); > + &irred_invalidated, > + > loop_closed_ssa_invalidated); > } > gcc_assert (!need_ssa_update_p (cfun)); > > @@ -701,6 +708,13 @@ canonicalize_induction_variables (void) > evaluation could reveal new information. */ > scev_reset (); > > + if (!bitmap_empty_p (loop_closed_ssa_invalidated)) > + { > + gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA)); > + rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); > + } > + BITMAP_FREE (loop_closed_ssa_invalidated); > + > if (changed) > return TODO_cleanup_cfg; > return 0; > @@ -794,11 +808,15 @@ tree_unroll_loops_completely (bool may_i > bool changed; > enum unroll_level ul; > int iteration = 0; > + bool irred_invalidated = false; > > do > { > - bool irred_invalidated = false; > changed = false; > + bitmap loop_closed_ssa_invalidated = NULL; > + > + if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) > + loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); > > FOR_EACH_LOOP (li, loop, 0) > { > @@ -812,9 +830,9 @@ tree_unroll_loops_completely (bool may_i > else > ul = UL_NO_GROWTH; > > - if (canonicalize_loop_induction_variables (loop, false, ul, > - !flag_tree_loop_ivcanon, > - &irred_invalidated)) > + if (canonicalize_loop_induction_variables > + (loop, false, ul, !flag_tree_loop_ivcanon, > + &irred_invalidated, loop_closed_ssa_invalidated)) > { > changed = true; > /* If we'll continue unrolling, we need to propagate constants > @@ -834,11 +852,14 @@ tree_unroll_loops_completely (bool may_i > struct loop **iter; > unsigned i; > > - if (irred_invalidated > - && loops_state_satisfies_p > (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) > - mark_irreducible_loops (); > + /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. > */ > > - update_ssa (TODO_update_ssa); > + if (loop_closed_ssa_invalidated > + && !bitmap_empty_p (loop_closed_ssa_invalidated)) > + rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated, > + TODO_update_ssa); > + else > + update_ssa (TODO_update_ssa); > > /* Propagate the constants within the new basic blocks. */ > FOR_EACH_VEC_ELT (loop_p, father_stack, i, iter) > @@ -861,12 +882,22 @@ tree_unroll_loops_completely (bool may_i > /* Clean up the information about numbers of iterations, since > complete unrolling might have invalidated it. */ > scev_reset (); > +#ifdef ENABLE_CHECKING > + if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) > + verify_loop_closed_ssa (true); > +#endif > } > + if (loop_closed_ssa_invalidated) > + BITMAP_FREE (loop_closed_ssa_invalidated); > } > while (changed > && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS)); > > VEC_free (loop_p, stack, father_stack); > > + if (irred_invalidated > + && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) > + mark_irreducible_loops (); > + > return 0; > } > Index: cfgloop.h > =================================================================== > --- cfgloop.h (revision 192632) > +++ cfgloop.h (working copy) > @@ -321,7 +321,7 @@ extern struct loop *loopify (edge, edge, > struct loop * loop_version (struct loop *, void *, > basic_block *, unsigned, unsigned, unsigned, bool); > extern bool remove_path (edge); > -extern void unloop (struct loop *, bool *); > +extern void unloop (struct loop *, bool *, bitmap); > extern void scale_loop_frequencies (struct loop *, int, int); > > /* Induction variable analysis. */ > Index: cfgloopmanip.c > =================================================================== > --- cfgloopmanip.c (revision 192632) > +++ cfgloopmanip.c (working copy) > @@ -36,7 +36,7 @@ static bool rpe_enum_p (const_basic_bloc > static int find_path (edge, basic_block **); > static void fix_loop_placements (struct loop *, bool *); > static bool fix_bb_placement (basic_block); > -static void fix_bb_placements (basic_block, bool *); > +static void fix_bb_placements (basic_block, bool *, bitmap); > > /* Checks whether basic block BB is dominated by DATA. */ > static bool > @@ -159,11 +159,15 @@ fix_loop_placement (struct loop *loop) > successors we consider edges coming out of the loops. > > If the changes may invalidate the information about irreducible regions, > - IRRED_INVALIDATED is set to true. */ > + IRRED_INVALIDATED is set to true. > + > + If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with > + changed loop_father are collected there. */ > > static void > fix_bb_placements (basic_block from, > - bool *irred_invalidated) > + bool *irred_invalidated, > + bitmap loop_closed_ssa_invalidated) > { > sbitmap in_queue; > basic_block *queue, *qtop, *qbeg, *qend; > @@ -218,6 +222,8 @@ fix_bb_placements (basic_block from, > /* Ordinary basic block. */ > if (!fix_bb_placement (from)) > continue; > + if (loop_closed_ssa_invalidated) > + bitmap_set_bit (loop_closed_ssa_invalidated, from->index); > target_loop = from->loop_father; > } > > @@ -312,7 +318,7 @@ remove_path (edge e) > { > f = loop_outer (l); > if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest)) > - unloop (l, &irred_invalidated); > + unloop (l, &irred_invalidated, NULL); > } > > /* Identify the path. */ > @@ -385,7 +391,7 @@ remove_path (edge e) > > /* Fix placements of basic blocks inside loops and the placement of > loops in the loop tree. */ > - fix_bb_placements (from, &irred_invalidated); > + fix_bb_placements (from, &irred_invalidated, NULL); > fix_loop_placements (from->loop_father, &irred_invalidated); > > if (irred_invalidated > @@ -892,10 +898,14 @@ loopify (edge latch_edge, edge header_ed > have no successor, which caller is expected to fix somehow. > > If this may cause the information about irreducible regions to become > - invalid, IRRED_INVALIDATED is set to true. */ > + invalid, IRRED_INVALIDATED is set to true. > + > + LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store > + basic blocks that had non-trivial update on their loop_father.*/ > > void > -unloop (struct loop *loop, bool *irred_invalidated) > +unloop (struct loop *loop, bool *irred_invalidated, > + bitmap loop_closed_ssa_invalidated) > { > basic_block *body; > struct loop *ploop; > @@ -937,7 +947,7 @@ unloop (struct loop *loop, bool *irred_i > /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if > there is an irreducible region inside the cancelled loop, the flags will > be still correct. */ > - fix_bb_placements (latch, &dummy); > + fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated); > } > > /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that > @@ -965,7 +975,7 @@ fix_loop_placements (struct loop *loop, > to the loop. So call fix_bb_placements to fix up the placement > of the preheader and (possibly) of its predecessors. */ > fix_bb_placements (loop_preheader_edge (loop)->src, > - irred_invalidated); > + irred_invalidated, NULL); > loop = outer; > } > } > Index: testsuite/gfortran.dg/pr54967.f90 > =================================================================== > --- testsuite/gfortran.dg/pr54967.f90 (revision 0) > +++ testsuite/gfortran.dg/pr54967.f90 (revision 0) > @@ -0,0 +1,36 @@ > + SUBROUTINE calc_S_derivs() > + INTEGER, DIMENSION(6, 2) :: c_map_mat > + INTEGER, DIMENSION(:), POINTER:: C_mat > + DO j=1,3 > + DO m=j,3 > + n=n+1 > + c_map_mat(n,1)=j > + IF(m==j)CYCLE > + c_map_mat(n,2)=m > + END DO > + END DO > + DO m=1,6 > + DO j=1,2 > + IF(c_map_mat(m,j)==0)CYCLE > + CALL foo(C_mat(c_map_mat(m,j))) > + END DO > + END DO > + END SUBROUTINE calc_S_derivs > > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend