Re: [PATCH] Add helper to sort sibling loops, do so in GRAPHITE

2017-09-26 Thread Sebastian Pop
On Mon, Sep 25, 2017 at 8:18 AM, Richard Biener  wrote:

>
> The following adds a helper to sort the sibling loop list in RPO order
> as it can get messed up (we only ever add loops at the start of the list).
> GRAPHITE SCOP detection assumes this list is sorted naturally in RPO
> order (as a flow_loops_find would generate).
>
> Turns out it helps a few more loops in SPEC CPU 2006 to get optimized
> by GRAPHITE.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, SPEC 2k6 is happy
> with GRAPHITE.
>
> I've tested the variant below with the extra call in pass_tree_loop_init
> but as no pass cares about the sibling list order but graphite I'll not
> commit that hunk.
>
> Applied to trunk (w/o that hunk)
>
> Richard.
>
> 2017-09-25  Richard Biener  
>
> * cfgloop.h (sort_sibling_loops): Declare.
> * cfgloop.c (sort_sibling_loops_cmp): New helper.
> (sort_sibling_loops): New function sorting the sibling loop list
> in RPO order.
> * graphite.c (graphite_transform_loops): Sort sibling loops.
>
>
Looks good.  Thanks!


[PATCH] Add helper to sort sibling loops, do so in GRAPHITE

2017-09-25 Thread Richard Biener

The following adds a helper to sort the sibling loop list in RPO order
as it can get messed up (we only ever add loops at the start of the list).
GRAPHITE SCOP detection assumes this list is sorted naturally in RPO
order (as a flow_loops_find would generate).

Turns out it helps a few more loops in SPEC CPU 2006 to get optimized
by GRAPHITE.

Bootstrapped and tested on x86_64-unknown-linux-gnu, SPEC 2k6 is happy
with GRAPHITE.

I've tested the variant below with the extra call in pass_tree_loop_init
but as no pass cares about the sibling list order but graphite I'll not
commit that hunk.

Applied to trunk (w/o that hunk)

Richard.

2017-09-25  Richard Biener  

* cfgloop.h (sort_sibling_loops): Declare.
* cfgloop.c (sort_sibling_loops_cmp): New helper.
(sort_sibling_loops): New function sorting the sibling loop list
in RPO order.
* graphite.c (graphite_transform_loops): Sort sibling loops.

Index: gcc/cfgloop.c
===
--- gcc/cfgloop.c   (revision 253144)
+++ gcc/cfgloop.c   (working copy)
@@ -521,6 +521,58 @@ flow_loops_find (struct loops *loops)
   return loops;
 }
 
+/* qsort helper for sort_sibling_loops.  */
+
+static int *sort_sibling_loops_cmp_rpo;
+static int
+sort_sibling_loops_cmp (const void *la_, const void *lb_)
+{
+  const struct loop *la = *(const struct loop * const *)la_;
+  const struct loop *lb = *(const struct loop * const *)lb_;
+  return (sort_sibling_loops_cmp_rpo[la->header->index]
+ - sort_sibling_loops_cmp_rpo[lb->header->index]);
+}
+
+/* Sort sibling loops in RPO order.  */
+
+void
+sort_sibling_loops (function *fn)
+{
+  /* Match flow_loops_find in the order we sort sibling loops.  */
+  sort_sibling_loops_cmp_rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
+  int *rc_order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
+  pre_and_rev_post_order_compute_fn (fn, NULL, rc_order, false);
+  for (int i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; ++i)
+sort_sibling_loops_cmp_rpo[rc_order[i]] = i;
+  free (rc_order);
+
+  auto_vec siblings;
+  loop_p loop;
+  FOR_EACH_LOOP_FN (fn, loop, LI_INCLUDE_ROOT)
+if (loop->inner && loop->inner->next)
+  {
+   loop_p sibling = loop->inner;
+   do
+ {
+   siblings.safe_push (sibling);
+   sibling = sibling->next;
+ }
+   while (sibling);
+   siblings.qsort (sort_sibling_loops_cmp);
+   loop_p *siblingp = >inner;
+   for (unsigned i = 0; i < siblings.length (); ++i)
+ {
+   *siblingp = siblings[i];
+   siblingp = &(*siblingp)->next;
+ }
+   *siblingp = NULL;
+   siblings.truncate (0);
+  }
+
+  free (sort_sibling_loops_cmp_rpo);
+  sort_sibling_loops_cmp_rpo = NULL;
+}
+
 /* Ratio of frequencies of edges so that one of more latch edges is
considered to belong to inner loop with same header.  */
 #define HEAVY_EDGE_RATIO 8
Index: gcc/cfgloop.h
===
--- gcc/cfgloop.h   (revision 253144)
+++ gcc/cfgloop.h   (working copy)
@@ -333,6 +333,7 @@ bool mark_irreducible_loops (void);
 void release_recorded_exits (function *);
 void record_loop_exits (void);
 void rescan_loop_exit (edge, bool, bool);
+void sort_sibling_loops (function *);
 
 /* Loop data structure manipulation/querying.  */
 extern void flow_loop_tree_node_add (struct loop *, struct loop *);
Index: gcc/graphite.c
===
--- gcc/graphite.c  (revision 253144)
+++ gcc/graphite.c  (working copy)
@@ -419,6 +419,7 @@ graphite_transform_loops (void)
   isl_options_set_on_error (ctx, ISL_ON_ERROR_ABORT);
   the_isl_ctx = ctx;
 
+  sort_sibling_loops (cfun);
   canonicalize_loop_closed_ssa_form ();
 
   calculate_dominance_info (CDI_POST_DOMINATORS);
Index: gcc/tree-ssa-loop.c
===
--- gcc/tree-ssa-loop.c (revision 253144)
+++ gcc/tree-ssa-loop.c (working copy)
@@ -359,6 +359,7 @@ pass_tree_loop_init::execute (function *
   | LOOPS_HAVE_RECORDED_EXITS);
   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
   scev_initialize ();
+  sort_sibling_loops (fun);
 
   return 0;
 }