It always felt odd that the expression candidate set refinement were in
two more-or-less identical phases. It turns out that this is not
necessary and the neighborhood, ancestor filtering, update candidate set
steps can all be done inside a single loop, which terminates when
|NG| == |Outcomes| == 2. This also provides a nice opportunity for some
error sensitivity - if the candidate set cannot be reduced any more, and
the neighborhood |NG| != 2 we error out.

It is unclear if this graph can possibly be constructed, so for now it
uses an assert for sensitivity to it. What this should really do is drop
the instrumentation for the function altogether, and give a warning.
---
 gcc/tree-profile.cc | 139 ++++++++++++++++----------------------------
 1 file changed, 49 insertions(+), 90 deletions(-)

diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
index 3fc78ff8ace..b75736a22a0 100644
--- a/gcc/tree-profile.cc
+++ b/gcc/tree-profile.cc
@@ -114,9 +114,10 @@ struct conds_ctx
     auto_sbitmap G1;
     auto_sbitmap G2;
     auto_sbitmap G3;
+    auto_sbitmap G4;
 
     explicit conds_ctx (unsigned size) noexcept (true) : marks (size),
-    G1 (size), G2 (size), G3 (size)
+    G1 (size), G2 (size), G3 (size), G4 (size)
     {
        bitmap_clear (marks);
     }
@@ -728,13 +729,14 @@ isolate_expression (conds_ctx &ctx, basic_block p, 
vec<basic_block>& out)
     sbitmap expr = ctx.G1;
     sbitmap reachable = ctx.G2;
     sbitmap ancestors = ctx.G3;
+    sbitmap prev = ctx.G3;
     bitmap_clear (expr);
     bitmap_clear (reachable);
+    bitmap_clear (prev);
 
     vec<basic_block>& G = ctx.B1;
     vec<basic_block>& NG = ctx.B2;
     G.truncate (0);
-    NG.truncate (0);
 
     basic_block post = get_immediate_dominator (CDI_POST_DOMINATORS, p);
     cond_reachable_from (p, post, reachable, G);
@@ -744,105 +746,62 @@ isolate_expression (conds_ctx &ctx, basic_block p, 
vec<basic_block>& out)
        return;
     }
 
-    neighborhood (G, reachable, NG);
-    bitmap_copy (expr, reachable);
-
-    for (const basic_block neighbor : NG)
-    {
-       bitmap_clear (ancestors);
-       for (edge e : neighbor->preds)
-           ancestors_of (e->src, p, reachable, ancestors);
-       bitmap_and (expr, expr, ancestors);
-    }
-
-    for (const basic_block b : G)
-       if (bitmap_bit_p (expr, b->index))
-           out.safe_push (b);
-    out.sort (cmp_index_map, &ctx.index_map);
-
-
-    /* In a lot of programs we would be done now, but in complex CFGs with
-       gotos or returns from nested then-blocks we need some post processing.
-
-       Essentially, perform another step of the algorithm - find the
-       neighborhood NG' of the candidate expression B.  If |B| == 2 we have the
-       two outcome nodes and are done.  If not, we know the previous step must
-       have captured something in a nested expression (probably because of a
-       fallthrough from the then-block being merged into the next block.  If
-       two nodes are dominated by some node lower (according to topsort) in the
-       CFG then the dominating node is the first term in some conditional
-       expression, which means it cannot be a part of the expression we're
-       searching for.  We redo the ancestor filtering, but now use a more
-       precise neighborhood that is unaffected.  */
-
-    NG.truncate (0);
-    neighborhood (out, expr, NG);
-    if (NG.length () == 2)
-       return;
-
-    /* Names are reused in this section and generally mean the same thing.  */
-    bitmap_clear (reachable);
-    for (const basic_block b : NG)
-       bitmap_set_bit (reachable, b->index);
-
-    /* If a pair of nodes has a shared dominator *that is not the root*
-       then that dominator must be the first term of another boolean
-       expression or some other structure outside the expression.  */
-    gcc_assert (!NG.is_empty ());
-    for (unsigned i = 0; i != NG.length () - 1; i++)
+    while (true)
     {
-       for (unsigned j = i+1; j != NG.length (); j++)
+       NG.truncate (0);
+       neighborhood (G, reachable, NG);
+       gcc_assert (!NG.is_empty ());
+
+       bitmap_clear (expr);
+       for (basic_block b : NG)
+           bitmap_set_bit (expr, b->index);
+
+       if (bitmap_count_bits (expr) == 2)
+           break;
+
+       /* If the neighborhood does not change between iterations (a fixed
+          point) we cannot understand the graph properly, and this would loop
+          infinitely.  If this should happen, we should bail out and give up
+          instrumentation for the function altogether.  It is possible no such
+          CFGs exist, so for now this is an assert.  */
+       if (bitmap_equal_p (prev, expr) || bitmap_count_bits (expr) < 2)
+           gcc_assert (false);
+       bitmap_copy (prev, expr);
+
+       for (unsigned i = 0; i != NG.length () - 1; i++)
        {
-           auto b = nearest_common_dominator (CDI_DOMINATORS, NG[i], NG[j]);
-           if (ctx.index_map[b->index] > ctx.index_map[p->index])
+           for (unsigned j = i+1; j != NG.length (); j++)
            {
-               bitmap_clear_bit (reachable, NG[i]->index);
-               bitmap_clear_bit (reachable, NG[j]->index);
-               bitmap_set_bit (reachable, b->index);
+               auto b = nearest_common_dominator (CDI_DOMINATORS, NG[i], 
NG[j]);
+               if (ctx.index_map[b->index] > ctx.index_map[p->index])
+               {
+                   bitmap_clear_bit (reachable, NG[i]->index);
+                   bitmap_clear_bit (reachable, NG[j]->index);
+                   bitmap_set_bit (reachable, b->index);
+               }
            }
        }
-    }
-
-    NG.truncate (0);
-    for (basic_block b : G)
-       if (bitmap_bit_p (reachable, b->index))
-           NG.safe_push (b);
+       bitmap_copy (expr, reachable);
 
-    bitmap_copy (reachable, expr);
-    for (const basic_block neighbor : NG)
-    {
-       bitmap_clear (ancestors);
-       for (edge e : neighbor->preds)
+       for (const basic_block neighbor : NG)
        {
-           /* The e edge might have been contracted past when doing the
-              first scan, so we must make sure its bit is set to not think it
-              is outside of our candidate set.  */
-           bitmap_set_bit (reachable, e->src->index);
-           ancestors_of (e->src, p, reachable, ancestors);
+           bitmap_clear (ancestors);
+           for (edge e : neighbor->preds)
+               ancestors_of (e->src, p, reachable, ancestors);
+           bitmap_and (expr, expr, ancestors);
        }
-       bitmap_and (expr, expr, ancestors);
-    }
 
-    out.truncate (0);
-    for (const basic_block b : G)
-       if (bitmap_bit_p (expr, b->index))
-           out.safe_push (b);
-    out.sort (cmp_index_map, &ctx.index_map);
+       for (unsigned i = 0; i != G.length (); i++)
+           if (!bitmap_bit_p (expr, G[i]->index))
+               G.unordered_remove (i--);
 
-    bitmap_clear (expr);
-    for (const basic_block b : out)
-       bitmap_set_bit (expr, b->index);
+       bitmap_clear (reachable);
+       for (basic_block b : G)
+           bitmap_set_bit (reachable, b->index);
+    }
 
-    /* Perform a final step as a sanity check - if the neighborhood is still
-       larger than the outcome set (|NG| == 2) the graph is too complex and we
-       give up instrumentation.  Any graph that exhibit this behaviour should
-       be studied as it might reveal an implementation error.  */
-    NG.truncate (0);
-    neighborhood (out, expr, NG);
-    bitmap_clear (expr);
-    for (const basic_block b : NG)
-       bitmap_set_bit (expr, b->index);
-    gcc_assert (bitmap_count_bits (expr) == 2);
+    out.safe_splice (G);
+    out.sort (cmp_index_map, &ctx.index_map);
 }
 
 /* Emit lhs = op1 <op> op2 on edges.  This emits non-atomic instructions and
-- 
2.30.2

Reply via email to