In complex control flow graphs where gotos and returns escape the inner
then-blocks and the right else-blocks are omitted, inner conditions
would erroneously be considered a part of the outer. By using the
observation that the neighborhood of a proper boolean expression should
always be the two outcome nodes we can repeat the search and ancestor
filtering to pinpoint the proper expression. This is helped by replacing
every pair of nodes that share some dominator in the candidate set that
is not the leading term, as that dominator must be the leading term in
some other boolean expression than the one we are looking for.
---
 gcc/testsuite/gcc.misc-tests/gcov-19.c | 125 +++++++++++++++++++++++++
 gcc/testsuite/gcc.misc-tests/gcov-22.c |  31 ++++++
 gcc/tree-profile.cc                    |  96 ++++++++++++++++++-
 3 files changed, 251 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.misc-tests/gcov-19.c 
b/gcc/testsuite/gcc.misc-tests/gcov-19.c
index 5b5c1c275b0..8d6eb610af2 100644
--- a/gcc/testsuite/gcc.misc-tests/gcov-19.c
+++ b/gcc/testsuite/gcc.misc-tests/gcov-19.c
@@ -1036,6 +1036,10 @@ mcdc023g (int a, int b, int c, int d, int e, int f, int 
g, int h, int i, int k,
        x = b + c;
 }
 
+/* Gotos, return, labels can make odd graphs.  It is important that conditions
+   are assigned to the right expression, and that there are no miscounts.  In
+   these tests values may be re-used, as checking things like masking an
+   independence is done in other test cases and not so useful here.  */
 void
 mcdc024a (int a, int b)
 {
@@ -1117,6 +1121,123 @@ label4:
     }
 }
 
+int
+mcdc024d (int a, int b, int c)
+{
+    /* Graphs can get complicated with the innermost returns and else-less if,
+       so we must make sure these conditions are counted correctly.  */
+    if (a)  /* conditions(1/2) true(0) */
+           /* conditions(end) */
+    {
+       if (b)  /* conditions(0/2) true(0) false(0) */
+               /* conditions(end) */
+       {
+           if (c)  /* conditions(0/2) true(0) false(0) */
+                   /* conditions(end) */
+               return 1;
+           else
+               return 2;
+       }
+
+       if (a)  /* conditions(0/2) true(0) false(0) */
+               /* conditions(end) */
+           return 3;
+    }
+
+    return 5;
+}
+
+/* Nested else-less ifs with inner returns needs to be counted right, which
+   puts some pressure on the expression isolation.  The fallthrough from inner
+   expressions into the next if cause the cfg to have edges from deeper in
+   subexpression to the next block sequence, which can confuse the expression
+   isolation. */
+int
+mcdc024e (int a, int b, int c)
+{
+    if (a)  /* conditions(1/2) true(0) */
+           /* conditions(end) */
+    {
+       if (b)  /* conditions(0/2) true(0) false(0) */
+               /* conditions(end) */
+       {
+           if (c)  /* conditions(0/2) true(0) false(0) */
+                   /* conditions(end) */
+           {
+               if (a)  /* conditions(0/2) true(0) false(0) */
+                       /* conditions(end) */
+                   return 1;
+               else
+                   return 2;
+           }
+
+           if (a)  /* conditions(0/2) true(0) false(0) */
+                   /* conditions(end) */
+               return 3;
+       }
+
+       if (b)  /* conditions(0/2) true(0) false(0) */
+               /* conditions(end) */
+           return 4;
+    }
+    return 5;
+}
+
+int
+mcdc024f (int a, int b, int c)
+{
+    if (b)  /* conditions(1/2) true(0) */
+           /* conditions(end) */
+       return 0;
+
+    if (a)  /* conditions(1/2) true(0) */
+           /* conditions(end) */
+    {
+       if (b)  /* conditions(0/2) true(0) false(0) */
+               /* conditions(end) */
+       {
+           b += 2;
+           if (b & 0xFF)   /* conditions(0/2) true(0) false(0) */
+                           /* conditions(end) */
+               c++;
+
+           return c;
+       }
+       c += 10;
+    }
+}
+
+
+int
+mcdc024g (int a, int b, int c)
+{
+    if (b)  /* conditions(1/2) true(0) */
+           /* conditions(end) */
+       goto inner;
+
+    if (a)  /* conditions(1/2) true(0) */
+           /* conditions(end) */
+       ++a;
+
+
+    if (a)  /* conditions(1/2) true(0) */
+           /* conditions(end) */
+    {
+       if (b)  /* conditions(0/2) true(0) false(0) */
+               /* conditions(end) */
+       {
+inner:
+           b += 2;
+           if (b & 0xFF)   /* conditions(0/2) true(0) false(0) */
+                           /* conditions(end) */
+               c++;
+
+           return c;
+       }
+       c += 10;
+    }
+}
+
 int main ()
 {
     mcdc001a (0, 1);
@@ -1313,6 +1434,10 @@ int main ()
     mcdc024a (0, 0);
     mcdc024b (0, 0);
     mcdc024c (0, 0);
+    mcdc024d (0, 0, 0);
+    mcdc024e (0, 0, 0);
+    mcdc024f (0, 0, 0);
+    mcdc024g (0, 0, 0);
 }
 
 /* { dg-final { run-gcov conditions { --conditions gcov-19.c } } } */
diff --git a/gcc/testsuite/gcc.misc-tests/gcov-22.c 
b/gcc/testsuite/gcc.misc-tests/gcov-22.c
index 8aa8867dbf1..28b7de66022 100644
--- a/gcc/testsuite/gcc.misc-tests/gcov-22.c
+++ b/gcc/testsuite/gcc.misc-tests/gcov-22.c
@@ -5,6 +5,7 @@
 jmp_buf buf;
 
 void noop() {}
+int identity(int x) { return x; }
 
 /* This function is a test to verify that the expression isolation does not
    break on a CFG with the right set of complex edges.  The (_ && setjmp)
@@ -30,10 +31,40 @@ pathological001 (int a, int b, int c)
        noop ();
 }
 
+///* Adapted from freetype-2.13.0 gxvalid/gxvmod.c classic_kern_validate */
+int
+pathological002 (int a)
+{
+    int error = identity(a);
+
+    if (error) /* conditions(1/2) true(0) */
+               /* conditions(end) */
+       goto Exit;
+
+   if (a+1) /* conditions(1/2) false(0) */
+           /* conditions(end) */
+   {
+       noop ();
+       if (setjmp (buf))    /* conditions(1/2) true(0) */
+                           /* conditions(end) */
+          noop ();
+
+       if (error)  /* conditions(1/2) true(0) */
+                   /* conditions(end) */
+           noop ();
+   }
+
+   error--;
+
+Exit:
+   return error;
+}
+
 int
 main ()
 {
     pathological001 (0, 1, 0);
+    pathological002 (0);
 }
 
 
diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
index f98bb1f76c8..ab96b872db3 100644
--- a/gcc/tree-profile.cc
+++ b/gcc/tree-profile.cc
@@ -292,6 +292,8 @@ contract_edge (edge e)
            return source;
        if (block_conditional_p (dest))
            return e;
+       if (dest->index == EXIT_BLOCK)
+           return source;
 
        e = single_edge (dest->succs);
        if (!e)
@@ -360,6 +362,8 @@ merge_split_outcome (basic_block b)
     edge e = single_edge (b->succs);
     for (edge pred : e->dest->preds)
     {
+       if (!(pred->flags & EDGE_FALLTHRU))
+           return b;
        if (!single (pred->src->preds))
            return b;
        if (!(single_edge (pred->src->preds)->flags & flag))
@@ -727,7 +731,13 @@ neighborhood (const vec<basic_block>& blocks, sbitmap G, 
vec<basic_block>& out)
    the expression B, but the walk may include expressions from the then/else
    blocks if they start with conditions.  Only the subgraph B is the ancestor
    of *both* the then/else outcome, which means B is the intersection of the
-   ancestors of the nodes in the neighborhood N(G).  */
+   ancestors of the nodes in the neighborhood N(G).
+
+   In complex graphs this may capture more than the expression proper.  In that
+   case, perform the algorithm again but on the neighborhood N(B) rather than
+   N(G).  Unless there is complex control flow with deep early returns, gotos
+   and else-less ifs N(B) will be the two outcome nodes, otherwise search again
+   after replacing nodes with their common dominator.  */
 void
 isolate_expression (conds_ctx &ctx, basic_block p, vec<basic_block>& out)
 {
@@ -765,6 +775,90 @@ isolate_expression (conds_ctx &ctx, basic_block p, 
vec<basic_block>& out)
        if (bitmap_bit_p (expr, b->index))
            out.safe_push (b);
     out.sort (cmp_index_map, &ctx.index_map);
+
+
+    /* In a lot of programs we would be done now, but in complex CFGs with
+       gotos or returns from nested then-blocks we need some post processing.
+
+       Essentially, perform another step of the algorithm - find the
+       neighborhood NG' of the candidate expression B.  If |B| == 2 we have the
+       two outcome nodes and are done.  If not, we know the previous step must
+       have captured something in a nested expression (probably because of a
+       fallthrough from the then-block being merged into the next block.  If
+       two nodes are dominated by some node lower (according to topsort) in the
+       CFG then the dominating node is the first term in some conditional
+       expression, which means it cannot be a part of the expression we're
+       searching for.  We redo the ancestor filtering, but now use a more
+       precise neighborhood that is unaffected.  */
+
+    NG.truncate (0);
+    neighborhood (out, expr, NG);
+    if (NG.length () == 2)
+       return;
+
+    /* Names are reused in this section and generally mean the same thing.  */
+    bitmap_clear (reachable);
+    for (const basic_block b : NG)
+       bitmap_set_bit (reachable, b->index);
+
+    /* If a pair of nodes has a shared dominator *that is not the root*
+       then that dominator must be the first term of another boolean
+       expression or some other structure outside the expression.  */
+    gcc_assert (!NG.is_empty ());
+    for (unsigned i = 0; i != NG.length () - 1; i++)
+    {
+       for (unsigned j = i+1; j != NG.length (); j++)
+       {
+           auto b = nearest_common_dominator (CDI_DOMINATORS, NG[i], NG[j]);
+           if (ctx.index_map[b->index] > ctx.index_map[p->index])
+           {
+               bitmap_clear_bit (reachable, NG[i]->index);
+               bitmap_clear_bit (reachable, NG[j]->index);
+               bitmap_set_bit (reachable, b->index);
+           }
+       }
+    }
+
+    NG.truncate (0);
+    for (basic_block b : G)
+       if (bitmap_bit_p (reachable, b->index))
+           NG.safe_push (b);
+
+    bitmap_copy (reachable, expr);
+    for (const basic_block neighbor : NG)
+    {
+       bitmap_clear (ancestors);
+       for (edge e : neighbor->preds)
+       {
+           /* The e edge might have been contracted past when doing the
+              first scan, so we must make sure its bit is set to not think it
+              is outside of our candidate set.  */
+           bitmap_set_bit (reachable, e->src->index);
+           ancestors_of (e->src, p, reachable, ancestors);
+       }
+       bitmap_and (expr, expr, ancestors);
+    }
+
+    out.truncate (0);
+    for (const basic_block b : G)
+       if (bitmap_bit_p (expr, b->index))
+           out.safe_push (b);
+    out.sort (cmp_index_map, &ctx.index_map);
+
+    bitmap_clear (expr);
+    for (const basic_block b : out)
+       bitmap_set_bit (expr, b->index);
+
+    /* Perform a final step as a sanity check - if the neighborhood is still
+       larger than the outcome set (|NG| == 2) the graph is too complex and we
+       give up instrumentation.  Any graph that exhibit this behaviour should
+       be studied as it might reveal an implementation error.  */
+    NG.truncate (0);
+    neighborhood (out, expr, NG);
+    bitmap_clear (expr);
+    for (const basic_block b : NG)
+       bitmap_set_bit (expr, b->index);
+    gcc_assert (bitmap_count_bits (expr) == 2);
 }
 
 /* Emit lhs = op1 <op> op2 on edges.  This emits non-atomic instructions and
-- 
2.30.2

Reply via email to