The attached Ada testcase triggers an ICE at -O3 when compiled for x86:

eric@polaris:~/build/gcc/native> gcc/xgcc -Bprev-gcc -S opt68.adb -O3 -m32
opt68.adb: In function 'Opt68.Copy':
opt68.adb:51:6: error: multiple hot/cold transitions found (bb 34)
opt68.adb:51:6: error: multiple hot/cold transitions found (bb 64)
+===========================GNAT BUG DETECTED==============================+
| 8.0.0 20171026 (experimental) [trunk revision 254096] (x86_64-suse-linux) 
GCC error:|
| verify_flow_info failed                                                  |

It's the RTL basic block reordering pass confusing itself, more precisely:

          /* If the best destination has multiple predecessors, and can be
             duplicated cheaper than a jump, don't allow it to be added
             to a trace.  We'll duplicate it when connecting traces.  */
          if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
              && copy_bb_p (best_edge->dest, 0))
            best_edge = NULL;

Now, if you're in the first round (reordering of the hot paritition) and the 
basic block has 2 predecessors, a regular one (corresponding to best_edge) and 
another coming from the cold partition (i.e. EDGE_CROSSING), the duplication 
will redirect best_edge and leave the block with only one predecessor coming 
from the cold partition, which means that the block will eventually become 
cold at the end of the pass if fixup_partitions is invoked.  But that's too 
late since

  /* Signal that rtl_verify_flow_info_1 can now verify that there
     is at most one switch between hot/cold sections.  */
  crtl->bb_reorder_complete = true;

So we would need to re-run the pass in order to move the block from the hot 
partition to the cold partition (re-running only a subpass wouldn't work since 
the above decision is made by find_traces but the duplication is triggered by 
connect_traces), which seems overkill.

Therefore the attached patch detects this problematic case and doesn't reset 
best_edge upon finding it.  This means that the destination will be added to 
the current trace in the hot partition, which looks OK since all its other 
predecessors are in the cold partition.  It also contains a fix for an off-by-
one bug in the dump file and removes a redundant check from better_edge_p (the 
same check on the BB_PARTITION of the blocks is done in find_traces_1_round).

Bootstrapped/regtested on x86_64-suse-linux.  Any objection?


2017-10-26  Eric Botcazou  <ebotca...@adacore.com>

        * bb-reorder.c (find_traces_1_round): Fix off-by-one index.
        Move around comment.  Do not reset best_edge for a copiable
        destination if the copy would cause a partition change.
        (better_edge_p): Remove redundant check.


2017-10-26  Eric Botcazou  <ebotca...@adacore.com>

        * gnat.dg/opt68.ad[sb]: New test.

-- 
Eric Botcazou
Index: bb-reorder.c
===================================================================
--- bb-reorder.c	(revision 254096)
+++ bb-reorder.c	(working copy)
@@ -529,7 +529,7 @@ find_traces_1_round (int branch_th, int
 
 	  if (dump_file)
 	    fprintf (dump_file, "Basic block %d was visited in trace %d\n",
-		     bb->index, *n_traces - 1);
+		     bb->index, *n_traces);
 
 	  ends_in_call = block_ends_with_call_p (bb);
 
@@ -545,6 +545,8 @@ find_traces_1_round (int branch_th, int
 		  && bb_visited_trace (e->dest) != *n_traces)
 		continue;
 
+	      /* If partitioning hot/cold basic blocks, don't consider edges
+		 that cross section boundaries.  */
 	      if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
 		continue;
 
@@ -574,9 +576,6 @@ find_traces_1_round (int branch_th, int
 		      || e->count () < count_th) && (!for_size)))
 		continue;
 
-	      /* If partitioning hot/cold basic blocks, don't consider edges
-		 that cross section boundaries.  */
-
 	      if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
 				 best_edge))
 		{
@@ -586,12 +585,28 @@ find_traces_1_round (int branch_th, int
 		}
 	    }
 
-	  /* If the best destination has multiple predecessors, and can be
-	     duplicated cheaper than a jump, don't allow it to be added
-	     to a trace.  We'll duplicate it when connecting traces.  */
-	  if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
+	  /* If the best destination has multiple predecessors and can be
+	     duplicated cheaper than a jump, don't allow it to be added to
+	     a trace; we'll duplicate it when connecting the traces later.
+	     However, we need to check that this duplication wouldn't leave
+	     the best destination with only crossing predecessors, because
+	     this would change its effective partition from hot to cold.  */
+	  if (best_edge
+	      && EDGE_COUNT (best_edge->dest->preds) >= 2
 	      && copy_bb_p (best_edge->dest, 0))
-	    best_edge = NULL;
+	    {
+	      bool only_crossing_preds = true;
+	      edge e;
+	      edge_iterator ei;
+	      FOR_EACH_EDGE (e, ei, best_edge->dest->preds)
+		if (e != best_edge && !(e->flags & EDGE_CROSSING))
+		  {
+		    only_crossing_preds = false;
+		    break;
+		  }
+	      if (!only_crossing_preds)
+		best_edge = NULL;
+	    }
 
 	  /* If the best destination has multiple successors or predecessors,
 	     don't allow it to be added when optimizing for size.  This makes
@@ -988,16 +1003,6 @@ better_edge_p (const_basic_block bb, con
   else
     is_better_edge = false;
 
-  /* If we are doing hot/cold partitioning, make sure that we always favor
-     non-crossing edges over crossing edges.  */
-
-  if (!is_better_edge
-      && flag_reorder_blocks_and_partition
-      && cur_best_edge
-      && (cur_best_edge->flags & EDGE_CROSSING)
-      && !(e->flags & EDGE_CROSSING))
-    is_better_edge = true;
-
   return is_better_edge;
 }
 
-- { dg-do compile }
-- { dg-options "-O3" }

with Ada.Unchecked_Deallocation;

package body Opt68 is

  procedure Free
    is new Ada.Unchecked_Deallocation (Queue_Element, A_Queue_Element);

  procedure Copy (dest : in out Queue; src : Queue) is
    d, s, pd, ps, t : A_Queue_Element;
  begin
    if src.sz /= 0 then
      d := dest.front;
      s := src.front;
      while d /= null and s /= null loop
        d.value := s.value;
        pd := d;
        ps := s;
        d  := d.next;
        s  := s.next;
      end loop;
      if src.sz = dest.sz then
        return;
      elsif s = null then
        while d /= null loop
          t := d.next;
          Free (d);
          d := t;
        end loop;
        dest.back      := pd;
        dest.back.next := null;
      else
        if pd = null then
          dest.front       := new Queue_Element;
          dest.front.value := s.value;
          s                := s.next;
          pd               := dest.front;
        end if;
        while s /= null loop
          pd.next       := new Queue_Element;
          pd.next.value := s.value;
          pd            := pd.next;
          s             := s.next;
        end loop;
        dest.back := pd;
      end if;
      dest.sz := src.sz;
    end if;
  end;

end Opt68;
with Ada.Finalization;

package Opt68 is

  type Cont is new Ada.Finalization.Controlled with null record;

  type Element is record
    C : Cont;
  end record;

  type Queue_Element;
  type A_Queue_Element is access Queue_Element;
  type Queue_Element is record
    Value : Element;
    Next  : A_Queue_Element;
  end record;

  type Queue is limited record
    Sz    : Natural;
    Front : A_Queue_Element;
    Back  : A_Queue_Element;
  end record;

  procedure Copy (dest : in out Queue; src : Queue);

end Opt68;

Reply via email to