I've committed this to gomp4 branch. The lto device compiler was ICEing when reading in the offload inline statistics. The root cause was due to it not having the function bodies of some functions, it therefore didn't try and read callee statistics. Thus starting reading the next function's data early, and in this particular case ICEing due to a failed assert.

The statistics write out should only include callee data when the function body is also being dumped. For regular LTO we always do that when available, but for offload LTO we only dump bodies of functions marked for offload.

Such a case isn't necessarily an error, as we may be wanting to link with a function implementation provided by a library. In this example case, it's user error and we'll eventually produce a final link error.

The testcase is set up so that another TU provides the function implementations and we get an executable program.

nathan
2016-08-26  Nathan Sidwell  <nat...@codesourcery.com>

	gcc/
	* ipa-inline-analysis.c (inline_write_summary): Only dump callee
	counts when dumping the function's body.

	libgomp/
	* testsuite/libgomp.oacc-c++/pr71959.C: New.
	* testsuite/libgomp.oacc-c++/pr71959-a.C: New.

Index: gcc/ipa-inline-analysis.c
===================================================================
--- gcc/ipa-inline-analysis.c	(revision 239787)
+++ gcc/ipa-inline-analysis.c	(working copy)
@@ -4383,8 +4383,10 @@ inline_write_summary (void)
 	  int i;
 	  size_time_entry *e;
 	  struct condition *c;
+	  int index = lto_symtab_encoder_encode (encoder, cnode);
+	  bool body = encoder->nodes[index].body;
 
-	  streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
+	  streamer_write_uhwi (ob, index);
 	  streamer_write_hwi (ob, info->estimated_self_stack_size);
 	  streamer_write_hwi (ob, info->self_size);
 	  streamer_write_hwi (ob, info->self_time);
@@ -4415,10 +4417,17 @@ inline_write_summary (void)
 	  write_predicate (ob, info->loop_iterations);
 	  write_predicate (ob, info->loop_stride);
 	  write_predicate (ob, info->array_index);
-	  for (edge = cnode->callees; edge; edge = edge->next_callee)
-	    write_inline_edge_summary (ob, edge);
-	  for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
-	    write_inline_edge_summary (ob, edge);
+	  if (body)
+	    {
+	      /* Only write callee counts when we're emitting the
+		 body, as the reader only knows about the callees when
+		 the body's emitted.  */
+	      for (edge = cnode->callees; edge; edge = edge->next_callee)
+		write_inline_edge_summary (ob, edge);
+	      for (edge = cnode->indirect_calls; edge;
+		   edge = edge->next_callee)
+		write_inline_edge_summary (ob, edge);
+	    }
 	}
     }
   streamer_write_char_stream (ob->main_stream, 0);
Index: libgomp/testsuite/libgomp.oacc-c++/pr71959-a.C
===================================================================
--- libgomp/testsuite/libgomp.oacc-c++/pr71959-a.C	(nonexistent)
+++ libgomp/testsuite/libgomp.oacc-c++/pr71959-a.C	(working copy)
@@ -0,0 +1,31 @@
+// { dg-do compile }
+
+struct Iter 
+{
+  int *cursor;
+
+  void ctor (int *cursor_) asm("_ZN4IterC1EPi");
+  int *point () const asm("_ZNK4Iter5pointEv");
+};
+
+#pragma acc routine
+void  Iter::ctor (int *cursor_)
+{
+  cursor = cursor_;
+}
+
+#pragma acc routine
+int *Iter::point () const
+{
+  return cursor;
+}
+
+void apply (int (*fn)(), Iter out) asm ("_ZN5Apply5applyEPFivE4Iter");
+
+#pragma acc routine
+void apply (int (*fn)(), struct Iter out)
+{ *out.point() = fn (); }
+
+extern "C" void __gxx_personality_v0 ()
+{
+}
Index: libgomp/testsuite/libgomp.oacc-c++/pr71959.C
===================================================================
--- libgomp/testsuite/libgomp.oacc-c++/pr71959.C	(nonexistent)
+++ libgomp/testsuite/libgomp.oacc-c++/pr71959.C	(working copy)
@@ -0,0 +1,31 @@
+// { dg-additional-sources "pr71959-a.C" }
+
+// pr lto/71959 ICEd LTO due to mismatch between writing & reading behaviour
+
+struct Iter
+{
+  int *cursor;
+  
+  Iter(int *cursor_) : cursor(cursor_) {}
+
+  int *point() const { return cursor; }
+};
+
+#pragma acc routine seq
+int one () { return 1; }
+
+struct Apply
+{
+  static void apply (int (*fn)(), Iter out)
+  { *out.point() = fn (); }
+};
+
+int main ()
+{
+  int x;
+  
+#pragma acc parallel copyout(x)
+  Apply::apply (one, Iter (&x));
+
+  return x != 1;
+}

Reply via email to