I believe that a write barrier is missing in MVM_sc_get_sc(). Patch attached.

This was really really messy to figure out. The bug manifests as a SEGV in
process_worklist because a pointer to an object is 0x6. Clearly not a pointer.
The value 0x6 has come from the scs array of a MVMCompUnit. In turn, the
value 0x6 gets copied to that array because the GC thinks that it's the value
of a forwarding pointer in an object in fromspace, during a GC sweep.

In turn, that address in memory is 6 because it's actually now the count for
an array - ie ...->scs[4] is a stale pointer to memory that has been freed
and re-used, and this is only noticed after a pair (or any even number) of
GC runs.


1) Off topic question - is there a point at runtime at which derealisation
   contexts are no longer needed, and so can be discarded to reduce memory
   usage?

At this point I got somewhat stuck trying to work out what on earth was
going on, and tried to brute force trace all the MVMCompUnit objects, and
at the end of each GC run assert that all of their scs arrays still pointed
to valid locations. I got the following output:


0x19438d0 moves to 0x19438d0
0x1943990 moves to 0x1943990
counted 13, expected 13
counted 13, expected 13
counted 13, expected 13
counted 13, expected 13
counted 13, expected 13
counted 13, expected 13
0x7ffff7feb230 moves to 0x7ffff7f35d88
0x7ffff7fea690 moves to 0x7ffff7f35288
0x7ffff7fe9af0 moves to 0x7ffff7f34ec0
counted 16, expected 16
0x7ffff7f4d1b8 moves to 0x7ffff7fb56e0
0x7ffff7f4c618 moves to 0x7ffff7fb57a0
0x7ffff7f4aeb8 moves to 0x7ffff7f90100
0x7ffff7f3fc90 moves to 0x7ffff7fba4e8
0x7ffff7f35d88 moves to 0x2525b20
0x7ffff7f35288 moves to 0x2525be0
0x7ffff7f34ec0 moves to 0x2525a60
counted 20, expected 20
0x7ffff7fb56e0 moves to 0x2525ca0
0x7ffff7fb57a0 moves to 0x2525d60
0x7ffff7f90100 moves to 0x2525e20
0x7ffff7fba4e8 moves to 0x2525ee0
scs 2 in fromspace (0x7ffff7fbb8d0)

Program received signal SIGABRT, Aborted.
0x00007ffff75698a5 in raise () from /lib64/libc.so.6
Missing separate debuginfos, use: debuginfo-install glibc-2.12-1.107.el6.x86_64
(gdb) where
#0  0x00007ffff75698a5 in raise () from /lib64/libc.so.6
#1  0x00007ffff756b085 in abort () from /lib64/libc.so.6
#2  0x00007ffff79d652f in run_gc (tc=0x6023f0, what_to_do=0 '\000')
    at src/gc/orchestrate.c:322
#3  0x00007ffff79d6942 in MVM_gc_enter_from_allocator (tc=0x6023f0)
    at src/gc/orchestrate.c:416
#4  0x00007ffff79d6b16 in MVM_gc_allocate_nursery (tc=0x6023f0, size=40)
    at src/gc/allocation.c:32
#5  0x00007ffff79d6b9b in MVM_gc_allocate_zeroed (tc=0x6023f0, size=40)
    at src/gc/allocation.c:49
#6  0x00007ffff79d6ee3 in MVM_gc_allocate_object (tc=0x6023f0, st=0x6034b0)
    at src/gc/allocation.c:85
#7  0x00007ffff79f015d in allocate (tc=0x6023f0, st=0x6034b0)
    at src/6model/reprs/P6str.c:22
#8  0x00007ffff798b186 in MVM_args_get_pos_obj (tc=0x6023f0, ctx=0x177b7f8, 
    pos=2, required=1 '\001') at src/core/args.c:228
#9  0x00007ffff7992d87 in MVM_interp_run (tc=0x6023f0, 
    initial_invoke=0x7ffff7a2eb80 <toplevel_initial_invoke>, 
    invoke_data=0x74f9a0) at src/core/interp.c:580
#10 0x00007ffff7a2ecad in MVM_vm_run_file (instance=0x602010, 
    filename=0x7fffffffe76d "src/vm/moar/stage0/nqp.moarvm") at src/moar.c:173
#11 0x0000000000400d2e in main (argc=11, argv=0x7fffffffe498) at src/main.c:137


With this hack added:

diff --git a/src/6model/reprs/MVMCompUnit.c b/src/6model/reprs/MVMCompUnit.c
index 4e688f9..f546d4e 100644
--- a/src/6model/reprs/MVMCompUnit.c
+++ b/src/6model/reprs/MVMCompUnit.c
@@ -1,5 +1,8 @@
 #include "moar.h"
 
+struct debug_ll *cu_ll = NULL;
+int cu_count = 0;
+
 /* This representation's function pointer table. */
 static const MVMREPROps this_repr;
 
@@ -25,7 +28,13 @@ static MVMObject * type_object_for(MVMThreadContext *tc, 
MVMObject *HOW) {
 
 /* Creates a new instance based on the type object. */
 static MVMObject * allocate(MVMThreadContext *tc, MVMSTable *st) {
-    return MVM_gc_allocate_object(tc, st);
+    MVMObject *result = MVM_gc_allocate_object(tc, st);
+    struct debug_ll *l = malloc(sizeof(struct debug_ll));
+    ++cu_count;
+    l->obj = (MVMCollectable *) result;
+    l->next = cu_ll;
+    cu_ll = l;
+    return result;
 }
 
 /* Copies the body of one object to another. */
@@ -70,6 +79,7 @@ static void gc_mark(MVMThreadContext *tc, MVMSTable *st, void 
*data, MVMGCWorkli
 /* Called by the VM in order to free memory associated with this object. */
 static void gc_free(MVMThreadContext *tc, MVMObject *obj) {
     MVMCompUnitBody *body = &((MVMCompUnit *)obj)->body;
+    --cu_count;
     MVM_checked_free_null(body->frames);
     MVM_checked_free_null(body->coderefs);
     body->main_frame = NULL;
@@ -131,4 +141,4 @@ static const MVMREPROps this_repr = {
     "MVMCompUnit", /* name */
     MVM_REPR_ID_MVMCompUnit,
     0, /* refs_frames */
-};
\ No newline at end of file
+};
diff --git a/src/6model/reprs/MVMCompUnit.h b/src/6model/reprs/MVMCompUnit.h
index e1570d9..d20d33d 100644
--- a/src/6model/reprs/MVMCompUnit.h
+++ b/src/6model/reprs/MVMCompUnit.h
@@ -78,3 +78,13 @@ struct MVMLoadedCompUnitName {
 
 /* Function for REPR setup. */
 const MVMREPROps * MVMCompUnit_initialize(MVMThreadContext *tc);
+
+struct debug_ll;
+
+struct debug_ll {
+    struct debug_ll *next;
+    struct MVMCollectable *obj;
+};
+
+extern struct debug_ll *cu_ll;
+extern int cu_count;
diff --git a/src/gc/orchestrate.c b/src/gc/orchestrate.c
index 804f0f6..4b77f8f 100644
--- a/src/gc/orchestrate.c
+++ b/src/gc/orchestrate.c
@@ -290,12 +290,57 @@ static void run_gc(MVMThreadContext *tc, MVMuint8 
what_to_do) {
 
         MVM_gc_collect_free_nursery_uncopied(other, tc->gc_work[i].limit);
 
+       {
+           struct debug_ll *cup = cu_ll;
+           unsigned int count = 0;
+
+           while (cup) {
+               if (cup->obj) {
+                   if (cup->obj->forwarder
+                       || (gen == MVMGCGenerations_Nursery &&
+                           cup->obj->flags & MVM_CF_SECOND_GEN)) {
+                       struct MVMCompUnit const *const cu
+                           = (struct MVMCompUnit *)cup->obj;
+
+                       ++count;
+
+                       if (cup->obj->forwarder) {
+                           fprintf(stderr, "%p moves to %p\n", cu,
+                                   cu->common.header.forwarder);
+                           cup->obj = cup->obj->forwarder;
+                       }
+
+                       if (cu && cu->body.num_scs) {
+                           unsigned int i;
+
+                           for (i = 0; i < cu->body.num_scs; i++) {
+                               const char *const c = (char *) cu->body.scs[i];
+                               if (c >= (char *)tc->nursery_fromspace &&
+                                   c < (char *)tc->nursery_fromspace + 
MVM_NURSERY_SIZE) {
+                                   fprintf(stderr, "scs %u of cu %p in 
fromspace(%p)\n",
+                                           i, cu, c);
+                                   abort();
+                               }
+                           }
+                       }
+                   } else {
+                       /* Really we ought to delete them from the list here */
+                       fprintf(stderr, "%p is freed\n", cup->obj);
+                       cup->obj = NULL;
+                   }
+               }
+               cup = cup->next;
+           }
+           fprintf(stderr, "counted %u, expected %d\n", count, cu_count);
+       }
+
         if (gen == MVMGCGenerations_Both) {
             GCDEBUG_LOG(tc, MVM_GC_DEBUG_ORCHESTRATE, "Thread %d run %d : 
freeing gen2 of thread %d\n", other->thread_id);
             MVM_gc_collect_cleanup_gen2roots(other);
             MVM_gc_collect_free_gen2_unmarked(other);
         }
     }
+
     if (what_to_do == MVMGCWhatToDo_All) {
         MVM_gc_collect_free_stables(tc);
     }


I mail it here in case it's useful to anyone else trying to figure out missing
write barriers.


2) Do most missing write barrier bugs look as crazy to figure out as this one?
   Or was this one particularly special by corrupting something outside of
   the nursery by way of a "corrupt" forwarding pointer?

Nicholas Clark
>From 629e6548fd587862f07ed9cdb8bf10ebbad7e80f Mon Sep 17 00:00:00 2001
From: Nicholas Clark <n...@ccl4.org>
Date: Sat, 28 Dec 2013 12:27:02 +0100
Subject: [PATCH 2/2] MVM_sc_get_sc() was missing a write barrier.

---
 src/6model/sc.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/src/6model/sc.c b/src/6model/sc.c
index 3e2e3ad..91e7814 100644
--- a/src/6model/sc.c
+++ b/src/6model/sc.c
@@ -112,7 +112,7 @@ MVMSerializationContext * MVM_sc_get_sc(MVMThreadContext *tc, MVMCompUnit *cu, M
         sc = scb->sc;
         if (sc == NULL)
             return NULL;
-        cu->body.scs[dep] = sc;
+        MVM_ASSIGN_REF(tc, cu, cu->body.scs[dep], sc);
     }
     return sc;
 }
-- 
1.7.1

Reply via email to