This libgo patch by Cherry Zhang changes the runtime package to call dropg before changing the g status to _Grunnable/_Gwaiting. Currently, we dropg (which clears gp.m) after we CAS the g status to _Grunnable or _Gwaiting. Immediately after CASing the g status, another thread may CAS it to _Gscan status and scan its stack. With precise stack scan, it accesses gp.m in order to switch to g and back (in doscanstackswitch). This races with dropg. If doscanstackswitch reads gp.m, then dropg runs, when we restore the m at the end of the scan it will set to a stale value. Worse, if dropg runs after doscanstackswitch sets the new m, gp will be running with a nil m.
To fix this, we do dropg before CAS g status to _Grunnable or _Gwaiting. We can do this safely if we are CASing from _Grunning, as we own the g when it is in _Grunning. There is one case where we CAS from _Gsyscall to _Grunnable. It is not safe to dropg when it is in _Gsyscall, as precise stack scan needs to read gp.m in order to signal the m. So we need to introduce a transient state, _Gexitingsyscall, between _Gsyscall and _Grunnable, where the GC should not scan its stack. Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu. Committed to mainline. Ian
Index: gcc/go/gofrontend/MERGE =================================================================== --- gcc/go/gofrontend/MERGE (revision 267989) +++ gcc/go/gofrontend/MERGE (working copy) @@ -1,4 +1,4 @@ -ee94431c133a90ca5c3c5ebbebcb019c60258dac +d6576c83016d856217758c06d945bfc363ffb817 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. Index: libgo/go/runtime/proc.go =================================================================== --- libgo/go/runtime/proc.go (revision 267941) +++ libgo/go/runtime/proc.go (working copy) @@ -956,6 +956,10 @@ loop: break loop } + case _Gexitingsyscall: + // This is a transient state during which we should not scan its stack. + // Try again. + case _Gscanwaiting: // newstack is doing a scan for us right now. Wait. @@ -2635,8 +2639,8 @@ func park_m(gp *g) { traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip) } - casgstatus(gp, _Grunning, _Gwaiting) dropg() + casgstatus(gp, _Grunning, _Gwaiting) if _g_.m.waitunlockf != nil { fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf)) @@ -2660,8 +2664,8 @@ func goschedImpl(gp *g) { dumpgstatus(gp) throw("bad g status") } - casgstatus(gp, _Grunning, _Grunnable) dropg() + casgstatus(gp, _Grunning, _Grunnable) lock(&sched.lock) globrunqput(gp) unlock(&sched.lock) @@ -3054,8 +3058,9 @@ func exitsyscallfast_pidle() bool { func exitsyscall0(gp *g) { _g_ := getg() - casgstatus(gp, _Gsyscall, _Grunnable) + casgstatus(gp, _Gsyscall, _Gexitingsyscall) dropg() + casgstatus(gp, _Gexitingsyscall, _Grunnable) lock(&sched.lock) _p_ := pidleget() if _p_ == nil { Index: libgo/go/runtime/runtime2.go =================================================================== --- libgo/go/runtime/runtime2.go (revision 267941) +++ libgo/go/runtime/runtime2.go (working copy) @@ -70,6 +70,12 @@ const ( // stack is owned by the goroutine that put it in _Gcopystack. _Gcopystack // 8 + // _Gexitingsyscall means this goroutine is exiting from a + // system call. This is like _Gsyscall, but the GC should not + // scan its stack. Currently this is only used in exitsyscall0 + // as a transient state when it drops the G. + _Gexitingsyscall // 9 + // _Gscan combined with one of the above states other than // _Grunning indicates that GC is scanning the stack. The // goroutine is not executing user code and the stack is owned Index: libgo/go/runtime/traceback_gccgo.go =================================================================== --- libgo/go/runtime/traceback_gccgo.go (revision 267941) +++ libgo/go/runtime/traceback_gccgo.go (working copy) @@ -122,13 +122,14 @@ func isExportedRuntime(name string) bool } var gStatusStrings = [...]string{ - _Gidle: "idle", - _Grunnable: "runnable", - _Grunning: "running", - _Gsyscall: "syscall", - _Gwaiting: "waiting", - _Gdead: "dead", - _Gcopystack: "copystack", + _Gidle: "idle", + _Grunnable: "runnable", + _Grunning: "running", + _Gsyscall: "syscall", + _Gwaiting: "waiting", + _Gdead: "dead", + _Gcopystack: "copystack", + _Gexitingsyscall: "exiting syscall", } func goroutineheader(gp *g) {