This libgo patch by Cherry Zhang implements cheaper goroutine context
switches on x86_64 GNU/Linux.

Currently, goroutine switches are implemented with libc
getcontext/setcontext functions, which saves/restores the machine
register states and also the signal context.  This does more than what
we need, and performs an expensive syscall.

This patch implements a simplified version of getcontext/setcontext,
in assembly, that only saves/restores the necessary part, i.e. the
callee-save registers, and the PC, SP.  A simplified version of
makecontext, written in C, is also added.  Currently this is only
implemented on x86_64 GNU/Linux.

Bootstrapped and tested on x86_64-pc-linux-gnu.  Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===================================================================
--- gcc/go/gofrontend/MERGE     (revision 271784)
+++ gcc/go/gofrontend/MERGE     (working copy)
@@ -1,4 +1,4 @@
-4dc60d989293d070702024e7dea52b9849f74775
+8402f6ac021ba20163ab4fcdb10ab7bb642de6dc
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: libgo/Makefile.am
===================================================================
--- libgo/Makefile.am   (revision 271669)
+++ libgo/Makefile.am   (working copy)
@@ -481,6 +481,7 @@ runtime_files = \
        runtime/runtime_c.c \
        runtime/stack.c \
        runtime/yield.c \
+       runtime/go-context.S \
        $(rtems_task_variable_add_file) \
        $(runtime_getncpu_file)
 
Index: libgo/configure.ac
===================================================================
--- libgo/configure.ac  (revision 271669)
+++ libgo/configure.ac  (working copy)
@@ -26,6 +26,7 @@ m4_rename([_AC_ARG_VAR_PRECIOUS],[glibgo
 m4_define([_AC_ARG_VAR_PRECIOUS],[])
 AC_PROG_CC
 AC_PROG_GO
+AM_PROG_AS
 m4_rename_force([glibgo_PRECIOUS],[_AC_ARG_VAR_PRECIOUS])
 
 AC_SUBST(CFLAGS)
Index: libgo/runtime/go-context.S
===================================================================
--- libgo/runtime/go-context.S  (nonexistent)
+++ libgo/runtime/go-context.S  (working copy)
@@ -0,0 +1,69 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This provides a simplified version of getcontext and
+// setcontext. They are like the corresponding functions
+// in libc, but we only save/restore the callee-save
+// registers and PC, SP. Unlike the libc functions, we
+// don't save/restore the signal masks and floating point
+// environment.
+
+#if defined(__x86_64__) && defined(__linux__) && !defined(__CET__)
+
+#define RBP_OFF        (0*8)
+#define RBX_OFF        (1*8)
+#define R12_OFF        (2*8)
+#define R13_OFF        (3*8)
+#define R14_OFF        (4*8)
+#define R15_OFF        (5*8)
+#define SP_OFF (6*8)
+#define PC_OFF (7*8)
+
+.globl __go_getcontext
+.text
+__go_getcontext:
+       movq    %rbx, RBX_OFF(%rdi)
+       movq    %rbp, RBP_OFF(%rdi)
+       movq    %r12, R12_OFF(%rdi)
+       movq    %r13, R13_OFF(%rdi)
+       movq    %r14, R14_OFF(%rdi)
+       movq    %r15, R15_OFF(%rdi)
+
+       movq    (%rsp), %rax    // return PC
+       movq    %rax, PC_OFF(%rdi)
+       leaq    8(%rsp), %rax   // the SP before pushing return PC
+       movq    %rax, SP_OFF(%rdi)
+
+       ret
+
+.globl __go_setcontext
+.text
+__go_setcontext:
+       movq    RBX_OFF(%rdi), %rbx
+       movq    RBP_OFF(%rdi), %rbp
+       movq    R12_OFF(%rdi), %r12
+       movq    R13_OFF(%rdi), %r13
+       movq    R14_OFF(%rdi), %r14
+       movq    R15_OFF(%rdi), %r15
+       movq    SP_OFF(%rdi), %rsp
+       movq    PC_OFF(%rdi), %rdx
+
+       jmp     *%rdx
+
+.globl __go_makecontext
+.text
+__go_makecontext:
+       addq    %rcx, %rdx
+
+       // Align the SP, and push a dummy return address.
+       andq    $~0xfULL, %rdx
+       subq    $8, %rdx
+       movq    $0, (%rdx)
+
+       movq    %rdx, SP_OFF(%rdi)
+       movq    %rsi, PC_OFF(%rdi)
+
+       ret
+
+#endif
Index: libgo/runtime/proc.c
===================================================================
--- libgo/runtime/proc.c        (revision 271669)
+++ libgo/runtime/proc.c        (working copy)
@@ -75,7 +75,7 @@ initcontext(void)
 }
 
 static inline void
-fixcontext(ucontext_t *c __attribute__ ((unused)))
+fixcontext(__go_context_t *c __attribute__ ((unused)))
 {
 }
 
@@ -182,18 +182,18 @@ fixcontext(ucontext_t* c)
 // Go, and Go has no simple way to align a field to such a boundary.
 // So we make the field larger in runtime2.go and pick an appropriate
 // offset within the field here.
-static ucontext_t*
+static __go_context_t*
 ucontext_arg(uintptr_t* go_ucontext)
 {
        uintptr_t p = (uintptr_t)go_ucontext;
-       size_t align = __alignof__(ucontext_t);
+       size_t align = __alignof__(__go_context_t);
        if(align > 16) {
                // We only ensured space for up to a 16 byte alignment
                // in libgo/go/runtime/runtime2.go.
-               runtime_throw("required alignment of ucontext_t too large");
+               runtime_throw("required alignment of __go_context_t too large");
        }
        p = (p + align - 1) &~ (uintptr_t)(align - 1);
-       return (ucontext_t*)p;
+       return (__go_context_t*)p;
 }
 
 // We can not always refer to the TLS variables directly.  The
@@ -289,7 +289,7 @@ runtime_gogo(G* newg)
        g = newg;
        newg->fromgogo = true;
        fixcontext(ucontext_arg(&newg->context[0]));
-       setcontext(ucontext_arg(&newg->context[0]));
+       __go_setcontext(ucontext_arg(&newg->context[0]));
        runtime_throw("gogo setcontext returned");
 }
 
@@ -328,7 +328,7 @@ runtime_mcall(FuncVal *fv)
                gp->gcnextsp2 = (uintptr)(secondary_stack_pointer());
 #endif
                gp->fromgogo = false;
-               getcontext(ucontext_arg(&gp->context[0]));
+               __go_getcontext(ucontext_arg(&gp->context[0]));
 
                // When we return from getcontext, we may be running
                // in a new thread.  That means that g may have
@@ -358,7 +358,7 @@ runtime_mcall(FuncVal *fv)
                g = mp->g0;
 
                fixcontext(ucontext_arg(&mp->g0->context[0]));
-               setcontext(ucontext_arg(&mp->g0->context[0]));
+               __go_setcontext(ucontext_arg(&mp->g0->context[0]));
                runtime_throw("runtime: mcall function returned");
        }
 }
@@ -450,7 +450,7 @@ void getTraceback(G* me, G* gp)
 #ifdef USING_SPLIT_STACK
        __splitstack_getcontext((void*)(&me->stackcontext[0]));
 #endif
-       getcontext(ucontext_arg(&me->context[0]));
+       __go_getcontext(ucontext_arg(&me->context[0]));
 
        if (gp->traceback != 0) {
                runtime_gogo(gp);
@@ -493,7 +493,7 @@ doscanstackswitch(G* me, G* gp)
 #ifdef USING_SPLIT_STACK
        __splitstack_getcontext((void*)(&me->stackcontext[0]));
 #endif
-       getcontext(ucontext_arg(&me->context[0]));
+       __go_getcontext(ucontext_arg(&me->context[0]));
 
        if(me->entry != nil) {
                // Got here from mcall.
@@ -574,7 +574,7 @@ runtime_mstart(void *arg)
 
        // Save the currently active context.  This will return
        // multiple times via the setcontext call in mcall.
-       getcontext(ucontext_arg(&gp->context[0]));
+       __go_getcontext(ucontext_arg(&gp->context[0]));
 
        if(gp->traceback != 0) {
                // Got here from getTraceback.
@@ -652,7 +652,7 @@ setGContext(void)
        gp->gcinitialsp2 = secondary_stack_pointer();
        gp->gcnextsp2 = (uintptr)(gp->gcinitialsp2);
 #endif
-       getcontext(ucontext_arg(&gp->context[0]));
+       __go_getcontext(ucontext_arg(&gp->context[0]));
 
        if(gp->entry != nil) {
                // Got here from mcall.
@@ -672,13 +672,11 @@ void makeGContext(G*, byte*, uintptr)
 // makeGContext makes a new context for a g.
 void
 makeGContext(G* gp, byte* sp, uintptr spsize) {
-       ucontext_t *uc;
+       __go_context_t *uc;
 
        uc = ucontext_arg(&gp->context[0]);
-       getcontext(uc);
-       uc->uc_stack.ss_sp = sp;
-       uc->uc_stack.ss_size = (size_t)spsize;
-       makecontext(uc, kickoff, 0);
+       __go_getcontext(uc);
+       __go_makecontext(uc, kickoff, sp, (size_t)spsize);
 }
 
 // The goroutine g is about to enter a system call.
@@ -700,7 +698,7 @@ runtime_entersyscall()
        // Save the registers in the g structure so that any pointers
        // held in registers will be seen by the garbage collector.
        if (!runtime_usestackmaps)
-               getcontext(ucontext_arg(&g->gcregs[0]));
+               __go_getcontext(ucontext_arg(&g->gcregs[0]));
 
        // Note that if this function does save any registers itself,
        // we might store the wrong value in the call to getcontext.
@@ -747,7 +745,7 @@ runtime_entersyscallblock()
        // Save the registers in the g structure so that any pointers
        // held in registers will be seen by the garbage collector.
        if (!runtime_usestackmaps)
-               getcontext(ucontext_arg(&g->gcregs[0]));
+               __go_getcontext(ucontext_arg(&g->gcregs[0]));
 
        // See comment in runtime_entersyscall.
        doentersyscallblock((uintptr)runtime_getcallerpc(),
Index: libgo/runtime/runtime.h
===================================================================
--- libgo/runtime/runtime.h     (revision 271669)
+++ libgo/runtime/runtime.h     (working copy)
@@ -510,3 +510,20 @@ bool probestackmaps(void)
 // older versions of glibc when a SIGPROF signal arrives while
 // collecting a backtrace.
 extern uint32 __go_runtime_in_callers;
+
+// Cheaper context switch functions.  Currently only defined on
+// Linux/AMD64.
+#if defined(__x86_64__) && defined(__linux__) && !defined(__CET__)
+typedef struct {
+       uint64 regs[8];
+} __go_context_t;
+int __go_getcontext(__go_context_t*);
+int __go_setcontext(__go_context_t*);
+void __go_makecontext(__go_context_t*, void (*)(), void*, size_t);
+#else
+#define __go_context_t ucontext_t
+#define __go_getcontext(c)     getcontext(c)
+#define __go_setcontext(c)     setcontext(c)
+#define __go_makecontext(c, fn, sp, size) \
+       ((c)->uc_stack.ss_sp = sp, (c)->uc_stack.ss_size = size, makecontext(c, 
fn, 0))
+#endif

Reply via email to