Module Name:    src
Committed By:   riastradh
Date:           Wed Mar  1 08:38:50 UTC 2023

Modified Files:
        src/sys/arch/amd64/amd64: locore.S spl.S
        src/sys/arch/i386/i386: locore.S spl.S

Log Message:
x86: Expand on comments on ordering around stores to ci_curlwp.

No functional change intended.

PR kern/57240


To generate a diff of this commit:
cvs rdiff -u -r1.216 -r1.217 src/sys/arch/amd64/amd64/locore.S
cvs rdiff -u -r1.48 -r1.49 src/sys/arch/amd64/amd64/spl.S
cvs rdiff -u -r1.192 -r1.193 src/sys/arch/i386/i386/locore.S
cvs rdiff -u -r1.57 -r1.58 src/sys/arch/i386/i386/spl.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/amd64/locore.S
diff -u src/sys/arch/amd64/amd64/locore.S:1.216 src/sys/arch/amd64/amd64/locore.S:1.217
--- src/sys/arch/amd64/amd64/locore.S:1.216	Sat Feb 25 18:04:42 2023
+++ src/sys/arch/amd64/amd64/locore.S	Wed Mar  1 08:38:50 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.216 2023/02/25 18:04:42 riastradh Exp $	*/
+/*	$NetBSD: locore.S,v 1.217 2023/03/01 08:38:50 riastradh Exp $	*/
 
 /*
  * Copyright-o-rama!
@@ -1177,8 +1177,32 @@ ENTRY(cpu_switchto)
 	movq	PCB_RBP(%r14),%rbp
 
 	/*
-	 * Set curlwp.  This must be globally visible in order to permit
-	 * non-interlocked mutex release.
+	 * Issue XCHG, rather than MOV, to set ci_curlwp := newlwp in
+	 * order to coordinate mutex_exit on this CPU with
+	 * mutex_vector_enter on another CPU.
+	 *
+	 * 1. Any prior mutex_exit by oldlwp must be visible to other
+	 *    CPUs before we set ci_curlwp := newlwp on this one,
+	 *    requiring a store-before-store barrier.
+	 *
+	 *    (This is always guaranteed by the x86 memory model, TSO,
+	 *    but other architectures require a explicit barrier before
+	 *    the store to ci->ci_curlwp.)
+	 *
+	 * 2. ci_curlwp := newlwp must be visible on all other CPUs
+	 *    before any subsequent mutex_exit by newlwp can even test
+	 *    whether there might be waiters, requiring a
+	 *    store-before-load barrier.
+	 *
+	 *    (This is the only ordering x86 TSO ever requires any kind
+	 *    of barrier for -- in this case, we take advantage of the
+	 *    sequential consistency implied by XCHG to obviate the
+	 *    need for MFENCE or something.)
+	 *
+	 * See kern_mutex.c for details -- this is necessary for
+	 * adaptive mutexes to detect whether the lwp is on the CPU in
+	 * order to safely block without requiring atomic r/m/w in
+	 * mutex_exit.
 	 */
 	movq	%r12,%rcx
 	xchgq	%rcx,CPUVAR(CURLWP)

Index: src/sys/arch/amd64/amd64/spl.S
diff -u src/sys/arch/amd64/amd64/spl.S:1.48 src/sys/arch/amd64/amd64/spl.S:1.49
--- src/sys/arch/amd64/amd64/spl.S:1.48	Wed Sep  7 00:40:18 2022
+++ src/sys/arch/amd64/amd64/spl.S	Wed Mar  1 08:38:50 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: spl.S,v 1.48 2022/09/07 00:40:18 knakahara Exp $	*/
+/*	$NetBSD: spl.S,v 1.49 2023/03/01 08:38:50 riastradh Exp $	*/
 
 /*
  * Copyright (c) 2003 Wasabi Systems, Inc.
@@ -116,6 +116,20 @@ IDTVEC(softintr)
 	movq	IS_LWP(%rax),%rdi	/* switch to handler LWP */
 	movq	L_PCB(%rdi),%rdx
 	movq	L_PCB(%r15),%rcx
+	/*
+	 * Simple MOV to set curlwp to softlwp.  See below on ordering
+	 * required to restore softlwp like cpu_switchto.
+	 *
+	 * 1. Don't need store-before-store barrier because x86 is TSO.
+	 *
+	 * 2. Don't need store-before-load barrier because when we
+	 *    enter a softint lwp, it can't be holding any mutexes, so
+	 *    it can't release any until after it has acquired them, so
+	 *    we need not participate in the protocol with
+	 *    mutex_vector_enter barriers here.
+	 *
+	 * Hence no need for XCHG or barriers around MOV.
+	 */
 	movq	%rdi,CPUVAR(CURLWP)
 
 #ifdef KASAN
@@ -158,9 +172,31 @@ IDTVEC(softintr)
 	movq	PCB_RSP(%rcx),%rsp
 
 	/*
-	 * for non-interlocked mutex release to work safely the change
-	 * to ci_curlwp must not languish in the store buffer. therefore
-	 * we use XCHG and not MOV here.  see kern_mutex.c.
+	 * Use XCHG, not MOV, to coordinate mutex_exit on this CPU with
+	 * mutex_vector_enter on another CPU.
+	 *
+	 * 1. Any prior mutex_exit by the softint must be visible to
+	 *    other CPUs before we restore curlwp on this one,
+	 *    requiring store-before-store ordering.
+	 *
+	 *    (This is always guaranteed by the x86 memory model, TSO,
+	 *    but other architectures require a explicit barrier before
+	 *    the store to ci->ci_curlwp.)
+	 *
+	 * 2. Restoring curlwp must be visible on all other CPUs before
+	 *    any subsequent mutex_exit on this one can even test
+	 *    whether there might be waiters, requiring
+	 *    store-before-load ordering.
+	 *
+	 *    (This is the only ordering x86 TSO ever requires any kind
+	 *    of barrier for -- in this case, we take advantage of the
+	 *    sequential consistency implied by XCHG to obviate the
+	 *    need for MFENCE or something.)
+	 *
+	 * See kern_mutex.c for details -- this is necessary for
+	 * adaptive mutexes to detect whether the lwp is on the CPU in
+	 * order to safely block without requiring atomic r/m/w in
+	 * mutex_exit.  See also cpu_switchto.
 	 */
 	xchgq	%r15,CPUVAR(CURLWP)	/* restore curlwp */
 	popq	%r15			/* unwind switchframe */

Index: src/sys/arch/i386/i386/locore.S
diff -u src/sys/arch/i386/i386/locore.S:1.192 src/sys/arch/i386/i386/locore.S:1.193
--- src/sys/arch/i386/i386/locore.S:1.192	Sat Feb 25 18:35:54 2023
+++ src/sys/arch/i386/i386/locore.S	Wed Mar  1 08:38:50 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.192 2023/02/25 18:35:54 riastradh Exp $	*/
+/*	$NetBSD: locore.S,v 1.193 2023/03/01 08:38:50 riastradh Exp $	*/
 
 /*
  * Copyright-o-rama!
@@ -128,7 +128,7 @@
  */
 
 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.192 2023/02/25 18:35:54 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.193 2023/03/01 08:38:50 riastradh Exp $");
 
 #include "opt_copy_symtab.h"
 #include "opt_ddb.h"
@@ -1401,8 +1401,32 @@ ENTRY(cpu_switchto)
 	movl	PCB_ESP(%ebx),%esp
 
 	/*
-	 * Set curlwp.  This must be globally visible in order to permit
-	 * non-interlocked mutex release.
+	 * Issue XCHG, rather than MOV, to set ci_curlwp := newlwp in
+	 * order to coordinate mutex_exit on this CPU with
+	 * mutex_vector_enter on another CPU.
+	 *
+	 * 1. Any prior mutex_exit by oldlwp must be visible to other
+	 *    CPUs before we set ci_curlwp := newlwp on this one,
+	 *    requiring a store-before-store barrier.
+	 *
+	 *    (This is always guaranteed by the x86 memory model, TSO,
+	 *    but other architectures require a explicit barrier before
+	 *    the store to ci->ci_curlwp.)
+	 *
+	 * 2. ci_curlwp := newlwp must be visible on all other CPUs
+	 *    before any subsequent mutex_exit by newlwp can even test
+	 *    whether there might be waiters, requiring a
+	 *    store-before-load barrier.
+	 *
+	 *    (This is the only ordering x86 TSO ever requires any kind
+	 *    of barrier for -- in this case, we take advantage of the
+	 *    sequential consistency implied by XCHG to obviate the
+	 *    need for MFENCE or something.)
+	 *
+	 * See kern_mutex.c for details -- this is necessary for
+	 * adaptive mutexes to detect whether the lwp is on the CPU in
+	 * order to safely block without requiring atomic r/m/w in
+	 * mutex_exit.
 	 */
 	movl	%edi,%ecx
 	xchgl	%ecx,CPUVAR(CURLWP)

Index: src/sys/arch/i386/i386/spl.S
diff -u src/sys/arch/i386/i386/spl.S:1.57 src/sys/arch/i386/i386/spl.S:1.58
--- src/sys/arch/i386/i386/spl.S:1.57	Thu Sep  8 06:57:44 2022
+++ src/sys/arch/i386/i386/spl.S	Wed Mar  1 08:38:50 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: spl.S,v 1.57 2022/09/08 06:57:44 knakahara Exp $	*/
+/*	$NetBSD: spl.S,v 1.58 2023/03/01 08:38:50 riastradh Exp $	*/
 
 /*
  * Copyright (c) 1998, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.57 2022/09/08 06:57:44 knakahara Exp $");
+__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.58 2023/03/01 08:38:50 riastradh Exp $");
 
 #include "opt_ddb.h"
 #include "opt_spldebug.h"
@@ -384,6 +384,20 @@ IDTVEC(softintr)
 	STI(%esi)
 	movl	CPUVAR(CURLWP),%esi
 	movl	IS_LWP(%eax),%edi	/* switch to handler LWP */
+	/*
+	 * Simple MOV to set curlwp to softlwp.  See below on ordering
+	 * required to restore softlwp like cpu_switchto.
+	 *
+	 * 1. Don't need store-before-store barrier because x86 is TSO.
+	 *
+	 * 2. Don't need store-before-load barrier because when we
+	 *    enter a softint lwp, it can't be holding any mutexes, so
+	 *    it can't release any until after it has acquired them, so
+	 *    we need not participate in the protocol with
+	 *    mutex_vector_enter barriers here.
+	 *
+	 * Hence no need for XCHG or barriers around MOV.
+	 */
 	movl	%edi,CPUVAR(CURLWP)
 	movl	L_PCB(%edi),%edx
 	movl	L_PCB(%esi),%ecx
@@ -399,9 +413,31 @@ IDTVEC(softintr)
 	movl	PCB_ESP(%ecx),%esp
 
 	/*
-	 * for non-interlocked mutex release to work safely the change
-	 * to ci_curlwp must not languish in the store buffer. therefore
-	 * we use XCHG and not MOV here.  see kern_mutex.c.
+	 * Use XCHG, not MOV, to coordinate mutex_exit on this CPU with
+	 * mutex_vector_enter on another CPU.
+	 *
+	 * 1. Any prior mutex_exit by the softint must be visible to
+	 *    other CPUs before we restore curlwp on this one,
+	 *    requiring store-before-store ordering.
+	 *
+	 *    (This is always guaranteed by the x86 memory model, TSO,
+	 *    but other architectures require a explicit barrier before
+	 *    the store to ci->ci_curlwp.)
+	 *
+	 * 2. Restoring curlwp must be visible on all other CPUs before
+	 *    any subsequent mutex_exit on this one can even test
+	 *    whether there might be waiters, requiring
+	 *    store-before-load ordering.
+	 *
+	 *    (This is the only ordering x86 TSO ever requires any kind
+	 *    of barrier for -- in this case, we take advantage of the
+	 *    sequential consistency implied by XCHG to obviate the
+	 *    need for MFENCE or something.)
+	 *
+	 * See kern_mutex.c for details -- this is necessary for
+	 * adaptive mutexes to detect whether the lwp is on the CPU in
+	 * order to safely block without requiring atomic r/m/w in
+	 * mutex_exit.  See also cpu_switchto.
 	 */
 	xchgl	%esi,CPUVAR(CURLWP)	/* restore ci_curlwp */
 	popl	%edi			/* unwind switchframe */

Reply via email to