Module Name: src
Committed By: riastradh
Date: Wed Mar 1 08:38:50 UTC 2023
Modified Files:
src/sys/arch/amd64/amd64: locore.S spl.S
src/sys/arch/i386/i386: locore.S spl.S
Log Message:
x86: Expand on comments on ordering around stores to ci_curlwp.
No functional change intended.
PR kern/57240
To generate a diff of this commit:
cvs rdiff -u -r1.216 -r1.217 src/sys/arch/amd64/amd64/locore.S
cvs rdiff -u -r1.48 -r1.49 src/sys/arch/amd64/amd64/spl.S
cvs rdiff -u -r1.192 -r1.193 src/sys/arch/i386/i386/locore.S
cvs rdiff -u -r1.57 -r1.58 src/sys/arch/i386/i386/spl.S
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/amd64/amd64/locore.S
diff -u src/sys/arch/amd64/amd64/locore.S:1.216 src/sys/arch/amd64/amd64/locore.S:1.217
--- src/sys/arch/amd64/amd64/locore.S:1.216 Sat Feb 25 18:04:42 2023
+++ src/sys/arch/amd64/amd64/locore.S Wed Mar 1 08:38:50 2023
@@ -1,4 +1,4 @@
-/* $NetBSD: locore.S,v 1.216 2023/02/25 18:04:42 riastradh Exp $ */
+/* $NetBSD: locore.S,v 1.217 2023/03/01 08:38:50 riastradh Exp $ */
/*
* Copyright-o-rama!
@@ -1177,8 +1177,32 @@ ENTRY(cpu_switchto)
movq PCB_RBP(%r14),%rbp
/*
- * Set curlwp. This must be globally visible in order to permit
- * non-interlocked mutex release.
+ * Issue XCHG, rather than MOV, to set ci_curlwp := newlwp in
+ * order to coordinate mutex_exit on this CPU with
+ * mutex_vector_enter on another CPU.
+ *
+ * 1. Any prior mutex_exit by oldlwp must be visible to other
+ * CPUs before we set ci_curlwp := newlwp on this one,
+ * requiring a store-before-store barrier.
+ *
+ * (This is always guaranteed by the x86 memory model, TSO,
+ * but other architectures require a explicit barrier before
+ * the store to ci->ci_curlwp.)
+ *
+ * 2. ci_curlwp := newlwp must be visible on all other CPUs
+ * before any subsequent mutex_exit by newlwp can even test
+ * whether there might be waiters, requiring a
+ * store-before-load barrier.
+ *
+ * (This is the only ordering x86 TSO ever requires any kind
+ * of barrier for -- in this case, we take advantage of the
+ * sequential consistency implied by XCHG to obviate the
+ * need for MFENCE or something.)
+ *
+ * See kern_mutex.c for details -- this is necessary for
+ * adaptive mutexes to detect whether the lwp is on the CPU in
+ * order to safely block without requiring atomic r/m/w in
+ * mutex_exit.
*/
movq %r12,%rcx
xchgq %rcx,CPUVAR(CURLWP)
Index: src/sys/arch/amd64/amd64/spl.S
diff -u src/sys/arch/amd64/amd64/spl.S:1.48 src/sys/arch/amd64/amd64/spl.S:1.49
--- src/sys/arch/amd64/amd64/spl.S:1.48 Wed Sep 7 00:40:18 2022
+++ src/sys/arch/amd64/amd64/spl.S Wed Mar 1 08:38:50 2023
@@ -1,4 +1,4 @@
-/* $NetBSD: spl.S,v 1.48 2022/09/07 00:40:18 knakahara Exp $ */
+/* $NetBSD: spl.S,v 1.49 2023/03/01 08:38:50 riastradh Exp $ */
/*
* Copyright (c) 2003 Wasabi Systems, Inc.
@@ -116,6 +116,20 @@ IDTVEC(softintr)
movq IS_LWP(%rax),%rdi /* switch to handler LWP */
movq L_PCB(%rdi),%rdx
movq L_PCB(%r15),%rcx
+ /*
+ * Simple MOV to set curlwp to softlwp. See below on ordering
+ * required to restore softlwp like cpu_switchto.
+ *
+ * 1. Don't need store-before-store barrier because x86 is TSO.
+ *
+ * 2. Don't need store-before-load barrier because when we
+ * enter a softint lwp, it can't be holding any mutexes, so
+ * it can't release any until after it has acquired them, so
+ * we need not participate in the protocol with
+ * mutex_vector_enter barriers here.
+ *
+ * Hence no need for XCHG or barriers around MOV.
+ */
movq %rdi,CPUVAR(CURLWP)
#ifdef KASAN
@@ -158,9 +172,31 @@ IDTVEC(softintr)
movq PCB_RSP(%rcx),%rsp
/*
- * for non-interlocked mutex release to work safely the change
- * to ci_curlwp must not languish in the store buffer. therefore
- * we use XCHG and not MOV here. see kern_mutex.c.
+ * Use XCHG, not MOV, to coordinate mutex_exit on this CPU with
+ * mutex_vector_enter on another CPU.
+ *
+ * 1. Any prior mutex_exit by the softint must be visible to
+ * other CPUs before we restore curlwp on this one,
+ * requiring store-before-store ordering.
+ *
+ * (This is always guaranteed by the x86 memory model, TSO,
+ * but other architectures require a explicit barrier before
+ * the store to ci->ci_curlwp.)
+ *
+ * 2. Restoring curlwp must be visible on all other CPUs before
+ * any subsequent mutex_exit on this one can even test
+ * whether there might be waiters, requiring
+ * store-before-load ordering.
+ *
+ * (This is the only ordering x86 TSO ever requires any kind
+ * of barrier for -- in this case, we take advantage of the
+ * sequential consistency implied by XCHG to obviate the
+ * need for MFENCE or something.)
+ *
+ * See kern_mutex.c for details -- this is necessary for
+ * adaptive mutexes to detect whether the lwp is on the CPU in
+ * order to safely block without requiring atomic r/m/w in
+ * mutex_exit. See also cpu_switchto.
*/
xchgq %r15,CPUVAR(CURLWP) /* restore curlwp */
popq %r15 /* unwind switchframe */
Index: src/sys/arch/i386/i386/locore.S
diff -u src/sys/arch/i386/i386/locore.S:1.192 src/sys/arch/i386/i386/locore.S:1.193
--- src/sys/arch/i386/i386/locore.S:1.192 Sat Feb 25 18:35:54 2023
+++ src/sys/arch/i386/i386/locore.S Wed Mar 1 08:38:50 2023
@@ -1,4 +1,4 @@
-/* $NetBSD: locore.S,v 1.192 2023/02/25 18:35:54 riastradh Exp $ */
+/* $NetBSD: locore.S,v 1.193 2023/03/01 08:38:50 riastradh Exp $ */
/*
* Copyright-o-rama!
@@ -128,7 +128,7 @@
*/
#include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.192 2023/02/25 18:35:54 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.193 2023/03/01 08:38:50 riastradh Exp $");
#include "opt_copy_symtab.h"
#include "opt_ddb.h"
@@ -1401,8 +1401,32 @@ ENTRY(cpu_switchto)
movl PCB_ESP(%ebx),%esp
/*
- * Set curlwp. This must be globally visible in order to permit
- * non-interlocked mutex release.
+ * Issue XCHG, rather than MOV, to set ci_curlwp := newlwp in
+ * order to coordinate mutex_exit on this CPU with
+ * mutex_vector_enter on another CPU.
+ *
+ * 1. Any prior mutex_exit by oldlwp must be visible to other
+ * CPUs before we set ci_curlwp := newlwp on this one,
+ * requiring a store-before-store barrier.
+ *
+ * (This is always guaranteed by the x86 memory model, TSO,
+ * but other architectures require a explicit barrier before
+ * the store to ci->ci_curlwp.)
+ *
+ * 2. ci_curlwp := newlwp must be visible on all other CPUs
+ * before any subsequent mutex_exit by newlwp can even test
+ * whether there might be waiters, requiring a
+ * store-before-load barrier.
+ *
+ * (This is the only ordering x86 TSO ever requires any kind
+ * of barrier for -- in this case, we take advantage of the
+ * sequential consistency implied by XCHG to obviate the
+ * need for MFENCE or something.)
+ *
+ * See kern_mutex.c for details -- this is necessary for
+ * adaptive mutexes to detect whether the lwp is on the CPU in
+ * order to safely block without requiring atomic r/m/w in
+ * mutex_exit.
*/
movl %edi,%ecx
xchgl %ecx,CPUVAR(CURLWP)
Index: src/sys/arch/i386/i386/spl.S
diff -u src/sys/arch/i386/i386/spl.S:1.57 src/sys/arch/i386/i386/spl.S:1.58
--- src/sys/arch/i386/i386/spl.S:1.57 Thu Sep 8 06:57:44 2022
+++ src/sys/arch/i386/i386/spl.S Wed Mar 1 08:38:50 2023
@@ -1,4 +1,4 @@
-/* $NetBSD: spl.S,v 1.57 2022/09/08 06:57:44 knakahara Exp $ */
+/* $NetBSD: spl.S,v 1.58 2023/03/01 08:38:50 riastradh Exp $ */
/*
* Copyright (c) 1998, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
*/
#include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.57 2022/09/08 06:57:44 knakahara Exp $");
+__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.58 2023/03/01 08:38:50 riastradh Exp $");
#include "opt_ddb.h"
#include "opt_spldebug.h"
@@ -384,6 +384,20 @@ IDTVEC(softintr)
STI(%esi)
movl CPUVAR(CURLWP),%esi
movl IS_LWP(%eax),%edi /* switch to handler LWP */
+ /*
+ * Simple MOV to set curlwp to softlwp. See below on ordering
+ * required to restore softlwp like cpu_switchto.
+ *
+ * 1. Don't need store-before-store barrier because x86 is TSO.
+ *
+ * 2. Don't need store-before-load barrier because when we
+ * enter a softint lwp, it can't be holding any mutexes, so
+ * it can't release any until after it has acquired them, so
+ * we need not participate in the protocol with
+ * mutex_vector_enter barriers here.
+ *
+ * Hence no need for XCHG or barriers around MOV.
+ */
movl %edi,CPUVAR(CURLWP)
movl L_PCB(%edi),%edx
movl L_PCB(%esi),%ecx
@@ -399,9 +413,31 @@ IDTVEC(softintr)
movl PCB_ESP(%ecx),%esp
/*
- * for non-interlocked mutex release to work safely the change
- * to ci_curlwp must not languish in the store buffer. therefore
- * we use XCHG and not MOV here. see kern_mutex.c.
+ * Use XCHG, not MOV, to coordinate mutex_exit on this CPU with
+ * mutex_vector_enter on another CPU.
+ *
+ * 1. Any prior mutex_exit by the softint must be visible to
+ * other CPUs before we restore curlwp on this one,
+ * requiring store-before-store ordering.
+ *
+ * (This is always guaranteed by the x86 memory model, TSO,
+ * but other architectures require a explicit barrier before
+ * the store to ci->ci_curlwp.)
+ *
+ * 2. Restoring curlwp must be visible on all other CPUs before
+ * any subsequent mutex_exit on this one can even test
+ * whether there might be waiters, requiring
+ * store-before-load ordering.
+ *
+ * (This is the only ordering x86 TSO ever requires any kind
+ * of barrier for -- in this case, we take advantage of the
+ * sequential consistency implied by XCHG to obviate the
+ * need for MFENCE or something.)
+ *
+ * See kern_mutex.c for details -- this is necessary for
+ * adaptive mutexes to detect whether the lwp is on the CPU in
+ * order to safely block without requiring atomic r/m/w in
+ * mutex_exit. See also cpu_switchto.
*/
xchgl %esi,CPUVAR(CURLWP) /* restore ci_curlwp */
popl %edi /* unwind switchframe */