Module Name: src Committed By: riastradh Date: Wed Mar 1 08:38:50 UTC 2023
Modified Files: src/sys/arch/amd64/amd64: locore.S spl.S src/sys/arch/i386/i386: locore.S spl.S Log Message: x86: Expand on comments on ordering around stores to ci_curlwp. No functional change intended. PR kern/57240 To generate a diff of this commit: cvs rdiff -u -r1.216 -r1.217 src/sys/arch/amd64/amd64/locore.S cvs rdiff -u -r1.48 -r1.49 src/sys/arch/amd64/amd64/spl.S cvs rdiff -u -r1.192 -r1.193 src/sys/arch/i386/i386/locore.S cvs rdiff -u -r1.57 -r1.58 src/sys/arch/i386/i386/spl.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/amd64/amd64/locore.S diff -u src/sys/arch/amd64/amd64/locore.S:1.216 src/sys/arch/amd64/amd64/locore.S:1.217 --- src/sys/arch/amd64/amd64/locore.S:1.216 Sat Feb 25 18:04:42 2023 +++ src/sys/arch/amd64/amd64/locore.S Wed Mar 1 08:38:50 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.216 2023/02/25 18:04:42 riastradh Exp $ */ +/* $NetBSD: locore.S,v 1.217 2023/03/01 08:38:50 riastradh Exp $ */ /* * Copyright-o-rama! @@ -1177,8 +1177,32 @@ ENTRY(cpu_switchto) movq PCB_RBP(%r14),%rbp /* - * Set curlwp. This must be globally visible in order to permit - * non-interlocked mutex release. + * Issue XCHG, rather than MOV, to set ci_curlwp := newlwp in + * order to coordinate mutex_exit on this CPU with + * mutex_vector_enter on another CPU. + * + * 1. Any prior mutex_exit by oldlwp must be visible to other + * CPUs before we set ci_curlwp := newlwp on this one, + * requiring a store-before-store barrier. + * + * (This is always guaranteed by the x86 memory model, TSO, + * but other architectures require a explicit barrier before + * the store to ci->ci_curlwp.) + * + * 2. ci_curlwp := newlwp must be visible on all other CPUs + * before any subsequent mutex_exit by newlwp can even test + * whether there might be waiters, requiring a + * store-before-load barrier. + * + * (This is the only ordering x86 TSO ever requires any kind + * of barrier for -- in this case, we take advantage of the + * sequential consistency implied by XCHG to obviate the + * need for MFENCE or something.) + * + * See kern_mutex.c for details -- this is necessary for + * adaptive mutexes to detect whether the lwp is on the CPU in + * order to safely block without requiring atomic r/m/w in + * mutex_exit. */ movq %r12,%rcx xchgq %rcx,CPUVAR(CURLWP) Index: src/sys/arch/amd64/amd64/spl.S diff -u src/sys/arch/amd64/amd64/spl.S:1.48 src/sys/arch/amd64/amd64/spl.S:1.49 --- src/sys/arch/amd64/amd64/spl.S:1.48 Wed Sep 7 00:40:18 2022 +++ src/sys/arch/amd64/amd64/spl.S Wed Mar 1 08:38:50 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: spl.S,v 1.48 2022/09/07 00:40:18 knakahara Exp $ */ +/* $NetBSD: spl.S,v 1.49 2023/03/01 08:38:50 riastradh Exp $ */ /* * Copyright (c) 2003 Wasabi Systems, Inc. @@ -116,6 +116,20 @@ IDTVEC(softintr) movq IS_LWP(%rax),%rdi /* switch to handler LWP */ movq L_PCB(%rdi),%rdx movq L_PCB(%r15),%rcx + /* + * Simple MOV to set curlwp to softlwp. See below on ordering + * required to restore softlwp like cpu_switchto. + * + * 1. Don't need store-before-store barrier because x86 is TSO. + * + * 2. Don't need store-before-load barrier because when we + * enter a softint lwp, it can't be holding any mutexes, so + * it can't release any until after it has acquired them, so + * we need not participate in the protocol with + * mutex_vector_enter barriers here. + * + * Hence no need for XCHG or barriers around MOV. + */ movq %rdi,CPUVAR(CURLWP) #ifdef KASAN @@ -158,9 +172,31 @@ IDTVEC(softintr) movq PCB_RSP(%rcx),%rsp /* - * for non-interlocked mutex release to work safely the change - * to ci_curlwp must not languish in the store buffer. therefore - * we use XCHG and not MOV here. see kern_mutex.c. + * Use XCHG, not MOV, to coordinate mutex_exit on this CPU with + * mutex_vector_enter on another CPU. + * + * 1. Any prior mutex_exit by the softint must be visible to + * other CPUs before we restore curlwp on this one, + * requiring store-before-store ordering. + * + * (This is always guaranteed by the x86 memory model, TSO, + * but other architectures require a explicit barrier before + * the store to ci->ci_curlwp.) + * + * 2. Restoring curlwp must be visible on all other CPUs before + * any subsequent mutex_exit on this one can even test + * whether there might be waiters, requiring + * store-before-load ordering. + * + * (This is the only ordering x86 TSO ever requires any kind + * of barrier for -- in this case, we take advantage of the + * sequential consistency implied by XCHG to obviate the + * need for MFENCE or something.) + * + * See kern_mutex.c for details -- this is necessary for + * adaptive mutexes to detect whether the lwp is on the CPU in + * order to safely block without requiring atomic r/m/w in + * mutex_exit. See also cpu_switchto. */ xchgq %r15,CPUVAR(CURLWP) /* restore curlwp */ popq %r15 /* unwind switchframe */ Index: src/sys/arch/i386/i386/locore.S diff -u src/sys/arch/i386/i386/locore.S:1.192 src/sys/arch/i386/i386/locore.S:1.193 --- src/sys/arch/i386/i386/locore.S:1.192 Sat Feb 25 18:35:54 2023 +++ src/sys/arch/i386/i386/locore.S Wed Mar 1 08:38:50 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.192 2023/02/25 18:35:54 riastradh Exp $ */ +/* $NetBSD: locore.S,v 1.193 2023/03/01 08:38:50 riastradh Exp $ */ /* * Copyright-o-rama! @@ -128,7 +128,7 @@ */ #include <machine/asm.h> -__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.192 2023/02/25 18:35:54 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.193 2023/03/01 08:38:50 riastradh Exp $"); #include "opt_copy_symtab.h" #include "opt_ddb.h" @@ -1401,8 +1401,32 @@ ENTRY(cpu_switchto) movl PCB_ESP(%ebx),%esp /* - * Set curlwp. This must be globally visible in order to permit - * non-interlocked mutex release. + * Issue XCHG, rather than MOV, to set ci_curlwp := newlwp in + * order to coordinate mutex_exit on this CPU with + * mutex_vector_enter on another CPU. + * + * 1. Any prior mutex_exit by oldlwp must be visible to other + * CPUs before we set ci_curlwp := newlwp on this one, + * requiring a store-before-store barrier. + * + * (This is always guaranteed by the x86 memory model, TSO, + * but other architectures require a explicit barrier before + * the store to ci->ci_curlwp.) + * + * 2. ci_curlwp := newlwp must be visible on all other CPUs + * before any subsequent mutex_exit by newlwp can even test + * whether there might be waiters, requiring a + * store-before-load barrier. + * + * (This is the only ordering x86 TSO ever requires any kind + * of barrier for -- in this case, we take advantage of the + * sequential consistency implied by XCHG to obviate the + * need for MFENCE or something.) + * + * See kern_mutex.c for details -- this is necessary for + * adaptive mutexes to detect whether the lwp is on the CPU in + * order to safely block without requiring atomic r/m/w in + * mutex_exit. */ movl %edi,%ecx xchgl %ecx,CPUVAR(CURLWP) Index: src/sys/arch/i386/i386/spl.S diff -u src/sys/arch/i386/i386/spl.S:1.57 src/sys/arch/i386/i386/spl.S:1.58 --- src/sys/arch/i386/i386/spl.S:1.57 Thu Sep 8 06:57:44 2022 +++ src/sys/arch/i386/i386/spl.S Wed Mar 1 08:38:50 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: spl.S,v 1.57 2022/09/08 06:57:44 knakahara Exp $ */ +/* $NetBSD: spl.S,v 1.58 2023/03/01 08:38:50 riastradh Exp $ */ /* * Copyright (c) 1998, 2007, 2008, 2020 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <machine/asm.h> -__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.57 2022/09/08 06:57:44 knakahara Exp $"); +__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.58 2023/03/01 08:38:50 riastradh Exp $"); #include "opt_ddb.h" #include "opt_spldebug.h" @@ -384,6 +384,20 @@ IDTVEC(softintr) STI(%esi) movl CPUVAR(CURLWP),%esi movl IS_LWP(%eax),%edi /* switch to handler LWP */ + /* + * Simple MOV to set curlwp to softlwp. See below on ordering + * required to restore softlwp like cpu_switchto. + * + * 1. Don't need store-before-store barrier because x86 is TSO. + * + * 2. Don't need store-before-load barrier because when we + * enter a softint lwp, it can't be holding any mutexes, so + * it can't release any until after it has acquired them, so + * we need not participate in the protocol with + * mutex_vector_enter barriers here. + * + * Hence no need for XCHG or barriers around MOV. + */ movl %edi,CPUVAR(CURLWP) movl L_PCB(%edi),%edx movl L_PCB(%esi),%ecx @@ -399,9 +413,31 @@ IDTVEC(softintr) movl PCB_ESP(%ecx),%esp /* - * for non-interlocked mutex release to work safely the change - * to ci_curlwp must not languish in the store buffer. therefore - * we use XCHG and not MOV here. see kern_mutex.c. + * Use XCHG, not MOV, to coordinate mutex_exit on this CPU with + * mutex_vector_enter on another CPU. + * + * 1. Any prior mutex_exit by the softint must be visible to + * other CPUs before we restore curlwp on this one, + * requiring store-before-store ordering. + * + * (This is always guaranteed by the x86 memory model, TSO, + * but other architectures require a explicit barrier before + * the store to ci->ci_curlwp.) + * + * 2. Restoring curlwp must be visible on all other CPUs before + * any subsequent mutex_exit on this one can even test + * whether there might be waiters, requiring + * store-before-load ordering. + * + * (This is the only ordering x86 TSO ever requires any kind + * of barrier for -- in this case, we take advantage of the + * sequential consistency implied by XCHG to obviate the + * need for MFENCE or something.) + * + * See kern_mutex.c for details -- this is necessary for + * adaptive mutexes to detect whether the lwp is on the CPU in + * order to safely block without requiring atomic r/m/w in + * mutex_exit. See also cpu_switchto. */ xchgl %esi,CPUVAR(CURLWP) /* restore ci_curlwp */ popl %edi /* unwind switchframe */