Module Name: src Committed By: martin Date: Mon Jul 31 13:56:15 UTC 2023
Modified Files: src/sys/arch/aarch64/aarch64 [netbsd-8]: locore.S src/sys/arch/arm/arm32 [netbsd-8]: cpuswitch.S src/sys/arch/evbmips/ingenic [netbsd-8]: cpu_startup.S src/sys/arch/hppa/include [netbsd-8]: cpu.h src/sys/arch/ia64/ia64 [netbsd-8]: machdep.c vm_machdep.c src/sys/arch/mips/include [netbsd-8]: asm.h src/sys/arch/mips/mips [netbsd-8]: locore.S locore_mips3.S src/sys/arch/powerpc/powerpc [netbsd-8]: locore_subr.S src/sys/arch/sparc/sparc [netbsd-8]: locore.s src/sys/arch/sparc64/sparc64 [netbsd-8]: locore.s src/sys/arch/vax/vax [netbsd-8]: subr.S Log Message: Pull up following revision(s) (requested by riastradh in ticket #1859): sys/arch/ia64/ia64/vm_machdep.c: revision 1.18 sys/arch/powerpc/powerpc/locore_subr.S: revision 1.67 sys/arch/aarch64/aarch64/locore.S: revision 1.91 sys/arch/mips/include/asm.h: revision 1.74 sys/arch/hppa/include/cpu.h: revision 1.13 sys/arch/arm/arm/armv6_start.S: revision 1.38 (applied also to sys/arch/arm/cortex/a9_mpsubr.S, sys/arch/arm/cortex/a9_mpsubr.S, sys/arch/arm/cortex/cortex_init.S) sys/arch/evbmips/ingenic/cpu_startup.S: revision 1.2 sys/arch/mips/mips/locore.S: revision 1.229 sys/arch/alpha/include/asm.h: revision 1.45 (applied to sys/arch/alpha/alpha/multiproc.s) sys/arch/sparc64/sparc64/locore.s: revision 1.432 sys/arch/vax/vax/subr.S: revision 1.42 sys/arch/mips/mips/locore_mips3.S: revision 1.116 sys/arch/ia64/ia64/machdep.c: revision 1.44 sys/arch/arm/arm32/cpuswitch.S: revision 1.106 sys/arch/sparc/sparc/locore.s: revision 1.284 (all via patch) aarch64: Add missing barriers in cpu_switchto. Details in comments. Note: This is a conservative change that inserts a barrier where there was a comment saying none is needed, which is probably correct. The goal of this change is to systematically add barriers to be confident in correctness; subsequent changes may remove some bariers, as an optimization, with an explanation of why each barrier is not needed. PR kern/57240 alpha: Add missing barriers in cpu_switchto. Details in comments. arm32: Add missing barriers in cpu_switchto. Details in comments. hppa: Add missing barriers in cpu_switchto. Not sure hppa has ever had working MULTIPROCESSOR, so maybe no pullups needed? ia64: Add missing barriers in cpu_switchto. (ia64 has never really worked, so no pullups needed, right?) mips: Add missing barriers in cpu_switchto. Details in comments. powerpc: Add missing barriers in cpu_switchto. Details in comments. sparc: Add missing barriers in cpu_switchto. sparc64: Add missing barriers in cpu_switchto. Details in comments. vax: Note where cpu_switchto needs barriers. Not sure vax has ever had working MULTIPROCESSOR, though, and I'm not even sure how to spell store-before-load barriers on VAX, so no functional change for now. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.1.22.1 src/sys/arch/aarch64/aarch64/locore.S cvs rdiff -u -r1.90 -r1.90.10.1 src/sys/arch/arm/arm32/cpuswitch.S cvs rdiff -u -r1.1 -r1.1.12.1 src/sys/arch/evbmips/ingenic/cpu_startup.S cvs rdiff -u -r1.3 -r1.3.10.1 src/sys/arch/hppa/include/cpu.h cvs rdiff -u -r1.38 -r1.38.6.1 src/sys/arch/ia64/ia64/machdep.c cvs rdiff -u -r1.13 -r1.13.6.1 src/sys/arch/ia64/ia64/vm_machdep.c cvs rdiff -u -r1.54 -r1.54.6.1 src/sys/arch/mips/include/asm.h cvs rdiff -u -r1.208 -r1.208.8.1 src/sys/arch/mips/mips/locore.S cvs rdiff -u -r1.113 -r1.113.8.1 src/sys/arch/mips/mips/locore_mips3.S cvs rdiff -u -r1.55 -r1.55.6.1 src/sys/arch/powerpc/powerpc/locore_subr.S cvs rdiff -u -r1.268.30.1 -r1.268.30.2 src/sys/arch/sparc/sparc/locore.s cvs rdiff -u -r1.411 -r1.411.2.1 src/sys/arch/sparc64/sparc64/locore.s cvs rdiff -u -r1.34 -r1.34.2.1 src/sys/arch/vax/vax/subr.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/aarch64/aarch64/locore.S diff -u src/sys/arch/aarch64/aarch64/locore.S:1.1 src/sys/arch/aarch64/aarch64/locore.S:1.1.22.1 --- src/sys/arch/aarch64/aarch64/locore.S:1.1 Sun Aug 10 05:47:37 2014 +++ src/sys/arch/aarch64/aarch64/locore.S Mon Jul 31 13:56:14 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.1 2014/08/10 05:47:37 matt Exp $ */ +/* $NetBSD: locore.S,v 1.1.22.1 2023/07/31 13:56:14 martin Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -34,7 +34,7 @@ #include "opt_ddb.h" -RCSID("$NetBSD: locore.S,v 1.1 2014/08/10 05:47:37 matt Exp $") +RCSID("$NetBSD: locore.S,v 1.1.22.1 2023/07/31 13:56:14 martin Exp $") /* * At IPL_SCHED: @@ -99,7 +99,27 @@ ENTRY_NP(cpu_switchto) msr tpidr_el0, x4 // restore it mrs x3, tpidr_el1 // get curcpu + /* + * Issue barriers to coordinate mutex_exit on this CPU with + * mutex_vector_enter on another CPU. + * + * 1. Any prior mutex_exit by oldlwp must be visible to other + * CPUs before we set ci_curlwp := newlwp on this one, + * requiring a store-before-store barrier. + * + * 2. ci_curlwp := newlwp must be visible on all other CPUs + * before any subsequent mutex_exit by newlwp can even test + * whether there might be waiters, requiring a + * store-before-load barrier. + * + * See kern_mutex.c for details -- this is necessary for + * adaptive mutexes to detect whether the lwp is on the CPU in + * order to safely block without requiring atomic r/m/w in + * mutex_exit. + */ + dmb ishst /* store-before-store */ str x1, [x3, #CI_CURLWP] // show as curlwp + dmb ish /* store-before-load */ /* * Restore callee save registers Index: src/sys/arch/arm/arm32/cpuswitch.S diff -u src/sys/arch/arm/arm32/cpuswitch.S:1.90 src/sys/arch/arm/arm32/cpuswitch.S:1.90.10.1 --- src/sys/arch/arm/arm32/cpuswitch.S:1.90 Wed Apr 8 12:07:40 2015 +++ src/sys/arch/arm/arm32/cpuswitch.S Mon Jul 31 13:56:15 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: cpuswitch.S,v 1.90 2015/04/08 12:07:40 matt Exp $ */ +/* $NetBSD: cpuswitch.S,v 1.90.10.1 2023/07/31 13:56:15 martin Exp $ */ /* * Copyright 2003 Wasabi Systems, Inc. @@ -87,7 +87,7 @@ #include <arm/asm.h> #include <arm/locore.h> - RCSID("$NetBSD: cpuswitch.S,v 1.90 2015/04/08 12:07:40 matt Exp $") + RCSID("$NetBSD: cpuswitch.S,v 1.90.10.1 2023/07/31 13:56:15 martin Exp $") /* LINTSTUB: include <sys/param.h> */ @@ -205,8 +205,34 @@ ENTRY(cpu_switchto) mcr p15, 0, r6, c13, c0, 4 /* set current lwp */ #endif + /* + * Issue barriers to coordinate mutex_exit on this CPU with + * mutex_vector_enter on another CPU. + * + * 1. Any prior mutex_exit by oldlwp must be visible to other + * CPUs before we set ci_curlwp := newlwp on this one, + * requiring a store-before-store barrier. + * + * 2. ci_curlwp := newlwp must be visible on all other CPUs + * before any subsequent mutex_exit by newlwp can even test + * whether there might be waiters, requiring a + * store-before-load barrier. + * + * See kern_mutex.c for details -- this is necessary for + * adaptive mutexes to detect whether the lwp is on the CPU in + * order to safely block without requiring atomic r/m/w in + * mutex_exit. + */ + /* We have a new curlwp now so make a note of it */ +#ifdef _ARM_ARCH_7 + dmb /* store-before-store */ +#endif str r6, [r5, #(CI_CURLWP)] +#ifdef _ARM_ARCH_7 + dmb /* store-before-load */ +#endif + /* Get the new pcb */ ldr r7, [r6, #(L_PCB)] @@ -403,7 +429,13 @@ ENTRY_NP(softint_switch) #if defined(TPIDRPRW_IS_CURLWP) mcr p15, 0, r5, c13, c0, 4 /* save new lwp */ #endif +#ifdef _ARM_ARCH_7 + dmb /* for mutex_enter; see cpu_switchto */ +#endif str r5, [r7, #(CI_CURLWP)] /* save new lwp */ +#ifdef _ARM_ARCH_7 + dmb /* for mutex_enter; see cpu_switchto */ +#endif /* * Normally, we'd get {r8-r13} but since this is a softint lwp @@ -431,7 +463,13 @@ ENTRY_NP(softint_switch) #if defined(TPIDRPRW_IS_CURLWP) mcr p15, 0, r4, c13, c0, 4 /* restore pinned lwp */ #endif +#ifdef _ARM_ARCH_7 + dmb /* for mutex_enter; see cpu_switchto */ +#endif str r4, [r7, #(CI_CURLWP)] /* restore pinned lwp */ +#ifdef _ARM_ARCH_7 + dmb /* for mutex_enter; see cpu_switchto */ +#endif ldr sp, [r2, #(PCB_KSP)] /* now running on the old stack. */ /* At this point we can allow IRQ's again. */ Index: src/sys/arch/evbmips/ingenic/cpu_startup.S diff -u src/sys/arch/evbmips/ingenic/cpu_startup.S:1.1 src/sys/arch/evbmips/ingenic/cpu_startup.S:1.1.12.1 --- src/sys/arch/evbmips/ingenic/cpu_startup.S:1.1 Fri Jan 29 01:54:14 2016 +++ src/sys/arch/evbmips/ingenic/cpu_startup.S Mon Jul 31 13:56:15 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu_startup.S,v 1.1 2016/01/29 01:54:14 macallan Exp $ */ +/* $NetBSD: cpu_startup.S,v 1.1.12.1 2023/07/31 13:56:15 martin Exp $ */ /*- * Copyright (c) 2015 Michael Lorenz @@ -33,7 +33,7 @@ #include <sys/endian.h> #include <mips/asm.h> -RCSID("$NetBSD: cpu_startup.S,v 1.1 2016/01/29 01:54:14 macallan Exp $"); +RCSID("$NetBSD: cpu_startup.S,v 1.1.12.1 2023/07/31 13:56:15 martin Exp $"); #ifdef MULTIPROCESSOR @@ -56,6 +56,11 @@ NESTED_NOPROFILE(ingenic_trampoline, 0, nop beqz MIPS_CURLWP, 1b nop + /* + * No membar needed because we're not switching from a + * previous lwp, and the idle lwp we're switching to can't be + * holding locks already; see cpu_switchto. + */ PTR_S MIPS_CURLWP, CPU_INFO_CURLWP(a0) li v0, 0 Index: src/sys/arch/hppa/include/cpu.h diff -u src/sys/arch/hppa/include/cpu.h:1.3 src/sys/arch/hppa/include/cpu.h:1.3.10.1 --- src/sys/arch/hppa/include/cpu.h:1.3 Mon Jan 25 18:14:40 2016 +++ src/sys/arch/hppa/include/cpu.h Mon Jul 31 13:56:14 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.3 2016/01/25 18:14:40 christos Exp $ */ +/* $NetBSD: cpu.h,v 1.3.10.1 2023/07/31 13:56:14 martin Exp $ */ /* $OpenBSD: cpu.h,v 1.55 2008/07/23 17:39:35 kettenis Exp $ */ @@ -199,7 +199,26 @@ extern int cpu_revision; #define GET_CURLWP(r) mfctl CR_CURCPU, r ! ldw CI_CURLWP(r), r #define GET_CURLWP_SPACE(s, r) mfctl CR_CURCPU, r ! ldw CI_CURLWP(s, r), r -#define SET_CURLWP(r,t) mfctl CR_CURCPU, t ! stw r, CI_CURLWP(t) +/* + * Issue barriers to coordinate mutex_exit on this CPU with + * mutex_vector_enter on another CPU. + * + * 1. Any prior mutex_exit by oldlwp must be visible to other + * CPUs before we set ci_curlwp := newlwp on this one, + * requiring a store-before-store barrier. + * + * 2. ci_curlwp := newlwp must be visible on all other CPUs + * before any subsequent mutex_exit by newlwp can even test + * whether there might be waiters, requiring a + * store-before-load barrier. + * + * See kern_mutex.c for details -- this is necessary for + * adaptive mutexes to detect whether the lwp is on the CPU in + * order to safely block without requiring atomic r/m/w in + * mutex_exit. + */ +#define SET_CURLWP(r,t) \ + sync ! mfctl CR_CURCPU, t ! stw r, CI_CURLWP(t) ! sync #else /* MULTIPROCESSOR */ Index: src/sys/arch/ia64/ia64/machdep.c diff -u src/sys/arch/ia64/ia64/machdep.c:1.38 src/sys/arch/ia64/ia64/machdep.c:1.38.6.1 --- src/sys/arch/ia64/ia64/machdep.c:1.38 Sat Apr 8 17:46:01 2017 +++ src/sys/arch/ia64/ia64/machdep.c Mon Jul 31 13:56:14 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: machdep.c,v 1.38 2017/04/08 17:46:01 scole Exp $ */ +/* $NetBSD: machdep.c,v 1.38.6.1 2023/07/31 13:56:14 martin Exp $ */ /*- * Copyright (c) 2003,2004 Marcel Moolenaar @@ -662,7 +662,11 @@ ia64_init(void) /* - * Initialise process context. XXX: This should really be in cpu_switch + * Initialise process context. XXX: This should really be in cpu_switchto + * + * No membar needed because we're not switching from a + * previous lwp, and the idle lwp we're switching to can't be + * holding locks already; see cpu_switchto. */ ci->ci_curlwp = &lwp0; Index: src/sys/arch/ia64/ia64/vm_machdep.c diff -u src/sys/arch/ia64/ia64/vm_machdep.c:1.13 src/sys/arch/ia64/ia64/vm_machdep.c:1.13.6.1 --- src/sys/arch/ia64/ia64/vm_machdep.c:1.13 Sat Apr 8 17:38:43 2017 +++ src/sys/arch/ia64/ia64/vm_machdep.c Mon Jul 31 13:56:14 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: vm_machdep.c,v 1.13 2017/04/08 17:38:43 scole Exp $ */ +/* $NetBSD: vm_machdep.c,v 1.13.6.1 2023/07/31 13:56:14 martin Exp $ */ /* * Copyright (c) 2006 The NetBSD Foundation, Inc. @@ -37,6 +37,7 @@ #include <sys/proc.h> #include <sys/systm.h> #include <sys/cpu.h> +#include <sys/atomic.h> #include <machine/frame.h> #include <machine/md_var.h> @@ -77,9 +78,29 @@ cpu_switchto(lwp_t *oldlwp, lwp_t *newlw register uint64_t reg9 __asm("r9"); KASSERT(newlwp != NULL); - + + /* + * Issue barriers to coordinate mutex_exit on this CPU with + * mutex_vector_enter on another CPU. + * + * 1. Any prior mutex_exit by oldlwp must be visible to other + * CPUs before we set ci_curlwp := newlwp on this one, + * requiring a store-before-store barrier. + * + * 2. ci_curlwp := newlwp must be visible on all other CPUs + * before any subsequent mutex_exit by newlwp can even test + * whether there might be waiters, requiring a + * store-before-load barrier. + * + * See kern_mutex.c for details -- this is necessary for + * adaptive mutexes to detect whether the lwp is on the CPU in + * order to safely block without requiring atomic r/m/w in + * mutex_exit. + */ + membar_producer(); /* store-before-store */ ci->ci_curlwp = newlwp; - + membar_sync(); /* store-before-load */ + /* required for lwp_startup, copy oldlwp into r9, "mov r9=in0" */ __asm __volatile("mov %0=%1" : "=r"(reg9) : "r"(oldlwp)); Index: src/sys/arch/mips/include/asm.h diff -u src/sys/arch/mips/include/asm.h:1.54 src/sys/arch/mips/include/asm.h:1.54.6.1 --- src/sys/arch/mips/include/asm.h:1.54 Sat Feb 25 21:16:50 2017 +++ src/sys/arch/mips/include/asm.h Mon Jul 31 13:56:14 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: asm.h,v 1.54 2017/02/25 21:16:50 joerg Exp $ */ +/* $NetBSD: asm.h,v 1.54.6.1 2023/07/31 13:56:14 martin Exp $ */ /* * Copyright (c) 1992, 1993 @@ -510,6 +510,32 @@ _C_LABEL(x): #define NOP_L /* nothing */ #endif +/* XXX pullup more mips barrier improvements here */ +#define SYNC_ACQ sync +#define SYNC_REL sync + +/* + * Store-before-load barrier. Do not use this unless you know what + * you're doing. + */ +#ifdef MULTIPROCESSOR +#define SYNC_DEKKER sync +#else +#define SYNC_DEKKER /* nothing */ +#endif + +/* + * Store-before-store and load-before-load barriers. These could be + * made weaker than release (load/store-before-store) and acquire + * (load-before-load/store) barriers, and newer MIPS does have + * instruction encodings for finer-grained barriers like this, but I + * dunno how to appropriately conditionalize their use or get the + * assembler to be happy with them, so we'll use these definitions for + * now. + */ +#define SYNC_PRODUCER SYNC_REL +#define SYNC_CONSUMER SYNC_ACQ + /* CPU dependent hook for cp0 load delays */ #if defined(MIPS1) || defined(MIPS2) || defined(MIPS3) #define MFC0_HAZARD sll $0,$0,1 /* super scalar nop */ Index: src/sys/arch/mips/mips/locore.S diff -u src/sys/arch/mips/mips/locore.S:1.208 src/sys/arch/mips/mips/locore.S:1.208.8.1 --- src/sys/arch/mips/mips/locore.S:1.208 Wed Nov 9 11:50:09 2016 +++ src/sys/arch/mips/mips/locore.S Mon Jul 31 13:56:15 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.208 2016/11/09 11:50:09 maya Exp $ */ +/* $NetBSD: locore.S,v 1.208.8.1 2023/07/31 13:56:15 martin Exp $ */ /* * Copyright (c) 1992, 1993 @@ -63,7 +63,7 @@ #include <mips/trap.h> #include <mips/locore.h> -RCSID("$NetBSD: locore.S,v 1.208 2016/11/09 11:50:09 maya Exp $") +RCSID("$NetBSD: locore.S,v 1.208.8.1 2023/07/31 13:56:15 martin Exp $") #include "assym.h" @@ -286,7 +286,28 @@ NESTED(cpu_switchto, CALLFRAME_SIZ, ra) PTR_L t2, L_CPU(MIPS_CURLWP) nop # patchable load delay slot + + /* + * Issue barriers to coordinate mutex_exit on this CPU with + * mutex_vector_enter on another CPU. + * + * 1. Any prior mutex_exit by oldlwp must be visible to other + * CPUs before we set ci_curlwp := newlwp on this one, + * requiring a store-before-store barrier. + * + * 2. ci_curlwp := newlwp must be visible on all other CPUs + * before any subsequent mutex_exit by newlwp can even test + * whether there might be waiters, requiring a + * store-before-load barrier. + * + * See kern_mutex.c for details -- this is necessary for + * adaptive mutexes to detect whether the lwp is on the CPU in + * order to safely block without requiring atomic r/m/w in + * mutex_exit. + */ + SYNC_PRODUCER /* XXX fixup to nop for uniprocessor boot */ PTR_S MIPS_CURLWP, CPU_INFO_CURLWP(t2) + SYNC_DEKKER /* XXX fixup to nop for uniprocessor boot */ /* Check for restartable atomic sequences (RAS) */ PTR_L a0, L_PROC(MIPS_CURLWP) # argument to ras_lookup @@ -437,7 +458,9 @@ NESTED(softint_fast_dispatch, CALLFRAME_ move MIPS_CURLWP, a0 # switch to softint lwp PTR_L s1, L_CPU(MIPS_CURLWP) # get curcpu() nop # patchable load delay slot + SYNC_PRODUCER /* XXX fixup */ /* for mutex_enter; see cpu_switchto */ PTR_S MIPS_CURLWP, CPU_INFO_CURLWP(s1) # ... + SYNC_DEKKER /* XXX fixup */ /* for mutex_enter; see cpu_switchto */ move s2, sp # remember sp move s3, t0 # remember curpcb @@ -448,7 +471,9 @@ NESTED(softint_fast_dispatch, CALLFRAME_ move sp, s2 # restore stack move MIPS_CURLWP, s0 # restore curlwp + SYNC_PRODUCER /* XXX fixup */ /* for mutex_enter; see cpu_switchto */ PTR_S MIPS_CURLWP, CPU_INFO_CURLWP(s1) # .... + SYNC_DEKKER /* XXX fixup */ /* for mutex_enter; see cpu_switchto */ REG_L ra, CALLFRAME_RA(sp) # load early since we use it Index: src/sys/arch/mips/mips/locore_mips3.S diff -u src/sys/arch/mips/mips/locore_mips3.S:1.113 src/sys/arch/mips/mips/locore_mips3.S:1.113.8.1 --- src/sys/arch/mips/mips/locore_mips3.S:1.113 Wed Jul 27 09:32:35 2016 +++ src/sys/arch/mips/mips/locore_mips3.S Mon Jul 31 13:56:15 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: locore_mips3.S,v 1.113 2016/07/27 09:32:35 skrll Exp $ */ +/* $NetBSD: locore_mips3.S,v 1.113.8.1 2023/07/31 13:56:15 martin Exp $ */ /* * Copyright (c) 1997 Jonathan Stone (hereinafter referred to as the author) @@ -92,7 +92,7 @@ #include <mips/asm.h> #include <mips/cpuregs.h> -RCSID("$NetBSD: locore_mips3.S,v 1.113 2016/07/27 09:32:35 skrll Exp $") +RCSID("$NetBSD: locore_mips3.S,v 1.113.8.1 2023/07/31 13:56:15 martin Exp $") #include "assym.h" @@ -796,6 +796,11 @@ NESTED_NOPROFILE(cpu_trampoline, 0, ra) nop beqz MIPS_CURLWP, 1b nop + /* + * No membar needed because we're not switching from a + * previous lwp, and the idle lwp we're switching to can't be + * holding locks already; see cpu_switchto. + */ PTR_S MIPS_CURLWP, CPU_INFO_CURLWP(a0) #ifdef _LP64 Index: src/sys/arch/powerpc/powerpc/locore_subr.S diff -u src/sys/arch/powerpc/powerpc/locore_subr.S:1.55 src/sys/arch/powerpc/powerpc/locore_subr.S:1.55.6.1 --- src/sys/arch/powerpc/powerpc/locore_subr.S:1.55 Tue Feb 28 17:35:29 2017 +++ src/sys/arch/powerpc/powerpc/locore_subr.S Mon Jul 31 13:56:14 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: locore_subr.S,v 1.55 2017/02/28 17:35:29 chs Exp $ */ +/* $NetBSD: locore_subr.S,v 1.55.6.1 2023/07/31 13:56:14 martin Exp $ */ /* * Copyright (c) 2001 Wasabi Systems, Inc. @@ -224,7 +224,32 @@ switchto_restore: */ GET_CPUINFO(%r7) + + /* + * Issue barriers to coordinate mutex_exit on this CPU with + * mutex_vector_enter on another CPU. + * + * 1. Any prior mutex_exit by oldlwp must be visible to other + * CPUs before we set ci_curlwp := newlwp on this one, + * requiring a store-before-store barrier. + * + * 2. ci_curlwp := newlwp must be visible on all other CPUs + * before any subsequent mutex_exit by newlwp can even test + * whether there might be waiters, requiring a + * store-before-load barrier. + * + * See kern_mutex.c for details -- this is necessary for + * adaptive mutexes to detect whether the lwp is on the CPU in + * order to safely block without requiring atomic r/m/w in + * mutex_exit. + */ +#ifdef MULTIPROCESSOR + sync /* store-before-store XXX use eieio if available -- cheaper */ +#endif stptr %r31,CI_CURLWP(%r7) +#ifdef MULTIPROCESSOR + sync /* store-before-load */ +#endif mr %r13,%r31 #ifdef PPC_BOOKE mtsprg2 %r31 /* save curlwp in sprg2 */ @@ -398,7 +423,13 @@ _ENTRY(softint_fast_dispatch) * to a kernel thread */ +#ifdef MULTIPROCESSOR + sync /* XXX eieio */ /* for mutex_enter; see cpu_switchto */ +#endif stptr %r3, CI_CURLWP(%r7) +#ifdef MULTIPROCESSOR + sync /* for mutex_enter; see cpu_switchto */ +#endif mr %r13, %r3 #ifdef PPC_BOOKE mtsprg2 %r3 @@ -432,7 +463,13 @@ _ENTRY(softint_fast_dispatch) #endif GET_CPUINFO(%r7) +#ifdef MULTIPROCESSOR + sync /* XXX eieio */ /* for mutex_enter; see cpu_switchto */ +#endif stptr %r30, CI_CURLWP(%r7) +#ifdef MULTIPROCESSOR + sync /* for mutex_enter; see cpu_switchto */ +#endif mr %r13, %r30 #ifdef PPC_BOOKE mtsprg2 %r30 Index: src/sys/arch/sparc/sparc/locore.s diff -u src/sys/arch/sparc/sparc/locore.s:1.268.30.1 src/sys/arch/sparc/sparc/locore.s:1.268.30.2 --- src/sys/arch/sparc/sparc/locore.s:1.268.30.1 Mon Nov 27 10:31:33 2017 +++ src/sys/arch/sparc/sparc/locore.s Mon Jul 31 13:56:15 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.s,v 1.268.30.1 2017/11/27 10:31:33 martin Exp $ */ +/* $NetBSD: locore.s,v 1.268.30.2 2023/07/31 13:56:15 martin Exp $ */ /* * Copyright (c) 1996 Paul Kranenburg @@ -4905,7 +4905,30 @@ Lnosaveoldlwp: /* set new cpcb, and curlwp */ sethi %hi(curlwp), %l7 st %g5, [%l6 + %lo(cpcb)] ! cpcb = newpcb; + + /* + * Issue barriers to coordinate mutex_exit on this CPU with + * mutex_vector_enter on another CPU. + * + * 1. Any prior mutex_exit by oldlwp must be visible to other + * CPUs before we set ci_curlwp := newlwp on this one, + * requiring a store-before-store barrier. + * + * 2. ci_curlwp := newlwp must be visible on all other CPUs + * before any subsequent mutex_exit by newlwp can even test + * whether there might be waiters, requiring a + * store-before-load barrier. + * + * See kern_mutex.c for details -- this is necessary for + * adaptive mutexes to detect whether the lwp is on the CPU in + * order to safely block without requiring atomic r/m/w in + * mutex_exit. + */ + /* stbar -- store-before-store, not needed on TSO */ st %g3, [%l7 + %lo(curlwp)] ! curlwp = l; +#ifdef MULTIPROCESSOR + ldstub [%sp - 4], %g0 /* makeshift store-before-load barrier */ +#endif /* compute new wim */ ld [%g5 + PCB_WIM], %o0 Index: src/sys/arch/sparc64/sparc64/locore.s diff -u src/sys/arch/sparc64/sparc64/locore.s:1.411 src/sys/arch/sparc64/sparc64/locore.s:1.411.2.1 --- src/sys/arch/sparc64/sparc64/locore.s:1.411 Sat May 6 21:46:31 2017 +++ src/sys/arch/sparc64/sparc64/locore.s Mon Jul 31 13:56:15 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.s,v 1.411 2017/05/06 21:46:31 palle Exp $ */ +/* $NetBSD: locore.s,v 1.411.2.1 2023/07/31 13:56:15 martin Exp $ */ /* * Copyright (c) 2006-2010 Matthew R. Green @@ -6471,9 +6471,28 @@ ENTRY(cpu_switchto) * Load the new lwp. To load, we must change stacks and * alter cpcb and the window control registers, hence we must * keep interrupts disabled. + * + * Issue barriers to coordinate mutex_exit on this CPU with + * mutex_vector_enter on another CPU. + * + * 1. Any prior mutex_exit by oldlwp must be visible to other + * CPUs before we set ci_curlwp := newlwp on this one, + * requiring a store-before-store barrier. + * + * 2. ci_curlwp := newlwp must be visible on all other CPUs + * before any subsequent mutex_exit by newlwp can even test + * whether there might be waiters, requiring a + * store-before-load barrier. + * + * See kern_mutex.c for details -- this is necessary for + * adaptive mutexes to detect whether the lwp is on the CPU in + * order to safely block without requiring atomic r/m/w in + * mutex_exit. */ + membar #StoreStore STPTR %i1, [%l7 + %lo(CURLWP)] ! curlwp = l; + membar #StoreLoad STPTR %l1, [%l6 + %lo(CPCB)] ! cpcb = newpcb; ldx [%l1 + PCB_SP], %i6 @@ -6566,7 +6585,9 @@ ENTRY(softint_fastintr) sethi %hi(USPACE - TF_SIZE - CC64FSZ - STKB), %o3 LDPTR [%i0 + L_PCB], %l1 ! l1 = softint pcb or %o3, %lo(USPACE - TF_SIZE - CC64FSZ - STKB), %o3 + membar #StoreStore /* for mutex_enter; see cpu_switchto */ STPTR %i0, [%l7 + %lo(CURLWP)] + membar #StoreLoad /* for mutex_enter; see cpu_switchto */ add %l1, %o3, %i6 STPTR %l1, [%l6 + %lo(CPCB)] stx %i6, [%l1 + PCB_SP] @@ -6579,7 +6600,9 @@ ENTRY(softint_fastintr) /* switch back to interrupted lwp */ ldx [%l5 + PCB_SP], %i6 + membar #StoreStore /* for mutex_enter; see cpu_switchto */ STPTR %l0, [%l7 + %lo(CURLWP)] + membar #StoreLoad /* for mutex_enter; see cpu_switchto */ STPTR %l5, [%l6 + %lo(CPCB)] restore ! rewind register window Index: src/sys/arch/vax/vax/subr.S diff -u src/sys/arch/vax/vax/subr.S:1.34 src/sys/arch/vax/vax/subr.S:1.34.2.1 --- src/sys/arch/vax/vax/subr.S:1.34 Mon May 22 16:53:05 2017 +++ src/sys/arch/vax/vax/subr.S Mon Jul 31 13:56:15 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: subr.S,v 1.34 2017/05/22 16:53:05 ragge Exp $ */ +/* $NetBSD: subr.S,v 1.34.2.1 2023/07/31 13:56:15 martin Exp $ */ /* * Copyright (c) 1994 Ludd, University of Lule}, Sweden. @@ -335,7 +335,9 @@ softint_process: movab softint_exit,PCB_PC(%r3)/* do a quick exit */ #ifdef MULTIPROCESSOR movl L_CPU(%r6),%r8 + /* XXX store-before-store barrier -- see cpu_switchto */ movl %r6,CI_CURLWP(%r8) + /* XXX store-before-load barrier -- see cpu_switchto */ #endif mtpr PCB_PADDR(%r3),$PR_PCBB /* restore PA of interrupted pcb */ @@ -358,7 +360,9 @@ softint_common: movl %r6,PCB_R6(%r3) /* move old lwp into new pcb */ movl %r1,PCB_R7(%r3) /* move IPL into new pcb */ #ifdef MULTIPROCESSOR + /* XXX store-before-store barrier -- see cpu_switchto */ movl %r2,CI_CURLWP(%r8) /* update ci_curlwp */ + /* XXX store-before-load barrier -- see cpu_switchto */ #endif /* @@ -424,7 +428,31 @@ JSBENTRY(Swtchto) #ifdef MULTIPROCESSOR movl L_CPU(%r0), %r8 /* get cpu_info of old lwp */ movl %r8, L_CPU(%r1) /* update cpu_info of new lwp */ + /* + * Issue barriers to coordinate mutex_exit on this CPU with + * mutex_vector_enter on another CPU. + * + * 1. Any prior mutex_exit by oldlwp must be visible to other + * CPUs before we set ci_curlwp := newlwp on this one, + * requiring a store-before-store barrier. + * + * 2. ci_curlwp := newlwp must be visible on all other CPUs + * before any subsequent mutex_exit by newlwp can even test + * whether there might be waiters, requiring a + * store-before-load barrier. + * + * See kern_mutex.c for details -- this is necessary for + * adaptive mutexes to detect whether the lwp is on the CPU in + * order to safely block without requiring atomic r/m/w in + * mutex_exit. + * + * XXX I'm fuzzy on the memory model of VAX. I would guess + * it's TSO like x86 but I can't find a store-before-load + * barrier, which is the only one TSO requires explicitly. + */ + /* XXX store-before-store barrier */ movl %r1,CI_CURLWP(%r8) /* update ci_curlwp */ + /* XXX store-before-load barrier */ #endif mtpr PCB_PADDR(%r3),$PR_PCBB # set PA of new pcb