Module Name:    src
Committed By:   martin
Date:           Mon Jul 31 13:56:15 UTC 2023

Modified Files:
        src/sys/arch/aarch64/aarch64 [netbsd-8]: locore.S
        src/sys/arch/arm/arm32 [netbsd-8]: cpuswitch.S
        src/sys/arch/evbmips/ingenic [netbsd-8]: cpu_startup.S
        src/sys/arch/hppa/include [netbsd-8]: cpu.h
        src/sys/arch/ia64/ia64 [netbsd-8]: machdep.c vm_machdep.c
        src/sys/arch/mips/include [netbsd-8]: asm.h
        src/sys/arch/mips/mips [netbsd-8]: locore.S locore_mips3.S
        src/sys/arch/powerpc/powerpc [netbsd-8]: locore_subr.S
        src/sys/arch/sparc/sparc [netbsd-8]: locore.s
        src/sys/arch/sparc64/sparc64 [netbsd-8]: locore.s
        src/sys/arch/vax/vax [netbsd-8]: subr.S

Log Message:
Pull up following revision(s) (requested by riastradh in ticket #1859):

        sys/arch/ia64/ia64/vm_machdep.c: revision 1.18
        sys/arch/powerpc/powerpc/locore_subr.S: revision 1.67
        sys/arch/aarch64/aarch64/locore.S: revision 1.91
        sys/arch/mips/include/asm.h: revision 1.74
        sys/arch/hppa/include/cpu.h: revision 1.13
        sys/arch/arm/arm/armv6_start.S: revision 1.38
         (applied also to sys/arch/arm/cortex/a9_mpsubr.S,
         sys/arch/arm/cortex/a9_mpsubr.S,
         sys/arch/arm/cortex/cortex_init.S)
        sys/arch/evbmips/ingenic/cpu_startup.S: revision 1.2
        sys/arch/mips/mips/locore.S: revision 1.229
        sys/arch/alpha/include/asm.h: revision 1.45
         (applied to sys/arch/alpha/alpha/multiproc.s)
        sys/arch/sparc64/sparc64/locore.s: revision 1.432
        sys/arch/vax/vax/subr.S: revision 1.42
        sys/arch/mips/mips/locore_mips3.S: revision 1.116
        sys/arch/ia64/ia64/machdep.c: revision 1.44
        sys/arch/arm/arm32/cpuswitch.S: revision 1.106
        sys/arch/sparc/sparc/locore.s: revision 1.284
        (all via patch)

aarch64: Add missing barriers in cpu_switchto.
Details in comments.

Note: This is a conservative change that inserts a barrier where
there was a comment saying none is needed, which is probably correct.
The goal of this change is to systematically add barriers to be
confident in correctness; subsequent changes may remove some bariers,
as an optimization, with an explanation of why each barrier is not
needed.

PR kern/57240

alpha: Add missing barriers in cpu_switchto.
Details in comments.

arm32: Add missing barriers in cpu_switchto.
Details in comments.

hppa: Add missing barriers in cpu_switchto.
Not sure hppa has ever had working MULTIPROCESSOR, so maybe no
pullups needed?

ia64: Add missing barriers in cpu_switchto.
(ia64 has never really worked, so no pullups needed, right?)

mips: Add missing barriers in cpu_switchto.
Details in comments.

powerpc: Add missing barriers in cpu_switchto.
Details in comments.

sparc: Add missing barriers in cpu_switchto.

sparc64: Add missing barriers in cpu_switchto.
Details in comments.

vax: Note where cpu_switchto needs barriers.

Not sure vax has ever had working MULTIPROCESSOR, though, and I'm not
even sure how to spell store-before-load barriers on VAX, so no
functional change for now.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.1.22.1 src/sys/arch/aarch64/aarch64/locore.S
cvs rdiff -u -r1.90 -r1.90.10.1 src/sys/arch/arm/arm32/cpuswitch.S
cvs rdiff -u -r1.1 -r1.1.12.1 src/sys/arch/evbmips/ingenic/cpu_startup.S
cvs rdiff -u -r1.3 -r1.3.10.1 src/sys/arch/hppa/include/cpu.h
cvs rdiff -u -r1.38 -r1.38.6.1 src/sys/arch/ia64/ia64/machdep.c
cvs rdiff -u -r1.13 -r1.13.6.1 src/sys/arch/ia64/ia64/vm_machdep.c
cvs rdiff -u -r1.54 -r1.54.6.1 src/sys/arch/mips/include/asm.h
cvs rdiff -u -r1.208 -r1.208.8.1 src/sys/arch/mips/mips/locore.S
cvs rdiff -u -r1.113 -r1.113.8.1 src/sys/arch/mips/mips/locore_mips3.S
cvs rdiff -u -r1.55 -r1.55.6.1 src/sys/arch/powerpc/powerpc/locore_subr.S
cvs rdiff -u -r1.268.30.1 -r1.268.30.2 src/sys/arch/sparc/sparc/locore.s
cvs rdiff -u -r1.411 -r1.411.2.1 src/sys/arch/sparc64/sparc64/locore.s
cvs rdiff -u -r1.34 -r1.34.2.1 src/sys/arch/vax/vax/subr.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/aarch64/aarch64/locore.S
diff -u src/sys/arch/aarch64/aarch64/locore.S:1.1 src/sys/arch/aarch64/aarch64/locore.S:1.1.22.1
--- src/sys/arch/aarch64/aarch64/locore.S:1.1	Sun Aug 10 05:47:37 2014
+++ src/sys/arch/aarch64/aarch64/locore.S	Mon Jul 31 13:56:14 2023
@@ -1,4 +1,4 @@
-/* $NetBSD: locore.S,v 1.1 2014/08/10 05:47:37 matt Exp $ */
+/* $NetBSD: locore.S,v 1.1.22.1 2023/07/31 13:56:14 martin Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
 
 #include "opt_ddb.h"
 
-RCSID("$NetBSD: locore.S,v 1.1 2014/08/10 05:47:37 matt Exp $")
+RCSID("$NetBSD: locore.S,v 1.1.22.1 2023/07/31 13:56:14 martin Exp $")
 
 /*
  * At IPL_SCHED:
@@ -99,7 +99,27 @@ ENTRY_NP(cpu_switchto)
 	msr	tpidr_el0, x4		// restore it
 
 	mrs	x3, tpidr_el1		// get curcpu
+	/*
+	 * Issue barriers to coordinate mutex_exit on this CPU with
+	 * mutex_vector_enter on another CPU.
+	 *
+	 * 1. Any prior mutex_exit by oldlwp must be visible to other
+	 *    CPUs before we set ci_curlwp := newlwp on this one,
+	 *    requiring a store-before-store barrier.
+	 *
+	 * 2. ci_curlwp := newlwp must be visible on all other CPUs
+	 *    before any subsequent mutex_exit by newlwp can even test
+	 *    whether there might be waiters, requiring a
+	 *    store-before-load barrier.
+	 *
+	 * See kern_mutex.c for details -- this is necessary for
+	 * adaptive mutexes to detect whether the lwp is on the CPU in
+	 * order to safely block without requiring atomic r/m/w in
+	 * mutex_exit.
+	 */
+	dmb	ishst			/* store-before-store */
 	str	x1, [x3, #CI_CURLWP]	// show as curlwp
+	dmb	ish			/* store-before-load */
 
 	/*
 	 * Restore callee save registers

Index: src/sys/arch/arm/arm32/cpuswitch.S
diff -u src/sys/arch/arm/arm32/cpuswitch.S:1.90 src/sys/arch/arm/arm32/cpuswitch.S:1.90.10.1
--- src/sys/arch/arm/arm32/cpuswitch.S:1.90	Wed Apr  8 12:07:40 2015
+++ src/sys/arch/arm/arm32/cpuswitch.S	Mon Jul 31 13:56:15 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpuswitch.S,v 1.90 2015/04/08 12:07:40 matt Exp $	*/
+/*	$NetBSD: cpuswitch.S,v 1.90.10.1 2023/07/31 13:56:15 martin Exp $	*/
 
 /*
  * Copyright 2003 Wasabi Systems, Inc.
@@ -87,7 +87,7 @@
 #include <arm/asm.h>
 #include <arm/locore.h>
 
-	RCSID("$NetBSD: cpuswitch.S,v 1.90 2015/04/08 12:07:40 matt Exp $")
+	RCSID("$NetBSD: cpuswitch.S,v 1.90.10.1 2023/07/31 13:56:15 martin Exp $")
 
 /* LINTSTUB: include <sys/param.h> */
 
@@ -205,8 +205,34 @@ ENTRY(cpu_switchto)
 	mcr	p15, 0, r6, c13, c0, 4		/* set current lwp */
 #endif
 
+	/*
+	 * Issue barriers to coordinate mutex_exit on this CPU with
+	 * mutex_vector_enter on another CPU.
+	 *
+	 * 1. Any prior mutex_exit by oldlwp must be visible to other
+	 *    CPUs before we set ci_curlwp := newlwp on this one,
+	 *    requiring a store-before-store barrier.
+	 *
+	 * 2. ci_curlwp := newlwp must be visible on all other CPUs
+	 *    before any subsequent mutex_exit by newlwp can even test
+	 *    whether there might be waiters, requiring a
+	 *    store-before-load barrier.
+	 *
+	 * See kern_mutex.c for details -- this is necessary for
+	 * adaptive mutexes to detect whether the lwp is on the CPU in
+	 * order to safely block without requiring atomic r/m/w in
+	 * mutex_exit.
+	 */
+
 	/* We have a new curlwp now so make a note of it */
+#ifdef _ARM_ARCH_7
+	dmb				/* store-before-store */
+#endif
 	str	r6, [r5, #(CI_CURLWP)]
+#ifdef _ARM_ARCH_7
+	dmb				/* store-before-load */
+#endif
+
 	/* Get the new pcb */
 	ldr	r7, [r6, #(L_PCB)]
 
@@ -403,7 +429,13 @@ ENTRY_NP(softint_switch)
 #if defined(TPIDRPRW_IS_CURLWP)
 	mcr	p15, 0, r5, c13, c0, 4		/* save new lwp */
 #endif
+#ifdef _ARM_ARCH_7
+	dmb				/* for mutex_enter; see cpu_switchto */
+#endif
 	str	r5, [r7, #(CI_CURLWP)]		/* save new lwp */
+#ifdef _ARM_ARCH_7
+	dmb				/* for mutex_enter; see cpu_switchto */
+#endif
 
 	/*
 	 * Normally, we'd get {r8-r13} but since this is a softint lwp
@@ -431,7 +463,13 @@ ENTRY_NP(softint_switch)
 #if defined(TPIDRPRW_IS_CURLWP)
 	mcr	p15, 0, r4, c13, c0, 4		/* restore pinned lwp */
 #endif
+#ifdef _ARM_ARCH_7
+	dmb				/* for mutex_enter; see cpu_switchto */
+#endif
 	str	r4, [r7, #(CI_CURLWP)]		/* restore pinned lwp */
+#ifdef _ARM_ARCH_7
+	dmb				/* for mutex_enter; see cpu_switchto */
+#endif
 	ldr	sp, [r2, #(PCB_KSP)]	/* now running on the old stack. */
 
 	/* At this point we can allow IRQ's again. */

Index: src/sys/arch/evbmips/ingenic/cpu_startup.S
diff -u src/sys/arch/evbmips/ingenic/cpu_startup.S:1.1 src/sys/arch/evbmips/ingenic/cpu_startup.S:1.1.12.1
--- src/sys/arch/evbmips/ingenic/cpu_startup.S:1.1	Fri Jan 29 01:54:14 2016
+++ src/sys/arch/evbmips/ingenic/cpu_startup.S	Mon Jul 31 13:56:15 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu_startup.S,v 1.1 2016/01/29 01:54:14 macallan Exp $ */
+/*	$NetBSD: cpu_startup.S,v 1.1.12.1 2023/07/31 13:56:15 martin Exp $ */
 
 /*-
  * Copyright (c) 2015 Michael Lorenz
@@ -33,7 +33,7 @@
 #include <sys/endian.h>
 
 #include <mips/asm.h>
-RCSID("$NetBSD: cpu_startup.S,v 1.1 2016/01/29 01:54:14 macallan Exp $");
+RCSID("$NetBSD: cpu_startup.S,v 1.1.12.1 2023/07/31 13:56:15 martin Exp $");
 
 #ifdef MULTIPROCESSOR
 
@@ -56,6 +56,11 @@ NESTED_NOPROFILE(ingenic_trampoline, 0, 
 	nop
 	beqz	MIPS_CURLWP, 1b
 	 nop
+	/*
+	 * No membar needed because we're not switching from a
+	 * previous lwp, and the idle lwp we're switching to can't be
+	 * holding locks already; see cpu_switchto.
+	 */
 	PTR_S	MIPS_CURLWP, CPU_INFO_CURLWP(a0)
 
 	li	v0, 0

Index: src/sys/arch/hppa/include/cpu.h
diff -u src/sys/arch/hppa/include/cpu.h:1.3 src/sys/arch/hppa/include/cpu.h:1.3.10.1
--- src/sys/arch/hppa/include/cpu.h:1.3	Mon Jan 25 18:14:40 2016
+++ src/sys/arch/hppa/include/cpu.h	Mon Jul 31 13:56:14 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.3 2016/01/25 18:14:40 christos Exp $	*/
+/*	$NetBSD: cpu.h,v 1.3.10.1 2023/07/31 13:56:14 martin Exp $	*/
 
 /*	$OpenBSD: cpu.h,v 1.55 2008/07/23 17:39:35 kettenis Exp $	*/
 
@@ -199,7 +199,26 @@ extern int cpu_revision;
 #define	GET_CURLWP(r)		mfctl CR_CURCPU, r ! ldw CI_CURLWP(r), r
 #define	GET_CURLWP_SPACE(s, r)	mfctl CR_CURCPU, r ! ldw CI_CURLWP(s, r), r
 
-#define	SET_CURLWP(r,t)		mfctl CR_CURCPU, t ! stw r, CI_CURLWP(t)
+/*
+ * Issue barriers to coordinate mutex_exit on this CPU with
+ * mutex_vector_enter on another CPU.
+ *
+ * 1. Any prior mutex_exit by oldlwp must be visible to other
+ *    CPUs before we set ci_curlwp := newlwp on this one,
+ *    requiring a store-before-store barrier.
+ *
+ * 2. ci_curlwp := newlwp must be visible on all other CPUs
+ *    before any subsequent mutex_exit by newlwp can even test
+ *    whether there might be waiters, requiring a
+ *    store-before-load barrier.
+ *
+ * See kern_mutex.c for details -- this is necessary for
+ * adaptive mutexes to detect whether the lwp is on the CPU in
+ * order to safely block without requiring atomic r/m/w in
+ * mutex_exit.
+ */
+#define	SET_CURLWP(r,t)		\
+	sync ! mfctl CR_CURCPU, t ! stw r, CI_CURLWP(t) ! sync
 
 #else /*  MULTIPROCESSOR */
 

Index: src/sys/arch/ia64/ia64/machdep.c
diff -u src/sys/arch/ia64/ia64/machdep.c:1.38 src/sys/arch/ia64/ia64/machdep.c:1.38.6.1
--- src/sys/arch/ia64/ia64/machdep.c:1.38	Sat Apr  8 17:46:01 2017
+++ src/sys/arch/ia64/ia64/machdep.c	Mon Jul 31 13:56:14 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.38 2017/04/08 17:46:01 scole Exp $	*/
+/*	$NetBSD: machdep.c,v 1.38.6.1 2023/07/31 13:56:14 martin Exp $	*/
 
 /*-
  * Copyright (c) 2003,2004 Marcel Moolenaar
@@ -662,7 +662,11 @@ ia64_init(void)
 
 
 	/*
-	 * Initialise process context. XXX: This should really be in cpu_switch
+	 * Initialise process context. XXX: This should really be in cpu_switchto
+	 *
+	 * No membar needed because we're not switching from a
+	 * previous lwp, and the idle lwp we're switching to can't be
+	 * holding locks already; see cpu_switchto.
 	 */
 	ci->ci_curlwp = &lwp0;
 

Index: src/sys/arch/ia64/ia64/vm_machdep.c
diff -u src/sys/arch/ia64/ia64/vm_machdep.c:1.13 src/sys/arch/ia64/ia64/vm_machdep.c:1.13.6.1
--- src/sys/arch/ia64/ia64/vm_machdep.c:1.13	Sat Apr  8 17:38:43 2017
+++ src/sys/arch/ia64/ia64/vm_machdep.c	Mon Jul 31 13:56:14 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: vm_machdep.c,v 1.13 2017/04/08 17:38:43 scole Exp $	*/
+/*	$NetBSD: vm_machdep.c,v 1.13.6.1 2023/07/31 13:56:14 martin Exp $	*/
 
 /*
  * Copyright (c) 2006 The NetBSD Foundation, Inc.
@@ -37,6 +37,7 @@
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/cpu.h>
+#include <sys/atomic.h>
 
 #include <machine/frame.h>
 #include <machine/md_var.h>
@@ -77,9 +78,29 @@ cpu_switchto(lwp_t *oldlwp, lwp_t *newlw
 	register uint64_t reg9 __asm("r9");
 
 	KASSERT(newlwp != NULL);
-	
+
+	/*
+	 * Issue barriers to coordinate mutex_exit on this CPU with
+	 * mutex_vector_enter on another CPU.
+	 *
+	 * 1. Any prior mutex_exit by oldlwp must be visible to other
+	 *    CPUs before we set ci_curlwp := newlwp on this one,
+	 *    requiring a store-before-store barrier.
+	 *
+	 * 2. ci_curlwp := newlwp must be visible on all other CPUs
+	 *    before any subsequent mutex_exit by newlwp can even test
+	 *    whether there might be waiters, requiring a
+	 *    store-before-load barrier.
+	 *
+	 * See kern_mutex.c for details -- this is necessary for
+	 * adaptive mutexes to detect whether the lwp is on the CPU in
+	 * order to safely block without requiring atomic r/m/w in
+	 * mutex_exit.
+	 */
+	membar_producer();	/* store-before-store */
 	ci->ci_curlwp = newlwp;
-	
+	membar_sync();		/* store-before-load */
+
 	/* required for lwp_startup, copy oldlwp into r9, "mov r9=in0" */
 	__asm __volatile("mov %0=%1" : "=r"(reg9) : "r"(oldlwp));
 	

Index: src/sys/arch/mips/include/asm.h
diff -u src/sys/arch/mips/include/asm.h:1.54 src/sys/arch/mips/include/asm.h:1.54.6.1
--- src/sys/arch/mips/include/asm.h:1.54	Sat Feb 25 21:16:50 2017
+++ src/sys/arch/mips/include/asm.h	Mon Jul 31 13:56:14 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: asm.h,v 1.54 2017/02/25 21:16:50 joerg Exp $	*/
+/*	$NetBSD: asm.h,v 1.54.6.1 2023/07/31 13:56:14 martin Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
@@ -510,6 +510,32 @@ _C_LABEL(x):
 #define	NOP_L		/* nothing */
 #endif
 
+/* XXX pullup more mips barrier improvements here */
+#define	SYNC_ACQ	sync
+#define	SYNC_REL	sync
+
+/*
+ * Store-before-load barrier.  Do not use this unless you know what
+ * you're doing.
+ */
+#ifdef MULTIPROCESSOR
+#define	SYNC_DEKKER	sync
+#else
+#define	SYNC_DEKKER	/* nothing */
+#endif
+
+/*
+ * Store-before-store and load-before-load barriers.  These could be
+ * made weaker than release (load/store-before-store) and acquire
+ * (load-before-load/store) barriers, and newer MIPS does have
+ * instruction encodings for finer-grained barriers like this, but I
+ * dunno how to appropriately conditionalize their use or get the
+ * assembler to be happy with them, so we'll use these definitions for
+ * now.
+ */
+#define	SYNC_PRODUCER	SYNC_REL
+#define	SYNC_CONSUMER	SYNC_ACQ
+
 /* CPU dependent hook for cp0 load delays */
 #if defined(MIPS1) || defined(MIPS2) || defined(MIPS3)
 #define MFC0_HAZARD	sll $0,$0,1	/* super scalar nop */

Index: src/sys/arch/mips/mips/locore.S
diff -u src/sys/arch/mips/mips/locore.S:1.208 src/sys/arch/mips/mips/locore.S:1.208.8.1
--- src/sys/arch/mips/mips/locore.S:1.208	Wed Nov  9 11:50:09 2016
+++ src/sys/arch/mips/mips/locore.S	Mon Jul 31 13:56:15 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.208 2016/11/09 11:50:09 maya Exp $	*/
+/*	$NetBSD: locore.S,v 1.208.8.1 2023/07/31 13:56:15 martin Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
@@ -63,7 +63,7 @@
 #include <mips/trap.h>
 #include <mips/locore.h>
 
-RCSID("$NetBSD: locore.S,v 1.208 2016/11/09 11:50:09 maya Exp $")
+RCSID("$NetBSD: locore.S,v 1.208.8.1 2023/07/31 13:56:15 martin Exp $")
 
 #include "assym.h"
 
@@ -286,7 +286,28 @@ NESTED(cpu_switchto, CALLFRAME_SIZ, ra)
 
 	PTR_L	t2, L_CPU(MIPS_CURLWP)
 	nop					# patchable load delay slot
+
+	/*
+	 * Issue barriers to coordinate mutex_exit on this CPU with
+	 * mutex_vector_enter on another CPU.
+	 *
+	 * 1. Any prior mutex_exit by oldlwp must be visible to other
+	 *    CPUs before we set ci_curlwp := newlwp on this one,
+	 *    requiring a store-before-store barrier.
+	 *
+	 * 2. ci_curlwp := newlwp must be visible on all other CPUs
+	 *    before any subsequent mutex_exit by newlwp can even test
+	 *    whether there might be waiters, requiring a
+	 *    store-before-load barrier.
+	 *
+	 * See kern_mutex.c for details -- this is necessary for
+	 * adaptive mutexes to detect whether the lwp is on the CPU in
+	 * order to safely block without requiring atomic r/m/w in
+	 * mutex_exit.
+	 */
+	SYNC_PRODUCER		/* XXX fixup to nop for uniprocessor boot */
 	PTR_S	MIPS_CURLWP, CPU_INFO_CURLWP(t2)
+	SYNC_DEKKER		/* XXX fixup to nop for uniprocessor boot */
 
 	/* Check for restartable atomic sequences (RAS) */
 	PTR_L	a0, L_PROC(MIPS_CURLWP)		# argument to ras_lookup
@@ -437,7 +458,9 @@ NESTED(softint_fast_dispatch, CALLFRAME_
 	move	MIPS_CURLWP, a0				# switch to softint lwp
 	PTR_L	s1, L_CPU(MIPS_CURLWP)			# get curcpu()
 	nop					# patchable load delay slot
+	SYNC_PRODUCER /* XXX fixup */	/* for mutex_enter; see cpu_switchto */
 	PTR_S	MIPS_CURLWP, CPU_INFO_CURLWP(s1)	#    ...
+	SYNC_DEKKER /* XXX fixup */	/* for mutex_enter; see cpu_switchto */
 	move	s2, sp					# remember sp
 	move	s3, t0					# remember curpcb
 
@@ -448,7 +471,9 @@ NESTED(softint_fast_dispatch, CALLFRAME_
 
 	move	sp, s2					# restore stack
 	move	MIPS_CURLWP, s0				# restore curlwp
+	SYNC_PRODUCER /* XXX fixup */	/* for mutex_enter; see cpu_switchto */
 	PTR_S	MIPS_CURLWP, CPU_INFO_CURLWP(s1)	#    ....
+	SYNC_DEKKER /* XXX fixup */	/* for mutex_enter; see cpu_switchto */
 
 	REG_L	ra, CALLFRAME_RA(sp)		# load early since we use it
 

Index: src/sys/arch/mips/mips/locore_mips3.S
diff -u src/sys/arch/mips/mips/locore_mips3.S:1.113 src/sys/arch/mips/mips/locore_mips3.S:1.113.8.1
--- src/sys/arch/mips/mips/locore_mips3.S:1.113	Wed Jul 27 09:32:35 2016
+++ src/sys/arch/mips/mips/locore_mips3.S	Mon Jul 31 13:56:15 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore_mips3.S,v 1.113 2016/07/27 09:32:35 skrll Exp $	*/
+/*	$NetBSD: locore_mips3.S,v 1.113.8.1 2023/07/31 13:56:15 martin Exp $	*/
 
 /*
  * Copyright (c) 1997 Jonathan Stone (hereinafter referred to as the author)
@@ -92,7 +92,7 @@
 #include <mips/asm.h>
 #include <mips/cpuregs.h>
 
-RCSID("$NetBSD: locore_mips3.S,v 1.113 2016/07/27 09:32:35 skrll Exp $")
+RCSID("$NetBSD: locore_mips3.S,v 1.113.8.1 2023/07/31 13:56:15 martin Exp $")
 
 #include "assym.h"
 
@@ -796,6 +796,11 @@ NESTED_NOPROFILE(cpu_trampoline, 0, ra)
 	nop
 	beqz	MIPS_CURLWP, 1b
 	 nop
+	/*
+	 * No membar needed because we're not switching from a
+	 * previous lwp, and the idle lwp we're switching to can't be
+	 * holding locks already; see cpu_switchto.
+	 */
 	PTR_S	MIPS_CURLWP, CPU_INFO_CURLWP(a0)
 
 #ifdef _LP64

Index: src/sys/arch/powerpc/powerpc/locore_subr.S
diff -u src/sys/arch/powerpc/powerpc/locore_subr.S:1.55 src/sys/arch/powerpc/powerpc/locore_subr.S:1.55.6.1
--- src/sys/arch/powerpc/powerpc/locore_subr.S:1.55	Tue Feb 28 17:35:29 2017
+++ src/sys/arch/powerpc/powerpc/locore_subr.S	Mon Jul 31 13:56:14 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore_subr.S,v 1.55 2017/02/28 17:35:29 chs Exp $	*/
+/*	$NetBSD: locore_subr.S,v 1.55.6.1 2023/07/31 13:56:14 martin Exp $	*/
 
 /*
  * Copyright (c) 2001 Wasabi Systems, Inc.
@@ -224,7 +224,32 @@ switchto_restore:
 	 */
 
 	GET_CPUINFO(%r7)
+
+	/*
+	 * Issue barriers to coordinate mutex_exit on this CPU with
+	 * mutex_vector_enter on another CPU.
+	 *
+	 * 1. Any prior mutex_exit by oldlwp must be visible to other
+	 *    CPUs before we set ci_curlwp := newlwp on this one,
+	 *    requiring a store-before-store barrier.
+	 *
+	 * 2. ci_curlwp := newlwp must be visible on all other CPUs
+	 *    before any subsequent mutex_exit by newlwp can even test
+	 *    whether there might be waiters, requiring a
+	 *    store-before-load barrier.
+	 *
+	 * See kern_mutex.c for details -- this is necessary for
+	 * adaptive mutexes to detect whether the lwp is on the CPU in
+	 * order to safely block without requiring atomic r/m/w in
+	 * mutex_exit.
+	 */
+#ifdef MULTIPROCESSOR
+	sync	/* store-before-store XXX use eieio if available -- cheaper */
+#endif
 	stptr	%r31,CI_CURLWP(%r7)
+#ifdef MULTIPROCESSOR
+	sync	/* store-before-load */
+#endif
 	mr	%r13,%r31
 #ifdef PPC_BOOKE
 	mtsprg2	%r31			/* save curlwp in sprg2 */
@@ -398,7 +423,13 @@ _ENTRY(softint_fast_dispatch)
 	 * to a kernel thread
 	 */
 
+#ifdef MULTIPROCESSOR
+	sync	/* XXX eieio */		/* for mutex_enter; see cpu_switchto */
+#endif
 	stptr	%r3, CI_CURLWP(%r7)
+#ifdef MULTIPROCESSOR
+	sync				/* for mutex_enter; see cpu_switchto */
+#endif
 	mr	%r13, %r3
 #ifdef PPC_BOOKE
 	mtsprg2	%r3
@@ -432,7 +463,13 @@ _ENTRY(softint_fast_dispatch)
 #endif
 
 	GET_CPUINFO(%r7)
+#ifdef MULTIPROCESSOR
+	sync	/* XXX eieio */		/* for mutex_enter; see cpu_switchto */
+#endif
 	stptr	%r30, CI_CURLWP(%r7)
+#ifdef MULTIPROCESSOR
+	sync				/* for mutex_enter; see cpu_switchto */
+#endif
 	mr	%r13, %r30
 #ifdef PPC_BOOKE
 	mtsprg2	%r30

Index: src/sys/arch/sparc/sparc/locore.s
diff -u src/sys/arch/sparc/sparc/locore.s:1.268.30.1 src/sys/arch/sparc/sparc/locore.s:1.268.30.2
--- src/sys/arch/sparc/sparc/locore.s:1.268.30.1	Mon Nov 27 10:31:33 2017
+++ src/sys/arch/sparc/sparc/locore.s	Mon Jul 31 13:56:15 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.s,v 1.268.30.1 2017/11/27 10:31:33 martin Exp $	*/
+/*	$NetBSD: locore.s,v 1.268.30.2 2023/07/31 13:56:15 martin Exp $	*/
 
 /*
  * Copyright (c) 1996 Paul Kranenburg
@@ -4905,7 +4905,30 @@ Lnosaveoldlwp:
 	/* set new cpcb, and curlwp */
 	sethi	%hi(curlwp), %l7
 	st	%g5, [%l6 + %lo(cpcb)]		! cpcb = newpcb;
+
+	/*
+	 * Issue barriers to coordinate mutex_exit on this CPU with
+	 * mutex_vector_enter on another CPU.
+	 *
+	 * 1. Any prior mutex_exit by oldlwp must be visible to other
+	 *    CPUs before we set ci_curlwp := newlwp on this one,
+	 *    requiring a store-before-store barrier.
+	 *
+	 * 2. ci_curlwp := newlwp must be visible on all other CPUs
+	 *    before any subsequent mutex_exit by newlwp can even test
+	 *    whether there might be waiters, requiring a
+	 *    store-before-load barrier.
+	 *
+	 * See kern_mutex.c for details -- this is necessary for
+	 * adaptive mutexes to detect whether the lwp is on the CPU in
+	 * order to safely block without requiring atomic r/m/w in
+	 * mutex_exit.
+	 */
+	/* stbar -- store-before-store, not needed on TSO */
 	st      %g3, [%l7 + %lo(curlwp)]        ! curlwp = l;
+#ifdef MULTIPROCESSOR
+	ldstub	[%sp - 4], %g0	/* makeshift store-before-load barrier */
+#endif
 
 	/* compute new wim */
 	ld	[%g5 + PCB_WIM], %o0

Index: src/sys/arch/sparc64/sparc64/locore.s
diff -u src/sys/arch/sparc64/sparc64/locore.s:1.411 src/sys/arch/sparc64/sparc64/locore.s:1.411.2.1
--- src/sys/arch/sparc64/sparc64/locore.s:1.411	Sat May  6 21:46:31 2017
+++ src/sys/arch/sparc64/sparc64/locore.s	Mon Jul 31 13:56:15 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.s,v 1.411 2017/05/06 21:46:31 palle Exp $	*/
+/*	$NetBSD: locore.s,v 1.411.2.1 2023/07/31 13:56:15 martin Exp $	*/
 
 /*
  * Copyright (c) 2006-2010 Matthew R. Green
@@ -6471,9 +6471,28 @@ ENTRY(cpu_switchto)
 	 * Load the new lwp.  To load, we must change stacks and
 	 * alter cpcb and the window control registers, hence we must
 	 * keep interrupts disabled.
+	 *
+	 * Issue barriers to coordinate mutex_exit on this CPU with
+	 * mutex_vector_enter on another CPU.
+	 *
+	 * 1. Any prior mutex_exit by oldlwp must be visible to other
+	 *    CPUs before we set ci_curlwp := newlwp on this one,
+	 *    requiring a store-before-store barrier.
+	 *
+	 * 2. ci_curlwp := newlwp must be visible on all other CPUs
+	 *    before any subsequent mutex_exit by newlwp can even test
+	 *    whether there might be waiters, requiring a
+	 *    store-before-load barrier.
+	 *
+	 * See kern_mutex.c for details -- this is necessary for
+	 * adaptive mutexes to detect whether the lwp is on the CPU in
+	 * order to safely block without requiring atomic r/m/w in
+	 * mutex_exit.
 	 */
 
+	membar	#StoreStore
 	STPTR	%i1, [%l7 + %lo(CURLWP)]	! curlwp = l;
+	membar	#StoreLoad
 	STPTR	%l1, [%l6 + %lo(CPCB)]		! cpcb = newpcb;
 
 	ldx	[%l1 + PCB_SP], %i6
@@ -6566,7 +6585,9 @@ ENTRY(softint_fastintr)
 	sethi	%hi(USPACE - TF_SIZE - CC64FSZ - STKB), %o3
 	LDPTR	[%i0 + L_PCB], %l1		! l1 = softint pcb
 	or	%o3, %lo(USPACE - TF_SIZE - CC64FSZ - STKB), %o3
+	membar	#StoreStore		/* for mutex_enter; see cpu_switchto */
 	STPTR	%i0, [%l7 + %lo(CURLWP)]
+	membar	#StoreLoad		/* for mutex_enter; see cpu_switchto */
 	add	%l1, %o3, %i6
 	STPTR	%l1, [%l6 + %lo(CPCB)]
 	stx	%i6, [%l1 + PCB_SP]
@@ -6579,7 +6600,9 @@ ENTRY(softint_fastintr)
 
 	/* switch back to interrupted lwp */
 	ldx	[%l5 + PCB_SP], %i6
+	membar	#StoreStore		/* for mutex_enter; see cpu_switchto */
 	STPTR	%l0, [%l7 + %lo(CURLWP)]
+	membar	#StoreLoad		/* for mutex_enter; see cpu_switchto */
 	STPTR	%l5, [%l6 + %lo(CPCB)]
 
 	restore					! rewind register window

Index: src/sys/arch/vax/vax/subr.S
diff -u src/sys/arch/vax/vax/subr.S:1.34 src/sys/arch/vax/vax/subr.S:1.34.2.1
--- src/sys/arch/vax/vax/subr.S:1.34	Mon May 22 16:53:05 2017
+++ src/sys/arch/vax/vax/subr.S	Mon Jul 31 13:56:15 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: subr.S,v 1.34 2017/05/22 16:53:05 ragge Exp $	   */
+/*	$NetBSD: subr.S,v 1.34.2.1 2023/07/31 13:56:15 martin Exp $	   */
 
 /*
  * Copyright (c) 1994 Ludd, University of Lule}, Sweden.
@@ -335,7 +335,9 @@ softint_process:
 	movab	softint_exit,PCB_PC(%r3)/* do a quick exit */
 #ifdef MULTIPROCESSOR
 	movl	L_CPU(%r6),%r8
+	/* XXX store-before-store barrier -- see cpu_switchto */
 	movl	%r6,CI_CURLWP(%r8)
+	/* XXX store-before-load barrier -- see cpu_switchto */
 #endif
 
 	mtpr	PCB_PADDR(%r3),$PR_PCBB	/* restore PA of interrupted pcb */
@@ -358,7 +360,9 @@ softint_common:
 	movl	%r6,PCB_R6(%r3)		/* move old lwp into new pcb */
 	movl	%r1,PCB_R7(%r3)		/* move IPL into new pcb */
 #ifdef MULTIPROCESSOR
+	/* XXX store-before-store barrier -- see cpu_switchto */
 	movl	%r2,CI_CURLWP(%r8)	/* update ci_curlwp */
+	/* XXX store-before-load barrier -- see cpu_switchto */
 #endif
 
 	/*
@@ -424,7 +428,31 @@ JSBENTRY(Swtchto)
 #ifdef MULTIPROCESSOR
 	movl	L_CPU(%r0), %r8		/* get cpu_info of old lwp */
 	movl	%r8, L_CPU(%r1)		/* update cpu_info of new lwp */
+	/*
+	 * Issue barriers to coordinate mutex_exit on this CPU with
+	 * mutex_vector_enter on another CPU.
+	 *
+	 * 1. Any prior mutex_exit by oldlwp must be visible to other
+	 *    CPUs before we set ci_curlwp := newlwp on this one,
+	 *    requiring a store-before-store barrier.
+	 *
+	 * 2. ci_curlwp := newlwp must be visible on all other CPUs
+	 *    before any subsequent mutex_exit by newlwp can even test
+	 *    whether there might be waiters, requiring a
+	 *    store-before-load barrier.
+	 *
+	 * See kern_mutex.c for details -- this is necessary for
+	 * adaptive mutexes to detect whether the lwp is on the CPU in
+	 * order to safely block without requiring atomic r/m/w in
+	 * mutex_exit.
+	 *
+	 * XXX I'm fuzzy on the memory model of VAX.  I would guess
+	 * it's TSO like x86 but I can't find a store-before-load
+	 * barrier, which is the only one TSO requires explicitly.
+	 */
+	/* XXX store-before-store barrier */
 	movl	%r1,CI_CURLWP(%r8)	/* update ci_curlwp */
+	/* XXX store-before-load barrier */
 #endif
 
 	mtpr	PCB_PADDR(%r3),$PR_PCBB	# set PA of new pcb

Reply via email to