Module Name:    src
Committed By:   skrll
Date:           Wed Aug 12 13:19:35 UTC 2020

Modified Files:
        src/sys/arch/aarch64/aarch64: copyinout.S cpu_machdep.c cpuswitch.S
            db_machdep.c fusu.S genassym.cf idle_machdep.S locore.S vectors.S
        src/sys/arch/aarch64/include: cpu.h proc.h

Log Message:
Part II of ad's aarch64 performance improvements (cpu_switch.S bugs are
all mine)

- Use tpidr_el1 to hold curlwp and not curcpu, because curlwp is accessed
  much more often by MI code.  It also makes curlwp preemption safe and
  allows aarch64_curlwp() to be a const function (curcpu must be volatile).

- Make ASTs operate per-LWP rather than per-CPU, otherwise sometimes LWPs
  can see spurious ASTs (which doesn't cause a problem, it just means some
  time may be wasted).

- Use plain stores to set/clear ASTs.  Make sure ASTs are always set on the
  same CPU as the target LWP, and delivered via IPI if posted from a remote
  CPU so that they are resolved quickly.

- Add some cache line padding to struct cpu_info, to match x86.

- Add a memory barrier in a couple of places where ci_curlwp is set.  This
  is needed whenever an LWP that is resuming on the CPU could hold an
  adaptive mutex.  The barrier needs to drain the CPU's store buffer, so
  that the update to ci_curlwp becomes globally visible before the LWP can
  resume and call mutex_exit().  By my reading of the ARM docs it looks like
  the instruction I used will do the right thing, but I'm not 100% sure.


To generate a diff of this commit:
cvs rdiff -u -r1.14 -r1.15 src/sys/arch/aarch64/aarch64/copyinout.S
cvs rdiff -u -r1.10 -r1.11 src/sys/arch/aarch64/aarch64/cpu_machdep.c
cvs rdiff -u -r1.24 -r1.25 src/sys/arch/aarch64/aarch64/cpuswitch.S
cvs rdiff -u -r1.25 -r1.26 src/sys/arch/aarch64/aarch64/db_machdep.c
cvs rdiff -u -r1.9 -r1.10 src/sys/arch/aarch64/aarch64/fusu.S
cvs rdiff -u -r1.29 -r1.30 src/sys/arch/aarch64/aarch64/genassym.cf
cvs rdiff -u -r1.5 -r1.6 src/sys/arch/aarch64/aarch64/idle_machdep.S
cvs rdiff -u -r1.69 -r1.70 src/sys/arch/aarch64/aarch64/locore.S
cvs rdiff -u -r1.17 -r1.18 src/sys/arch/aarch64/aarch64/vectors.S
cvs rdiff -u -r1.25 -r1.26 src/sys/arch/aarch64/include/cpu.h
cvs rdiff -u -r1.7 -r1.8 src/sys/arch/aarch64/include/proc.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/aarch64/aarch64/copyinout.S
diff -u src/sys/arch/aarch64/aarch64/copyinout.S:1.14 src/sys/arch/aarch64/aarch64/copyinout.S:1.15
--- src/sys/arch/aarch64/aarch64/copyinout.S:1.14	Thu Aug  6 06:49:55 2020
+++ src/sys/arch/aarch64/aarch64/copyinout.S	Wed Aug 12 13:19:35 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: copyinout.S,v 1.14 2020/08/06 06:49:55 ryo Exp $ */
+/* $NetBSD: copyinout.S,v 1.15 2020/08/12 13:19:35 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -33,7 +33,7 @@
 #include <aarch64/asm.h>
 #include "assym.h"
 
-RCSID("$NetBSD: copyinout.S,v 1.14 2020/08/06 06:49:55 ryo Exp $");
+RCSID("$NetBSD: copyinout.S,v 1.15 2020/08/12 13:19:35 skrll Exp $");
 
 #ifdef ARMV81_PAN
 #define PAN_ENABLE	\
@@ -80,8 +80,7 @@ RCSID("$NetBSD: copyinout.S,v 1.14 2020/
 
 	.macro exit_cpu_onfault
 	/* curlwp->l_md.md_onfault = NULL */
-	mrs	x0, tpidr_el1			/* curcpu */
-	ldr	x0, [x0, #CI_CURLWP]		/* x0 = curlwp */
+	mrs	x0, tpidr_el1			/* x0 = curlwp */
 	str	xzr, [x0, #L_MD_ONFAULT]	/* lwp->l_md_onfault = NULL */
 9:
 	PAN_ENABLE				/* enable PAN */

Index: src/sys/arch/aarch64/aarch64/cpu_machdep.c
diff -u src/sys/arch/aarch64/aarch64/cpu_machdep.c:1.10 src/sys/arch/aarch64/aarch64/cpu_machdep.c:1.11
--- src/sys/arch/aarch64/aarch64/cpu_machdep.c:1.10	Thu May 21 05:41:40 2020
+++ src/sys/arch/aarch64/aarch64/cpu_machdep.c	Wed Aug 12 13:19:35 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu_machdep.c,v 1.10 2020/05/21 05:41:40 ryo Exp $ */
+/* $NetBSD: cpu_machdep.c,v 1.11 2020/08/12 13:19:35 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014, 2019 The NetBSD Foundation, Inc.
@@ -31,7 +31,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(1, "$NetBSD: cpu_machdep.c,v 1.10 2020/05/21 05:41:40 ryo Exp $");
+__KERNEL_RCSID(1, "$NetBSD: cpu_machdep.c,v 1.11 2020/08/12 13:19:35 skrll Exp $");
 
 #include "opt_multiprocessor.h"
 
@@ -261,7 +261,7 @@ cpu_need_resched(struct cpu_info *ci, st
 		intr_ipi_send(ci->ci_kcpuset, IPI_AST);
 #endif
 	} else {
-		setsoftast(ci);	/* force call to ast() */
+		l->l_md.md_astpending = 1;
 	}
 }
 
@@ -272,7 +272,22 @@ cpu_need_proftick(struct lwp *l)
 	KASSERT(l->l_cpu == curcpu());
 
 	l->l_pflag |= LP_OWEUPC;
-	setsoftast(l->l_cpu);
+	l->l_md.md_astpending = 1;
+}
+
+void
+cpu_signotify(struct lwp *l)
+{
+
+	KASSERT(kpreempt_disabled());
+
+	if (l->l_cpu != curcpu()) {
+#ifdef MULTIPROCESSOR
+		intr_ipi_send(l->l_cpu->ci_kcpuset, IPI_AST);
+#endif
+	} else {
+		l->l_md.md_astpending = 1;
+	}
 }
 
 #ifdef __HAVE_PREEMPTION

Index: src/sys/arch/aarch64/aarch64/cpuswitch.S
diff -u src/sys/arch/aarch64/aarch64/cpuswitch.S:1.24 src/sys/arch/aarch64/aarch64/cpuswitch.S:1.25
--- src/sys/arch/aarch64/aarch64/cpuswitch.S:1.24	Thu Aug  6 06:49:55 2020
+++ src/sys/arch/aarch64/aarch64/cpuswitch.S	Wed Aug 12 13:19:35 2020
@@ -1,7 +1,7 @@
-/* $NetBSD: cpuswitch.S,v 1.24 2020/08/06 06:49:55 ryo Exp $ */
+/* $NetBSD: cpuswitch.S,v 1.25 2020/08/12 13:19:35 skrll Exp $ */
 
 /*-
- * Copyright (c) 2014 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014, 2020 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -38,7 +38,7 @@
 #include "opt_ddb.h"
 #include "opt_kasan.h"
 
-RCSID("$NetBSD: cpuswitch.S,v 1.24 2020/08/06 06:49:55 ryo Exp $")
+RCSID("$NetBSD: cpuswitch.S,v 1.25 2020/08/12 13:19:35 skrll Exp $")
 
 	ARMV8_DEFINE_OPTIONS
 
@@ -83,11 +83,9 @@ ENTRY_NP(cpu_switchto)
 #endif
 	ldr	x5, [x1, #L_MD_CPACR]	/* get cpacr_el1 */
 
-	mrs	x3, tpidr_el1
 	DISABLE_INTERRUPT
 	mov	sp, x4			/* restore stack pointer */
 	msr	cpacr_el1, x5		/* restore cpacr_el1 */
-	str	x1, [x3, #CI_CURLWP]	/* switch curlwp to new lwp */
 
 #ifdef ARMV83_PAC
 	/* Switch the PAC key. */
@@ -118,6 +116,10 @@ ENTRY_NP(cpu_switchto)
 1:
 #endif
 
+	msr	tpidr_el1, x1		/* switch curlwp to new lwp */
+	ldr	x3, [x1, #L_CPU]
+	str	x1, [x3, #CI_CURLWP]	/* switch curlwp to new lwp */
+	dmb	st			/* see comments in kern_mutex.c */
 	ENABLE_INTERRUPT
 
 	/*
@@ -161,8 +163,7 @@ ENTRY_NP(cpu_switchto_softint)
 	stp	x27, x28, [sp, #TF_X27]
 	stp	x29, x2, [sp, #TF_X29]	/* tf->lr = softint_cleanup; */
 
-	mrs	x20, tpidr_el1		/* x20 := curcpu() */
-	ldr	x19, [x20, #CI_CURLWP]	/* x19 := curcpu()->ci_curlwp */
+	mrs	x19, tpidr_el1		/* x19 := curlwp */
 	mov	x4, sp
 
 	mrs	x5, cpacr_el1
@@ -180,9 +181,13 @@ ENTRY_NP(cpu_switchto_softint)
 	ldr	x4, [x0, #L_MD_UTF]
 
 	DISABLE_INTERRUPT
+	ldr	x20, [x19, #L_CPU]	/* x20 := curlwp->l_cpu */
+
 	/* onto new stack */
 	sub	sp, x4, #TF_SIZE	/* new sp := softlwp->l_md_utf - 1 */
+	msr	tpidr_el1, x0		/* curlwp = softlwp; */
 	str	x0, [x20, #CI_CURLWP]	/* curcpu()->ci_curlwp = softlwp; */
+					/* no need for memory barrier here */
 
 	mov	x5, #CPACR_FPEN_NONE
 	msr	cpacr_el1, x5		/* cpacr_el1 = CPACR_FPEN_NONE */
@@ -203,7 +208,6 @@ ENTRY_NP(cpu_switchto_softint)
 	mov	x0, x19			/* x0 := pinned_lwp */
 	bl	_C_LABEL(softint_dispatch)
 
-	mrs	x20, tpidr_el1
 	ldr	x6, [x19, #L_PCB]	/* x6 = lwp_getpcb(curlwp) */
 	ldr	x4, [x6, #PCB_TF]	/* x4 := pinned_lwp->l_addr->pcb_tf */
 #ifdef DDB
@@ -212,7 +216,11 @@ ENTRY_NP(cpu_switchto_softint)
 	ldr	x5, [x19, #L_MD_CPACR]	/* x5 := pinned_lwp->l_md_cpacr */
 
 	DISABLE_INTERRUPT
-	str	x19, [x20, #CI_CURLWP]	/* curcpu()->ci_curlwp := x19 */
+	msr	tpidr_el1, x19		/* curlwp = pinned_lwp */
+	ldr	x3, [x19, #L_CPU]	/* x3 = curlwp->l_cpu */
+	str	x19, [x3, #CI_CURLWP]	/* curlwp->l_cpu->ci_curlwp := x19 */
+	dmb	st			/* see comments in kern_mutex.c */
+
 	mov	sp, x4			/* restore pinned_lwp sp */
 	msr	cpacr_el1, x5		/* restore pinned_lwp cpacr */
 
@@ -249,10 +257,11 @@ END(cpu_switchto_softint)
 ENTRY_NP(softint_cleanup)
 	mov	lr, x20			/* restore original lr */
 
-	mrs	x20, tpidr_el1		/* curcpu() */
-	ldr	w2, [x20, #CI_MTX_COUNT]/* ->ci_mtx_count */
+	mrs	x20, tpidr_el1		/* curlwp */
+	ldr	x3, [x20, #L_CPU]	/* curcpu */
+	ldr	w2, [x3, #CI_MTX_COUNT]	/* ->ci_mtx_count */
 	add	w2, w2, #1
-	str	w2, [x20, #CI_MTX_COUNT]
+	str	w2, [x3, #CI_MTX_COUNT]
 
 	msr	daif, x19		/* restore interrupt mask */
 	ldp	x19, x20, [sp], #16	/* restore */
@@ -366,15 +375,13 @@ ENTRY_NP(el0_trap)
 ENTRY_NP(el0_trap_exit)
 	DISABLE_INTERRUPT		/* make sure I|F marked */
 1:
-	/* while (curcpu()->ci_astpending & __BIT(0)) { */
+	/* while (curlwp->l_md.md_astpending != 0) { */
 	mrs	x8, tpidr_el1
-	ldr	w9, [x8, #CI_ASTPENDING]
-	tbz	w9, #0, 9f
+	ldr	w9, [x8, #L_MD_ASTPENDING]
+	cbz	w9, 9f
 
-	/*  atomic_and_uint(&curcpu()->ci_astpending, ~__BIT(0)); */
-	mov	w1, #~__BIT(0)
-	add	x0, x8, #CI_ASTPENDING
-	bl	_C_LABEL(atomic_and_uint);
+	/* curlwp->l_md.md_astpending = 0; */
+	str	xzr, [x8, #L_MD_ASTPENDING]
 
 	/*  trap_doast(tf); */
 	ENABLE_INTERRUPT
@@ -384,8 +391,8 @@ ENTRY_NP(el0_trap_exit)
 	b	1b
 	/* } */
 9:
-	mrs	x8, tpidr_el1
-	ldr	x9, [x8, #CI_CURLWP]
+
+	mrs	x9, tpidr_el1
 	ldr	x23, [x9, #L_MD_CPACR]
 	msr	cpacr_el1, x23		/* FP unit EL0 handover */
 	isb				/* necessary? */
@@ -446,8 +453,7 @@ END(cpu_Debugger)
  * int cpu_set_onfault(struct faultbuf *fb)
  */
 ENTRY_NP(cpu_set_onfault)
-	mrs	x3, tpidr_el1
-	ldr	x2, [x3, #CI_CURLWP]	/* curlwp = curcpu()->ci_curlwp */
+	mrs	x2, tpidr_el1		/* x2 = curlwp */
 	str	x0, [x2, #L_MD_ONFAULT] /* l_md.md_onfault = fb */
 
 	stp	x19, x20, [x0, #(FB_X19 * 8)]

Index: src/sys/arch/aarch64/aarch64/db_machdep.c
diff -u src/sys/arch/aarch64/aarch64/db_machdep.c:1.25 src/sys/arch/aarch64/aarch64/db_machdep.c:1.26
--- src/sys/arch/aarch64/aarch64/db_machdep.c:1.25	Thu Jul  2 11:10:48 2020
+++ src/sys/arch/aarch64/aarch64/db_machdep.c	Wed Aug 12 13:19:35 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: db_machdep.c,v 1.25 2020/07/02 11:10:48 jmcneill Exp $ */
+/* $NetBSD: db_machdep.c,v 1.26 2020/08/12 13:19:35 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: db_machdep.c,v 1.25 2020/07/02 11:10:48 jmcneill Exp $");
+__KERNEL_RCSID(0, "$NetBSD: db_machdep.c,v 1.26 2020/08/12 13:19:35 skrll Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd32.h"
@@ -322,8 +322,6 @@ show_cpuinfo(struct cpu_info *ci)
 	    &ci->ci_cpl, cpuid, cpuinfobuf.ci_cpl);
 	db_printf("%p cpu[%lu].ci_softints     = 0x%08x\n",
 	    &ci->ci_softints, cpuid, cpuinfobuf.ci_softints);
-	db_printf("%p cpu[%lu].ci_astpending   = 0x%08x\n",
-	    &ci->ci_astpending, cpuid, cpuinfobuf.ci_astpending);
 	db_printf("%p cpu[%lu].ci_intr_depth   = %u\n",
 	    &ci->ci_intr_depth, cpuid, cpuinfobuf.ci_intr_depth);
 	db_printf("%p cpu[%lu].ci_biglock_count = %u\n",

Index: src/sys/arch/aarch64/aarch64/fusu.S
diff -u src/sys/arch/aarch64/aarch64/fusu.S:1.9 src/sys/arch/aarch64/aarch64/fusu.S:1.10
--- src/sys/arch/aarch64/aarch64/fusu.S:1.9	Thu Aug  6 06:49:55 2020
+++ src/sys/arch/aarch64/aarch64/fusu.S	Wed Aug 12 13:19:35 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: fusu.S,v 1.9 2020/08/06 06:49:55 ryo Exp $	*/
+/*	$NetBSD: fusu.S,v 1.10 2020/08/12 13:19:35 skrll Exp $	*/
 
 /*-
  * Copyright (c) 2014, 2019 The NetBSD Foundation, Inc.
@@ -32,7 +32,7 @@
 #include <aarch64/asm.h>
 #include "assym.h"
 
-RCSID("$NetBSD: fusu.S,v 1.9 2020/08/06 06:49:55 ryo Exp $");
+RCSID("$NetBSD: fusu.S,v 1.10 2020/08/12 13:19:35 skrll Exp $");
 
 #ifdef ARMV81_PAN
 #define PAN_ENABLE	\
@@ -73,8 +73,7 @@ RCSID("$NetBSD: fusu.S,v 1.9 2020/08/06 
 
 	.macro exit_cpu_onfault
 	/* curlwp->l_md.md_onfault = NULL */
-	mrs	x1, tpidr_el1			/* curcpu */
-	ldr	x1, [x1, #CI_CURLWP]		/* x1 = curlwp */
+	mrs	x1, tpidr_el1			/* x1 = curlwp */
 	str	xzr, [x1, #L_MD_ONFAULT]	/* lwp->l_md_onfault = NULL */
 9:
 	PAN_ENABLE				/* enable PAN */

Index: src/sys/arch/aarch64/aarch64/genassym.cf
diff -u src/sys/arch/aarch64/aarch64/genassym.cf:1.29 src/sys/arch/aarch64/aarch64/genassym.cf:1.30
--- src/sys/arch/aarch64/aarch64/genassym.cf:1.29	Thu Aug  6 06:49:55 2020
+++ src/sys/arch/aarch64/aarch64/genassym.cf	Wed Aug 12 13:19:35 2020
@@ -1,4 +1,4 @@
-# $NetBSD: genassym.cf,v 1.29 2020/08/06 06:49:55 ryo Exp $
+# $NetBSD: genassym.cf,v 1.30 2020/08/12 13:19:35 skrll Exp $
 #-
 # Copyright (c) 2014 The NetBSD Foundation, Inc.
 # All rights reserved.
@@ -154,6 +154,7 @@ define	L_MD_FLAGS		offsetof(struct lwp, 
 define	L_MD_UTF		offsetof(struct lwp, l_md.md_utf)
 define	L_MD_CPACR		offsetof(struct lwp, l_md.md_cpacr)
 define	L_MD_ONFAULT		offsetof(struct lwp, l_md.md_onfault)
+define	L_MD_ASTPENDING		offsetof(struct lwp, l_md.md_astpending)
 define	L_MD_IA_KERN		offsetof(struct lwp, l_md.md_ia_kern)
 define	L_MD_IA_USER		offsetof(struct lwp, l_md.md_ia_user)
 define	L_MD_IB_USER		offsetof(struct lwp, l_md.md_ib_user)
@@ -288,7 +289,6 @@ define	CI_CPL			offsetof(struct cpu_info
 define	CI_CPUID		offsetof(struct cpu_info, ci_cpuid)
 define	CI_MIDR			offsetof(struct cpu_info, ci_id.ac_midr)
 define	CI_MPIDR		offsetof(struct cpu_info, ci_id.ac_mpidr)
-define	CI_ASTPENDING		offsetof(struct cpu_info, ci_astpending)
 define	CI_WANT_RESCHED		offsetof(struct cpu_info, ci_want_resched)
 define	CI_INTR_DEPTH		offsetof(struct cpu_info, ci_intr_depth)
 define	CI_MTX_COUNT		offsetof(struct cpu_info, ci_mtx_count)

Index: src/sys/arch/aarch64/aarch64/idle_machdep.S
diff -u src/sys/arch/aarch64/aarch64/idle_machdep.S:1.5 src/sys/arch/aarch64/aarch64/idle_machdep.S:1.6
--- src/sys/arch/aarch64/aarch64/idle_machdep.S:1.5	Sun Jan 27 04:48:09 2019
+++ src/sys/arch/aarch64/aarch64/idle_machdep.S	Wed Aug 12 13:19:35 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: idle_machdep.S,v 1.5 2019/01/27 04:48:09 dholland Exp $ */
+/* $NetBSD: idle_machdep.S,v 1.6 2020/08/12 13:19:35 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
 #include "opt_arm_intr_impl.h"
 #include "opt_ddb.h"
 
-RCSID("$NetBSD: idle_machdep.S,v 1.5 2019/01/27 04:48:09 dholland Exp $");
+RCSID("$NetBSD: idle_machdep.S,v 1.6 2020/08/12 13:19:35 skrll Exp $");
 
 #ifdef ARM_INTR_IMPL
 #include ARM_INTR_IMPL
@@ -74,7 +74,8 @@ ENTRY(cpu_idle)
 	adr	x0, 1f
 	str	x0, [sp, #TF_PC]		/* CLKF_PC refer to tf_pc */
 
-	mrs	x1, tpidr_el1			/* get curcpu() */
+	mrs	x1, tpidr_el1			/* get curlwp */
+	ldr	x1, [x1, #L_CPU]		/* get curcpu */
 	ldr	w28, [x1, #CI_INTR_DEPTH]	/* w28 = ci->ci_intr_depth */
 	add	w2, w28, #1			/* w2 = intr_depth + 1 */
 
@@ -86,7 +87,8 @@ ENTRY(cpu_idle)
 	str	w2, [x1, #CI_INTR_DEPTH]	/* ci->ci_intr_depth++ */
 	bl	ARM_IRQ_HANDLER			/* irqhandler(trapframe) */
 1:
-	mrs	x1, tpidr_el1			/* get curcpu() */
+	mrs	x1, tpidr_el1			/* get curlwp */
+	ldr	x1, [x1, #L_CPU]		/* get curcpu */
 	str	w28, [x1, #CI_INTR_DEPTH]	/* ci->ci_intr_depth = old */
 
 #if defined(__HAVE_FAST_SOFTINTS) && !defined(__HAVE_PIC_FAST_SOFTINTS)

Index: src/sys/arch/aarch64/aarch64/locore.S
diff -u src/sys/arch/aarch64/aarch64/locore.S:1.69 src/sys/arch/aarch64/aarch64/locore.S:1.70
--- src/sys/arch/aarch64/aarch64/locore.S:1.69	Sun Aug  2 06:58:16 2020
+++ src/sys/arch/aarch64/aarch64/locore.S	Wed Aug 12 13:19:35 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.69 2020/08/02 06:58:16 maxv Exp $	*/
+/*	$NetBSD: locore.S,v 1.70 2020/08/12 13:19:35 skrll Exp $	*/
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -38,7 +38,7 @@
 #include <aarch64/hypervisor.h>
 #include "assym.h"
 
-RCSID("$NetBSD: locore.S,v 1.69 2020/08/02 06:58:16 maxv Exp $")
+RCSID("$NetBSD: locore.S,v 1.70 2020/08/12 13:19:35 skrll Exp $")
 
 #ifdef AARCH64_DEVICE_MEM_STRONGLY_ORDERED
 #define	MAIR_DEVICE_MEM		MAIR_DEVICE_nGnRnE
@@ -176,9 +176,9 @@ vstart:
 	msr	tpidrro_el0, xzr
 
 	/* set curcpu() */
-	adrl	x0, cpu_info_store	/* cpu_info_store is cpu_info[0] */
-	msr	tpidr_el1, x0		/* curcpu is cpu_info[0] */
-	DPRINTREG("curcpu           = ", x0);
+	adrl	x0, lwp0		/* curlwp is lwp0 */
+	msr	tpidr_el1, x0
+	DPRINTREG("curlwp           = ", x0);
 
 	/* init PAN if supported */
 	mov	x0, #1
@@ -195,8 +195,8 @@ vstart:
 1:
 
 	/* set topology information */
-	mrs	x0, tpidr_el1		/* curcpu */
-	mrs     x1, mpidr_el1
+	adrl	x0, cpu_info_store	/* curcpu */
+	mrs	x1, mpidr_el1
 	mov	x2, #0
 	bl	arm_cpu_topology_set
 
@@ -459,7 +459,7 @@ mp_vstart:
 	mul	x0, x27, x0
 	adrl	x1, _C_LABEL(cpu_info_store)
 	add	x0, x0, x1		/* x0 = &cpu_info_store[cpuindex] */
-	msr	tpidr_el1, x0		/* tpidr_el1 = curcpu() = x0 */
+	msr	tpidr_el1, x0		/* tpidr_el1 = curcpu = x0 */
 
 	mrs	x1, midr_el1
 	str	x1, [x0, #CI_MIDR]	/* curcpu()->ci_cpuid = midr_el1 */
@@ -509,9 +509,9 @@ mp_vstart:
 9:
 
 	/* fill my cpu_info */
-	mrs	x0, tpidr_el1		/* curcpu() */
-
-	ldr	x1, [x0, #CI_IDLELWP]	/* x1 = curcpu()->ci_data.cpu_idlelwp */
+	mrs	x0, tpidr_el1		/* curcpu */
+	ldr	x1, [x0, #CI_IDLELWP]	/* x0 = curcpu()->ci_idlelwp */
+	msr	tpidr_el1, x1		/* tpidr_el1 = curlwp = x1 */
 	str	x1, [x0, #CI_CURLWP]	/* curlwp is idlelwp */
 
 	/* get my stack from lwp */
@@ -534,7 +534,8 @@ mp_vstart:
 1:
 
 	mov	fp, xzr			/* trace back starts here */
-	mrs	x0, tpidr_el1		/* curcpu() */
+	mrs	x0, tpidr_el1		/* curlwp */
+	ldr	x0, [x0, #L_CPU]	/* curlwp->l_cpu */
 	bl	_C_LABEL(cpu_hatch)
 	mov	x0, xzr
 	b	_C_LABEL(idle_loop)	/* never to return */

Index: src/sys/arch/aarch64/aarch64/vectors.S
diff -u src/sys/arch/aarch64/aarch64/vectors.S:1.17 src/sys/arch/aarch64/aarch64/vectors.S:1.18
--- src/sys/arch/aarch64/aarch64/vectors.S:1.17	Sat May 23 18:08:59 2020
+++ src/sys/arch/aarch64/aarch64/vectors.S	Wed Aug 12 13:19:35 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: vectors.S,v 1.17 2020/05/23 18:08:59 ryo Exp $	*/
+/*	$NetBSD: vectors.S,v 1.18 2020/08/12 13:19:35 skrll Exp $	*/
 
 #include <aarch64/asm.h>
 #include "assym.h"
@@ -81,8 +81,7 @@ ENTRY_NBTI(\func)
 
 	.if \el == 0
 	/* curlwp->l_private = tpidr{,ro}_el0 */
-	mrs	x1, tpidr_el1		/* curcpu() */
-	ldr	x1, [x1, #CI_CURLWP]	/* x1 = curcpu()->ci_curlwp */
+	mrs	x1, tpidr_el1		/* x1 = curlwp */
 	mrs	x0, tpidr\tpidr\()_el0
 	str	x0, [x1, #L_PRIVATE]	/* curlwp->l_private = tpidr{,ro}_el0 */
 

Index: src/sys/arch/aarch64/include/cpu.h
diff -u src/sys/arch/aarch64/include/cpu.h:1.25 src/sys/arch/aarch64/include/cpu.h:1.26
--- src/sys/arch/aarch64/include/cpu.h:1.25	Wed Jul  1 08:01:07 2020
+++ src/sys/arch/aarch64/include/cpu.h	Wed Aug 12 13:19:35 2020
@@ -1,7 +1,7 @@
-/* $NetBSD: cpu.h,v 1.25 2020/07/01 08:01:07 ryo Exp $ */
+/* $NetBSD: cpu.h,v 1.26 2020/08/12 13:19:35 skrll Exp $ */
 
 /*-
- * Copyright (c) 2014 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014, 2020 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -75,19 +75,33 @@ struct cpu_info {
 	struct cpu_data ci_data;
 	device_t ci_dev;
 	cpuid_t ci_cpuid;
-	struct lwp *ci_curlwp;
+
+	/*
+	 * the following are in their own cache line, as they are stored to
+	 * regularly by remote CPUs; when they were mixed with other fields
+	 * we observed frequent cache misses.
+	 */
+	int ci_want_resched __aligned(COHERENCY_UNIT);
+	/* XXX pending IPIs? */
+
+	/*
+	 * this is stored frequently, and is fetched by remote CPUs.
+	 */
+	struct lwp *ci_curlwp __aligned(COHERENCY_UNIT);
 	struct lwp *ci_onproc;
-	struct lwp *ci_softlwps[SOFTINT_COUNT];
+
+	/*
+	 * largely CPU-private.
+	 */
+	struct lwp *ci_softlwps[SOFTINT_COUNT] __aligned(COHERENCY_UNIT);
 
 	uint64_t ci_lastintr;
 
 	int ci_mtx_oldspl;
 	int ci_mtx_count;
 
-	int ci_want_resched;
 	int ci_cpl;
 	volatile u_int ci_softints;
-	volatile u_int ci_astpending;
 	volatile u_int ci_intr_depth;
 
 	int ci_kfpu_spl;
@@ -117,18 +131,23 @@ struct cpu_info {
 } __aligned(COHERENCY_UNIT);
 
 #ifdef _KERNEL
-static inline struct cpu_info *
-curcpu(void)
+static inline struct lwp * __attribute__ ((const))
+aarch64_curlwp(void)
 {
-	struct cpu_info *ci;
-	__asm __volatile ("mrs %0, tpidr_el1" : "=r"(ci));
-	return ci;
+	struct lwp *l;
+	__asm("mrs %0, tpidr_el1" : "=r"(l));
+	return l;
 }
-#define curlwp			(curcpu()->ci_curlwp)
 
-#define setsoftast(ci)		atomic_or_uint(&(ci)->ci_astpending, __BIT(0))
-#define cpu_signotify(l)	setsoftast((l)->l_cpu)
+/* forward declaration; defined in sys/lwp.h. */
+static __inline struct cpu_info *lwp_getcpu(struct lwp *);
+
+#define	curcpu()		(lwp_getcpu(aarch64_curlwp()))
+#define	setsoftast(ci)		(cpu_signotify((ci)->ci_onproc))
+#undef curlwp
+#define	curlwp			(aarch64_curlwp())
 
+void	cpu_signotify(struct lwp *l);
 void	cpu_need_proftick(struct lwp *l);
 
 void	cpu_hatch(struct cpu_info *);
@@ -151,6 +170,7 @@ extern struct cpu_info cpu_info_store[];
 	cii = 0, __USE(cii), ci = curcpu(); ci != NULL; ci = NULL
 #endif /* MULTIPROCESSOR */
 
+#define	LWP0_CPU_INFO	(&cpu_info_store[0])
 
 static inline void
 cpu_dosoftints(void)

Index: src/sys/arch/aarch64/include/proc.h
diff -u src/sys/arch/aarch64/include/proc.h:1.7 src/sys/arch/aarch64/include/proc.h:1.8
--- src/sys/arch/aarch64/include/proc.h:1.7	Sat May 23 18:08:59 2020
+++ src/sys/arch/aarch64/include/proc.h	Wed Aug 12 13:19:35 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: proc.h,v 1.7 2020/05/23 18:08:59 ryo Exp $ */
+/* $NetBSD: proc.h,v 1.8 2020/08/12 13:19:35 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -43,6 +43,7 @@ struct mdlwp {
 	struct trapframe *md_utf;
 	uint64_t md_cpacr;
 	uint32_t md_flags;
+	volatile uint32_t md_astpending;
 
 	uint64_t md_ia_kern[2]; /* APIAKey{Lo,Hi}_EL1 used in the kernel */
 	uint64_t md_ia_user[2]; /* APIAKey{Lo,Hi}_EL1 used in user-process */

Reply via email to