Module Name: src Committed By: skrll Date: Wed Aug 12 13:19:35 UTC 2020
Modified Files: src/sys/arch/aarch64/aarch64: copyinout.S cpu_machdep.c cpuswitch.S db_machdep.c fusu.S genassym.cf idle_machdep.S locore.S vectors.S src/sys/arch/aarch64/include: cpu.h proc.h Log Message: Part II of ad's aarch64 performance improvements (cpu_switch.S bugs are all mine) - Use tpidr_el1 to hold curlwp and not curcpu, because curlwp is accessed much more often by MI code. It also makes curlwp preemption safe and allows aarch64_curlwp() to be a const function (curcpu must be volatile). - Make ASTs operate per-LWP rather than per-CPU, otherwise sometimes LWPs can see spurious ASTs (which doesn't cause a problem, it just means some time may be wasted). - Use plain stores to set/clear ASTs. Make sure ASTs are always set on the same CPU as the target LWP, and delivered via IPI if posted from a remote CPU so that they are resolved quickly. - Add some cache line padding to struct cpu_info, to match x86. - Add a memory barrier in a couple of places where ci_curlwp is set. This is needed whenever an LWP that is resuming on the CPU could hold an adaptive mutex. The barrier needs to drain the CPU's store buffer, so that the update to ci_curlwp becomes globally visible before the LWP can resume and call mutex_exit(). By my reading of the ARM docs it looks like the instruction I used will do the right thing, but I'm not 100% sure. To generate a diff of this commit: cvs rdiff -u -r1.14 -r1.15 src/sys/arch/aarch64/aarch64/copyinout.S cvs rdiff -u -r1.10 -r1.11 src/sys/arch/aarch64/aarch64/cpu_machdep.c cvs rdiff -u -r1.24 -r1.25 src/sys/arch/aarch64/aarch64/cpuswitch.S cvs rdiff -u -r1.25 -r1.26 src/sys/arch/aarch64/aarch64/db_machdep.c cvs rdiff -u -r1.9 -r1.10 src/sys/arch/aarch64/aarch64/fusu.S cvs rdiff -u -r1.29 -r1.30 src/sys/arch/aarch64/aarch64/genassym.cf cvs rdiff -u -r1.5 -r1.6 src/sys/arch/aarch64/aarch64/idle_machdep.S cvs rdiff -u -r1.69 -r1.70 src/sys/arch/aarch64/aarch64/locore.S cvs rdiff -u -r1.17 -r1.18 src/sys/arch/aarch64/aarch64/vectors.S cvs rdiff -u -r1.25 -r1.26 src/sys/arch/aarch64/include/cpu.h cvs rdiff -u -r1.7 -r1.8 src/sys/arch/aarch64/include/proc.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/aarch64/aarch64/copyinout.S diff -u src/sys/arch/aarch64/aarch64/copyinout.S:1.14 src/sys/arch/aarch64/aarch64/copyinout.S:1.15 --- src/sys/arch/aarch64/aarch64/copyinout.S:1.14 Thu Aug 6 06:49:55 2020 +++ src/sys/arch/aarch64/aarch64/copyinout.S Wed Aug 12 13:19:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: copyinout.S,v 1.14 2020/08/06 06:49:55 ryo Exp $ */ +/* $NetBSD: copyinout.S,v 1.15 2020/08/12 13:19:35 skrll Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -33,7 +33,7 @@ #include <aarch64/asm.h> #include "assym.h" -RCSID("$NetBSD: copyinout.S,v 1.14 2020/08/06 06:49:55 ryo Exp $"); +RCSID("$NetBSD: copyinout.S,v 1.15 2020/08/12 13:19:35 skrll Exp $"); #ifdef ARMV81_PAN #define PAN_ENABLE \ @@ -80,8 +80,7 @@ RCSID("$NetBSD: copyinout.S,v 1.14 2020/ .macro exit_cpu_onfault /* curlwp->l_md.md_onfault = NULL */ - mrs x0, tpidr_el1 /* curcpu */ - ldr x0, [x0, #CI_CURLWP] /* x0 = curlwp */ + mrs x0, tpidr_el1 /* x0 = curlwp */ str xzr, [x0, #L_MD_ONFAULT] /* lwp->l_md_onfault = NULL */ 9: PAN_ENABLE /* enable PAN */ Index: src/sys/arch/aarch64/aarch64/cpu_machdep.c diff -u src/sys/arch/aarch64/aarch64/cpu_machdep.c:1.10 src/sys/arch/aarch64/aarch64/cpu_machdep.c:1.11 --- src/sys/arch/aarch64/aarch64/cpu_machdep.c:1.10 Thu May 21 05:41:40 2020 +++ src/sys/arch/aarch64/aarch64/cpu_machdep.c Wed Aug 12 13:19:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu_machdep.c,v 1.10 2020/05/21 05:41:40 ryo Exp $ */ +/* $NetBSD: cpu_machdep.c,v 1.11 2020/08/12 13:19:35 skrll Exp $ */ /*- * Copyright (c) 2014, 2019 The NetBSD Foundation, Inc. @@ -31,7 +31,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(1, "$NetBSD: cpu_machdep.c,v 1.10 2020/05/21 05:41:40 ryo Exp $"); +__KERNEL_RCSID(1, "$NetBSD: cpu_machdep.c,v 1.11 2020/08/12 13:19:35 skrll Exp $"); #include "opt_multiprocessor.h" @@ -261,7 +261,7 @@ cpu_need_resched(struct cpu_info *ci, st intr_ipi_send(ci->ci_kcpuset, IPI_AST); #endif } else { - setsoftast(ci); /* force call to ast() */ + l->l_md.md_astpending = 1; } } @@ -272,7 +272,22 @@ cpu_need_proftick(struct lwp *l) KASSERT(l->l_cpu == curcpu()); l->l_pflag |= LP_OWEUPC; - setsoftast(l->l_cpu); + l->l_md.md_astpending = 1; +} + +void +cpu_signotify(struct lwp *l) +{ + + KASSERT(kpreempt_disabled()); + + if (l->l_cpu != curcpu()) { +#ifdef MULTIPROCESSOR + intr_ipi_send(l->l_cpu->ci_kcpuset, IPI_AST); +#endif + } else { + l->l_md.md_astpending = 1; + } } #ifdef __HAVE_PREEMPTION Index: src/sys/arch/aarch64/aarch64/cpuswitch.S diff -u src/sys/arch/aarch64/aarch64/cpuswitch.S:1.24 src/sys/arch/aarch64/aarch64/cpuswitch.S:1.25 --- src/sys/arch/aarch64/aarch64/cpuswitch.S:1.24 Thu Aug 6 06:49:55 2020 +++ src/sys/arch/aarch64/aarch64/cpuswitch.S Wed Aug 12 13:19:35 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: cpuswitch.S,v 1.24 2020/08/06 06:49:55 ryo Exp $ */ +/* $NetBSD: cpuswitch.S,v 1.25 2020/08/12 13:19:35 skrll Exp $ */ /*- - * Copyright (c) 2014 The NetBSD Foundation, Inc. + * Copyright (c) 2014, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -38,7 +38,7 @@ #include "opt_ddb.h" #include "opt_kasan.h" -RCSID("$NetBSD: cpuswitch.S,v 1.24 2020/08/06 06:49:55 ryo Exp $") +RCSID("$NetBSD: cpuswitch.S,v 1.25 2020/08/12 13:19:35 skrll Exp $") ARMV8_DEFINE_OPTIONS @@ -83,11 +83,9 @@ ENTRY_NP(cpu_switchto) #endif ldr x5, [x1, #L_MD_CPACR] /* get cpacr_el1 */ - mrs x3, tpidr_el1 DISABLE_INTERRUPT mov sp, x4 /* restore stack pointer */ msr cpacr_el1, x5 /* restore cpacr_el1 */ - str x1, [x3, #CI_CURLWP] /* switch curlwp to new lwp */ #ifdef ARMV83_PAC /* Switch the PAC key. */ @@ -118,6 +116,10 @@ ENTRY_NP(cpu_switchto) 1: #endif + msr tpidr_el1, x1 /* switch curlwp to new lwp */ + ldr x3, [x1, #L_CPU] + str x1, [x3, #CI_CURLWP] /* switch curlwp to new lwp */ + dmb st /* see comments in kern_mutex.c */ ENABLE_INTERRUPT /* @@ -161,8 +163,7 @@ ENTRY_NP(cpu_switchto_softint) stp x27, x28, [sp, #TF_X27] stp x29, x2, [sp, #TF_X29] /* tf->lr = softint_cleanup; */ - mrs x20, tpidr_el1 /* x20 := curcpu() */ - ldr x19, [x20, #CI_CURLWP] /* x19 := curcpu()->ci_curlwp */ + mrs x19, tpidr_el1 /* x19 := curlwp */ mov x4, sp mrs x5, cpacr_el1 @@ -180,9 +181,13 @@ ENTRY_NP(cpu_switchto_softint) ldr x4, [x0, #L_MD_UTF] DISABLE_INTERRUPT + ldr x20, [x19, #L_CPU] /* x20 := curlwp->l_cpu */ + /* onto new stack */ sub sp, x4, #TF_SIZE /* new sp := softlwp->l_md_utf - 1 */ + msr tpidr_el1, x0 /* curlwp = softlwp; */ str x0, [x20, #CI_CURLWP] /* curcpu()->ci_curlwp = softlwp; */ + /* no need for memory barrier here */ mov x5, #CPACR_FPEN_NONE msr cpacr_el1, x5 /* cpacr_el1 = CPACR_FPEN_NONE */ @@ -203,7 +208,6 @@ ENTRY_NP(cpu_switchto_softint) mov x0, x19 /* x0 := pinned_lwp */ bl _C_LABEL(softint_dispatch) - mrs x20, tpidr_el1 ldr x6, [x19, #L_PCB] /* x6 = lwp_getpcb(curlwp) */ ldr x4, [x6, #PCB_TF] /* x4 := pinned_lwp->l_addr->pcb_tf */ #ifdef DDB @@ -212,7 +216,11 @@ ENTRY_NP(cpu_switchto_softint) ldr x5, [x19, #L_MD_CPACR] /* x5 := pinned_lwp->l_md_cpacr */ DISABLE_INTERRUPT - str x19, [x20, #CI_CURLWP] /* curcpu()->ci_curlwp := x19 */ + msr tpidr_el1, x19 /* curlwp = pinned_lwp */ + ldr x3, [x19, #L_CPU] /* x3 = curlwp->l_cpu */ + str x19, [x3, #CI_CURLWP] /* curlwp->l_cpu->ci_curlwp := x19 */ + dmb st /* see comments in kern_mutex.c */ + mov sp, x4 /* restore pinned_lwp sp */ msr cpacr_el1, x5 /* restore pinned_lwp cpacr */ @@ -249,10 +257,11 @@ END(cpu_switchto_softint) ENTRY_NP(softint_cleanup) mov lr, x20 /* restore original lr */ - mrs x20, tpidr_el1 /* curcpu() */ - ldr w2, [x20, #CI_MTX_COUNT]/* ->ci_mtx_count */ + mrs x20, tpidr_el1 /* curlwp */ + ldr x3, [x20, #L_CPU] /* curcpu */ + ldr w2, [x3, #CI_MTX_COUNT] /* ->ci_mtx_count */ add w2, w2, #1 - str w2, [x20, #CI_MTX_COUNT] + str w2, [x3, #CI_MTX_COUNT] msr daif, x19 /* restore interrupt mask */ ldp x19, x20, [sp], #16 /* restore */ @@ -366,15 +375,13 @@ ENTRY_NP(el0_trap) ENTRY_NP(el0_trap_exit) DISABLE_INTERRUPT /* make sure I|F marked */ 1: - /* while (curcpu()->ci_astpending & __BIT(0)) { */ + /* while (curlwp->l_md.md_astpending != 0) { */ mrs x8, tpidr_el1 - ldr w9, [x8, #CI_ASTPENDING] - tbz w9, #0, 9f + ldr w9, [x8, #L_MD_ASTPENDING] + cbz w9, 9f - /* atomic_and_uint(&curcpu()->ci_astpending, ~__BIT(0)); */ - mov w1, #~__BIT(0) - add x0, x8, #CI_ASTPENDING - bl _C_LABEL(atomic_and_uint); + /* curlwp->l_md.md_astpending = 0; */ + str xzr, [x8, #L_MD_ASTPENDING] /* trap_doast(tf); */ ENABLE_INTERRUPT @@ -384,8 +391,8 @@ ENTRY_NP(el0_trap_exit) b 1b /* } */ 9: - mrs x8, tpidr_el1 - ldr x9, [x8, #CI_CURLWP] + + mrs x9, tpidr_el1 ldr x23, [x9, #L_MD_CPACR] msr cpacr_el1, x23 /* FP unit EL0 handover */ isb /* necessary? */ @@ -446,8 +453,7 @@ END(cpu_Debugger) * int cpu_set_onfault(struct faultbuf *fb) */ ENTRY_NP(cpu_set_onfault) - mrs x3, tpidr_el1 - ldr x2, [x3, #CI_CURLWP] /* curlwp = curcpu()->ci_curlwp */ + mrs x2, tpidr_el1 /* x2 = curlwp */ str x0, [x2, #L_MD_ONFAULT] /* l_md.md_onfault = fb */ stp x19, x20, [x0, #(FB_X19 * 8)] Index: src/sys/arch/aarch64/aarch64/db_machdep.c diff -u src/sys/arch/aarch64/aarch64/db_machdep.c:1.25 src/sys/arch/aarch64/aarch64/db_machdep.c:1.26 --- src/sys/arch/aarch64/aarch64/db_machdep.c:1.25 Thu Jul 2 11:10:48 2020 +++ src/sys/arch/aarch64/aarch64/db_machdep.c Wed Aug 12 13:19:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: db_machdep.c,v 1.25 2020/07/02 11:10:48 jmcneill Exp $ */ +/* $NetBSD: db_machdep.c,v 1.26 2020/08/12 13:19:35 skrll Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: db_machdep.c,v 1.25 2020/07/02 11:10:48 jmcneill Exp $"); +__KERNEL_RCSID(0, "$NetBSD: db_machdep.c,v 1.26 2020/08/12 13:19:35 skrll Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd32.h" @@ -322,8 +322,6 @@ show_cpuinfo(struct cpu_info *ci) &ci->ci_cpl, cpuid, cpuinfobuf.ci_cpl); db_printf("%p cpu[%lu].ci_softints = 0x%08x\n", &ci->ci_softints, cpuid, cpuinfobuf.ci_softints); - db_printf("%p cpu[%lu].ci_astpending = 0x%08x\n", - &ci->ci_astpending, cpuid, cpuinfobuf.ci_astpending); db_printf("%p cpu[%lu].ci_intr_depth = %u\n", &ci->ci_intr_depth, cpuid, cpuinfobuf.ci_intr_depth); db_printf("%p cpu[%lu].ci_biglock_count = %u\n", Index: src/sys/arch/aarch64/aarch64/fusu.S diff -u src/sys/arch/aarch64/aarch64/fusu.S:1.9 src/sys/arch/aarch64/aarch64/fusu.S:1.10 --- src/sys/arch/aarch64/aarch64/fusu.S:1.9 Thu Aug 6 06:49:55 2020 +++ src/sys/arch/aarch64/aarch64/fusu.S Wed Aug 12 13:19:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: fusu.S,v 1.9 2020/08/06 06:49:55 ryo Exp $ */ +/* $NetBSD: fusu.S,v 1.10 2020/08/12 13:19:35 skrll Exp $ */ /*- * Copyright (c) 2014, 2019 The NetBSD Foundation, Inc. @@ -32,7 +32,7 @@ #include <aarch64/asm.h> #include "assym.h" -RCSID("$NetBSD: fusu.S,v 1.9 2020/08/06 06:49:55 ryo Exp $"); +RCSID("$NetBSD: fusu.S,v 1.10 2020/08/12 13:19:35 skrll Exp $"); #ifdef ARMV81_PAN #define PAN_ENABLE \ @@ -73,8 +73,7 @@ RCSID("$NetBSD: fusu.S,v 1.9 2020/08/06 .macro exit_cpu_onfault /* curlwp->l_md.md_onfault = NULL */ - mrs x1, tpidr_el1 /* curcpu */ - ldr x1, [x1, #CI_CURLWP] /* x1 = curlwp */ + mrs x1, tpidr_el1 /* x1 = curlwp */ str xzr, [x1, #L_MD_ONFAULT] /* lwp->l_md_onfault = NULL */ 9: PAN_ENABLE /* enable PAN */ Index: src/sys/arch/aarch64/aarch64/genassym.cf diff -u src/sys/arch/aarch64/aarch64/genassym.cf:1.29 src/sys/arch/aarch64/aarch64/genassym.cf:1.30 --- src/sys/arch/aarch64/aarch64/genassym.cf:1.29 Thu Aug 6 06:49:55 2020 +++ src/sys/arch/aarch64/aarch64/genassym.cf Wed Aug 12 13:19:35 2020 @@ -1,4 +1,4 @@ -# $NetBSD: genassym.cf,v 1.29 2020/08/06 06:49:55 ryo Exp $ +# $NetBSD: genassym.cf,v 1.30 2020/08/12 13:19:35 skrll Exp $ #- # Copyright (c) 2014 The NetBSD Foundation, Inc. # All rights reserved. @@ -154,6 +154,7 @@ define L_MD_FLAGS offsetof(struct lwp, define L_MD_UTF offsetof(struct lwp, l_md.md_utf) define L_MD_CPACR offsetof(struct lwp, l_md.md_cpacr) define L_MD_ONFAULT offsetof(struct lwp, l_md.md_onfault) +define L_MD_ASTPENDING offsetof(struct lwp, l_md.md_astpending) define L_MD_IA_KERN offsetof(struct lwp, l_md.md_ia_kern) define L_MD_IA_USER offsetof(struct lwp, l_md.md_ia_user) define L_MD_IB_USER offsetof(struct lwp, l_md.md_ib_user) @@ -288,7 +289,6 @@ define CI_CPL offsetof(struct cpu_info define CI_CPUID offsetof(struct cpu_info, ci_cpuid) define CI_MIDR offsetof(struct cpu_info, ci_id.ac_midr) define CI_MPIDR offsetof(struct cpu_info, ci_id.ac_mpidr) -define CI_ASTPENDING offsetof(struct cpu_info, ci_astpending) define CI_WANT_RESCHED offsetof(struct cpu_info, ci_want_resched) define CI_INTR_DEPTH offsetof(struct cpu_info, ci_intr_depth) define CI_MTX_COUNT offsetof(struct cpu_info, ci_mtx_count) Index: src/sys/arch/aarch64/aarch64/idle_machdep.S diff -u src/sys/arch/aarch64/aarch64/idle_machdep.S:1.5 src/sys/arch/aarch64/aarch64/idle_machdep.S:1.6 --- src/sys/arch/aarch64/aarch64/idle_machdep.S:1.5 Sun Jan 27 04:48:09 2019 +++ src/sys/arch/aarch64/aarch64/idle_machdep.S Wed Aug 12 13:19:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: idle_machdep.S,v 1.5 2019/01/27 04:48:09 dholland Exp $ */ +/* $NetBSD: idle_machdep.S,v 1.6 2020/08/12 13:19:35 skrll Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ #include "opt_arm_intr_impl.h" #include "opt_ddb.h" -RCSID("$NetBSD: idle_machdep.S,v 1.5 2019/01/27 04:48:09 dholland Exp $"); +RCSID("$NetBSD: idle_machdep.S,v 1.6 2020/08/12 13:19:35 skrll Exp $"); #ifdef ARM_INTR_IMPL #include ARM_INTR_IMPL @@ -74,7 +74,8 @@ ENTRY(cpu_idle) adr x0, 1f str x0, [sp, #TF_PC] /* CLKF_PC refer to tf_pc */ - mrs x1, tpidr_el1 /* get curcpu() */ + mrs x1, tpidr_el1 /* get curlwp */ + ldr x1, [x1, #L_CPU] /* get curcpu */ ldr w28, [x1, #CI_INTR_DEPTH] /* w28 = ci->ci_intr_depth */ add w2, w28, #1 /* w2 = intr_depth + 1 */ @@ -86,7 +87,8 @@ ENTRY(cpu_idle) str w2, [x1, #CI_INTR_DEPTH] /* ci->ci_intr_depth++ */ bl ARM_IRQ_HANDLER /* irqhandler(trapframe) */ 1: - mrs x1, tpidr_el1 /* get curcpu() */ + mrs x1, tpidr_el1 /* get curlwp */ + ldr x1, [x1, #L_CPU] /* get curcpu */ str w28, [x1, #CI_INTR_DEPTH] /* ci->ci_intr_depth = old */ #if defined(__HAVE_FAST_SOFTINTS) && !defined(__HAVE_PIC_FAST_SOFTINTS) Index: src/sys/arch/aarch64/aarch64/locore.S diff -u src/sys/arch/aarch64/aarch64/locore.S:1.69 src/sys/arch/aarch64/aarch64/locore.S:1.70 --- src/sys/arch/aarch64/aarch64/locore.S:1.69 Sun Aug 2 06:58:16 2020 +++ src/sys/arch/aarch64/aarch64/locore.S Wed Aug 12 13:19:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.69 2020/08/02 06:58:16 maxv Exp $ */ +/* $NetBSD: locore.S,v 1.70 2020/08/12 13:19:35 skrll Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org> @@ -38,7 +38,7 @@ #include <aarch64/hypervisor.h> #include "assym.h" -RCSID("$NetBSD: locore.S,v 1.69 2020/08/02 06:58:16 maxv Exp $") +RCSID("$NetBSD: locore.S,v 1.70 2020/08/12 13:19:35 skrll Exp $") #ifdef AARCH64_DEVICE_MEM_STRONGLY_ORDERED #define MAIR_DEVICE_MEM MAIR_DEVICE_nGnRnE @@ -176,9 +176,9 @@ vstart: msr tpidrro_el0, xzr /* set curcpu() */ - adrl x0, cpu_info_store /* cpu_info_store is cpu_info[0] */ - msr tpidr_el1, x0 /* curcpu is cpu_info[0] */ - DPRINTREG("curcpu = ", x0); + adrl x0, lwp0 /* curlwp is lwp0 */ + msr tpidr_el1, x0 + DPRINTREG("curlwp = ", x0); /* init PAN if supported */ mov x0, #1 @@ -195,8 +195,8 @@ vstart: 1: /* set topology information */ - mrs x0, tpidr_el1 /* curcpu */ - mrs x1, mpidr_el1 + adrl x0, cpu_info_store /* curcpu */ + mrs x1, mpidr_el1 mov x2, #0 bl arm_cpu_topology_set @@ -459,7 +459,7 @@ mp_vstart: mul x0, x27, x0 adrl x1, _C_LABEL(cpu_info_store) add x0, x0, x1 /* x0 = &cpu_info_store[cpuindex] */ - msr tpidr_el1, x0 /* tpidr_el1 = curcpu() = x0 */ + msr tpidr_el1, x0 /* tpidr_el1 = curcpu = x0 */ mrs x1, midr_el1 str x1, [x0, #CI_MIDR] /* curcpu()->ci_cpuid = midr_el1 */ @@ -509,9 +509,9 @@ mp_vstart: 9: /* fill my cpu_info */ - mrs x0, tpidr_el1 /* curcpu() */ - - ldr x1, [x0, #CI_IDLELWP] /* x1 = curcpu()->ci_data.cpu_idlelwp */ + mrs x0, tpidr_el1 /* curcpu */ + ldr x1, [x0, #CI_IDLELWP] /* x0 = curcpu()->ci_idlelwp */ + msr tpidr_el1, x1 /* tpidr_el1 = curlwp = x1 */ str x1, [x0, #CI_CURLWP] /* curlwp is idlelwp */ /* get my stack from lwp */ @@ -534,7 +534,8 @@ mp_vstart: 1: mov fp, xzr /* trace back starts here */ - mrs x0, tpidr_el1 /* curcpu() */ + mrs x0, tpidr_el1 /* curlwp */ + ldr x0, [x0, #L_CPU] /* curlwp->l_cpu */ bl _C_LABEL(cpu_hatch) mov x0, xzr b _C_LABEL(idle_loop) /* never to return */ Index: src/sys/arch/aarch64/aarch64/vectors.S diff -u src/sys/arch/aarch64/aarch64/vectors.S:1.17 src/sys/arch/aarch64/aarch64/vectors.S:1.18 --- src/sys/arch/aarch64/aarch64/vectors.S:1.17 Sat May 23 18:08:59 2020 +++ src/sys/arch/aarch64/aarch64/vectors.S Wed Aug 12 13:19:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: vectors.S,v 1.17 2020/05/23 18:08:59 ryo Exp $ */ +/* $NetBSD: vectors.S,v 1.18 2020/08/12 13:19:35 skrll Exp $ */ #include <aarch64/asm.h> #include "assym.h" @@ -81,8 +81,7 @@ ENTRY_NBTI(\func) .if \el == 0 /* curlwp->l_private = tpidr{,ro}_el0 */ - mrs x1, tpidr_el1 /* curcpu() */ - ldr x1, [x1, #CI_CURLWP] /* x1 = curcpu()->ci_curlwp */ + mrs x1, tpidr_el1 /* x1 = curlwp */ mrs x0, tpidr\tpidr\()_el0 str x0, [x1, #L_PRIVATE] /* curlwp->l_private = tpidr{,ro}_el0 */ Index: src/sys/arch/aarch64/include/cpu.h diff -u src/sys/arch/aarch64/include/cpu.h:1.25 src/sys/arch/aarch64/include/cpu.h:1.26 --- src/sys/arch/aarch64/include/cpu.h:1.25 Wed Jul 1 08:01:07 2020 +++ src/sys/arch/aarch64/include/cpu.h Wed Aug 12 13:19:35 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: cpu.h,v 1.25 2020/07/01 08:01:07 ryo Exp $ */ +/* $NetBSD: cpu.h,v 1.26 2020/08/12 13:19:35 skrll Exp $ */ /*- - * Copyright (c) 2014 The NetBSD Foundation, Inc. + * Copyright (c) 2014, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -75,19 +75,33 @@ struct cpu_info { struct cpu_data ci_data; device_t ci_dev; cpuid_t ci_cpuid; - struct lwp *ci_curlwp; + + /* + * the following are in their own cache line, as they are stored to + * regularly by remote CPUs; when they were mixed with other fields + * we observed frequent cache misses. + */ + int ci_want_resched __aligned(COHERENCY_UNIT); + /* XXX pending IPIs? */ + + /* + * this is stored frequently, and is fetched by remote CPUs. + */ + struct lwp *ci_curlwp __aligned(COHERENCY_UNIT); struct lwp *ci_onproc; - struct lwp *ci_softlwps[SOFTINT_COUNT]; + + /* + * largely CPU-private. + */ + struct lwp *ci_softlwps[SOFTINT_COUNT] __aligned(COHERENCY_UNIT); uint64_t ci_lastintr; int ci_mtx_oldspl; int ci_mtx_count; - int ci_want_resched; int ci_cpl; volatile u_int ci_softints; - volatile u_int ci_astpending; volatile u_int ci_intr_depth; int ci_kfpu_spl; @@ -117,18 +131,23 @@ struct cpu_info { } __aligned(COHERENCY_UNIT); #ifdef _KERNEL -static inline struct cpu_info * -curcpu(void) +static inline struct lwp * __attribute__ ((const)) +aarch64_curlwp(void) { - struct cpu_info *ci; - __asm __volatile ("mrs %0, tpidr_el1" : "=r"(ci)); - return ci; + struct lwp *l; + __asm("mrs %0, tpidr_el1" : "=r"(l)); + return l; } -#define curlwp (curcpu()->ci_curlwp) -#define setsoftast(ci) atomic_or_uint(&(ci)->ci_astpending, __BIT(0)) -#define cpu_signotify(l) setsoftast((l)->l_cpu) +/* forward declaration; defined in sys/lwp.h. */ +static __inline struct cpu_info *lwp_getcpu(struct lwp *); + +#define curcpu() (lwp_getcpu(aarch64_curlwp())) +#define setsoftast(ci) (cpu_signotify((ci)->ci_onproc)) +#undef curlwp +#define curlwp (aarch64_curlwp()) +void cpu_signotify(struct lwp *l); void cpu_need_proftick(struct lwp *l); void cpu_hatch(struct cpu_info *); @@ -151,6 +170,7 @@ extern struct cpu_info cpu_info_store[]; cii = 0, __USE(cii), ci = curcpu(); ci != NULL; ci = NULL #endif /* MULTIPROCESSOR */ +#define LWP0_CPU_INFO (&cpu_info_store[0]) static inline void cpu_dosoftints(void) Index: src/sys/arch/aarch64/include/proc.h diff -u src/sys/arch/aarch64/include/proc.h:1.7 src/sys/arch/aarch64/include/proc.h:1.8 --- src/sys/arch/aarch64/include/proc.h:1.7 Sat May 23 18:08:59 2020 +++ src/sys/arch/aarch64/include/proc.h Wed Aug 12 13:19:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: proc.h,v 1.7 2020/05/23 18:08:59 ryo Exp $ */ +/* $NetBSD: proc.h,v 1.8 2020/08/12 13:19:35 skrll Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -43,6 +43,7 @@ struct mdlwp { struct trapframe *md_utf; uint64_t md_cpacr; uint32_t md_flags; + volatile uint32_t md_astpending; uint64_t md_ia_kern[2]; /* APIAKey{Lo,Hi}_EL1 used in the kernel */ uint64_t md_ia_user[2]; /* APIAKey{Lo,Hi}_EL1 used in user-process */