Module Name:    src
Committed By:   jmcneill
Date:           Sun Oct 20 14:03:51 UTC 2019

Modified Files:
        src/sys/arch/aarch64/aarch64: cpu.c locore.S

Log Message:
Use separate cacheline aligned arrays for mbox and hatched as before.


To generate a diff of this commit:
cvs rdiff -u -r1.24 -r1.25 src/sys/arch/aarch64/aarch64/cpu.c
cvs rdiff -u -r1.43 -r1.44 src/sys/arch/aarch64/aarch64/locore.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/aarch64/aarch64/cpu.c
diff -u src/sys/arch/aarch64/aarch64/cpu.c:1.24 src/sys/arch/aarch64/aarch64/cpu.c:1.25
--- src/sys/arch/aarch64/aarch64/cpu.c:1.24	Sun Oct 20 11:17:41 2019
+++ src/sys/arch/aarch64/aarch64/cpu.c	Sun Oct 20 14:03:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.c,v 1.24 2019/10/20 11:17:41 jmcneill Exp $ */
+/* $NetBSD: cpu.c,v 1.25 2019/10/20 14:03:51 jmcneill Exp $ */
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.24 2019/10/20 11:17:41 jmcneill Exp $");
+__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.25 2019/10/20 14:03:51 jmcneill Exp $");
 
 #include "locators.h"
 #include "opt_arm_debug.h"
@@ -69,9 +69,8 @@ static void cpu_setup_sysctl(device_t, s
 #ifdef MULTIPROCESSOR
 uint64_t cpu_mpidr[MAXCPUS];
 
-volatile u_int aarch64_cpu_mbox[MAXCPUS] __cacheline_aligned = { 0 };
-#define CPU_MBOX_HATCHED	__BIT(0)
-#define	CPU_MBOX_START		__BIT(1)
+volatile u_int aarch64_cpu_mbox[howmany(MAXCPUS, sizeof(u_int))] __cacheline_aligned = { 0 };
+volatile u_int aarch64_cpu_hatched[howmany(MAXCPUS, sizeof(u_int))] __cacheline_aligned = { 0 };
 u_int arm_cpu_max = 1;
 
 static kmutex_t cpu_hatch_lock;
@@ -500,7 +499,7 @@ cpu_setup_sysctl(device_t dv, struct cpu
 void
 cpu_boot_secondary_processors(void)
 {
-	u_int cpuno;
+	u_int n, bit;
 
 	if ((boothowto & RB_MD1) != 0)
 		return;
@@ -510,22 +509,20 @@ cpu_boot_secondary_processors(void)
 	VPRINTF("%s: starting secondary processors\n", __func__);
 
 	/* send mbox to have secondary processors do cpu_hatch() */
-	for (cpuno = 1; cpuno < ncpu; cpuno++) {
-		if (cpu_hatched_p(cpuno) == false)
-			continue;
-		atomic_or_uint(&aarch64_cpu_mbox[cpuno], CPU_MBOX_START);
-	}
+	for (n = 0; n < __arraycount(aarch64_cpu_mbox); n++)
+		atomic_or_uint(&aarch64_cpu_mbox[n], aarch64_cpu_hatched[n]);
 	__asm __volatile ("sev; sev; sev");
 
 	/* wait all cpus have done cpu_hatch() */
-	for (cpuno = 1; cpuno < ncpu; cpuno++) {
-		if (cpu_hatched_p(cpuno) == 0)
-			continue;
-		while (membar_consumer(), aarch64_cpu_mbox[cpuno] & CPU_MBOX_START) {
+	for (n = 0; n < __arraycount(aarch64_cpu_mbox); n++) {
+		while (membar_consumer(), aarch64_cpu_mbox[n] & aarch64_cpu_hatched[n]) {
 			__asm __volatile ("wfe");
 		}
-		/* Add processor to kcpuset */
-		kcpuset_set(kcpuset_attached, cpuno);
+		/* Add processors to kcpuset */
+		for (bit = 0; bit < 32; bit++) {
+			if (aarch64_cpu_hatched[n] & __BIT(bit))
+				kcpuset_set(kcpuset_attached, n * 32 + bit);
+		}
 	}
 
 	VPRINTF("%s: secondary processors hatched\n", __func__);
@@ -563,15 +560,18 @@ cpu_hatch(struct cpu_info *ci)
 	 * ci_index are each cpu0=0, cpu1=1, cpu2=undef, cpu3=2.
 	 * therefore we have to use device_unit instead of ci_index for mbox.
 	 */
-	const u_int cpuno = device_unit(ci->ci_dev);
-	atomic_and_uint(&aarch64_cpu_mbox[cpuno], ~(u_int)CPU_MBOX_START);
+	const u_int off = device_unit(ci->ci_dev) / 32;
+	const u_int bit = device_unit(ci->ci_dev) % 32;
+	atomic_and_uint(&aarch64_cpu_mbox[off], ~__BIT(bit));
 	__asm __volatile ("sev; sev; sev");
 }
 
 bool
 cpu_hatched_p(u_int cpuindex)
 {
-	aarch64_dcache_inv_range((vaddr_t)&aarch64_cpu_mbox[cpuindex], 4);
-	return (aarch64_cpu_mbox[cpuindex] & CPU_MBOX_HATCHED) != 0;
+	const u_int off = cpuindex / 32;
+	const u_int bit = cpuindex % 32;
+	membar_consumer();
+	return (aarch64_cpu_hatched[off] & __BIT(bit)) != 0;
 }
 #endif /* MULTIPROCESSOR */

Index: src/sys/arch/aarch64/aarch64/locore.S
diff -u src/sys/arch/aarch64/aarch64/locore.S:1.43 src/sys/arch/aarch64/aarch64/locore.S:1.44
--- src/sys/arch/aarch64/aarch64/locore.S:1.43	Sun Oct 20 12:25:43 2019
+++ src/sys/arch/aarch64/aarch64/locore.S	Sun Oct 20 14:03:51 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.43 2019/10/20 12:25:43 skrll Exp $	*/
+/*	$NetBSD: locore.S,v 1.44 2019/10/20 14:03:51 jmcneill Exp $	*/
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -38,7 +38,7 @@
 #include <aarch64/hypervisor.h>
 #include "assym.h"
 
-RCSID("$NetBSD: locore.S,v 1.43 2019/10/20 12:25:43 skrll Exp $")
+RCSID("$NetBSD: locore.S,v 1.44 2019/10/20 14:03:51 jmcneill Exp $")
 
 
 /*#define DEBUG_LOCORE			/* debug print */
@@ -361,19 +361,16 @@ ENTRY_NP(cpu_mpstart)
 	mov	x1, xzr
 1:
 	add	x1, x1, #1
-	cmp	x1, MAXCPUS		/* cpuindex >= MAXCPUS ? */
+	cmp	x1, #MAXCPUS		/* cpuindex >= MAXCPUS ? */
 	bge	toomanycpus
 	ldr	x2, [x0, x1, lsl #3]	/* cpu_mpidr[cpuindex] */
 	cmp	x2, x3			/* == mpidr_el1 & MPIDR_AFF ? */
 	bne	1b
 
 	mov	x27, x1			/* x27 = cpuindex */
-	ADDR	x0, _C_LABEL(aarch64_cpu_mbox)
-	add	x28, x0, x27, lsl #2	/* x28 = &aarch64_cpu_mbox[cpuindex] */
 
 	/*
 	 * x27 = cpuindex
-	 * x28 = &aarch64_cpu_mbox[cpuindex]
 	 */
 
 	/* set stack pointer for boot */
@@ -445,21 +442,35 @@ mp_vstart:
 	mrs	x1, mpidr_el1
 	str	x1, [x0, #CI_MPIDR]	/* curcpu()->ci_mpidr = mpidr_el1 */
 
+	mov	x0, #32
+	udiv	x1, x27, x0
+	ADDR	x0, _C_LABEL(aarch64_cpu_hatched)
+	add	x28, x0, x1, lsl #2	/* x28 = &aarch64_cpu_hatched[cpuindex/32] */
+	mov	x0, #1
+	mov	x2, #32
+	msub	x1, x1, x2, x27
+	lsl	x29, x0, x1		/* x29 = 1 << (cpuindex % 32) */
+
 	/*
-	 * atomic_or_uint(&aarch64_cpu_mbox[cpuindex], 1)
+	 * atomic_or_uint(&aarch64_cpu_hatched[cpuindex/32], 1<<cpuindex%32)
 	 * to tell my activity to primary processor.
 	 */
 	mov	x0, x28
-	mov	x1, #1
+	mov	x1, x29
 	bl	_C_LABEL(atomic_or_uint)	/* hatched! */
 	dsb	sy
 	sev
 
+	mov	x0, #32
+	udiv	x1, x27, x0
+	ADDR	x0, _C_LABEL(aarch64_cpu_mbox)
+	add	x28, x0, x1, lsl #2	/* x28 = &aarch64_cpu_mbox[cpuindex/32] */
+
 	/* wait for the mailbox start bit to become true */
 1:
 	dmb	sy
-	ldr	x20, [x28]
-	tst	x20, #2
+	ldr	w20, [x28]
+	tst	w20, w29
 	bne	9f
 	wfe
 	b	1b

Reply via email to