Module Name:    src
Committed By:   snj
Date:           Fri Jan 28 07:16:14 UTC 2011

Modified Files:
        src/sys/arch/sparc/sparc [netbsd-5]: cpu.c cpuvar.h genassym.cf intr.c
            locore.s pmap.c

Log Message:
Pull up following revision(s) (requested by mrg in ticket #1532):
        sys/arch/sparc/sparc/cpu.c: revision 1.215 via patch
        sys/arch/sparc/sparc/cpuvar.h: revision 1.78 via patch
        sys/arch/sparc/sparc/genassym.cf: revision 1.57 via patch
        sys/arch/sparc/sparc/intr.c: revision 1.103-1.105 via patch
        sys/arch/sparc/sparc/locore.s: revision 1.247, 1.250 via patch
        sys/arch/sparc/sparc/pmap.c: revision 1.329 via patch

- print the cpu_number() when we get a strayintr().

- use _MAXNCPU instead of 4
- convert xpmsg_lock from a simplelock to a kmutex
- don't wait for sparc_noop IPI calls
- remove xmpsg_func's "retval" parameter and usage
- remove the IPI at high IPL message
- rework cpu_attach() a bunch, refactoring calls to getcpuinfo() and setting
 of cpi, and split most of the non-boot CPU handling into a new function
- make CPU_INFO_FOREACH() work whether modular or not
- move the MP cpu_info pages earlier
- move a few things in cpu.c around to colsolidate the MP code together
- remove useless if (cpus == NULL) tests -- cpus is an array now

with these changes, and an additional change to crazyintr() to not printf(),
i can get to single user shell on my SS20 again.  i can run a few commands
but some of them cause hangs.  "ps auxw" works, but "top -b" does not.

convert sparc "intrcnt" counters to evcnt(9) style.  XXX some of the names
could be better, but i just copied them from the old intrnames in locore.

i benchmarked this with a simple test of ircii ./configure && make, to see
if the additional load/store & arith would cause any noticeable degradation
as the change also converts 32 bit counters to 64 bits.  amusingly, the
only trend i saw in this was that for both portions, i see a consistent
(across at least 8 runs) benefit of about 0.8% improvement.  ie, the newer
larger code size / counter size code actually runs faster for some reason..
maybe there's a cacheline effect in the size of the code?

XXX the current implementation depends on a couple of things:
XXX   - ev_count member of evcnt{} is first and has offset 0
XXX   - that sizeof(struct evcnt) equals 32
XXX if these are not true, locore.s has #error's to catch it

- remove unused ft_want_ast()
- give nmi_sun* ENTRY() points so they show up in symbols properly
- add some disabled code to use this cpu's idlelwp area when hatching
 a cpu, but right now it makes this worse not better...


To generate a diff of this commit:
cvs rdiff -u -r1.211.8.2 -r1.211.8.3 src/sys/arch/sparc/sparc/cpu.c
cvs rdiff -u -r1.75.10.2 -r1.75.10.3 src/sys/arch/sparc/sparc/cpuvar.h
cvs rdiff -u -r1.56 -r1.56.4.1 src/sys/arch/sparc/sparc/genassym.cf
cvs rdiff -u -r1.100.20.1 -r1.100.20.2 src/sys/arch/sparc/sparc/intr.c
cvs rdiff -u -r1.244.8.2 -r1.244.8.3 src/sys/arch/sparc/sparc/locore.s
cvs rdiff -u -r1.322.20.3 -r1.322.20.4 src/sys/arch/sparc/sparc/pmap.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/sparc/sparc/cpu.c
diff -u src/sys/arch/sparc/sparc/cpu.c:1.211.8.2 src/sys/arch/sparc/sparc/cpu.c:1.211.8.3
--- src/sys/arch/sparc/sparc/cpu.c:1.211.8.2	Sun Jan 16 12:58:23 2011
+++ src/sys/arch/sparc/sparc/cpu.c	Fri Jan 28 07:16:13 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.211.8.2 2011/01/16 12:58:23 bouyer Exp $ */
+/*	$NetBSD: cpu.c,v 1.211.8.3 2011/01/28 07:16:13 snj Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -52,7 +52,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.211.8.2 2011/01/16 12:58:23 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.211.8.3 2011/01/28 07:16:13 snj Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_lockdebug.h"
@@ -102,10 +102,7 @@
 extern char machine_model[];
 
 int	sparc_ncpus;			/* # of CPUs detected by PROM */
-#ifdef MULTIPROCESSOR
-struct cpu_info *cpus[4];		/* we only support 4 CPUs. */
-u_int	cpu_ready_mask;			/* the set of CPUs marked as READY */
-#endif
+struct cpu_info *cpus[_MAXNCPU];	/* we only support 4 CPUs. */
 
 /* The CPU configuration driver. */
 static void cpu_mainbus_attach(struct device *, struct device *, void *);
@@ -136,43 +133,6 @@
 #define SRMMU_VERS(mmusr)	(((mmusr) >> 24) & 0xf)
 
 int bootmid;		/* Module ID of boot CPU */
-#if defined(MULTIPROCESSOR)
-void cpu_spinup(struct cpu_info *);
-static void init_cpuinfo(struct cpu_info *, int);
-
-int go_smp_cpus = 0;	/* non-primary CPUs wait for this to go */
-
-/* lock this to send IPI's */
-struct simplelock xpmsg_lock = SIMPLELOCK_INITIALIZER;
-
-static void
-init_cpuinfo(struct cpu_info *cpi, int node)
-{
-	vaddr_t intstack, va;
-
-	/*
-	 * Finish initialising this cpu_info.
-	 */
-	getcpuinfo(cpi, node);
-
-	/*
-	 * Arrange interrupt stack.  This cpu will also abuse the bottom
-	 * half of the interrupt stack before it gets to run its idle LWP.
-	 */
-	intstack = uvm_km_alloc(kernel_map, INT_STACK_SIZE, 0, UVM_KMF_WIRED);
-	if (intstack == 0)
-		panic("%s: no uspace/intstack", __func__);
-	cpi->eintstack = (void*)(intstack + INT_STACK_SIZE);
-
-	/* Allocate virtual space for pmap page_copy/page_zero */
-	va = uvm_km_alloc(kernel_map, 2*PAGE_SIZE, 0, UVM_KMF_VAONLY);
-	if (va == 0)
-		panic("%s: no virtual space", __func__);
-
-	cpi->vpage[0] = (void *)(va + 0);
-	cpi->vpage[1] = (void *)(va + PAGE_SIZE);
-}
-#endif /* MULTIPROCESSOR */
 
 #ifdef notdef
 /*
@@ -199,6 +159,21 @@
 };
 #endif
 
+#if defined(MULTIPROCESSOR)
+u_int	cpu_ready_mask;			/* the set of CPUs marked as READY */
+void cpu_spinup(struct cpu_info *);
+static void cpu_attach_non_boot(struct cpu_softc *, struct cpu_info *, int);
+
+int go_smp_cpus = 0;	/* non-primary CPUs wait for this to go */
+
+/*
+ * This must be locked around all message transactions to ensure only
+ * one CPU is generating them.
+ */
+static kmutex_t xpmsg_mutex;
+
+#endif /* MULTIPROCESSOR */
+
 /*
  * 4/110 comment: the 4/110 chops off the top 4 bits of an OBIO address.
  *	this confuses autoconf.  for example, if you try and map
@@ -326,6 +301,7 @@
 static void
 cpu_attach(struct cpu_softc *sc, int node, int mid)
 {
+	char buf[100];
 	struct cpu_info *cpi;
 	int idx;
 	static int cpu_attach_count = 0;
@@ -335,69 +311,26 @@
 	 * (see autoconf.c and cpuunit.c)
 	 */
 	idx = cpu_attach_count++;
-	if (cpu_attach_count == 1) {
-		getcpuinfo(&cpuinfo, node);
-
-#if defined(MULTIPROCESSOR)
-		cpi = sc->sc_cpuinfo = cpus[idx];
-#else
-		/* The `local' VA is global for uniprocessor. */
-		cpi = sc->sc_cpuinfo = (struct cpu_info *)CPUINFO_VA;
-#endif
-		cpi->master = 1;
-		cpi->eintstack = eintstack;
-		/* Note: `curpcb' is set to `proc0' in locore */
-
-		/*
-		 * If we haven't been able to determine the Id of the
-		 * boot CPU, set it now. In this case we can only boot
-		 * from CPU #0 (see also the CPU attach code in autoconf.c)
-		 */
-		if (bootmid == 0)
-			bootmid = mid;
-	} else {
-#if defined(MULTIPROCESSOR)
-		int error;
-
-		/*
-		 * Initialise this cpu's cpu_info.
-		 */
-		cpi = sc->sc_cpuinfo = cpus[idx];
-		init_cpuinfo(cpi, node);
 
-		/*
-		 * Call the MI attach which creates an idle LWP for us.
-		 */
-		error = mi_cpu_attach(cpi);
-		if (error != 0) {
-			aprint_normal("\n");
-			aprint_error("%s: mi_cpu_attach failed with %d\n",
-			    sc->sc_dev.dv_xname, error);
-			return;
-		}
-
-		/*
-		 * Note: `eintstack' is set in init_cpuinfo() above.
-		 * The %wim register will be initialized in cpu_hatch().
-		 */
-		cpi->ci_curlwp = cpi->ci_data.cpu_idlelwp;
-		cpi->curpcb = (struct pcb *)cpi->ci_curlwp->l_addr;
-		cpi->curpcb->pcb_wim = 1;
-
-#else
-		sc->sc_cpuinfo = NULL;
+#if !defined(MULTIPROCESSOR)
+	if (cpu_attach_count > 1) {
 		printf(": no SMP support in kernel\n");
 		return;
-#endif
 	}
-
-#ifdef DEBUG
-	cpi->redzone = (void *)((long)cpi->eintstack + REDSIZE);
 #endif
 
+	/*
+	 * Initialise this cpu's cpu_info.
+	 */
+	cpi = sc->sc_cpuinfo = cpus[idx];
+	getcpuinfo(cpi, node);
+
 	cpi->ci_cpuid = idx;
 	cpi->mid = mid;
 	cpi->node = node;
+#ifdef DEBUG
+	cpi->redzone = (void *)((long)cpi->eintstack + REDSIZE);
+#endif
 
 	if (sparc_ncpus > 1) {
 		printf(": mid %d", mid);
@@ -405,24 +338,102 @@
 			printf(" [WARNING: mid should not be 0]");
 	}
 
+#if defined(MULTIPROCESSOR)
+	if (cpu_attach_count > 1) {
+		cpu_attach_non_boot(sc, cpi, node);
+		return;
+	}
+#endif /* MULTIPROCESSOR */
+
+	/* Stuff to only run on the boot CPU */
+	cpu_setup();
+	snprintf(buf, sizeof buf, "%s @ %s MHz, %s FPU",
+		cpi->cpu_name, clockfreq(cpi->hz), cpi->fpu_name);
+	snprintf(cpu_model, sizeof cpu_model, "%s (%s)",
+		machine_model, buf);
+	printf(": %s\n", buf);
+	cache_print(sc);
+
+	cpi->master = 1;
+	cpi->eintstack = eintstack;
+
+	/*
+	 * If we haven't been able to determine the Id of the
+	 * boot CPU, set it now. In this case we can only boot
+	 * from CPU #0 (see also the CPU attach code in autoconf.c)
+	 */
+	if (bootmid == 0)
+		bootmid = mid;
+}
+
+/*
+ * Finish CPU attach.
+ * Must be run by the CPU which is being attached.
+ */
+void
+cpu_setup(void)
+{
+ 	if (cpuinfo.hotfix)
+		(*cpuinfo.hotfix)(&cpuinfo);
+
+	/* Initialize FPU */
+	fpu_init(&cpuinfo);
+
+	/* Enable the cache */
+	cpuinfo.cache_enable();
+
+	cpuinfo.flags |= CPUFLG_HATCHED;
+}
+
+#if defined(MULTIPROCESSOR)
+/*
+ * Perform most of the tasks needed for a non-boot CPU.
+ */
+static void
+cpu_attach_non_boot(struct cpu_softc *sc, struct cpu_info *cpi, int node)
+{
+	vaddr_t intstack, va;
+	int error;
+
+	/*
+	 * Arrange interrupt stack.  This cpu will also abuse the bottom
+	 * half of the interrupt stack before it gets to run its idle LWP.
+	 */
+	intstack = uvm_km_alloc(kernel_map, INT_STACK_SIZE, 0, UVM_KMF_WIRED);
+	if (intstack == 0)
+		panic("%s: no uspace/intstack", __func__);
+	cpi->eintstack = (void*)(intstack + INT_STACK_SIZE);
 
-	if (cpi->master) {
-		char buf[100];
+	/* Allocate virtual space for pmap page_copy/page_zero */
+	va = uvm_km_alloc(kernel_map, 2*PAGE_SIZE, 0, UVM_KMF_VAONLY);
+	if (va == 0)
+		panic("%s: no virtual space", __func__);
 
-		cpu_setup();
-		snprintf(buf, sizeof buf, "%s @ %s MHz, %s FPU",
-			cpi->cpu_name, clockfreq(cpi->hz), cpi->fpu_name);
-		snprintf(cpu_model, sizeof cpu_model, "%s (%s)",
-			machine_model, buf);
-		printf(": %s\n", buf);
-		cache_print(sc);
+	cpi->vpage[0] = (void *)(va + 0);
+	cpi->vpage[1] = (void *)(va + PAGE_SIZE);
+
+	/*
+	 * Call the MI attach which creates an idle LWP for us.
+	 */
+	error = mi_cpu_attach(cpi);
+	if (error != 0) {
+		aprint_normal("\n");
+		aprint_error("%s: mi_cpu_attach failed with %d\n",
+		    sc->sc_dev.dv_xname, error);
 		return;
 	}
 
-#if defined(MULTIPROCESSOR)
+	/*
+	 * Note: `eintstack' is set in init_cpuinfo() above.
+	 * The %wim register will be initialized in cpu_hatch().
+	 */
+	cpi->ci_curlwp = cpi->ci_data.cpu_idlelwp;
+	cpi->curpcb = (struct pcb *)cpi->ci_curlwp->l_addr;
+	cpi->curpcb->pcb_wim = 1;
+
 	/* for now use the fixed virtual addresses setup in autoconf.c */
 	cpi->intreg_4m = (struct icr_pi *)
-		(PI_INTR_VA + (_MAXNBPG * CPU_MID2CPUNO(mid)));
+		(PI_INTR_VA + (_MAXNBPG * CPU_MID2CPUNO(cpi->mid)));
 
 	/* Now start this CPU */
 	cpu_spinup(cpi);
@@ -431,7 +442,10 @@
 
 	cache_print(sc);
 
-	if (sparc_ncpus > 1 && idx == sparc_ncpus-1) {
+	/*
+	 * Now we're on the last CPU to be attaching.
+	 */
+	if (sparc_ncpus > 1 && cpi->ci_cpuid == sparc_ncpus - 1) {
 		CPU_INFO_ITERATOR n;
 		/*
 		 * Install MP cache flush functions, unless the
@@ -446,10 +460,9 @@
 			SET_CACHE_FUNC(vcache_flush_context);
 		}
 	}
-#endif /* MULTIPROCESSOR */
+#undef SET_CACHE_FUNC
 }
 
-#if defined(MULTIPROCESSOR)
 /*
  * Start secondary processors in motion.
  */
@@ -479,41 +492,30 @@
 
 	printf("\n");
 }
-#endif /* MULTIPROCESSOR */
 
 /*
- * Finish CPU attach.
- * Must be run by the CPU which is being attached.
+ * Early initialisation, before main().
  */
 void
-cpu_setup(void)
+cpu_init_system(void)
 {
- 	if (cpuinfo.hotfix)
-		(*cpuinfo.hotfix)(&cpuinfo);
-
-	/* Initialize FPU */
-	fpu_init(&cpuinfo);
 
-	/* Enable the cache */
-	cpuinfo.cache_enable();
-
-	cpuinfo.flags |= CPUFLG_HATCHED;
+	mutex_init(&xpmsg_mutex, MUTEX_SPIN, IPL_VM);
 }
 
-#if defined(MULTIPROCESSOR)
-
-extern void cpu_hatch(void); /* in locore.s */
-
 /*
  * Allocate per-CPU data, then start up this CPU using PROM.
  */
 void
 cpu_spinup(struct cpu_info *cpi)
 {
+	extern void cpu_hatch(void); /* in locore.s */
 	struct openprom_addr oa;
-	void *pc = (void *)cpu_hatch;
+	void *pc;
 	int n;
 
+	pc = (void *)cpu_hatch;
+
 	/* Setup CPU-specific MMU tables */
 	pmap_alloc_cpu(cpi);
 
@@ -556,42 +558,20 @@
       u_int cpuset)
 {
 	struct cpu_info *cpi;
-	int s, n, i, done, callself, mybit;
+	int n, i, done, callself, mybit;
 	volatile struct xpmsg_func *p;
 	int fasttrap;
-
-	/* XXX - note p->retval is probably no longer useful */
+	int is_noop = func == (xcall_func_t)sparc_noop;
 
 	mybit = (1 << cpuinfo.ci_cpuid);
 	callself = func && (cpuset & mybit) != 0;
 	cpuset &= ~mybit;
 
-	/*
-	 * If no cpus are configured yet, just call ourselves.
-	 */
-	if (cpus == NULL) {
-		p = &cpuinfo.msg.u.xpmsg_func;
-		if (callself)
-			p->retval = (*func)(arg0, arg1, arg2);
-		return;
-	}
-
 	/* Mask any CPUs that are not ready */
 	cpuset &= cpu_ready_mask;
 
 	/* prevent interrupts that grab the kernel lock */
-	s = splsched();
-#ifdef DEBUG
-	if (!cold) {
-		u_int pc, lvl = ((u_int)s & PSR_PIL) >> 8;
-		if (lvl > IPL_SCHED) {
-			__asm("mov %%i7, %0" : "=r" (pc) : );
-			printf_nolog("%d: xcall at lvl %u from 0x%x\n",
-				cpu_number(), lvl, pc);
-		}
-	}
-#endif
-	LOCK_XPMSG();
+	mutex_spin_enter(&xpmsg_mutex);
 
 	/*
 	 * Firstly, call each CPU.  We do this so that they might have
@@ -621,14 +601,14 @@
 	 */
 	p = &cpuinfo.msg.u.xpmsg_func;
 	if (callself)
-		p->retval = (*func)(arg0, arg1, arg2);
+		(*func)(arg0, arg1, arg2);
 
 	/*
 	 * Lastly, start looping, waiting for all CPUs to register that they
 	 * have completed (bailing if it takes "too long", being loud about
 	 * this in the process).
 	 */
-	done = 0;
+	done = is_noop;
 	i = 100000;	/* time-out, not too long, but still an _AGE_ */
 	while (!done) {
 		if (--i < 0) {
@@ -654,8 +634,7 @@
 	if (i < 0)
 		printf_nolog("\n");
 
-	UNLOCK_XPMSG();
-	splx(s);
+	mutex_spin_exit(&xpmsg_mutex);
 }
 
 /*
@@ -667,9 +646,6 @@
 	CPU_INFO_ITERATOR n;
 	struct cpu_info *cpi;
 
-	if (cpus == NULL)
-		return;
-
 	for (CPU_INFO_FOREACH(n, cpi)) {
 		if (cpuinfo.mid == cpi->mid ||
 		    (cpi->flags & CPUFLG_HATCHED) == 0)
@@ -694,9 +670,6 @@
 	CPU_INFO_ITERATOR n;
 	struct cpu_info *cpi;
 
-	if (cpus == NULL)
-		return;
-
 	for (CPU_INFO_FOREACH(n, cpi)) {
 		if (cpuinfo.mid == cpi->mid ||
 		    (cpi->flags & CPUFLG_HATCHED) == 0)
@@ -720,9 +693,6 @@
 	CPU_INFO_ITERATOR n;
 	struct cpu_info *cpi;
 
-	if (cpus == NULL)
-		return;
-
 	for (CPU_INFO_FOREACH(n, cpi)) {
 		int r;
 
@@ -747,9 +717,6 @@
 	CPU_INFO_ITERATOR n;
 	struct cpu_info *cpi;
 
-	if (cpus == NULL)
-		return;
-
 	for (CPU_INFO_FOREACH(n, cpi)) {
 		if (cpi == NULL || cpi->mid == cpuinfo.mid ||
 		    (cpi->flags & CPUFLG_HATCHED) == 0)
@@ -766,9 +733,6 @@
 	CPU_INFO_ITERATOR n;
 	struct cpu_info *cpi;
 
-	if (cpus == NULL)
-		return;
-
 	for (CPU_INFO_FOREACH(n, cpi)) {
 		if (cpi == NULL || cpuinfo.mid == cpi->mid ||
 		    (cpi->flags & CPUFLG_PAUSED) == 0)

Index: src/sys/arch/sparc/sparc/cpuvar.h
diff -u src/sys/arch/sparc/sparc/cpuvar.h:1.75.10.2 src/sys/arch/sparc/sparc/cpuvar.h:1.75.10.3
--- src/sys/arch/sparc/sparc/cpuvar.h:1.75.10.2	Sun Jan 16 12:58:23 2011
+++ src/sys/arch/sparc/sparc/cpuvar.h	Fri Jan 28 07:16:13 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpuvar.h,v 1.75.10.2 2011/01/16 12:58:23 bouyer Exp $ */
+/*	$NetBSD: cpuvar.h,v 1.75.10.3 2011/01/28 07:16:13 snj Exp $ */
 
 /*
  *  Copyright (c) 1996 The NetBSD Foundation, Inc.
@@ -37,6 +37,7 @@
 #include "opt_lockdebug.h"
 #include "opt_ddb.h"
 #include "opt_sparc_arch.h"
+#include "opt_modular.h"
 #endif
 
 #include <sys/device.h>
@@ -104,12 +105,11 @@
 		 * the trap window (see locore.s).
 		 */
 		struct xpmsg_func {
-			int	(*func)(int, int, int);
+			void	(*func)(int, int, int);
 			void	(*trap)(int, int, int);
 			int	arg0;
 			int	arg1;
 			int	arg2;
-			int	retval;
 		} xpmsg_func;
 	} u;
 	volatile int	received;
@@ -117,15 +117,6 @@
 };
 
 /*
- * This must be locked around all message transactions to ensure only
- * one CPU is generating them.
- */
-extern struct simplelock xpmsg_lock;
-
-#define LOCK_XPMSG()	simple_lock(&xpmsg_lock);
-#define UNLOCK_XPMSG()	simple_unlock(&xpmsg_lock);
-
-/*
  * The cpuinfo structure. This structure maintains information about one
  * currently installed CPU (there may be several of these if the machine
  * supports multiple CPUs, as on some Sun4m architectures). The information
@@ -415,10 +406,16 @@
 
 
 #define CPU_INFO_ITERATOR		int
-#ifdef MULTIPROCESSOR
-#define CPU_INFO_FOREACH(cii, cp)	cii = 0; cp = cpus[cii], cii < sparc_ncpus; cii++
+/*
+ * Provide two forms of CPU_INFO_FOREACH.  One fast one for non-modular
+ * non-SMP kernels, and the other for everyone else.  Both work in the
+ * non-SMP case, just involving an extra indirection through cpus[0] for
+ * the portable version.
+ */
+#if defined(MULTIPROCESSOR) || defined(MODULAR) || defined(_MODULE)
+#define	CPU_INFO_FOREACH(cii, cp)	cii = 0; (cp = cpus[cii]) && cp->eintstack && cii < sparc_ncpus; cii++
 #else
-#define	CPU_INFO_FOREACH(cii, cp)	(void)cii, cp = curcpu(); cp != NULL; cp = NULL
+#define CPU_INFO_FOREACH(cii, cp)	(void)cii, cp = curcpu(); cp != NULL; cp = NULL
 #endif
 
 /*
@@ -437,7 +434,8 @@
 #define	CPUSET_ALL	0xffffffffU	/* xcall to all configured CPUs */
 
 #if defined(MULTIPROCESSOR)
-typedef int (*xcall_func_t)(int, int, int);
+void cpu_init_system(void);
+typedef void (*xcall_func_t)(int, int, int);
 typedef void (*xcall_trap_t)(int, int, int);
 void xcall(xcall_func_t, xcall_trap_t, int, int, int, u_int);
 /* Shorthand */
@@ -472,8 +470,8 @@
 extern int bootmid;			/* Module ID of boot CPU */
 #define CPU_MID2CPUNO(mid)		((mid) != 0 ? (mid) - 8 : 0)
 
-#ifdef MULTIPROCESSOR
 extern struct cpu_info *cpus[];
+#ifdef MULTIPROCESSOR
 extern u_int cpu_ready_mask;		/* the set of CPUs marked as READY */
 #endif
 

Index: src/sys/arch/sparc/sparc/genassym.cf
diff -u src/sys/arch/sparc/sparc/genassym.cf:1.56 src/sys/arch/sparc/sparc/genassym.cf:1.56.4.1
--- src/sys/arch/sparc/sparc/genassym.cf:1.56	Sat Sep 20 18:29:05 2008
+++ src/sys/arch/sparc/sparc/genassym.cf	Fri Jan 28 07:16:13 2011
@@ -1,4 +1,4 @@
-#	$NetBSD: genassym.cf,v 1.56 2008/09/20 18:29:05 tsutsui Exp $
+#	$NetBSD: genassym.cf,v 1.56.4.1 2011/01/28 07:16:13 snj Exp $
 
 #
 # Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -129,6 +129,7 @@
 define	V_SOFT		offsetof(struct uvmexp, softs)
 define	V_FAULTS	offsetof(struct uvmexp, faults)
 define	EV_COUNT	offsetof(struct evcnt, ev_count)
+define	EV_STRUCTSIZE	sizeof(struct evcnt)
 
 # CPU info structure
 define	CPUINFO_STRUCTSIZE	sizeof(struct cpu_info)

Index: src/sys/arch/sparc/sparc/intr.c
diff -u src/sys/arch/sparc/sparc/intr.c:1.100.20.1 src/sys/arch/sparc/sparc/intr.c:1.100.20.2
--- src/sys/arch/sparc/sparc/intr.c:1.100.20.1	Sat May 30 16:57:18 2009
+++ src/sys/arch/sparc/sparc/intr.c	Fri Jan 28 07:16:13 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: intr.c,v 1.100.20.1 2009/05/30 16:57:18 snj Exp $ */
+/*	$NetBSD: intr.c,v 1.100.20.2 2011/01/28 07:16:13 snj Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.100.20.1 2009/05/30 16:57:18 snj Exp $");
+__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.100.20.2 2011/01/28 07:16:13 snj Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_sparc_arch.h"
@@ -80,6 +80,39 @@
 EVCNT_ATTACH_STATIC(lev14_evcnt);
 #endif
 
+struct evcnt intrcnt[15] = {
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "spur", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "lev1", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "lev2", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "lev3", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "lev4", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "lev5", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "lev6", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "lev7", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "lev8", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "lev9", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "clock", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "lev11", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "lev12", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "lev13", "hard"),
+   EVCNT_INITIALIZER(EVCNT_TYPE_INTR, 0, "prof", "hard"),
+};
+
+EVCNT_ATTACH_STATIC2(intrcnt, 0);
+EVCNT_ATTACH_STATIC2(intrcnt, 1);
+EVCNT_ATTACH_STATIC2(intrcnt, 2);
+EVCNT_ATTACH_STATIC2(intrcnt, 3);
+EVCNT_ATTACH_STATIC2(intrcnt, 4);
+EVCNT_ATTACH_STATIC2(intrcnt, 5);
+EVCNT_ATTACH_STATIC2(intrcnt, 6);
+EVCNT_ATTACH_STATIC2(intrcnt, 7);
+EVCNT_ATTACH_STATIC2(intrcnt, 8);
+EVCNT_ATTACH_STATIC2(intrcnt, 9);
+EVCNT_ATTACH_STATIC2(intrcnt, 10);
+EVCNT_ATTACH_STATIC2(intrcnt, 11);
+EVCNT_ATTACH_STATIC2(intrcnt, 12);
+EVCNT_ATTACH_STATIC2(intrcnt, 13);
+EVCNT_ATTACH_STATIC2(intrcnt, 14);
 
 void	strayintr(struct clockframe *);
 #ifdef DIAGNOSTIC
@@ -98,9 +131,9 @@
 	char bits[64];
 	int timesince;
 
-	printf("stray interrupt ipl 0x%x pc=0x%x npc=0x%x psr=%s\n",
-		fp->ipl, fp->pc, fp->npc, bitmask_snprintf(fp->psr,
-		       PSR_BITS, bits, sizeof(bits)));
+	printf("stray interrupt cpu%d ipl 0x%x pc=0x%x npc=0x%x psr=%s\n",
+		cpu_number(), fp->ipl, fp->pc, fp->npc,
+		bitmask_snprintf(fp->psr, PSR_BITS, bits, sizeof(bits)));
 
 	timesince = time_uptime - straytime;
 	if (timesince <= 10) {
@@ -344,7 +377,7 @@
 		volatile struct xpmsg_func *p = &cpuinfo.msg.u.xpmsg_func;
 
 		if (p->func)
-			p->retval = (*p->func)(p->arg0, p->arg1, p->arg2);
+			(*p->func)(p->arg0, p->arg1, p->arg2);
 		break;
 	    }
 	}

Index: src/sys/arch/sparc/sparc/locore.s
diff -u src/sys/arch/sparc/sparc/locore.s:1.244.8.2 src/sys/arch/sparc/sparc/locore.s:1.244.8.3
--- src/sys/arch/sparc/sparc/locore.s:1.244.8.2	Sun Jan 16 12:58:23 2011
+++ src/sys/arch/sparc/sparc/locore.s	Fri Jan 28 07:16:13 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.s,v 1.244.8.2 2011/01/16 12:58:23 bouyer Exp $	*/
+/*	$NetBSD: locore.s,v 1.244.8.3 2011/01/28 07:16:13 snj Exp $	*/
 
 /*
  * Copyright (c) 1996 Paul Kranenburg
@@ -118,6 +118,15 @@
 	inc	%o1; \
 	st	%o1, [%o0 + %lo(what)]
 
+#if EV_COUNT != 0
+# error "this code does not work with EV_COUNT != 0"
+#endif
+#if EV_STRUCTSIZE != 32
+# error "this code does not work with EV_STRUCTSIZE != 32"
+#else
+# define EV_STRUCTSHIFT	5
+#endif
+
 /*
  * Another handy macro: load one register window, given `base' address.
  * This can be either a simple register (e.g., %sp) or include an initial
@@ -2518,11 +2527,13 @@
 	wr	%l4, PSR_ET, %psr	! song and dance is necessary
 	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
 	sll	%l3, 2, %l5
-	set	_C_LABEL(intrcnt), %l4	! intrcnt[intlev]++;
-	ld	[%l4 + %l5], %o0
+	set	intrcnt, %l4		! intrcnt[intlev].ev_count++;
+	sll	%l3, EV_STRUCTSHIFT, %o2
+	ldd	[%l4 + %o2], %o0
 	std	%l2, [%sp + CCFSZ + 8]
-	inc	%o0
-	st	%o0, [%l4 + %l5]
+	inccc   %o1
+	addx    %o0, 0, %o0
+	std	%o0, [%l4 + %o2]
 	set	_C_LABEL(sintrhand), %l4! %l4 = sintrhand[intlev];
 	ld	[%l4 + %l5], %l4
 
@@ -2670,11 +2681,13 @@
 	wr	%l4, PSR_ET, %psr	! song and dance is necessary
 	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
 	sll	%l3, 2, %l5
-	set	_C_LABEL(intrcnt), %l4	! intrcnt[intlev]++;
-	ld	[%l4 + %l5], %o0
+	set	intrcnt, %l4		! intrcnt[intlev].ev_count++;
+	sll	%l3, EV_STRUCTSHIFT, %o2
+	ldd	[%l4 + %o2], %o0
 	std	%l2, [%sp + CCFSZ + 8]	! set up intrframe/clockframe
-	inc	%o0
-	st	%o0, [%l4 + %l5]
+	inccc   %o1
+	addx    %o0, 0, %o0
+	std	%o0, [%l4 + %o2]
 
 	st	%fp, [%sp + CCFSZ + 16]
 
@@ -2715,11 +2728,13 @@
 	wr	%l4, PSR_ET, %psr	! song and dance is necessary
 	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
 	sll	%l3, 2, %l5
-	set	_C_LABEL(intrcnt), %l4	! intrcnt[intlev]++;
-	ld	[%l4 + %l5], %o0
+	set	intrcnt, %l4		! intrcnt[intlev].ev_count++;
+	sll	%l3, EV_STRUCTSHIFT, %o2
+	ldd	[%l4 + %o2], %o0
 	std	%l2, [%sp + CCFSZ + 8]	! set up intrframe/clockframe
-	inc	%o0
-	st	%o0, [%l4 + %l5]
+	inccc   %o1
+	addx    %o0, 0, %o0
+	std	%o0, [%l4 + %o2]
 	set	_C_LABEL(intrhand), %l4	! %l4 = intrhand[intlev];
 	ld	[%l4 + %l5], %l4
 
@@ -2973,17 +2988,6 @@
 	b	ft_rett
 	 mov	SRMMU_CXR, %l7			! reload ctx register
 
-_ENTRY(_C_LABEL(ft_want_ast))
-	mov	1, %l4				! ack xcall in all cases
-	st	%l4, [%l6 + CPUINFO_XMSG_CMPLT]	! completed = 1
-
-	btst	PSR_PS, %l0		! if from user mode
-	be,a	slowtrap		!  call trap(T_AST)
-	 mov	T_AST, %l3
-
-	mov	%l0, %psr		! else return from trap
-	 nop				! AST will be noticed on out way out
-	RETT
 #endif /* MULTIPROCESSOR */
 
 #ifdef notyet
@@ -3026,7 +3030,7 @@
  */
 
 #if defined(SUN4)
-nmi_sun4:
+_ENTRY(_C_LABEL(nmi_sun4))
 	INTR_SETUP(-CCFSZ-80)
 	INCR(_C_LABEL(uvmexp)+V_INTR)	! cnt.v_intr++; (clobbers %o0,%o1)
 	/*
@@ -3052,7 +3056,7 @@
 #endif
 
 #if defined(SUN4C)
-nmi_sun4c:
+_ENTRY(_C_LABEL(nmi_sun4c))
 	INTR_SETUP(-CCFSZ-80)
 	INCR(_C_LABEL(uvmexp)+V_INTR)	! cnt.v_intr++; (clobbers %o0,%o1)
 	/*
@@ -3086,7 +3090,7 @@
 #endif /* SUN4M */
 #endif /* SUN4C */
 
-nmi_common:
+_ENTRY(_C_LABEL(nmi_common))
 	! and call C code
 	call	_C_LABEL(memerr4_4c)	! memerr(0, ser, sva, aer, ava)
 	 clr	%o0
@@ -3107,7 +3111,7 @@
 	 wr	%l4, 0, %y		! restore y
 
 #if defined(SUN4M)
-nmi_sun4m:
+_ENTRY(_C_LABEL(nmi_sun4m))
 	INTR_SETUP(-CCFSZ-80)
 	INCR(_C_LABEL(uvmexp)+V_INTR)	! cnt.v_intr++; (clobbers %o0,%o1)
 
@@ -4601,12 +4605,25 @@
 	wr	%g6, 0, %tbr
 	nop; nop; nop			! paranoia
 
-	/* Set up a stack. We use the bottom half of the interrupt stack */
+#if 1
 	set	USRSTACK - CCFSZ, %fp	! as if called from user code
+
+	/* Set up a stack. We use the bottom half of the interrupt stack */
 	sethi	%hi(_EINTSTACKP), %o0
 	ld	[%o0 + %lo(_EINTSTACKP)], %o0
 	set	(INT_STACK_SIZE/2) + CCFSZ + 80, %sp
 	sub	%o0, %sp, %sp
+#else
+	/*
+	 * Use this CPUs idlelwp's stack
+	 */
+	sethi	%hi(cpcb), %o0
+	ld	[%o0 + %lo(cpcb)], %o0
+	set	USPACE - 80 - CCFSZ, %sp
+	add	%o0, %sp, %sp
+
+	add	80, %sp, %fp
+#endif
 
 	/* Enable traps */
 	rd	%psr, %l0
@@ -6320,30 +6337,5 @@
 _C_LABEL(proc0paddr):
 	.word	_C_LABEL(u0)	! KVA of proc0 uarea
 
-/* interrupt counters	XXX THESE BELONG ELSEWHERE (if anywhere) */
-	.globl	_C_LABEL(intrcnt), _C_LABEL(eintrcnt)
-	.globl	_C_LABEL(intrnames), _C_LABEL(eintrnames)
-_C_LABEL(intrnames):
-	.asciz	"spur"
-	.asciz	"lev1"
-	.asciz	"lev2"
-	.asciz	"lev3"
-	.asciz	"lev4"
-	.asciz	"lev5"
-	.asciz	"lev6"
-	.asciz	"lev7"
-	.asciz  "lev8"
-	.asciz	"lev9"
-	.asciz	"clock"
-	.asciz	"lev11"
-	.asciz	"lev12"
-	.asciz	"lev13"
-	.asciz	"prof"
-_C_LABEL(eintrnames):
-	_ALIGN
-_C_LABEL(intrcnt):
-	.skip	4*15
-_C_LABEL(eintrcnt):
-
 	.comm	_C_LABEL(nwindows), 4
 	.comm	_C_LABEL(romp), 4

Index: src/sys/arch/sparc/sparc/pmap.c
diff -u src/sys/arch/sparc/sparc/pmap.c:1.322.20.3 src/sys/arch/sparc/sparc/pmap.c:1.322.20.4
--- src/sys/arch/sparc/sparc/pmap.c:1.322.20.3	Sun Jan 16 12:58:23 2011
+++ src/sys/arch/sparc/sparc/pmap.c	Fri Jan 28 07:16:14 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.322.20.3 2011/01/16 12:58:23 bouyer Exp $ */
+/*	$NetBSD: pmap.c,v 1.322.20.4 2011/01/28 07:16:14 snj Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -56,7 +56,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.322.20.3 2011/01/16 12:58:23 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.322.20.4 2011/01/28 07:16:14 snj Exp $");
 
 #include "opt_ddb.h"
 #include "opt_kgdb.h"
@@ -3505,6 +3505,23 @@
 	 */
 	p = (vaddr_t)top;
 
+#if defined(MULTIPROCESSOR)
+	/*
+	 * allocate the rest of the cpu_info{} area.  note we waste the
+	 * first one to get a VA space.
+	 */
+	cpuinfo_len = ((sizeof(struct cpu_info) + NBPG - 1) & ~PGOFSET);
+	if (sparc_ncpus > 1) {
+		p = (p + NBPG - 1) & ~PGOFSET;
+		cpuinfo_data = (uint8_t *)p;
+		p += (cpuinfo_len * sparc_ncpus);
+
+		/* XXX we waste the first one */
+		memset(cpuinfo_data + cpuinfo_len, 0, cpuinfo_len * (sparc_ncpus - 1));
+	} else
+		cpuinfo_data = (uint8_t *)CPUINFO_VA;
+#endif
+
 	/*
 	 * Intialize the kernel pmap.
 	 */
@@ -3542,22 +3559,6 @@
 	p += ncontext * sizeof *ci;
 	bzero((void *)ci, (u_int)p - (u_int)ci);
 
-#if defined(MULTIPROCESSOR)
-	/*
-	 * allocate the rest of the cpu_info{} area.  note we waste the
-	 * first one to get a VA space.
-	 */
-	p = (p + NBPG - 1) & ~PGOFSET;
-	cpuinfo_data = (uint8_t *)p;
-	cpuinfo_len = ((sizeof(struct cpu_info) + NBPG - 1) & ~PGOFSET);
-	p += (cpuinfo_len * sparc_ncpus);
-	prom_printf("extra cpus: %p, p: %p, gap start: %p, gap end: %p\n",
-	    cpuinfo_data, p, etext_gap_start, etext_gap_end);
-
-	/* XXX we waste the first one */
-	memset(cpuinfo_data + cpuinfo_len, 0, cpuinfo_len * (sparc_ncpus - 1));
-#endif
-
 	/*
 	 * Set up the `constants' for the call to vm_init()
 	 * in main().  All pages beginning at p (rounded up to
@@ -3802,16 +3803,23 @@
 
 #ifdef MULTIPROCESSOR
 	/*
+	 * Initialise any cpu-specific data now.
+	 */
+	cpu_init_system();
+
+	/*
 	 * Remap cpu0 from CPUINFO_VA to the new correct value, wasting the
-	 * backing pages we allocated above XXX.
+	 * backing page we allocated above XXX.
 	 */
 	for (off = 0, va = (vaddr_t)cpuinfo_data;
-	     off < sizeof(struct cpu_info);
+	     sparc_ncpus > 1 && off < sizeof(struct cpu_info);
 	     va += NBPG, off += NBPG) {
 		paddr_t pa = PMAP_BOOTSTRAP_VA2PA(CPUINFO_VA + off);
 		prom_printf("going to pmap_kenter_pa(va=%p, pa=%p)\n", va, pa);
 		pmap_kremove(va, NBPG);
 		pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE);
+		cache_flush_page(va, 0);
+		cache_flush_page(CPUINFO_VA, 0);
 	}
 
 	/*
@@ -3823,6 +3831,8 @@
 		cpus[i]->ci_self = cpus[i];
 		prom_printf("set cpu%d ci_self address: %p\n", i, cpus[i]);
 	}
+#else
+	cpus[0] = (struct cpu_info *)CPUINFO_VA;
 #endif
 
 	pmap_update(pmap_kernel());

Reply via email to