Module Name:    src
Committed By:   rmind
Date:           Fri Apr 20 22:23:25 UTC 2012

Modified Files:
        src/sys/arch/amd64/amd64: genassym.cf mptramp.S
        src/sys/arch/amd64/include: param.h
        src/sys/arch/i386/i386: genassym.cf mptramp.S
        src/sys/arch/i386/include: param.h
        src/sys/arch/x86/acpi: acpi_wakeup.c
        src/sys/arch/x86/include: cpu.h cpuvar.h pmap.h
        src/sys/arch/x86/x86: cpu.c mtrr_i686.c pmap.c pmap_tlb.c
        src/sys/arch/xen/include: xenpmap.h
        src/sys/arch/xen/x86: cpu.c x86_xpmap.c xen_pmap.c
        src/sys/kern: subr_kcpuset.c sys_sched.c
        src/sys/sys: kcpuset.h

Log Message:
- Convert x86 MD code, mainly pmap(9) e.g. TLB shootdown code, to use
  kcpuset(9) and thus replace hardcoded CPU bitmasks.  This removes the
  limitation of maximum CPUs.

- Support up to 256 CPUs on amd64 architecture by default.

Bug fixes, improvements, completion of Xen part and testing on 64-core
AMD Opteron(tm) Processor 6282 SE (also, as Xen HVM domU with 128 CPUs)
by Manuel Bouyer.


To generate a diff of this commit:
cvs rdiff -u -r1.49 -r1.50 src/sys/arch/amd64/amd64/genassym.cf
cvs rdiff -u -r1.13 -r1.14 src/sys/arch/amd64/amd64/mptramp.S
cvs rdiff -u -r1.17 -r1.18 src/sys/arch/amd64/include/param.h
cvs rdiff -u -r1.91 -r1.92 src/sys/arch/i386/i386/genassym.cf
cvs rdiff -u -r1.23 -r1.24 src/sys/arch/i386/i386/mptramp.S
cvs rdiff -u -r1.76 -r1.77 src/sys/arch/i386/include/param.h
cvs rdiff -u -r1.30 -r1.31 src/sys/arch/x86/acpi/acpi_wakeup.c
cvs rdiff -u -r1.49 -r1.50 src/sys/arch/x86/include/cpu.h
cvs rdiff -u -r1.45 -r1.46 src/sys/arch/x86/include/cpuvar.h
cvs rdiff -u -r1.51 -r1.52 src/sys/arch/x86/include/pmap.h
cvs rdiff -u -r1.97 -r1.98 src/sys/arch/x86/x86/cpu.c
cvs rdiff -u -r1.25 -r1.26 src/sys/arch/x86/x86/mtrr_i686.c
cvs rdiff -u -r1.176 -r1.177 src/sys/arch/x86/x86/pmap.c
cvs rdiff -u -r1.4 -r1.5 src/sys/arch/x86/x86/pmap_tlb.c
cvs rdiff -u -r1.33 -r1.34 src/sys/arch/xen/include/xenpmap.h
cvs rdiff -u -r1.90 -r1.91 src/sys/arch/xen/x86/cpu.c
cvs rdiff -u -r1.42 -r1.43 src/sys/arch/xen/x86/x86_xpmap.c
cvs rdiff -u -r1.20 -r1.21 src/sys/arch/xen/x86/xen_pmap.c
cvs rdiff -u -r1.4 -r1.5 src/sys/kern/subr_kcpuset.c
cvs rdiff -u -r1.41 -r1.42 src/sys/kern/sys_sched.c
cvs rdiff -u -r1.4 -r1.5 src/sys/sys/kcpuset.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/amd64/genassym.cf
diff -u src/sys/arch/amd64/amd64/genassym.cf:1.49 src/sys/arch/amd64/amd64/genassym.cf:1.50
--- src/sys/arch/amd64/amd64/genassym.cf:1.49	Wed Dec  7 15:47:41 2011
+++ src/sys/arch/amd64/amd64/genassym.cf	Fri Apr 20 22:23:24 2012
@@ -1,4 +1,4 @@
-#	$NetBSD: genassym.cf,v 1.49 2011/12/07 15:47:41 cegger Exp $
+#	$NetBSD: genassym.cf,v 1.50 2012/04/20 22:23:24 rmind Exp $
 
 #
 # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -228,12 +228,10 @@ define	CPU_INFO_RESCHED	offsetof(struct 
 define	CPU_INFO_WANT_PMAPLOAD	offsetof(struct cpu_info, ci_want_pmapload)
 define	CPU_INFO_TLBSTATE	offsetof(struct cpu_info, ci_tlbstate)
 define	TLBSTATE_VALID		TLBSTATE_VALID
-define	CPU_INFO_TLB_EVCNT	offsetof(struct cpu_info, ci_tlb_evcnt)
 define	CPU_INFO_CURLWP		offsetof(struct cpu_info, ci_curlwp)
 define	CPU_INFO_CURLDT		offsetof(struct cpu_info, ci_curldt)
 define	CPU_INFO_IDLELWP	offsetof(struct cpu_info, ci_data.cpu_idlelwp)
 define	CPU_INFO_PMAP		offsetof(struct cpu_info, ci_pmap)
-define	CPU_INFO_CPUMASK	offsetof(struct cpu_info, ci_cpumask)
 define	CPU_INFO_RSP0		offsetof(struct cpu_info, ci_tss.tss_rsp0)
 define	CPU_INFO_NSYSCALL	offsetof(struct cpu_info, ci_data.cpu_nsyscall)
 define	CPU_INFO_NTRAP		offsetof(struct cpu_info, ci_data.cpu_ntrap)

Index: src/sys/arch/amd64/amd64/mptramp.S
diff -u src/sys/arch/amd64/amd64/mptramp.S:1.13 src/sys/arch/amd64/amd64/mptramp.S:1.14
--- src/sys/arch/amd64/amd64/mptramp.S:1.13	Thu Apr 19 18:00:34 2012
+++ src/sys/arch/amd64/amd64/mptramp.S	Fri Apr 20 22:23:24 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: mptramp.S,v 1.13 2012/04/19 18:00:34 jym Exp $	*/
+/*	$NetBSD: mptramp.S,v 1.14 2012/04/20 22:23:24 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2000 The NetBSD Foundation, Inc.
@@ -108,7 +108,6 @@
 #define HALTT(x,y) /**/
 #endif
 
-	.globl	_C_LABEL(idle_loop)
 	.global _C_LABEL(cpu_spinup_trampoline)
 	.global _C_LABEL(cpu_spinup_trampoline_end)
 	.global _C_LABEL(cpu_hatch)
@@ -252,7 +251,6 @@ _C_LABEL(cpu_spinup_trampoline_end):	#en
 	movl    PCB_CR0(%rsi),%eax
 	movq    %rax,%cr0
 	call	_C_LABEL(cpu_hatch)
-	jmp	_C_LABEL(idle_loop)
 	
 	.data
 _C_LABEL(mp_pdirpa):

Index: src/sys/arch/amd64/include/param.h
diff -u src/sys/arch/amd64/include/param.h:1.17 src/sys/arch/amd64/include/param.h:1.18
--- src/sys/arch/amd64/include/param.h:1.17	Sat Feb  4 17:56:16 2012
+++ src/sys/arch/amd64/include/param.h	Fri Apr 20 22:23:24 2012
@@ -1,7 +1,12 @@
-/*	$NetBSD: param.h,v 1.17 2012/02/04 17:56:16 para Exp $	*/
+/*	$NetBSD: param.h,v 1.18 2012/04/20 22:23:24 rmind Exp $	*/
 
 #ifdef __x86_64__
 
+#ifndef XEN
+/* Must be defined before cpu.h */
+#define	MAXCPUS		256
+#endif
+
 #ifdef _KERNEL
 #include <machine/cpu.h>
 #endif

Index: src/sys/arch/i386/i386/genassym.cf
diff -u src/sys/arch/i386/i386/genassym.cf:1.91 src/sys/arch/i386/i386/genassym.cf:1.92
--- src/sys/arch/i386/i386/genassym.cf:1.91	Wed Dec  7 15:47:42 2011
+++ src/sys/arch/i386/i386/genassym.cf	Fri Apr 20 22:23:24 2012
@@ -1,4 +1,4 @@
-#	$NetBSD: genassym.cf,v 1.91 2011/12/07 15:47:42 cegger Exp $
+#	$NetBSD: genassym.cf,v 1.92 2012/04/20 22:23:24 rmind Exp $
 
 #
 # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -287,13 +287,11 @@ define	CPU_INFO_RESCHED	offsetof(struct 
 define	CPU_INFO_WANT_PMAPLOAD	offsetof(struct cpu_info, ci_want_pmapload)
 define	CPU_INFO_TLBSTATE	offsetof(struct cpu_info, ci_tlbstate)
 define	TLBSTATE_VALID		TLBSTATE_VALID
-define	CPU_INFO_TLB_EVCNT	offsetof(struct cpu_info, ci_tlb_evcnt)
 define	CPU_INFO_CURLWP		offsetof(struct cpu_info, ci_curlwp)
 define	CPU_INFO_FPCURLWP	offsetof(struct cpu_info, ci_fpcurlwp)
 define	CPU_INFO_CURLDT		offsetof(struct cpu_info, ci_curldt)
 define	CPU_INFO_IDLELWP	offsetof(struct cpu_info, ci_data.cpu_idlelwp)
 define	CPU_INFO_PMAP		offsetof(struct cpu_info, ci_pmap)
-define	CPU_INFO_CPUMASK	offsetof(struct cpu_info, ci_cpumask)
 define	CPU_INFO_TSS		offsetof(struct cpu_info, ci_tss)
 define	CPU_INFO_TSS_SEL	offsetof(struct cpu_info, ci_tss_sel)
 define	CPU_INFO_ESP0		offsetof(struct cpu_info, ci_tss.tss_esp0)

Index: src/sys/arch/i386/i386/mptramp.S
diff -u src/sys/arch/i386/i386/mptramp.S:1.23 src/sys/arch/i386/i386/mptramp.S:1.24
--- src/sys/arch/i386/i386/mptramp.S:1.23	Thu Apr 19 18:00:35 2012
+++ src/sys/arch/i386/i386/mptramp.S	Fri Apr 20 22:23:24 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: mptramp.S,v 1.23 2012/04/19 18:00:35 jym Exp $	*/
+/*	$NetBSD: mptramp.S,v 1.24 2012/04/20 22:23:24 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2000 The NetBSD Foundation, Inc.
@@ -76,7 +76,7 @@
  */
 
 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: mptramp.S,v 1.23 2012/04/19 18:00:35 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: mptramp.S,v 1.24 2012/04/20 22:23:24 rmind Exp $");
 	
 #include "opt_mpbios.h"		/* for MPDEBUG */
 		
@@ -271,8 +271,6 @@ mp_cont:
 	HALTT(0x30,%ecx)	
 	pushl	%ecx
 	call	_C_LABEL(cpu_hatch)
-	HALT(0x33)
-	jmp	_C_LABEL(idle_loop)
 	
 	.data
 _C_LABEL(mp_pdirpa):

Index: src/sys/arch/i386/include/param.h
diff -u src/sys/arch/i386/include/param.h:1.76 src/sys/arch/i386/include/param.h:1.77
--- src/sys/arch/i386/include/param.h:1.76	Fri Feb 10 17:35:49 2012
+++ src/sys/arch/i386/include/param.h	Fri Apr 20 22:23:24 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: param.h,v 1.76 2012/02/10 17:35:49 para Exp $	*/
+/*	$NetBSD: param.h,v 1.77 2012/04/20 22:23:24 rmind Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -41,6 +41,13 @@
  * Machine dependent constants for Intel 386.
  */
 
+/*
+ * MAXCPUS must be defined before cpu.h inclusion.  Note: i386 might
+ * support more CPUs, but due to the limited KVA space available on
+ * i386, such support would be inefficient.  Use amd64 instead.
+ */
+#define	MAXCPUS		32
+
 #ifdef _KERNEL
 #include <machine/cpu.h>
 #endif

Index: src/sys/arch/x86/acpi/acpi_wakeup.c
diff -u src/sys/arch/x86/acpi/acpi_wakeup.c:1.30 src/sys/arch/x86/acpi/acpi_wakeup.c:1.31
--- src/sys/arch/x86/acpi/acpi_wakeup.c:1.30	Tue Apr 10 13:48:24 2012
+++ src/sys/arch/x86/acpi/acpi_wakeup.c	Fri Apr 20 22:23:24 2012
@@ -1,7 +1,7 @@
-/*	$NetBSD: acpi_wakeup.c,v 1.30 2012/04/10 13:48:24 jruoho Exp $	*/
+/*	$NetBSD: acpi_wakeup.c,v 1.31 2012/04/20 22:23:24 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2002 The NetBSD Foundation, Inc.
+ * Copyright (c) 2002, 2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.30 2012/04/10 13:48:24 jruoho Exp $");
+__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.31 2012/04/20 22:23:24 rmind Exp $");
 
 /*-
  * Copyright (c) 2001 Takanori Watanabe <takaw...@jp.freebsd.org>
@@ -61,11 +61,15 @@ __KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.
  *      FreeBSD: src/sys/i386/acpica/acpi_wakeup.c,v 1.9 2002/01/10 03:26:46 wes Exp
  */
 
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.31 2012/04/20 22:23:24 rmind Exp $");
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
-#include <sys/proc.h>
+#include <sys/cpu.h>
+#include <sys/kcpuset.h>
 #include <sys/sysctl.h>
 
 #include <uvm/uvm_extern.h>
@@ -209,7 +213,7 @@ acpi_md_sleep_enter(int state)
 #ifdef MULTIPROCESSOR
 	if (!CPU_IS_PRIMARY(ci)) {
 		atomic_and_32(&ci->ci_flags, ~CPUF_RUNNING);
-		atomic_and_32(&cpus_running, ~ci->ci_cpumask);
+		kcpuset_atomic_clear(kcpuset_running, cpu_index(ci));
 
 		ACPI_FLUSH_CPU_CACHE();
 
@@ -277,7 +281,7 @@ acpi_cpu_sleep(struct cpu_info *ci)
 #endif
 
 	atomic_or_32(&ci->ci_flags, CPUF_RUNNING);
-	atomic_or_32(&cpus_running, ci->ci_cpumask);
+	kcpuset_atomic_set(kcpuset_running, cpu_index(ci));
 	tsc_sync_ap(ci);
 
 	x86_enable_intr();
@@ -291,6 +295,7 @@ acpi_md_sleep(int state)
 #ifdef MULTIPROCESSOR
 	struct cpu_info *ci;
 	CPU_INFO_ITERATOR cii;
+	cpuid_t cid;
 #endif
 
 	KASSERT(acpi_wakeup_paddr != 0);
@@ -312,10 +317,12 @@ acpi_md_sleep(int state)
 	x86_disable_intr();
 
 #ifdef MULTIPROCESSOR
-	/* Save and suspend Application Processors */
+	/* Save and suspend Application Processors. */
 	x86_broadcast_ipi(X86_IPI_ACPI_CPU_SLEEP);
-	while (cpus_running != curcpu()->ci_cpumask)
+	cid = cpu_index(curcpu());
+	while (!kcpuset_isotherset(kcpuset_running, cid)) {
 		delay(1);
+	}
 #endif
 
 	if (acpi_md_sleep_prepare(state))

Index: src/sys/arch/x86/include/cpu.h
diff -u src/sys/arch/x86/include/cpu.h:1.49 src/sys/arch/x86/include/cpu.h:1.50
--- src/sys/arch/x86/include/cpu.h:1.49	Fri Mar  2 16:43:31 2012
+++ src/sys/arch/x86/include/cpu.h	Fri Apr 20 22:23:24 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.49 2012/03/02 16:43:31 bouyer Exp $	*/
+/*	$NetBSD: cpu.h,v 1.50 2012/04/20 22:23:24 rmind Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -105,7 +105,7 @@ struct cpu_info {
 	int	ci_fpsaving;		/* save in progress */
 	int	ci_fpused;		/* XEN: FPU was used by curlwp */
 	cpuid_t ci_cpuid;		/* our CPU ID */
-	int	ci_cpumask;		/* (1 << CPU ID) */
+	int	_unused;
 	uint32_t ci_acpiid;		/* our ACPI/MADT ID */
 	uint32_t ci_initapicid;		/* our intitial APIC ID */
 
@@ -323,8 +323,6 @@ void cpu_load_pmap(struct pmap *, struct
 void cpu_broadcast_halt(void);
 void cpu_kick(struct cpu_info *);
 
-extern uint32_t cpus_attached;
-
 #define	curcpu()		x86_curcpu()
 #define	curlwp			x86_curlwp()
 #define	curpcb			((struct pcb *)lwp_getpcb(curlwp))

Index: src/sys/arch/x86/include/cpuvar.h
diff -u src/sys/arch/x86/include/cpuvar.h:1.45 src/sys/arch/x86/include/cpuvar.h:1.46
--- src/sys/arch/x86/include/cpuvar.h:1.45	Sat Aug 13 12:37:30 2011
+++ src/sys/arch/x86/include/cpuvar.h	Fri Apr 20 22:23:24 2012
@@ -1,4 +1,4 @@
-/* 	$NetBSD: cpuvar.h,v 1.45 2011/08/13 12:37:30 cherry Exp $ */
+/* 	$NetBSD: cpuvar.h,v 1.46 2012/04/20 22:23:24 rmind Exp $ */
 
 /*-
  * Copyright (c) 2000, 2007 The NetBSD Foundation, Inc.
@@ -95,13 +95,11 @@ struct cpufeature_attach_args {
 };
 
 #ifdef _KERNEL
-
+#include <sys/kcpuset.h>
 #if defined(_KERNEL_OPT)
 #include "opt_multiprocessor.h"
 #endif /* defined(_KERNEL_OPT) */
 
-extern uint32_t cpus_running;
-
 int x86_ipi(int, int, int);
 void x86_self_ipi(int);
 int x86_ipi_init(int);

Index: src/sys/arch/x86/include/pmap.h
diff -u src/sys/arch/x86/include/pmap.h:1.51 src/sys/arch/x86/include/pmap.h:1.52
--- src/sys/arch/x86/include/pmap.h:1.51	Sun Mar 11 16:28:02 2012
+++ src/sys/arch/x86/include/pmap.h	Fri Apr 20 22:23:24 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.h,v 1.51 2012/03/11 16:28:02 jym Exp $	*/
+/*	$NetBSD: pmap.h,v 1.52 2012/04/20 22:23:24 rmind Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -108,6 +108,8 @@
 
 
 #if defined(_KERNEL)
+#include <sys/kcpuset.h>
+
 /*
  * pmap data structures: see pmap.c for details of locking.
  */
@@ -162,10 +164,10 @@ struct pmap {
 	union descriptor *pm_ldt;	/* user-set LDT */
 	size_t pm_ldt_len;		/* size of LDT in bytes */
 	int pm_ldt_sel;			/* LDT selector */
-	uint32_t pm_cpus;		/* mask of CPUs using pmap */
-	uint32_t pm_kernel_cpus;	/* mask of CPUs using kernel part
+	kcpuset_t *pm_cpus;		/* mask of CPUs using pmap */
+	kcpuset_t *pm_kernel_cpus;	/* mask of CPUs using kernel part
 					 of pmap */
-	uint32_t pm_xen_ptp_cpus;	/* mask of CPUs which have this pmap's
+	kcpuset_t *pm_xen_ptp_cpus;	/* mask of CPUs which have this pmap's
 					 ptp mapped */
 	uint64_t pm_ncsw;		/* for assertions */
 	struct vm_page *pm_gc_ptp;	/* pages from pmap g/c */
@@ -289,6 +291,7 @@ typedef enum tlbwhy {
 } tlbwhy_t;
 
 void		pmap_tlb_init(void);
+void		pmap_tlb_cpu_init(struct cpu_info *);
 void		pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t, tlbwhy_t);
 void		pmap_tlb_shootnow(void);
 void		pmap_tlb_intr(void);

Index: src/sys/arch/x86/x86/cpu.c
diff -u src/sys/arch/x86/x86/cpu.c:1.97 src/sys/arch/x86/x86/cpu.c:1.98
--- src/sys/arch/x86/x86/cpu.c:1.97	Fri Feb 17 18:40:19 2012
+++ src/sys/arch/x86/x86/cpu.c	Fri Apr 20 22:23:24 2012
@@ -1,7 +1,7 @@
-/*	$NetBSD: cpu.c,v 1.97 2012/02/17 18:40:19 bouyer Exp $	*/
+/*	$NetBSD: cpu.c,v 1.98 2012/04/20 22:23:24 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2000, 2006, 2007, 2008 The NetBSD Foundation, Inc.
+ * Copyright (c) 2000-2012 NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.97 2012/02/17 18:40:19 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.98 2012/04/20 22:23:24 rmind Exp $");
 
 #include "opt_ddb.h"
 #include "opt_mpbios.h"		/* for MPDEBUG */
@@ -82,6 +82,7 @@ __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.97
 #include <sys/kmem.h>
 #include <sys/cpu.h>
 #include <sys/cpufreq.h>
+#include <sys/idle.h>
 #include <sys/atomic.h>
 #include <sys/reboot.h>
 
@@ -114,10 +115,6 @@ __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.97
 
 #include "tsc.h"
 
-#if MAXCPUS > 32
-#error cpu_info contains 32bit bitmasks
-#endif
-
 static int	cpu_match(device_t, cfdata_t, void *);
 static void	cpu_attach(device_t, device_t, void *);
 static void	cpu_defer(device_t);
@@ -157,7 +154,6 @@ struct cpu_info cpu_info_primary __align
 	.ci_idepth = -1,
 	.ci_curlwp = &lwp0,
 	.ci_curldt = -1,
-	.ci_cpumask = 1,
 #ifdef TRAPLOG
 	.ci_tlog_base = &tlog_primary,
 #endif /* !TRAPLOG */
@@ -173,9 +169,6 @@ static void	tss_init(struct i386tss *, v
 
 static void	cpu_init_idle_lwp(struct cpu_info *);
 
-uint32_t cpus_attached = 0;
-uint32_t cpus_running = 1;
-
 uint32_t cpu_feature[5]; /* X86 CPUID feature bits
 			  *	[0] basic features %edx
 			  *	[1] basic features %ecx
@@ -271,8 +264,9 @@ cpu_vm_init(struct cpu_info *ci)
 	 */
 	aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors);
 	uvm_page_recolor(ncolors);
-}
 
+	pmap_tlb_cpu_init(ci);
+}
 
 static void
 cpu_attach(device_t parent, device_t self, void *aux)
@@ -286,8 +280,12 @@ cpu_attach(device_t parent, device_t sel
 
 	sc->sc_dev = self;
 
-	if (cpus_attached == ~0) {
-		aprint_error(": increase MAXCPUS\n");
+	if (ncpu == maxcpus) {
+#ifndef _LP64
+		aprint_error(": too many CPUs, please use NetBSD/amd64\n");
+#else
+		aprint_error(": too many CPUs\n");
+#endif
 		return;
 	}
 
@@ -356,7 +354,6 @@ cpu_attach(device_t parent, device_t sel
 		KASSERT(ci->ci_data.cpu_idlelwp != NULL);
 	}
 
-	ci->ci_cpumask = (1 << cpu_index(ci));
 	pmap_reference(pmap_kernel());
 	ci->ci_pmap = pmap_kernel();
 	ci->ci_tlbstate = TLBSTATE_STALE;
@@ -428,7 +425,6 @@ cpu_attach(device_t parent, device_t sel
 	}
 
 	pat_init(ci);
-	atomic_or_32(&cpus_attached, ci->ci_cpumask);
 
 	if (!pmf_device_register1(self, cpu_suspend, cpu_resume, cpu_shutdown))
 		aprint_error_dev(self, "couldn't establish power handler\n");
@@ -579,8 +575,6 @@ cpu_init(struct cpu_info *ci)
 #endif	/* i386 */
 #endif /* MTRR */
 
-	atomic_or_32(&cpus_running, ci->ci_cpumask);
-
 	if (ci != &cpu_info_primary) {
 		/* Synchronize TSC again, and check for drift. */
 		wbinvd();
@@ -839,6 +833,9 @@ cpu_hatch(void *v)
 	x86_errata();
 
 	aprint_debug_dev(ci->ci_dev, "running\n");
+
+	idle_loop(NULL);
+	KASSERT(false);
 }
 
 #if defined(DDB)

Index: src/sys/arch/x86/x86/mtrr_i686.c
diff -u src/sys/arch/x86/x86/mtrr_i686.c:1.25 src/sys/arch/x86/x86/mtrr_i686.c:1.26
--- src/sys/arch/x86/x86/mtrr_i686.c:1.25	Thu Dec 15 09:38:21 2011
+++ src/sys/arch/x86/x86/mtrr_i686.c	Fri Apr 20 22:23:24 2012
@@ -1,7 +1,7 @@
-/*	$NetBSD: mtrr_i686.c,v 1.25 2011/12/15 09:38:21 abs Exp $ */
+/*	$NetBSD: mtrr_i686.c,v 1.26 2012/04/20 22:23:24 rmind Exp $ */
 
 /*-
- * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * Copyright (c) 2000, 2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -30,16 +30,17 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: mtrr_i686.c,v 1.25 2011/12/15 09:38:21 abs Exp $");
+__KERNEL_RCSID(0, "$NetBSD: mtrr_i686.c,v 1.26 2012/04/20 22:23:24 rmind Exp $");
 
 #include "opt_multiprocessor.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/malloc.h>
+
 #include <sys/atomic.h>
 #include <sys/cpu.h>
+#include <sys/kmem.h>
+#include <sys/proc.h>
 
 #include <uvm/uvm_extern.h>
 
@@ -133,11 +134,9 @@ struct mtrr_funcs i686_mtrr_funcs = {
 	i686_mtrr_dump
 };
 
-#ifdef MULTIPROCESSOR
-static volatile uint32_t mtrr_waiting;
-#endif
+static kcpuset_t *		mtrr_waiting;
 
-static uint64_t i686_mtrr_cap;
+static uint64_t			i686_mtrr_cap;
 
 static void
 i686_mtrr_dump(const char *tag)
@@ -174,14 +173,10 @@ i686_mtrr_reload(int synch)
 	vaddr_t cr3, cr4;
 	uint32_t origcr0;
 	vaddr_t origcr4;
-#ifdef MULTIPROCESSOR
-	uint32_t mymask = 1 << cpu_number();
-#endif
 
 	/*
 	 * 2. Disable interrupts
 	 */
-
 	x86_disable_intr();
 
 #ifdef MULTIPROCESSOR
@@ -189,11 +184,10 @@ i686_mtrr_reload(int synch)
 		/*
 		 * 3. Wait for all processors to reach this point.
 		 */
-
-		atomic_or_32(&mtrr_waiting, mymask);
-
-		while (mtrr_waiting != cpus_running)
+		kcpuset_atomic_set(mtrr_waiting, cpu_index(curcpu()));
+		while (!kcpuset_match(mtrr_waiting, kcpuset_running)) {
 			DELAY(10);
+		}
 	}
 #endif
 
@@ -289,10 +283,10 @@ i686_mtrr_reload(int synch)
 		/*
 		 * 14. Wait for all processors to reach this point.
 		 */
-		atomic_and_32(&mtrr_waiting, ~mymask);
-
-		while (mtrr_waiting != 0)
+		kcpuset_atomic_clear(mtrr_waiting, cpu_index(curcpu()));
+		while (!kcpuset_iszero(mtrr_waiting)) {
 			DELAY(10);
+		}
 	}
 #endif
 
@@ -326,25 +320,25 @@ i686_mtrr_init_first(void)
 		}
 	}
 
-	for (i = 0; i < nmtrr_raw; i++)
+	for (i = 0; i < nmtrr_raw; i++) {
 		if (mtrr_raw[i].msraddr)
 			mtrr_raw[i].msrval = rdmsr(mtrr_raw[i].msraddr);
 		else
 			mtrr_raw[i].msrval = 0;
+	}
 #if 0
 	mtrr_dump("init mtrr");
 #endif
 
-	mtrr_fixed = (struct mtrr *)
-	    malloc(MTRR_I686_NFIXED_SOFT * sizeof (struct mtrr), M_TEMP,
-		   M_NOWAIT);
-	if (mtrr_fixed == NULL)
-		panic("can't allocate fixed MTRR array");
-
-	mtrr_var = (struct mtrr *)
-	    malloc(i686_mtrr_vcnt * sizeof (struct mtrr), M_TEMP, M_NOWAIT);
-	if (mtrr_var == NULL)
-		panic("can't allocate variable MTRR array");
+	kcpuset_create(&mtrr_waiting, true);
+
+	mtrr_fixed =
+	    kmem_zalloc(MTRR_I686_NFIXED_SOFT * sizeof(struct mtrr), KM_SLEEP);
+	KASSERT(mtrr_fixed != NULL);
+
+	mtrr_var =
+	    kmem_zalloc(i686_mtrr_vcnt * sizeof(struct mtrr), KM_SLEEP);
+	KASSERT(mtrr_var != NULL);
 
 	mtrr_var_raw = &mtrr_raw[0];
 	mtrr_fixed_raw = &mtrr_raw[MTRR_I686_NVAR_MAX * 2];
@@ -767,9 +761,12 @@ i686_mtrr_get(struct mtrr *mtrrp, int *n
 static void
 i686_mtrr_commit(void)
 {
+
 	i686_soft2raw();
+	kpreempt_disable();
 #ifdef MULTIPROCESSOR
 	x86_broadcast_ipi(X86_IPI_MTRR);
 #endif
 	i686_mtrr_reload(1);
+	kpreempt_enable();
 }

Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.176 src/sys/arch/x86/x86/pmap.c:1.177
--- src/sys/arch/x86/x86/pmap.c:1.176	Sat Feb 25 20:03:58 2012
+++ src/sys/arch/x86/x86/pmap.c	Fri Apr 20 22:23:24 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.176 2012/02/25 20:03:58 cherry Exp $	*/
+/*	$NetBSD: pmap.c,v 1.177 2012/04/20 22:23:24 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc.
@@ -171,7 +171,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.176 2012/02/25 20:03:58 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.177 2012/04/20 22:23:24 rmind Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -723,7 +723,6 @@ pmap_map_ptes(struct pmap *pmap, struct 
 {
 	struct pmap *curpmap;
 	struct cpu_info *ci;
-	uint32_t cpumask;
 	lwp_t *l;
 
 	/* The kernel's pmap is always accessible. */
@@ -765,13 +764,14 @@ pmap_map_ptes(struct pmap *pmap, struct 
 		 * The reference will be dropped by pmap_unmap_ptes().
 		 * Can happen if we block during exit().
 		 */
-		cpumask = ci->ci_cpumask;
-		atomic_and_32(&curpmap->pm_cpus, ~cpumask);
-		atomic_and_32(&curpmap->pm_kernel_cpus, ~cpumask);
+		const cpuid_t cid = cpu_index(ci);
+
+		kcpuset_atomic_clear(curpmap->pm_cpus, cid);
+		kcpuset_atomic_clear(curpmap->pm_kernel_cpus, cid);
 		ci->ci_pmap = pmap;
 		ci->ci_tlbstate = TLBSTATE_VALID;
-		atomic_or_32(&pmap->pm_cpus, cpumask);
-		atomic_or_32(&pmap->pm_kernel_cpus, cpumask);
+		kcpuset_atomic_set(pmap->pm_cpus, cid);
+		kcpuset_atomic_set(pmap->pm_kernel_cpus, cid);
 		cpu_load_pmap(pmap, curpmap);
 	}
 	pmap->pm_ncsw = l->l_ncsw;
@@ -1048,8 +1048,7 @@ pmap_emap_sync(bool canload)
 		 */
 		pmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map);
 		if (__predict_false(pmap == ci->ci_pmap)) {
-			const uint32_t cpumask = ci->ci_cpumask;
-			atomic_and_32(&pmap->pm_cpus, ~cpumask);
+			kcpuset_atomic_clear(pmap->pm_cpus, cpu_index(ci));
 		}
 		pmap_load();
 		KASSERT(ci->ci_want_pmapload == 0);
@@ -1234,6 +1233,9 @@ pmap_bootstrap(vaddr_t kva_start)
 	kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
 		x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS);
 
+	kcpuset_create(&kpm->pm_cpus, true);
+	kcpuset_create(&kpm->pm_kernel_cpus, true);
+
 	/*
 	 * the above is just a rough estimate and not critical to the proper
 	 * operation of the system.
@@ -1651,6 +1653,9 @@ pmap_init(void)
 
 	pmap_tlb_init();
 
+	/* XXX: Since cpu_hatch() is only for secondary CPUs. */
+	pmap_tlb_cpu_init(curcpu());
+
 	evcnt_attach_dynamic(&pmap_iobmp_evcnt, EVCNT_TYPE_MISC,
 	    NULL, "x86", "io bitmap copy");
 	evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC,
@@ -1896,9 +1901,8 @@ pmap_free_ptp(struct pmap *pmap, struct 
 			/*
 			 * Update the per-cpu PD on all cpus the current
 			 * pmap is active on 
-			 */ 
+			 */
 			xen_kpm_sync(pmap, index);
-
 		}
 #  endif /*__x86_64__ */
 		invaladdr = level == 1 ? (vaddr_t)ptes :
@@ -1988,10 +1992,10 @@ pmap_get_ptp(struct pmap *pmap, vaddr_t 
 			/*
 			 * Update the per-cpu PD on all cpus the current
 			 * pmap is active on 
-			 */ 
+			 */
 			xen_kpm_sync(pmap, index);
 		}
-#endif /* XEN && __x86_64__ */
+#endif
 		pmap_pte_flush();
 		pmap_stats_update(pmap, 1, 0);
 		/*
@@ -1999,33 +2003,26 @@ pmap_get_ptp(struct pmap *pmap, vaddr_t 
 		 * wire count of the parent page.
 		 */
 		if (i < PTP_LEVELS) {
-			if (pptp == NULL)
+			if (pptp == NULL) {
 				pptp = pmap_find_ptp(pmap, va, ppa, i);
-#ifdef DIAGNOSTIC
-			if (pptp == NULL)
-				panic("pde page disappeared");
-#endif
+				KASSERT(pptp != NULL);
+			}
 			pptp->wire_count++;
 		}
 	}
 
 	/*
-	 * ptp is not NULL if we just allocated a new ptp. If it's
+	 * PTP is not NULL if we just allocated a new PTP.  If it is
 	 * still NULL, we must look up the existing one.
 	 */
 	if (ptp == NULL) {
 		ptp = pmap_find_ptp(pmap, va, ppa, 1);
-#ifdef DIAGNOSTIC
-		if (ptp == NULL) {
-			printf("va %" PRIxVADDR " ppa %" PRIxPADDR "\n",
-			    va, ppa);
-			panic("pmap_get_ptp: unmanaged user PTP");
-		}
-#endif
+		KASSERTMSG(ptp != NULL, "pmap_get_ptp: va %" PRIxVADDR
+		    "ppa %" PRIxPADDR "\n", va, ppa);
 	}
 
 	pmap->pm_ptphint[0] = ptp;
-	return(ptp);
+	return ptp;
 }
 
 /*
@@ -2200,12 +2197,8 @@ pmap_pdp_free(struct pool *pp, void *v)
 #endif /* PAE */
 
 /*
- * pmap_create: create a pmap
- *
- * => note: old pmap interface took a "size" args which allowed for
- *	the creation of "software only" pmaps (not in bsd).
+ * pmap_create: create a pmap object.
  */
-
 struct pmap *
 pmap_create(void)
 {
@@ -2228,11 +2221,13 @@ pmap_create(void)
 	pmap->pm_hiexec = 0;
 #endif /* !defined(__x86_64__) */
 	pmap->pm_flags = 0;
-	pmap->pm_cpus = 0;
-	pmap->pm_kernel_cpus = 0;
-	pmap->pm_xen_ptp_cpus = 0;
 	pmap->pm_gc_ptp = NULL;
 
+	kcpuset_create(&pmap->pm_cpus, true);
+	kcpuset_create(&pmap->pm_kernel_cpus, true);
+#ifdef XEN
+	kcpuset_create(&pmap->pm_xen_ptp_cpus, true);
+#endif
 	/* init the LDT */
 	pmap->pm_ldt = NULL;
 	pmap->pm_ldt_len = 0;
@@ -2287,12 +2282,8 @@ pmap_free_ptps(struct vm_page *empty_ptp
 void
 pmap_destroy(struct pmap *pmap)
 {
-	int i;
-#ifdef DIAGNOSTIC
-	struct cpu_info *ci;
-	CPU_INFO_ITERATOR cii;
-#endif /* DIAGNOSTIC */
 	lwp_t *l;
+	int i;
 
 	/*
 	 * If we have torn down this pmap, process deferred frees and
@@ -2321,6 +2312,9 @@ pmap_destroy(struct pmap *pmap)
 	}
 
 #ifdef DIAGNOSTIC
+	CPU_INFO_ITERATOR cii;
+	struct cpu_info *ci;
+
 	for (CPU_INFO_FOREACH(cii, ci)) {
 		if (ci->ci_pmap == pmap)
 			panic("destroying pmap being used");
@@ -2344,11 +2338,8 @@ pmap_destroy(struct pmap *pmap)
 #endif /* DIAGNOSTIC */
 
 	/*
-	 * reference count is zero, free pmap resources and then free pmap.
-	 */
-
-	/*
-	 * remove it from global list of pmaps
+	 * Reference count is zero, free pmap resources and then free pmap.
+	 * First, remove it from global list of pmaps.
 	 */
 
 	mutex_enter(&pmaps_lock);
@@ -2394,6 +2385,11 @@ pmap_destroy(struct pmap *pmap)
 		uvm_obj_destroy(&pmap->pm_obj[i], false);
 		mutex_destroy(&pmap->pm_obj_lock[i]);
 	}
+	kcpuset_destroy(pmap->pm_cpus);
+	kcpuset_destroy(pmap->pm_kernel_cpus);
+#ifdef XEN
+	kcpuset_destroy(pmap->pm_xen_ptp_cpus);
+#endif
 	pool_cache_put(&pmap_cache, pmap);
 }
 
@@ -2596,19 +2592,15 @@ pmap_activate(struct lwp *l)
 /*
  * pmap_reactivate: try to regain reference to the pmap.
  *
- * => must be called with kernel preemption disabled
+ * => Must be called with kernel preemption disabled.
  */
 
 static bool
 pmap_reactivate(struct pmap *pmap)
 {
-	struct cpu_info *ci;
-	uint32_t cpumask;
-	bool result;	
-	uint32_t oldcpus;
-
-	ci = curcpu();
-	cpumask = ci->ci_cpumask;
+	struct cpu_info * const ci = curcpu();
+	const cpuid_t cid = cpu_index(ci);
+	bool result;
 
 	KASSERT(kpreempt_disabled());
 #if defined(XEN) && defined(__x86_64__)
@@ -2620,53 +2612,48 @@ pmap_reactivate(struct pmap *pmap)
 #endif
 
 	/*
-	 * if we still have a lazy reference to this pmap,
-	 * we can assume that there was no tlb shootdown
-	 * for this pmap in the meantime.
+	 * If we still have a lazy reference to this pmap, we can assume
+	 * that there was no TLB shootdown for this pmap in the meantime.
 	 *
-	 * the order of events here is important as we must
-	 * synchronize with TLB shootdown interrupts.  declare
-	 * interest in invalidations (TLBSTATE_VALID) and then
-	 * check the cpumask, which the IPIs can change only
-	 * when the state is TLBSTATE_LAZY.
+	 * The order of events here is important as we must synchronize
+	 * with TLB shootdown interrupts.  Declare interest in invalidations
+	 * (TLBSTATE_VALID) and then check the CPU set, which the IPIs can
+	 * change only when the state is TLBSTATE_LAZY.
 	 */
 
 	ci->ci_tlbstate = TLBSTATE_VALID;
-	oldcpus = pmap->pm_cpus;
-	KASSERT((pmap->pm_kernel_cpus & cpumask) != 0);
-	if (oldcpus & cpumask) {
-		/* got it */
+	KASSERT(kcpuset_isset(pmap->pm_kernel_cpus, cid));
+
+	if (kcpuset_isset(pmap->pm_cpus, cid)) {
+		/* We have the reference, state is valid. */
 		result = true;
 	} else {
-		/* must reload */
-		atomic_or_32(&pmap->pm_cpus, cpumask);
+		/* Must reload the TLB. */
+		kcpuset_atomic_set(pmap->pm_cpus, cid);
 		result = false;
 	}
-
 	return result;
 }
 
 /*
- * pmap_load: actually switch pmap.  (fill in %cr3 and LDT info)
- *
- * ensures that the current process' pmap is loaded on the current cpu's MMU
- * and there's no stale TLB entries.
+ * pmap_load: perform the actual pmap switch, i.e. fill in %cr3 register
+ * and relevant LDT info.
  *
- * the caller should disable preemption or do check-and-retry to prevent
- * a preemption from undoing our efforts.
+ * Ensures that the current process' pmap is loaded on the current CPU's
+ * MMU and that there are no stale TLB entries.
  *
- * this function can block.
+ * => The caller should disable kernel preemption or do check-and-retry
+ *    to prevent a preemption from undoing our efforts.
+ * => This function may block.
  */
-
 void
 pmap_load(void)
 {
 	struct cpu_info *ci;
-	uint32_t cpumask;
-	struct pmap *pmap;
-	struct pmap *oldpmap;
+	struct pmap *pmap, *oldpmap;
 	struct lwp *l;
 	struct pcb *pcb;
+	cpuid_t cid;
 	uint64_t ncsw;
 
 	kpreempt_disable();
@@ -2676,7 +2663,6 @@ pmap_load(void)
 		kpreempt_enable();
 		return;
 	}
-	cpumask = ci->ci_cpumask;
 	l = ci->ci_curlwp;
 	ncsw = l->l_ncsw;
 
@@ -2714,17 +2700,14 @@ pmap_load(void)
 	}
 
 	/*
-	 * grab a reference to the new pmap.
+	 * Acquire a reference to the new pmap and perform the switch.
 	 */
 
 	pmap_reference(pmap);
 
-	/*
-	 * actually switch pmap.
-	 */
-
-	atomic_and_32(&oldpmap->pm_cpus, ~cpumask);
-	atomic_and_32(&oldpmap->pm_kernel_cpus, ~cpumask);
+	cid = cpu_index(ci);
+	kcpuset_atomic_clear(oldpmap->pm_cpus, cid);
+	kcpuset_atomic_clear(oldpmap->pm_kernel_cpus, cid);
 
 #if defined(XEN) && defined(__x86_64__)
 	KASSERT(pmap_pdirpa(oldpmap, 0) == ci->ci_xen_current_user_pgd ||
@@ -2734,19 +2717,17 @@ pmap_load(void)
 #elif !defined(XEN)
 	KASSERT(pmap_pdirpa(oldpmap, 0) == pmap_pte2pa(rcr3()));
 #endif
-	KASSERT((pmap->pm_cpus & cpumask) == 0);
-	KASSERT((pmap->pm_kernel_cpus & cpumask) == 0);
+	KASSERT(!kcpuset_isset(pmap->pm_cpus, cid));
+	KASSERT(!kcpuset_isset(pmap->pm_kernel_cpus, cid));
 
 	/*
-	 * mark the pmap in use by this processor.  again we must
-	 * synchronize with TLB shootdown interrupts, so set the
-	 * state VALID first, then register us for shootdown events
-	 * on this pmap.
+	 * Mark the pmap in use by this CPU.  Again, we must synchronize
+	 * with TLB shootdown interrupts, so set the state VALID first,
+	 * then register us for shootdown events on this pmap.
 	 */
-
 	ci->ci_tlbstate = TLBSTATE_VALID;
-	atomic_or_32(&pmap->pm_cpus, cpumask);
-	atomic_or_32(&pmap->pm_kernel_cpus, cpumask);
+	kcpuset_atomic_set(pmap->pm_cpus, cid);
+	kcpuset_atomic_set(pmap->pm_kernel_cpus, cid);
 	ci->ci_pmap = pmap;
 
 	/*

Index: src/sys/arch/x86/x86/pmap_tlb.c
diff -u src/sys/arch/x86/x86/pmap_tlb.c:1.4 src/sys/arch/x86/x86/pmap_tlb.c:1.5
--- src/sys/arch/x86/x86/pmap_tlb.c:1.4	Sun Dec  4 04:28:41 2011
+++ src/sys/arch/x86/x86/pmap_tlb.c	Fri Apr 20 22:23:24 2012
@@ -1,7 +1,7 @@
-/*	$NetBSD: pmap_tlb.c,v 1.4 2011/12/04 04:28:41 cherry Exp $	*/
+/*	$NetBSD: pmap_tlb.c,v 1.5 2012/04/20 22:23:24 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2008-2011 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008-2012 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -32,15 +32,15 @@
 /*
  * x86 pmap(9) module: TLB shootdowns.
  *
- * TLB shootdowns are hard interrupts that operate outside the SPL framework:
+ * TLB shootdowns are hard interrupts that operate outside the SPL framework.
  * They do not need to be blocked, provided that the pmap module gets the
  * order of events correct.  The calls are made by poking the LAPIC directly.
- * The interrupt handler is short and does one of the following:  invalidate
+ * The interrupt handler is short and does one of the following: invalidate
  * a set of pages, all user TLB entries or the entire TLB.
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.4 2011/12/04 04:28:41 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.5 2012/04/20 22:23:24 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -70,19 +70,16 @@ typedef struct {
 #endif
 	uint16_t		tp_count;
 	uint16_t		tp_pte;
-	uint32_t		tp_cpumask;
-	uint32_t		tp_usermask;
+	int			tp_userpmap;
+	kcpuset_t *		tp_cpumask;
 } pmap_tlb_packet_t;
 
 /* No more than N seperate invlpg. */
 #define	TP_MAXVA		6
 
 typedef struct {
-	volatile uint32_t	tm_pending;
-	volatile uint32_t	tm_gen;
-	uint32_t		tm_usergen;
-	uint32_t		tm_globalgen;
-	char			tm_pad[64 - sizeof(uintptr_t) * 4];
+	volatile u_int		tm_pendcount;
+	volatile u_int		tm_gen;
 } pmap_tlb_mailbox_t;
 
 /*
@@ -151,11 +148,21 @@ pmap_tlb_init(void)
 #endif
 }
 
+void
+pmap_tlb_cpu_init(struct cpu_info *ci)
+{
+	pmap_tlb_packet_t *tp = (pmap_tlb_packet_t *)ci->ci_pmap_data;
+
+	memset(tp, 0, sizeof(pmap_tlb_packet_t));
+	kcpuset_create(&tp->tp_cpumask, true);
+}
+
 static inline void
 pmap_tlbstat_count(struct pmap *pm, vaddr_t va, tlbwhy_t why)
 {
 #ifdef TLBSTATS
-	uint32_t mask;
+	const cpuid_t cid = cpu_index(curcpu());
+	bool local = false, remote = false;
 
 	if (va != (vaddr_t)-1LL) {
 		atomic_inc_64(&tlbstat_single_req.ev_count);
@@ -164,15 +171,18 @@ pmap_tlbstat_count(struct pmap *pm, vadd
 		atomic_inc_64(&tlbstat_kernel[why].ev_count);
 		return;
 	}
+
 	if (va >= VM_MAXUSER_ADDRESS) {
-		mask = pm->pm_cpus | pm->pm_kernel_cpus;
-	} else {
-		mask = pm->pm_cpus;
+		remote = kcpuset_isotherset(pm->pm_kernel_cpus, cid);
+		local = kcpuset_isset(pm->pm_kernel_cpus, cid);
 	}
-	if ((mask & curcpu()->ci_cpumask) != 0) {
+	remote |= kcpuset_isotherset(pm->pm_cpus, cid);
+	local |= kcpuset_isset(pm->pm_cpus, cid);
+
+	if (local) {
 		atomic_inc_64(&tlbstat_local[why].ev_count);
 	}
-	if ((mask & ~curcpu()->ci_cpumask) != 0) {
+	if (remote) {
 		atomic_inc_64(&tlbstat_remote[why].ev_count);
 	}
 #endif
@@ -203,7 +213,7 @@ pmap_tlb_invalidate(pmap_tlb_packet_t *t
 }
 
 /*
- * pmap_tlb_shootdown: invalidate a page on all CPUs using pmap 'pm'
+ * pmap_tlb_shootdown: invalidate a page on all CPUs using pmap 'pm'.
  */
 void
 pmap_tlb_shootdown(struct pmap *pm, vaddr_t va, pt_entry_t pte, tlbwhy_t why)
@@ -229,7 +239,7 @@ pmap_tlb_shootdown(struct pmap *pm, vadd
 
 	/*
 	 * Add the shootdown operation to our pending set.
-	 */ 
+	 */
 	s = splvm();
 	tp = (pmap_tlb_packet_t *)curcpu()->ci_pmap_data;
 
@@ -250,14 +260,16 @@ pmap_tlb_shootdown(struct pmap *pm, vadd
 		tp->tp_count = (uint16_t)-1;
 	}
 
-	if (pm == pmap_kernel()) {
-		tp->tp_cpumask = cpus_running;
-	} else if (va >= VM_MAXUSER_ADDRESS) {
-		tp->tp_cpumask |= (pm->pm_cpus | pm->pm_kernel_cpus);
-		tp->tp_usermask |= (pm->pm_cpus | pm->pm_kernel_cpus);
+	if (pm != pmap_kernel()) {
+		kcpuset_copy(tp->tp_cpumask, pm->pm_cpus);
+		if (va >= VM_MAXUSER_ADDRESS) {
+			kcpuset_merge(tp->tp_cpumask, pm->pm_kernel_cpus);
+		}
+		kcpuset_intersect(tp->tp_cpumask, kcpuset_running);
+		tp->tp_userpmap = 1;
 	} else {
-		tp->tp_cpumask |= pm->pm_cpus;
-		tp->tp_usermask |= pm->pm_cpus;
+		kcpuset_copy(tp->tp_cpumask, kcpuset_running);
+		tp->tp_userpmap = 0;
 	}
 	pmap_tlbstat_count(pm, va, why);
 	splx(s);
@@ -265,59 +277,54 @@ pmap_tlb_shootdown(struct pmap *pm, vadd
 
 #ifdef MULTIPROCESSOR
 #ifdef XEN
-static inline
-void pmap_tlb_processpacket(pmap_tlb_packet_t *tp)
+
+static inline void
+pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target)
 {
-	struct cpu_info *self = curcpu();
-	if (tp->tp_count == (uint16_t)-1) {
-		xen_mcast_tlbflush(tp->tp_cpumask &
-				   cpus_running &
-				   ~self->ci_cpumask);
-	} else {
+	pmap_tlb_mailbox_t *tm = &pmap_tlb_mailbox;
+
+	if (tp->tp_count != (uint16_t)-1) {
 		/* Invalidating a single page or a range of pages. */
-		int i;
-		for (i = tp->tp_count - 1; i >= 0; i--) {
-			xen_mcast_invlpg(tp->tp_va[i],
-					 tp->tp_cpumask & 
-					 cpus_running &
-					 ~self->ci_cpumask);
+		for (int i = tp->tp_count - 1; i >= 0; i--) {
+			xen_mcast_invlpg(tp->tp_va[i], target);
 		}
+	} else {
+		xen_mcast_tlbflush(target);
 	}
 
-	/* Ack the request */
-	atomic_and_32(&pmap_tlb_mailbox.tm_pending, ~tp->tp_cpumask);
+	/* Remote CPUs have been synchronously flushed. */
+	tm->tm_pendcount = 0;
 }
-#else /* XEN */
-static inline 
-void pmap_tlb_processpacket(pmap_tlb_packet_t *tp)
+
+#else
+
+static inline void
+pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target)
 {
 	int err = 0;
-	CPU_INFO_ITERATOR cii;
-	struct cpu_info *lci;
 
-	if (tp->tp_cpumask == cpus_running) {
-		err = x86_ipi(LAPIC_TLB_VECTOR, LAPIC_DEST_ALLEXCL,
-		    LAPIC_DLMODE_FIXED);
-	} else {
-		struct cpu_info *self = curcpu();
+	if (!kcpuset_match(target, kcpuset_running)) {
+		const struct cpu_info * const self = curcpu();
+		CPU_INFO_ITERATOR cii;
+		struct cpu_info *lci;
+
 		for (CPU_INFO_FOREACH(cii, lci)) {
-			if (__predict_false(lci == self)) {
-				continue;
-			}
-			if ((lci->ci_cpumask & pmap_tlb_mailbox.tm_pending) == 0) {
+			const cpuid_t lcid = cpu_index(lci);
+
+			if (__predict_false(lci == self) ||
+			    !kcpuset_isset(target, lcid)) {
 				continue;
 			}
-			KASSERT(lci->ci_flags & CPUF_RUNNING);
-
 			err |= x86_ipi(LAPIC_TLB_VECTOR,
-				       lci->ci_cpuid, LAPIC_DLMODE_FIXED);
+			    lci->ci_cpuid, LAPIC_DLMODE_FIXED);
 		}
+	} else {
+		err = x86_ipi(LAPIC_TLB_VECTOR, LAPIC_DEST_ALLEXCL,
+		    LAPIC_DLMODE_FIXED);
 	}
-
-	if (__predict_false(err != 0)) {
-		panic("pmap_tlb_shootdown: IPI failed");
-	}
+	KASSERT(err == 0);
 }
+
 #endif /* XEN */
 #endif /* MULTIPROCESSOR */
 
@@ -332,8 +339,9 @@ pmap_tlb_shootnow(void)
 	pmap_tlb_packet_t *tp;
 	pmap_tlb_mailbox_t *tm;
 	struct cpu_info *ci;
-	uint32_t remote;
-	uintptr_t gen;
+	kcpuset_t *target;
+	u_int local, gen, rcpucount;
+	cpuid_t cid;
 	int s;
 
 	KASSERT(kpreempt_disabled());
@@ -351,22 +359,30 @@ pmap_tlb_shootnow(void)
 		splx(s);
 		return;
 	}
-	gen = 0; /* XXXgcc */
 	tm = &pmap_tlb_mailbox;
-	remote = tp->tp_cpumask & ~ci->ci_cpumask;
+	cid = cpu_index(ci);
+
+	target = tp->tp_cpumask;
+	local = kcpuset_isset(target, cid) ? 1 : 0;
+	rcpucount = kcpuset_countset(target) - local;
+	gen = 0;
 
 #ifdef MULTIPROCESSOR
-	if (remote != 0) {
+	if (rcpucount) {
 		int count;
+
 		/*
 		 * Gain ownership of the shootdown mailbox.  We must stay
 		 * at IPL_VM once we own it or could deadlock against an
 		 * interrupt on this CPU trying to do the same.
 		 */
-		while (atomic_cas_32(&tm->tm_pending, 0, remote) != 0) {
+		KASSERT(rcpucount < ncpu);
+
+		while (atomic_cas_uint(&tm->tm_pendcount, 0, rcpucount) != 0) {
 			splx(s);
 			count = SPINLOCK_BACKOFF_MIN;
-			while (tm->tm_pending != 0) {
+			while (tm->tm_pendcount != 0) {
+				KASSERT(tm->tm_pendcount < ncpu);
 				SPINLOCK_BACKOFF(count);
 			}
 			s = splvm();
@@ -388,24 +404,15 @@ pmap_tlb_shootnow(void)
 		/*
 		 * Initiate shootdowns on remote CPUs.
 		 */
-		/* Trim mailbox wait to only for CPUF_RUNNING cpus */
-		atomic_and_32(&tm->tm_pending, cpus_running);
-
-		pmap_tlb_processpacket(tp);
-#ifdef XEN
-		/* 
-		 * remote CPUs have been synchronously flushed
-		 */
-		remote = 0; 
-#endif /* XEN */
+		pmap_tlb_processpacket(tp, target);
 	}
-#endif /* MULTIPROCESSOR */
+#endif
 
 	/*
 	 * Shootdowns on remote CPUs are now in flight.  In the meantime,
-	 * perform local shootdowns and do not forget to update emap gen.
+	 * perform local shootdown if needed.
 	 */
-	if ((tp->tp_cpumask & ci->ci_cpumask) != 0) {
+	if (local) {
 		pmap_tlb_invalidate(tp);
 	}
 
@@ -417,26 +424,28 @@ pmap_tlb_shootnow(void)
 		atomic_add_64(&tlbstat_single_issue.ev_count, tp->tp_count);
 	}
 #endif
+	kcpuset_zero(tp->tp_cpumask);
+	tp->tp_userpmap = 0;
 	tp->tp_count = 0;
 	tp->tp_pte = 0;
-	tp->tp_cpumask = 0;
-	tp->tp_usermask = 0;
 	splx(s);
 
 	/*
 	 * Now wait for the current generation of updates to be
 	 * processed by remote CPUs.
 	 */
-	if (remote != 0 && tm->tm_pending != 0) {
+	if (rcpucount && tm->tm_pendcount) {
 		int count = SPINLOCK_BACKOFF_MIN;
-		while (tm->tm_pending != 0 && tm->tm_gen == gen) {
+
+		while (tm->tm_pendcount && tm->tm_gen == gen) {
+			KASSERT(tm->tm_pendcount < ncpu);
 			SPINLOCK_BACKOFF(count);
 		}
 	}
 }
 
 /*
- * pmap_tlb_ipi: pmap shootdown interrupt handler to invalidate TLB entries.
+ * pmap_tlb_intr: pmap shootdown interrupt handler to invalidate TLB entries.
  *
  * => Called from IPI only.
  */
@@ -444,10 +453,13 @@ void
 pmap_tlb_intr(void)
 {
 	pmap_tlb_packet_t *tp = &pmap_tlb_packet;
-	pmap_tlb_mailbox_t *tm;
-	struct cpu_info *ci;
-	uint32_t cm;
+	pmap_tlb_mailbox_t *tm = &pmap_tlb_mailbox;
+	struct cpu_info *ci = curcpu();
+	cpuid_t cid = cpu_index(ci);
 
+	KASSERT(tm->tm_pendcount > 0);
+
+	/* First, TLB flush. */
 	pmap_tlb_invalidate(tp);
 
 	/*
@@ -455,16 +467,13 @@ pmap_tlb_intr(void)
 	 * invalidations for this pmap, then take the CPU out of
 	 * the pmap's bitmask.
 	 */
-	ci = curcpu();
-	cm = ci->ci_cpumask;
-	if (ci->ci_tlbstate == TLBSTATE_LAZY && (tp->tp_usermask & cm) != 0) {
+	if (ci->ci_tlbstate == TLBSTATE_LAZY && tp->tp_userpmap) {
 		struct pmap *pm = ci->ci_pmap;
 
-		atomic_and_32(&pm->pm_cpus, ~cm);
+		kcpuset_atomic_clear(pm->pm_cpus, cid);
 		ci->ci_tlbstate = TLBSTATE_STALE;
 	}
 
-	/* Ack the request. */
-	tm = &pmap_tlb_mailbox;
-	atomic_and_32(&tm->tm_pending, ~cm);
+	/* Finally, ack the request. */
+	atomic_dec_uint(&tm->tm_pendcount);
 }

Index: src/sys/arch/xen/include/xenpmap.h
diff -u src/sys/arch/xen/include/xenpmap.h:1.33 src/sys/arch/xen/include/xenpmap.h:1.34
--- src/sys/arch/xen/include/xenpmap.h:1.33	Fri Dec 30 16:55:21 2011
+++ src/sys/arch/xen/include/xenpmap.h	Fri Apr 20 22:23:24 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: xenpmap.h,v 1.33 2011/12/30 16:55:21 cherry Exp $	*/
+/*	$NetBSD: xenpmap.h,v 1.34 2012/04/20 22:23:24 rmind Exp $	*/
 
 /*
  *
@@ -34,6 +34,9 @@
 #include "opt_xen.h"
 #endif
 
+#include <sys/types.h>
+#include <sys/kcpuset.h>
+
 #define	INVALID_P2M_ENTRY	(~0UL)
 
 void xpq_queue_machphys_update(paddr_t, paddr_t);
@@ -46,11 +49,11 @@ void xpq_queue_tlb_flush(void);
 void xpq_queue_pin_table(paddr_t, int);
 void xpq_queue_unpin_table(paddr_t);
 int  xpq_update_foreign(paddr_t, pt_entry_t, int);
-void xen_vcpu_mcast_invlpg(vaddr_t, vaddr_t, uint32_t);
+void xen_vcpu_mcast_invlpg(vaddr_t, vaddr_t, kcpuset_t *);
 void xen_vcpu_bcast_invlpg(vaddr_t, vaddr_t);
-void xen_mcast_tlbflush(uint32_t);
+void xen_mcast_tlbflush(kcpuset_t *);
 void xen_bcast_tlbflush(void);
-void xen_mcast_invlpg(vaddr_t, uint32_t);
+void xen_mcast_invlpg(vaddr_t, kcpuset_t *);
 void xen_bcast_invlpg(vaddr_t);
 
 void pmap_xen_resume(void);

Index: src/sys/arch/xen/x86/cpu.c
diff -u src/sys/arch/xen/x86/cpu.c:1.90 src/sys/arch/xen/x86/cpu.c:1.91
--- src/sys/arch/xen/x86/cpu.c:1.90	Sun Mar 11 16:16:44 2012
+++ src/sys/arch/xen/x86/cpu.c	Fri Apr 20 22:23:25 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.90 2012/03/11 16:16:44 jym Exp $	*/
+/*	$NetBSD: cpu.c,v 1.91 2012/04/20 22:23:25 rmind Exp $	*/
 /* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp  */
 
 /*-
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.90 2012/03/11 16:16:44 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.91 2012/04/20 22:23:25 rmind Exp $");
 
 #include "opt_ddb.h"
 #include "opt_multiprocessor.h"
@@ -121,10 +121,6 @@ __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.90
 #include <dev/ic/mc146818reg.h>
 #include <dev/isa/isareg.h>
 
-#if MAXCPUS > 32
-#error cpu_info contains 32bit bitmasks
-#endif
-
 static int	cpu_match(device_t, cfdata_t, void *);
 static void	cpu_attach(device_t, device_t, void *);
 static void	cpu_defer(device_t);
@@ -167,7 +163,6 @@ struct cpu_info cpu_info_primary __align
 	.ci_idepth = -1,
 	.ci_curlwp = &lwp0,
 	.ci_curldt = -1,
-	.ci_cpumask = 1,
 #ifdef TRAPLOG
 	.ci_tlog = &tlog_primary,
 #endif
@@ -181,9 +176,6 @@ struct cpu_info phycpu_info_primary __al
 struct cpu_info *cpu_info_list = &cpu_info_primary;
 struct cpu_info *phycpu_info_list = &phycpu_info_primary;
 
-uint32_t cpus_attached = 1;
-uint32_t cpus_running = 1;
-
 uint32_t cpu_feature[5]; /* X86 CPUID feature bits
 			  *	[0] basic features %edx
 			  *	[1] basic features %ecx
@@ -370,6 +362,7 @@ cpu_vm_init(struct cpu_info *ci)
 	 */
 	aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors);
 	uvm_page_recolor(ncolors);
+	pmap_tlb_cpu_init(ci);
 }
 
 static void
@@ -436,7 +429,6 @@ cpu_attach_common(device_t parent, devic
 	}
 
 	KASSERT(ci->ci_cpuid == ci->ci_index);
-	ci->ci_cpumask = (1 << cpu_index(ci));
 	pmap_reference(pmap_kernel());
 	ci->ci_pmap = pmap_kernel();
 	ci->ci_tlbstate = TLBSTATE_STALE;
@@ -515,8 +507,6 @@ cpu_attach_common(device_t parent, devic
 		panic("unknown processor type??\n");
 	}
 
-	atomic_or_32(&cpus_attached, ci->ci_cpumask);
-
 #ifdef MPVERBOSE
 	if (mp_verbose) {
 		struct lwp *l = ci->ci_data.cpu_idlelwp;
@@ -565,7 +555,6 @@ cpu_init(struct cpu_info *ci)
 	mutex_init(&ci->ci_kpm_mtx, MUTEX_DEFAULT, IPL_VM);
 #endif
 
-	atomic_or_32(&cpus_running, ci->ci_cpumask);
 	atomic_or_32(&ci->ci_flags, CPUF_RUNNING);
 }
 
@@ -728,8 +717,8 @@ cpu_hatch(void *v)
 
 	cpu_switchto(NULL, ci->ci_data.cpu_idlelwp, true);
 
-	panic("switch to idle_loop context returned!\n");
-	/* NOTREACHED */
+	idle_loop(NULL);
+	KASSERT(false);
 }
 
 #if defined(DDB)
@@ -1118,14 +1107,13 @@ void
 cpu_load_pmap(struct pmap *pmap, struct pmap *oldpmap)
 {
 	KASSERT(pmap != pmap_kernel());
-	
+
 #if defined(__x86_64__) || defined(PAE)
 	struct cpu_info *ci = curcpu();
-	uint32_t cpumask = ci->ci_cpumask;
 
 	mutex_enter(&ci->ci_kpm_mtx);
 	/* make new pmap visible to pmap_kpm_sync_xcall() */
-	atomic_or_32(&pmap->pm_xen_ptp_cpus, cpumask);
+	kcpuset_set(pmap->pm_xen_ptp_cpus, cpu_index(ci));
 #endif
 #ifdef i386
 #ifdef PAE
@@ -1178,7 +1166,9 @@ cpu_load_pmap(struct pmap *pmap, struct 
 #endif /* __x86_64__ */
 #if defined(__x86_64__) || defined(PAE)
 	/* old pmap no longer visible to pmap_kpm_sync_xcall() */
-	atomic_and_32(&oldpmap->pm_xen_ptp_cpus, ~cpumask);
+	if (oldpmap != pmap_kernel())
+		kcpuset_clear(oldpmap->pm_xen_ptp_cpus, cpu_index(ci));
+
 	mutex_exit(&ci->ci_kpm_mtx);
 #endif
 }

Index: src/sys/arch/xen/x86/x86_xpmap.c
diff -u src/sys/arch/xen/x86/x86_xpmap.c:1.42 src/sys/arch/xen/x86/x86_xpmap.c:1.43
--- src/sys/arch/xen/x86/x86_xpmap.c:1.42	Fri Mar  2 16:37:38 2012
+++ src/sys/arch/xen/x86/x86_xpmap.c	Fri Apr 20 22:23:25 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: x86_xpmap.c,v 1.42 2012/03/02 16:37:38 bouyer Exp $	*/
+/*	$NetBSD: x86_xpmap.c,v 1.43 2012/04/20 22:23:25 rmind Exp $	*/
 
 /*
  * Copyright (c) 2006 Mathieu Ropert <m...@adviseo.fr>
@@ -69,7 +69,7 @@
 
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.42 2012/03/02 16:37:38 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.43 2012/04/20 22:23:25 rmind Exp $");
 
 #include "opt_xen.h"
 #include "opt_ddb.h"
@@ -361,11 +361,31 @@ xpq_queue_invlpg(vaddr_t va)
 		panic("xpq_queue_invlpg");
 }
 
+#if defined(_LP64) &&  MAXCPUS > 64
+#error "XEN/amd64 uses 64 bit masks"
+#elsif !defined(_LP64) && MAXCPUS > 32
+#error "XEN/i386 uses 32 bit masks"
+#else
+/* XXX: Inefficient. */
+static u_long
+xen_kcpuset2bits(kcpuset_t *kc)
+{
+	u_long bits = 0;
+
+	for (cpuid_t i = 0; i < ncpu; i++) {
+		if (kcpuset_isset(kc, i)) {
+			bits |= 1 << i;
+		}
+	}
+	return bits;
+}
+#endif
+
 void
-xen_mcast_invlpg(vaddr_t va, uint32_t cpumask)
+xen_mcast_invlpg(vaddr_t va, kcpuset_t *kc)
 {
+	u_long xcpumask = xen_kcpuset2bits(kc);
 	mmuext_op_t op;
-	u_long xcpumask = cpumask;
 
 	/* Flush pending page updates */
 	xpq_flush_queue();
@@ -401,10 +421,10 @@ xen_bcast_invlpg(vaddr_t va)
 
 /* This is a synchronous call. */
 void
-xen_mcast_tlbflush(uint32_t cpumask)
+xen_mcast_tlbflush(kcpuset_t *kc)
 {
+	u_long xcpumask = xen_kcpuset2bits(kc);
 	mmuext_op_t op;
-	u_long xcpumask = cpumask;
 
 	/* Flush pending page updates */
 	xpq_flush_queue();
@@ -439,7 +459,7 @@ xen_bcast_tlbflush(void)
 
 /* This is a synchronous call. */
 void
-xen_vcpu_mcast_invlpg(vaddr_t sva, vaddr_t eva, uint32_t cpumask)
+xen_vcpu_mcast_invlpg(vaddr_t sva, vaddr_t eva, kcpuset_t *kc)
 {
 	KASSERT(eva > sva);
 
@@ -451,7 +471,7 @@ xen_vcpu_mcast_invlpg(vaddr_t sva, vaddr
 	eva &= ~PAGE_MASK;
 
 	for ( ; sva <= eva; sva += PAGE_SIZE) {
-		xen_mcast_invlpg(sva, cpumask);
+		xen_mcast_invlpg(sva, kc);
 	}
 
 	return;

Index: src/sys/arch/xen/x86/xen_pmap.c
diff -u src/sys/arch/xen/x86/xen_pmap.c:1.20 src/sys/arch/xen/x86/xen_pmap.c:1.21
--- src/sys/arch/xen/x86/xen_pmap.c:1.20	Sun Mar 11 17:14:30 2012
+++ src/sys/arch/xen/x86/xen_pmap.c	Fri Apr 20 22:23:25 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: xen_pmap.c,v 1.20 2012/03/11 17:14:30 jym Exp $	*/
+/*	$NetBSD: xen_pmap.c,v 1.21 2012/04/20 22:23:25 rmind Exp $	*/
 
 /*
  * Copyright (c) 2007 Manuel Bouyer.
@@ -102,7 +102,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.20 2012/03/11 17:14:30 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.21 2012/04/20 22:23:25 rmind Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -360,7 +360,7 @@ xen_kpm_sync(struct pmap *pmap, int inde
 {
 	CPU_INFO_ITERATOR cii;
 	struct cpu_info *ci;
-	
+
 	KASSERT(pmap != NULL);
 	KASSERT(kpreempt_disabled());
 
@@ -370,19 +370,19 @@ xen_kpm_sync(struct pmap *pmap, int inde
 		if (ci == NULL) {
 			continue;
 		}
+		cpuid_t cid = cpu_index(ci);
 		if (pmap != pmap_kernel() &&
-		    (ci->ci_cpumask & pmap->pm_xen_ptp_cpus) == 0)
+		    !kcpuset_isset(pmap->pm_xen_ptp_cpus, cid))
 			continue;
 
 		/* take the lock and check again */
 		mutex_enter(&ci->ci_kpm_mtx);
 		if (pmap == pmap_kernel() ||
-		    (ci->ci_cpumask & pmap->pm_xen_ptp_cpus) != 0) {
+		    kcpuset_isset(pmap->pm_xen_ptp_cpus, cid)) {
 			pmap_kpm_setpte(ci, pmap, index);
 		}
 		mutex_exit(&ci->ci_kpm_mtx);
 	}
-	return;
 }
 
 #endif /* PAE || __x86_64__ */

Index: src/sys/kern/subr_kcpuset.c
diff -u src/sys/kern/subr_kcpuset.c:1.4 src/sys/kern/subr_kcpuset.c:1.5
--- src/sys/kern/subr_kcpuset.c:1.4	Sun Jan 29 19:08:26 2012
+++ src/sys/kern/subr_kcpuset.c	Fri Apr 20 22:23:25 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: subr_kcpuset.c,v 1.4 2012/01/29 19:08:26 rmind Exp $	*/
+/*	$NetBSD: subr_kcpuset.c,v 1.5 2012/04/20 22:23:25 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2011 The NetBSD Foundation, Inc.
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_kcpuset.c,v 1.4 2012/01/29 19:08:26 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_kcpuset.c,v 1.5 2012/04/20 22:23:25 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -169,8 +169,9 @@ kcpuset_early_ptr(kcpuset_t **kcptr)
 		 * Save the pointer, return pointer to static early field.
 		 * Need to zero it out.
 		 */
-		kc_noted_early[kc_last_idx++] = kcptr;
+		kc_noted_early[kc_last_idx] = kcptr;
 		kcp = (kcpuset_t *)&kc_bits_early[kc_last_idx];
+		kc_last_idx++;
 		memset(kcp, 0, KC_BITSIZE_EARLY);
 		KASSERT(kc_bitsize == KC_BITSIZE_EARLY);
 	} else {
@@ -208,7 +209,6 @@ kcpuset_create_raw(bool zero)
 void
 kcpuset_create(kcpuset_t **retkcp, bool zero)
 {
-
 	if (__predict_false(!kc_initialised)) {
 		/* Early boot use - special case. */
 		*retkcp = kcpuset_early_ptr(retkcp);
@@ -274,33 +274,31 @@ kcpuset_unuse(kcpuset_t *kcp, kcpuset_t 
 int
 kcpuset_copyin(const cpuset_t *ucp, kcpuset_t *kcp, size_t len)
 {
-	kcpuset_impl_t *kc = KC_GETSTRUCT(kcp);
+	kcpuset_impl_t *kc __unused = KC_GETSTRUCT(kcp);
 
 	KASSERT(kc_initialised);
 	KASSERT(kc->kc_refcnt > 0);
 	KASSERT(kc->kc_next == NULL);
-	(void)kc;
 
-	if (len != kc_bitsize) { /* XXX */
+	if (len > kc_bitsize) { /* XXX */
 		return EINVAL;
 	}
-	return copyin(ucp, kcp, kc_bitsize);
+	return copyin(ucp, kcp, len);
 }
 
 int
 kcpuset_copyout(kcpuset_t *kcp, cpuset_t *ucp, size_t len)
 {
-	kcpuset_impl_t *kc = KC_GETSTRUCT(kcp);
+	kcpuset_impl_t *kc __unused = KC_GETSTRUCT(kcp);
 
 	KASSERT(kc_initialised);
 	KASSERT(kc->kc_refcnt > 0);
 	KASSERT(kc->kc_next == NULL);
-	(void)kc;
 
-	if (len != kc_bitsize) { /* XXX */
+	if (len > kc_bitsize) { /* XXX */
 		return EINVAL;
 	}
-	return copyout(kcp, ucp, kc_bitsize);
+	return copyout(kcp, ucp, len);
 }
 
 /*
@@ -412,6 +410,15 @@ kcpuset_merge(kcpuset_t *kcp1, kcpuset_t
 	}
 }
 
+void
+kcpuset_intersect(kcpuset_t *kcp1, kcpuset_t *kcp2)
+{
+
+	for (size_t j = 0; j < kc_nfields; j++) {
+		kcp1->bits[j] &= kcp2->bits[j];
+	}
+}
+
 int
 kcpuset_countset(kcpuset_t *kcp)
 {

Index: src/sys/kern/sys_sched.c
diff -u src/sys/kern/sys_sched.c:1.41 src/sys/kern/sys_sched.c:1.42
--- src/sys/kern/sys_sched.c:1.41	Fri Apr 13 15:27:13 2012
+++ src/sys/kern/sys_sched.c	Fri Apr 20 22:23:25 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: sys_sched.c,v 1.41 2012/04/13 15:27:13 yamt Exp $	*/
+/*	$NetBSD: sys_sched.c,v 1.42 2012/04/20 22:23:25 rmind Exp $	*/
 
 /*
  * Copyright (c) 2008, 2011 Mindaugas Rasiukevicius <rmind at NetBSD org>
@@ -42,7 +42,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.41 2012/04/13 15:27:13 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.42 2012/04/20 22:23:25 rmind Exp $");
 
 #include <sys/param.h>
 
@@ -314,7 +314,7 @@ genkcpuset(kcpuset_t **dset, const cpuse
 	kcpuset_t *kset;
 	int error;
 
-	kcpuset_create(&kset, false);
+	kcpuset_create(&kset, true);
 	error = kcpuset_copyin(sset, kset, size);
 	if (error) {
 		kcpuset_unuse(kset, NULL);

Index: src/sys/sys/kcpuset.h
diff -u src/sys/sys/kcpuset.h:1.4 src/sys/sys/kcpuset.h:1.5
--- src/sys/sys/kcpuset.h:1.4	Sun Jan 29 19:08:26 2012
+++ src/sys/sys/kcpuset.h	Fri Apr 20 22:23:25 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: kcpuset.h,v 1.4 2012/01/29 19:08:26 rmind Exp $	*/
+/*	$NetBSD: kcpuset.h,v 1.5 2012/04/20 22:23:25 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2011 The NetBSD Foundation, Inc.
@@ -61,6 +61,7 @@ bool		kcpuset_isotherset(kcpuset_t *, cp
 bool		kcpuset_iszero(kcpuset_t *);
 bool		kcpuset_match(const kcpuset_t *, const kcpuset_t *);
 void		kcpuset_merge(kcpuset_t *, kcpuset_t *);
+void		kcpuset_intersect(kcpuset_t *, kcpuset_t *);
 int		kcpuset_countset(kcpuset_t *);
 
 void		kcpuset_atomic_set(kcpuset_t *, cpuid_t);

Reply via email to