Module Name:    src
Committed By:   riz
Date:           Wed Feb 22 18:56:49 UTC 2012

Modified Files:
        src/sys/arch/x86/include [netbsd-6]: cpu.h pmap.h
        src/sys/arch/x86/x86 [netbsd-6]: cpu.c pmap.c
        src/sys/arch/xen/include [netbsd-6]: hypervisor.h intr.h
        src/sys/arch/xen/x86 [netbsd-6]: cpu.c x86_xpmap.c xen_ipi.c xen_pmap.c
        src/sys/uvm [netbsd-6]: uvm_km.c uvm_kmguard.c uvm_map.c

Log Message:
Pull up following revision(s) (requested by bouyer in ticket #29):
        sys/arch/xen/x86/x86_xpmap.c: revision 1.39
        sys/arch/xen/include/hypervisor.h: revision 1.37
        sys/arch/xen/include/intr.h: revision 1.34
        sys/arch/xen/x86/xen_ipi.c: revision 1.10
        sys/arch/x86/x86/cpu.c: revision 1.97
        sys/arch/x86/include/cpu.h: revision 1.48
        sys/uvm/uvm_map.c: revision 1.315
        sys/arch/x86/x86/pmap.c: revision 1.165
        sys/arch/xen/x86/cpu.c: revision 1.81
        sys/arch/x86/x86/pmap.c: revision 1.167
        sys/arch/xen/x86/cpu.c: revision 1.82
        sys/arch/x86/x86/pmap.c: revision 1.168
        sys/arch/xen/x86/xen_pmap.c: revision 1.17
        sys/uvm/uvm_km.c: revision 1.122
        sys/uvm/uvm_kmguard.c: revision 1.10
        sys/arch/x86/include/pmap.h: revision 1.50
Apply patch proposed in PR port-xen/45975 (this does not solve the exact
problem reported here but is part of the solution):
xen_kpm_sync() is not working as expected,
leading to races between CPUs.
1 the check (xpq_cpu != &x86_curcpu) is always false because we
  have different x86_curcpu symbols with different addresses in the kernel.
  Fortunably, all addresses dissaemble to the same code.
  Because of this we always use the code intended for bootstrap, which doesn't
  use cross-calls or lock.
2 once 1 above is fixed, xen_kpm_sync() will use xcalls to sync other CPUs,
  which cause it to sleep and pmap.c doesn't like that. It triggers this
  KASSERT() in pmap_unmap_ptes():
  KASSERT(pmap->pm_ncsw == curlwp->l_ncsw);
3 pmap->pm_cpus is not safe for the purpose of xen_kpm_sync(), which
  needs to know on which CPU a pmap is loaded *now*:
  pmap->pm_cpus is cleared before cpu_load_pmap() is called to switch
  to a new pmap, leaving a window where a pmap is still in a CPU's
  ci_kpm_pdir but not in pm_cpus. As a virtual CPU may be preempted
  by the hypervisor at any time, it can be large enough to let another
  CPU free the PTP and reuse it as a normal page.
To fix 2), avoid cross-calls and IPIs completely, and instead
use a mutex to update all CPU's ci_kpm_pdir from the local CPU.
It's safe because we just need to update the table page, a tlbflush IPI will
happen later. As a side effect, we don't need a different code for bootstrap,
fixing 1). The mutex added to struct cpu needs a small headers reorganisation.
to fix 3), introduce a pm_xen_ptp_cpus which is updated from
cpu_pmap_load(), whith the ci_kpm_mtx mutex held. Checking it with
ci_kpm_mtx held will avoid overwriting the wrong pmap's ci_kpm_pdir.
While there I removed the unused pmap_is_active() function;
and added some more details to DIAGNOSTIC panics.
When using uvm_km_pgremove_intrsafe() make sure mappings are removed
before returning the pages to the free pool. Otherwise, under Xen,
a page which still has a writable mapping could be allocated for
a PDP by another CPU and the hypervisor would refuse it (this is
PR port-xen/45975).
For this, move the pmap_kremove() calls inside uvm_km_pgremove_intrsafe(),
and do pmap_kremove()/uvm_pagefree() in batch of (at most) 16 entries
(as suggested by Chuck Silvers on tech-kern@, see also
http://mail-index.netbsd.org/tech-kern/2012/02/17/msg012727.html and
followups).
Avoid early use of xen_kpm_sync(); locks are not available at this time.
Don't call cpu_init() twice.
Makes LOCKDEBUG kernels boot again
Revert pmap_pte_flush() -> xpq_flush_queue() in previous.


To generate a diff of this commit:
cvs rdiff -u -r1.47 -r1.47.2.1 src/sys/arch/x86/include/cpu.h
cvs rdiff -u -r1.49 -r1.49.2.1 src/sys/arch/x86/include/pmap.h
cvs rdiff -u -r1.96 -r1.96.8.1 src/sys/arch/x86/x86/cpu.c
cvs rdiff -u -r1.164 -r1.164.2.1 src/sys/arch/x86/x86/pmap.c
cvs rdiff -u -r1.36.2.1 -r1.36.2.2 src/sys/arch/xen/include/hypervisor.h
cvs rdiff -u -r1.33 -r1.33.8.1 src/sys/arch/xen/include/intr.h
cvs rdiff -u -r1.80 -r1.80.2.1 src/sys/arch/xen/x86/cpu.c
cvs rdiff -u -r1.38 -r1.38.2.1 src/sys/arch/xen/x86/x86_xpmap.c
cvs rdiff -u -r1.9 -r1.9.2.1 src/sys/arch/xen/x86/xen_ipi.c
cvs rdiff -u -r1.16 -r1.16.2.1 src/sys/arch/xen/x86/xen_pmap.c
cvs rdiff -u -r1.120 -r1.120.2.1 src/sys/uvm/uvm_km.c
cvs rdiff -u -r1.9 -r1.9.2.1 src/sys/uvm/uvm_kmguard.c
cvs rdiff -u -r1.313 -r1.313.2.1 src/sys/uvm/uvm_map.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/x86/include/cpu.h
diff -u src/sys/arch/x86/include/cpu.h:1.47 src/sys/arch/x86/include/cpu.h:1.47.2.1
--- src/sys/arch/x86/include/cpu.h:1.47	Sun Feb 12 14:38:18 2012
+++ src/sys/arch/x86/include/cpu.h	Wed Feb 22 18:56:47 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.47 2012/02/12 14:38:18 jym Exp $	*/
+/*	$NetBSD: cpu.h,v 1.47.2.1 2012/02/22 18:56:47 riz Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -70,6 +70,7 @@
 #ifdef XEN
 #include <xen/xen-public/xen.h>
 #include <xen/xen-public/event_channel.h>
+#include <sys/mutex.h>
 #endif /* XEN */
 
 struct intrsource;
@@ -185,6 +186,7 @@ struct cpu_info {
 	/* Currently active user PGD (can't use rcr3() with Xen) */
 	pd_entry_t *	ci_kpm_pdir;	/* per-cpu PMD (va) */
 	paddr_t		ci_kpm_pdirpa;  /* per-cpu PMD (pa) */
+	kmutex_t	ci_kpm_mtx;
 #if defined(__x86_64__)
 	/* per-cpu version of normal_pdes */
 	pd_entry_t *	ci_normal_pdes[3]; /* Ok to hardcode. only for x86_64 && XEN */
@@ -317,7 +319,7 @@ lwp_t   *x86_curlwp(void);
 void cpu_boot_secondary_processors(void);
 void cpu_init_idle_lwps(void);
 void cpu_init_msrs(struct cpu_info *, bool);
-void cpu_load_pmap(struct pmap *);
+void cpu_load_pmap(struct pmap *, struct pmap *);
 void cpu_broadcast_halt(void);
 void cpu_kick(struct cpu_info *);
 

Index: src/sys/arch/x86/include/pmap.h
diff -u src/sys/arch/x86/include/pmap.h:1.49 src/sys/arch/x86/include/pmap.h:1.49.2.1
--- src/sys/arch/x86/include/pmap.h:1.49	Sun Dec  4 16:24:13 2011
+++ src/sys/arch/x86/include/pmap.h	Wed Feb 22 18:56:47 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.h,v 1.49 2011/12/04 16:24:13 chs Exp $	*/
+/*	$NetBSD: pmap.h,v 1.49.2.1 2012/02/22 18:56:47 riz Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -165,6 +165,8 @@ struct pmap {
 	uint32_t pm_cpus;		/* mask of CPUs using pmap */
 	uint32_t pm_kernel_cpus;	/* mask of CPUs using kernel part
 					 of pmap */
+	uint32_t pm_xen_ptp_cpus;	/* mask of CPUs which have this pmap's
+					 ptp mapped */
 	uint64_t pm_ncsw;		/* for assertions */
 	struct vm_page *pm_gc_ptp;	/* pages from pmap g/c */
 };

Index: src/sys/arch/x86/x86/cpu.c
diff -u src/sys/arch/x86/x86/cpu.c:1.96 src/sys/arch/x86/x86/cpu.c:1.96.8.1
--- src/sys/arch/x86/x86/cpu.c:1.96	Tue Oct 18 05:16:02 2011
+++ src/sys/arch/x86/x86/cpu.c	Wed Feb 22 18:56:46 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.96 2011/10/18 05:16:02 jruoho Exp $	*/
+/*	$NetBSD: cpu.c,v 1.96.8.1 2012/02/22 18:56:46 riz Exp $	*/
 
 /*-
  * Copyright (c) 2000, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.96 2011/10/18 05:16:02 jruoho Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.96.8.1 2012/02/22 18:56:46 riz Exp $");
 
 #include "opt_ddb.h"
 #include "opt_mpbios.h"		/* for MPDEBUG */
@@ -1228,7 +1228,7 @@ x86_cpu_idle_halt(void)
  * Loads pmap for the current CPU.
  */
 void
-cpu_load_pmap(struct pmap *pmap)
+cpu_load_pmap(struct pmap *pmap, struct pmap *oldpmap)
 {
 #ifdef PAE
 	int i, s;

Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.164 src/sys/arch/x86/x86/pmap.c:1.164.2.1
--- src/sys/arch/x86/x86/pmap.c:1.164	Sat Feb 11 18:59:41 2012
+++ src/sys/arch/x86/x86/pmap.c	Wed Feb 22 18:56:47 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.164 2012/02/11 18:59:41 chs Exp $	*/
+/*	$NetBSD: pmap.c,v 1.164.2.1 2012/02/22 18:56:47 riz Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc.
@@ -171,7 +171,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.164 2012/02/11 18:59:41 chs Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.164.2.1 2012/02/22 18:56:47 riz Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -561,7 +561,6 @@ static void		 pmap_freepage(struct pmap 
 static void		 pmap_free_ptp(struct pmap *, struct vm_page *,
 				       vaddr_t, pt_entry_t *,
 				       pd_entry_t * const *);
-static bool		 pmap_is_active(struct pmap *, struct cpu_info *, bool);
 static bool		 pmap_remove_pte(struct pmap *, struct vm_page *,
 					 pt_entry_t *, vaddr_t,
 					 struct pv_entry **);
@@ -680,19 +679,6 @@ pmap_is_curpmap(struct pmap *pmap)
 }
 
 /*
- * pmap_is_active: is this pmap loaded into the specified processor's %cr3?
- */
-
-inline static bool
-pmap_is_active(struct pmap *pmap, struct cpu_info *ci, bool kernel)
-{
-
-	return (pmap == pmap_kernel() ||
-	    (pmap->pm_cpus & ci->ci_cpumask) != 0 ||
-	    (kernel && (pmap->pm_kernel_cpus & ci->ci_cpumask) != 0));
-}
-
-/*
  *	Add a reference to the specified pmap.
  */
 
@@ -781,7 +767,7 @@ pmap_map_ptes(struct pmap *pmap, struct 
 		ci->ci_tlbstate = TLBSTATE_VALID;
 		atomic_or_32(&pmap->pm_cpus, cpumask);
 		atomic_or_32(&pmap->pm_kernel_cpus, cpumask);
-		cpu_load_pmap(pmap);
+		cpu_load_pmap(pmap, curpmap);
 	}
 	pmap->pm_ncsw = l->l_ncsw;
 	*pmap2 = curpmap;
@@ -2239,6 +2225,7 @@ pmap_create(void)
 	pmap->pm_flags = 0;
 	pmap->pm_cpus = 0;
 	pmap->pm_kernel_cpus = 0;
+	pmap->pm_xen_ptp_cpus = 0;
 	pmap->pm_gc_ptp = NULL;
 
 	/* init the LDT */
@@ -2329,9 +2316,26 @@ pmap_destroy(struct pmap *pmap)
 	}
 
 #ifdef DIAGNOSTIC
-	for (CPU_INFO_FOREACH(cii, ci))
+	for (CPU_INFO_FOREACH(cii, ci)) {
 		if (ci->ci_pmap == pmap)
 			panic("destroying pmap being used");
+#if defined(XEN) && defined(__x86_64__)
+		for (i = 0; i < PDIR_SLOT_PTE; i++) {
+			if (pmap->pm_pdir[i] != 0 &&
+			    ci->ci_kpm_pdir[i] == pmap->pm_pdir[i]) {
+				printf("pmap_destroy(%p) pmap_kernel %p "
+				    "curcpu %d cpu %d ci_pmap %p "
+				    "ci->ci_kpm_pdir[%d]=%" PRIx64
+				    " pmap->pm_pdir[%d]=%" PRIx64 "\n",
+				    pmap, pmap_kernel(), curcpu()->ci_index,
+				    ci->ci_index, ci->ci_pmap,
+				    i, ci->ci_kpm_pdir[i],
+				    i, pmap->pm_pdir[i]);
+				panic("pmap_destroy: used pmap");
+			}
+		}
+#endif
+	}
 #endif /* DIAGNOSTIC */
 
 	/*
@@ -2760,7 +2764,7 @@ pmap_load(void)
 	lldt(pmap->pm_ldt_sel);
 
 	u_int gen = uvm_emap_gen_return();
-	cpu_load_pmap(pmap);
+	cpu_load_pmap(pmap, oldpmap);
 	uvm_emap_update(gen);
 
 	ci->ci_want_pmapload = 0;
@@ -4179,14 +4183,30 @@ pmap_alloc_level(pd_entry_t * const *pde
 			pte = pmap_pa2pte(pa) | PG_k | PG_V | PG_RW;
 #ifdef XEN
 			xpq_queue_pte_update(xpmap_ptetomach(&pdep[i]), pte);
-			if (level == PTP_LEVELS) {
 #if defined(PAE) || defined(__x86_64__)
-				if (i >= PDIR_SLOT_KERN) {
+			if (level == PTP_LEVELS && i >= PDIR_SLOT_KERN) {
+				if (__predict_true(
+				    cpu_info_primary.ci_flags & CPUF_PRESENT)) {
 					/* update per-cpu PMDs on all cpus */
 					xen_kpm_sync(pmap_kernel(), i);
+				} else {
+					/*
+					 * too early; update primary CPU
+					 * PMD only (without locks)
+					 */
+#ifdef PAE
+					pd_entry_t *cpu_pdep =
+					    &cpu_info_primary.ci_kpm_pdir[l2tol2(i)];
+#endif
+#ifdef __x86_64__
+					pd_entry_t *cpu_pdep =
+						&cpu_info_primary.ci_kpm_pdir[i];
+#endif
+					xpq_queue_pte_update(
+					    xpmap_ptetomach(cpu_pdep), pte);
 				}
-#endif /* PAE || __x86_64__ */
 			}
+#endif /* PAE || __x86_64__ */
 #else /* XEN */
 			pdep[i] = pte;
 #endif /* XEN */

Index: src/sys/arch/xen/include/hypervisor.h
diff -u src/sys/arch/xen/include/hypervisor.h:1.36.2.1 src/sys/arch/xen/include/hypervisor.h:1.36.2.2
--- src/sys/arch/xen/include/hypervisor.h:1.36.2.1	Wed Feb 22 18:48:45 2012
+++ src/sys/arch/xen/include/hypervisor.h	Wed Feb 22 18:56:46 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: hypervisor.h,v 1.36.2.1 2012/02/22 18:48:45 riz Exp $	*/
+/*	$NetBSD: hypervisor.h,v 1.36.2.2 2012/02/22 18:56:46 riz Exp $	*/
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -91,7 +91,6 @@ struct xen_npx_attach_args {
 #include <xen/xen-public/io/netif.h>
 #include <xen/xen-public/io/blkif.h>
 
-#include <machine/cpu.h>
 #include <machine/hypercalls.h>
 
 #undef u8

Index: src/sys/arch/xen/include/intr.h
diff -u src/sys/arch/xen/include/intr.h:1.33 src/sys/arch/xen/include/intr.h:1.33.8.1
--- src/sys/arch/xen/include/intr.h:1.33	Thu Aug 11 17:58:59 2011
+++ src/sys/arch/xen/include/intr.h	Wed Feb 22 18:56:46 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: intr.h,v 1.33 2011/08/11 17:58:59 cherry Exp $	*/
+/*	$NetBSD: intr.h,v 1.33.8.1 2012/02/22 18:56:46 riz Exp $	*/
 /*	NetBSD intr.h,v 1.15 2004/10/31 10:39:34 yamt Exp	*/
 
 /*-
@@ -39,12 +39,13 @@
 #include <xen/xen.h>
 #include <xen/hypervisor.h>
 #include <xen/evtchn.h>
-#include <machine/cpu.h>
 #include <machine/pic.h>
 #include <sys/evcnt.h>
 
 #include "opt_xen.h"
 
+
+struct cpu_info;
 /*
  * Struct describing an event channel. 
  */
@@ -152,8 +153,6 @@ splraiseipl(ipl_cookie_t icookie)
  * Stub declarations.
  */
 
-struct cpu_info;
-
 struct pcibus_attach_args;
 
 #ifdef MULTIPROCESSOR

Index: src/sys/arch/xen/x86/cpu.c
diff -u src/sys/arch/xen/x86/cpu.c:1.80 src/sys/arch/xen/x86/cpu.c:1.80.2.1
--- src/sys/arch/xen/x86/cpu.c:1.80	Mon Feb 13 23:54:58 2012
+++ src/sys/arch/xen/x86/cpu.c	Wed Feb 22 18:56:45 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.80 2012/02/13 23:54:58 jym Exp $	*/
+/*	$NetBSD: cpu.c,v 1.80.2.1 2012/02/22 18:56:45 riz Exp $	*/
 /* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp  */
 
 /*-
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.80 2012/02/13 23:54:58 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.80.2.1 2012/02/22 18:56:45 riz Exp $");
 
 #include "opt_ddb.h"
 #include "opt_multiprocessor.h"
@@ -487,7 +487,6 @@ cpu_attach_common(device_t parent, devic
 	case CPU_ROLE_BP:
 		atomic_or_32(&ci->ci_flags, CPUF_BSP);
 		cpu_identify(ci);
-		cpu_init(ci);
 #if 0
 		x86_errata();
 #endif
@@ -595,6 +594,9 @@ cpu_init(struct cpu_info *ci)
 	/* No user PGD mapped for this CPU yet */
 	ci->ci_xen_current_user_pgd = 0;
 #endif
+#if defined(__x86_64__) || defined(PAE)
+	mutex_init(&ci->ci_kpm_mtx, MUTEX_DEFAULT, IPL_VM);
+#endif
 
 	atomic_or_32(&cpus_running, ci->ci_cpumask);
 	atomic_or_32(&ci->ci_flags, CPUF_RUNNING);
@@ -1172,62 +1174,76 @@ x86_cpu_idle_xen(void)
  * Loads pmap for the current CPU.
  */
 void
-cpu_load_pmap(struct pmap *pmap)
+cpu_load_pmap(struct pmap *pmap, struct pmap *oldpmap)
 {
+#if defined(__x86_64__) || defined(PAE)
+	struct cpu_info *ci = curcpu();
+	uint32_t cpumask = ci->ci_cpumask;
+
+	mutex_enter(&ci->ci_kpm_mtx);
+	/* make new pmap visible to pmap_kpm_sync_xcall() */
+	atomic_or_32(&pmap->pm_xen_ptp_cpus, cpumask);
+#endif
 #ifdef i386
 #ifdef PAE
-	int i, s;
-	struct cpu_info *ci;
-
-	s = splvm(); /* just to be safe */
-	ci = curcpu();
-	paddr_t l3_pd = xpmap_ptom_masked(ci->ci_pae_l3_pdirpa);
-	/* don't update the kernel L3 slot */
-	for (i = 0 ; i < PDP_SIZE - 1; i++) {
-		xpq_queue_pte_update(l3_pd + i * sizeof(pd_entry_t),
-		    xpmap_ptom(pmap->pm_pdirpa[i]) | PG_V);
+	{
+		int i;
+		paddr_t l3_pd = xpmap_ptom_masked(ci->ci_pae_l3_pdirpa);
+		/* don't update the kernel L3 slot */
+		for (i = 0 ; i < PDP_SIZE - 1; i++) {
+			xpq_queue_pte_update(l3_pd + i * sizeof(pd_entry_t),
+			    xpmap_ptom(pmap->pm_pdirpa[i]) | PG_V);
+		}
+		tlbflush();
 	}
-	splx(s);
-	tlbflush();
 #else /* PAE */
 	lcr3(pmap_pdirpa(pmap, 0));
 #endif /* PAE */
 #endif /* i386 */
 
 #ifdef __x86_64__
-	int i, s;
-	pd_entry_t *new_pgd;
-	struct cpu_info *ci;
-	paddr_t l4_pd_ma;
+	{
+		int i;
+		pd_entry_t *new_pgd;
+		paddr_t l4_pd_ma;
 
-	ci = curcpu();
-	l4_pd_ma = xpmap_ptom_masked(ci->ci_kpm_pdirpa);
+		l4_pd_ma = xpmap_ptom_masked(ci->ci_kpm_pdirpa);
 
-	/*
-	 * Map user space address in kernel space and load
-	 * user cr3
-	 */
-	s = splvm();
-	new_pgd = pmap->pm_pdir;
+		/*
+		 * Map user space address in kernel space and load
+		 * user cr3
+		 */
+		new_pgd = pmap->pm_pdir;
+		KASSERT(pmap == ci->ci_pmap);
 
-	/* Copy user pmap L4 PDEs (in user addr. range) to per-cpu L4 */
-	for (i = 0; i < PDIR_SLOT_PTE; i++) {
-		xpq_queue_pte_update(l4_pd_ma + i * sizeof(pd_entry_t), new_pgd[i]);
-	}
+		/* Copy user pmap L4 PDEs (in user addr. range) to per-cpu L4 */
+		for (i = 0; i < PDIR_SLOT_PTE; i++) {
+			KASSERT(pmap != pmap_kernel() || new_pgd[i] == 0);
+			if (ci->ci_kpm_pdir[i] != new_pgd[i]) {
+				xpq_queue_pte_update(
+				   l4_pd_ma + i * sizeof(pd_entry_t),
+				    new_pgd[i]);
+			}
+		}
 
-	if (__predict_true(pmap != pmap_kernel())) {
-		xen_set_user_pgd(pmap_pdirpa(pmap, 0));
-		ci->ci_xen_current_user_pgd = pmap_pdirpa(pmap, 0);
-	}
-	else {
-		xpq_queue_pt_switch(l4_pd_ma);
-		ci->ci_xen_current_user_pgd = 0;
-	}
+		if (__predict_true(pmap != pmap_kernel())) {
+			xen_set_user_pgd(pmap_pdirpa(pmap, 0));
+			ci->ci_xen_current_user_pgd = pmap_pdirpa(pmap, 0);
+		}
+		else {
+			xpq_queue_pt_switch(l4_pd_ma);
+			ci->ci_xen_current_user_pgd = 0;
+		}
 
-	tlbflush();
-	splx(s);
+		tlbflush();
+	}
 
 #endif /* __x86_64__ */
+#if defined(__x86_64__) || defined(PAE)
+	/* old pmap no longer visible to pmap_kpm_sync_xcall() */
+	atomic_and_32(&oldpmap->pm_xen_ptp_cpus, ~cpumask);
+	mutex_exit(&ci->ci_kpm_mtx);
+#endif
 }
 
  /*

Index: src/sys/arch/xen/x86/x86_xpmap.c
diff -u src/sys/arch/xen/x86/x86_xpmap.c:1.38 src/sys/arch/xen/x86/x86_xpmap.c:1.38.2.1
--- src/sys/arch/xen/x86/x86_xpmap.c:1.38	Thu Jan 12 19:49:37 2012
+++ src/sys/arch/xen/x86/x86_xpmap.c	Wed Feb 22 18:56:45 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: x86_xpmap.c,v 1.38 2012/01/12 19:49:37 cherry Exp $	*/
+/*	$NetBSD: x86_xpmap.c,v 1.38.2.1 2012/02/22 18:56:45 riz Exp $	*/
 
 /*
  * Copyright (c) 2006 Mathieu Ropert <[email protected]>
@@ -69,7 +69,7 @@
 
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.38 2012/01/12 19:49:37 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.38.2.1 2012/02/22 18:56:45 riz Exp $");
 
 #include "opt_xen.h"
 #include "opt_ddb.h"
@@ -185,8 +185,12 @@ retry:
 	ret = HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok);
 
 	if (xpq_idx != 0 && ret < 0) {
-		printf("xpq_flush_queue: %d entries (%d successful)\n",
-		    xpq_idx, ok);
+		struct cpu_info *ci;
+		CPU_INFO_ITERATOR cii;
+
+		printf("xpq_flush_queue: %d entries (%d successful) on "
+		    "cpu%d (%ld)\n",
+		    xpq_idx, ok, xpq_cpu()->ci_index, xpq_cpu()->ci_cpuid);
 
 		if (ok != 0) {
 			xpq_queue += ok;
@@ -195,9 +199,23 @@ retry:
 			goto retry;
 		}
 
-		for (i = 0; i < xpq_idx; i++)
-			printf("0x%016" PRIx64 ": 0x%016" PRIx64 "\n",
-			   xpq_queue[i].ptr, xpq_queue[i].val);
+		for (CPU_INFO_FOREACH(cii, ci)) {
+			xpq_queue = xpq_queue_array[ci->ci_cpuid];
+			xpq_idx = xpq_idx_array[ci->ci_cpuid];
+			printf("cpu%d (%ld):\n", ci->ci_index, ci->ci_cpuid);
+			for (i = 0; i < xpq_idx; i++) {
+				printf("  0x%016" PRIx64 ": 0x%016" PRIx64 "\n",
+				   xpq_queue[i].ptr, xpq_queue[i].val);
+			}
+#ifdef __x86_64__
+			for (i = 0; i < PDIR_SLOT_PTE; i++) {
+				if (ci->ci_kpm_pdir[i] == 0)
+					continue;
+				printf(" kpm_pdir[%d]: 0x%" PRIx64 "\n",
+				    i, ci->ci_kpm_pdir[i]);
+			}
+#endif
+		}
 		panic("HYPERVISOR_mmu_update failed, ret: %d\n", ret);
 	}
 	xpq_idx_array[xpq_cpu()->ci_cpuid] = 0;

Index: src/sys/arch/xen/x86/xen_ipi.c
diff -u src/sys/arch/xen/x86/xen_ipi.c:1.9 src/sys/arch/xen/x86/xen_ipi.c:1.9.2.1
--- src/sys/arch/xen/x86/xen_ipi.c:1.9	Fri Dec 30 12:16:19 2011
+++ src/sys/arch/xen/x86/xen_ipi.c	Wed Feb 22 18:56:45 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: xen_ipi.c,v 1.9 2011/12/30 12:16:19 cherry Exp $ */
+/* $NetBSD: xen_ipi.c,v 1.9.2.1 2012/02/22 18:56:45 riz Exp $ */
 
 /*-
  * Copyright (c) 2011 The NetBSD Foundation, Inc.
@@ -33,22 +33,21 @@
 
 /* 
  * Based on: x86/ipi.c
- * __KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.9 2011/12/30 12:16:19 cherry Exp $"); 
+ * __KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.9.2.1 2012/02/22 18:56:45 riz Exp $"); 
  */
 
-__KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.9 2011/12/30 12:16:19 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.9.2.1 2012/02/22 18:56:45 riz Exp $");
 
 #include <sys/types.h>
 
 #include <sys/atomic.h>
-#include <sys/mutex.h>
 #include <sys/cpu.h>
+#include <sys/mutex.h>
 #include <sys/device.h>
 #include <sys/xcall.h>
 #include <sys/errno.h>
 #include <sys/systm.h>
 
-#include <machine/cpu.h>
 #ifdef __x86_64__
 #include <machine/fpu.h>
 #else

Index: src/sys/arch/xen/x86/xen_pmap.c
diff -u src/sys/arch/xen/x86/xen_pmap.c:1.16 src/sys/arch/xen/x86/xen_pmap.c:1.16.2.1
--- src/sys/arch/xen/x86/xen_pmap.c:1.16	Sat Jan 28 07:19:17 2012
+++ src/sys/arch/xen/x86/xen_pmap.c	Wed Feb 22 18:56:45 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: xen_pmap.c,v 1.16 2012/01/28 07:19:17 cherry Exp $	*/
+/*	$NetBSD: xen_pmap.c,v 1.16.2.1 2012/02/22 18:56:45 riz Exp $	*/
 
 /*
  * Copyright (c) 2007 Manuel Bouyer.
@@ -102,7 +102,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.16 2012/01/28 07:19:17 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.16.2.1 2012/02/22 18:56:45 riz Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -350,34 +350,7 @@ pmap_kpm_setpte(struct cpu_info *ci, str
 		xpmap_ptetomach(&ci->ci_kpm_pdir[index]),
 		pmap->pm_pdir[index]);
 #endif /* PAE */
-}
-
-static void
-pmap_kpm_sync_xcall(void *arg1, void *arg2)
-{
-	KASSERT(arg1 != NULL);
-	KASSERT(arg2 != NULL);
-
-	struct pmap *pmap = arg1;
-	int index = *(int *)arg2;
-	KASSERT(pmap == pmap_kernel() || index < PDIR_SLOT_PTE);
-	
-	struct cpu_info *ci = xpq_cpu();
-
-#ifdef PAE
-	KASSERTMSG(pmap == pmap_kernel(), "%s not allowed for PAE user pmaps", __func__);
-#endif /* PAE */
-
-	if (__predict_true(pmap != pmap_kernel()) &&
-	    pmap != ci->ci_pmap) {
-		/* User pmap changed. Nothing to do. */
-		return;
-	}
-
-	/* Update per-cpu kpm */
-	pmap_kpm_setpte(ci, pmap, index);
-	pmap_pte_flush();
-	return;
+	xpq_flush_queue();
 }
 
 /*
@@ -387,68 +360,30 @@ pmap_kpm_sync_xcall(void *arg1, void *ar
 void
 xen_kpm_sync(struct pmap *pmap, int index)
 {
-	uint64_t where;
+	CPU_INFO_ITERATOR cii;
+	struct cpu_info *ci;
 	
 	KASSERT(pmap != NULL);
 
 	pmap_pte_flush();
 
-	if (__predict_false(xpq_cpu != &x86_curcpu)) { /* Too early to xcall */
-		CPU_INFO_ITERATOR cii;
-		struct cpu_info *ci;
-		int s = splvm();
-		for (CPU_INFO_FOREACH(cii, ci)) {
-			if (ci == NULL) {
-				continue;
-			}
-			if (pmap == pmap_kernel() ||
-			    ci->ci_cpumask & pmap->pm_cpus) {
-				pmap_kpm_setpte(ci, pmap, index);
-			}
+	for (CPU_INFO_FOREACH(cii, ci)) {
+		if (ci == NULL) {
+			continue;
 		}
-		pmap_pte_flush();
-		splx(s);
-		return;
-	}
-
-	if (pmap == pmap_kernel()) {
-		where = xc_broadcast(XC_HIGHPRI,
-		    pmap_kpm_sync_xcall, pmap, &index);
-		xc_wait(where);
-	} else {
-		KASSERT(mutex_owned(pmap->pm_lock));
-		KASSERT(kpreempt_disabled());
-
-		CPU_INFO_ITERATOR cii;
-		struct cpu_info *ci;
-		for (CPU_INFO_FOREACH(cii, ci)) {
-			if (ci == NULL) {
-				continue;
-			}
-			while (ci->ci_cpumask & pmap->pm_cpus) {
-#ifdef MULTIPROCESSOR
-#define CPU_IS_CURCPU(ci) __predict_false((ci) == curcpu())
-#else /* MULTIPROCESSOR */
-#define CPU_IS_CURCPU(ci) __predict_true((ci) == curcpu())
-#endif /* MULTIPROCESSOR */
-#if 0 /* XXX: Race with remote pmap_load() */
-				if (ci->ci_want_pmapload &&
-				    !CPU_IS_CURCPU(ci)) {
-					/*
-					 * XXX: make this more cpu
-					 *  cycle friendly/co-operate
-					 *  with pmap_load()
-					 */
-					continue;
-				    }
-#endif /* 0 */
-				where = xc_unicast(XC_HIGHPRI, pmap_kpm_sync_xcall,
-				    pmap, &index, ci);
-				xc_wait(where);
-				break;
-			}
+		if (pmap != pmap_kernel() &&
+		    (ci->ci_cpumask & pmap->pm_xen_ptp_cpus) == 0)
+			continue;
+
+		/* take the lock and check again */
+		mutex_enter(&ci->ci_kpm_mtx);
+		if (pmap == pmap_kernel() ||
+		    (ci->ci_cpumask & pmap->pm_xen_ptp_cpus) != 0) {
+			pmap_kpm_setpte(ci, pmap, index);
 		}
+		mutex_exit(&ci->ci_kpm_mtx);
 	}
+	return;
 }
 
 #endif /* PAE || __x86_64__ */

Index: src/sys/uvm/uvm_km.c
diff -u src/sys/uvm/uvm_km.c:1.120 src/sys/uvm/uvm_km.c:1.120.2.1
--- src/sys/uvm/uvm_km.c:1.120	Fri Feb 10 17:35:47 2012
+++ src/sys/uvm/uvm_km.c	Wed Feb 22 18:56:48 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_km.c,v 1.120 2012/02/10 17:35:47 para Exp $	*/
+/*	$NetBSD: uvm_km.c,v 1.120.2.1 2012/02/22 18:56:48 riz Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -120,7 +120,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.120 2012/02/10 17:35:47 para Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.120.2.1 2012/02/22 18:56:48 riz Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -459,8 +459,12 @@ uvm_km_pgremove(vaddr_t startva, vaddr_t
 void
 uvm_km_pgremove_intrsafe(struct vm_map *map, vaddr_t start, vaddr_t end)
 {
+#define __PGRM_BATCH 16
 	struct vm_page *pg;
-	paddr_t pa;
+	paddr_t pa[__PGRM_BATCH];
+	int npgrm, i;
+	vaddr_t va, batch_vastart;
+
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
 
 	KASSERT(VM_MAP_IS_KERNEL(map));
@@ -468,16 +472,30 @@ uvm_km_pgremove_intrsafe(struct vm_map *
 	KASSERT(start < end);
 	KASSERT(end <= vm_map_max(map));
 
-	for (; start < end; start += PAGE_SIZE) {
-		if (!pmap_extract(pmap_kernel(), start, &pa)) {
-			continue;
+	for (va = start; va < end;) {
+		batch_vastart = va;
+		/* create a batch of at most __PGRM_BATCH pages to free */
+		for (i = 0;
+		     i < __PGRM_BATCH && va < end;
+		     va += PAGE_SIZE) {
+			if (!pmap_extract(pmap_kernel(), va, &pa[i])) {
+				continue;
+			}
+			i++;
+		}
+		npgrm = i;
+		/* now remove the mappings */
+		pmap_kremove(batch_vastart, PAGE_SIZE * npgrm);
+		/* and free the pages */
+		for (i = 0; i < npgrm; i++) {
+			pg = PHYS_TO_VM_PAGE(pa[i]);
+			KASSERT(pg);
+			KASSERT(pg->uobject == NULL && pg->uanon == NULL);
+			KASSERT((pg->flags & PG_BUSY) == 0);
+			uvm_pagefree(pg);
 		}
-		pg = PHYS_TO_VM_PAGE(pa);
-		KASSERT(pg);
-		KASSERT(pg->uobject == NULL && pg->uanon == NULL);
-		KASSERT((pg->flags & PG_BUSY) == 0);
-		uvm_pagefree(pg);
 	}
+#undef __PGRM_BATCH
 }
 
 #if defined(DEBUG)
@@ -670,7 +688,6 @@ uvm_km_free(struct vm_map *map, vaddr_t 
 		 * remove it after.  See comment below about KVA visibility.
 		 */
 		uvm_km_pgremove_intrsafe(map, addr, addr + size);
-		pmap_kremove(addr, size);
 	}
 
 	/*
@@ -747,7 +764,6 @@ again:
 			} else {
 				uvm_km_pgremove_intrsafe(kernel_map, va,
 				    va + size);
-				pmap_kremove(va, size);
 				vmem_free(kmem_va_arena, va, size);
 				return ENOMEM;
 			}
@@ -783,7 +799,6 @@ uvm_km_kmem_free(vmem_t *vm, vmem_addr_t
 	}
 #endif /* PMAP_UNMAP_POOLPAGE */
 	uvm_km_pgremove_intrsafe(kernel_map, addr, addr + size);
-	pmap_kremove(addr, size);
 	pmap_update(pmap_kernel());
 
 	vmem_free(vm, addr, size);

Index: src/sys/uvm/uvm_kmguard.c
diff -u src/sys/uvm/uvm_kmguard.c:1.9 src/sys/uvm/uvm_kmguard.c:1.9.2.1
--- src/sys/uvm/uvm_kmguard.c:1.9	Sun Feb  5 11:08:06 2012
+++ src/sys/uvm/uvm_kmguard.c	Wed Feb 22 18:56:49 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_kmguard.c,v 1.9 2012/02/05 11:08:06 rmind Exp $	*/
+/*	$NetBSD: uvm_kmguard.c,v 1.9.2.1 2012/02/22 18:56:49 riz Exp $	*/
 
 /*-
  * Copyright (c) 2009 The NetBSD Foundation, Inc.
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_kmguard.c,v 1.9 2012/02/05 11:08:06 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_kmguard.c,v 1.9.2.1 2012/02/22 18:56:49 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -180,7 +180,6 @@ uvm_kmguard_free(struct uvm_kmguard *kg,
 	 */
 
 	uvm_km_pgremove_intrsafe(kernel_map, va, va + PAGE_SIZE * 2);
-	pmap_kremove(va, PAGE_SIZE * 2);
 	pmap_update(pmap_kernel());
 
 	/*

Index: src/sys/uvm/uvm_map.c
diff -u src/sys/uvm/uvm_map.c:1.313 src/sys/uvm/uvm_map.c:1.313.2.1
--- src/sys/uvm/uvm_map.c:1.313	Sun Feb 12 20:28:14 2012
+++ src/sys/uvm/uvm_map.c	Wed Feb 22 18:56:48 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_map.c,v 1.313 2012/02/12 20:28:14 martin Exp $	*/
+/*	$NetBSD: uvm_map.c,v 1.313.2.1 2012/02/22 18:56:48 riz Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.313 2012/02/12 20:28:14 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.313.2.1 2012/02/22 18:56:48 riz Exp $");
 
 #include "opt_ddb.h"
 #include "opt_uvmhist.h"
@@ -2246,7 +2246,6 @@ uvm_unmap_remove(struct vm_map *map, vad
 			if ((entry->flags & UVM_MAP_KMAPENT) == 0) {
 				uvm_km_pgremove_intrsafe(map, entry->start,
 				    entry->end);
-				pmap_kremove(entry->start, len);
 			}
 		} else if (UVM_ET_ISOBJ(entry) &&
 			   UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {

Reply via email to