Module Name:    src
Committed By:   cherry
Date:           Sun Nov  6 15:18:19 UTC 2011

Modified Files:
        src/sys/arch/amd64/include: pmap.h
        src/sys/arch/i386/include: pmap.h
        src/sys/arch/x86/include: cpu.h
        src/sys/arch/x86/x86: pmap.c
        src/sys/arch/xen/x86: cpu.c x86_xpmap.c

Log Message:
[merging from cherry-xenmp] make pmap_kernel() shadow PMD per-cpu and MP aware.


To generate a diff of this commit:
cvs rdiff -u -r1.27 -r1.28 src/sys/arch/amd64/include/pmap.h
cvs rdiff -u -r1.111 -r1.112 src/sys/arch/i386/include/pmap.h
cvs rdiff -u -r1.40 -r1.41 src/sys/arch/x86/include/cpu.h
cvs rdiff -u -r1.137 -r1.138 src/sys/arch/x86/x86/pmap.c
cvs rdiff -u -r1.69 -r1.70 src/sys/arch/xen/x86/cpu.c
cvs rdiff -u -r1.35 -r1.36 src/sys/arch/xen/x86/x86_xpmap.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/include/pmap.h
diff -u src/sys/arch/amd64/include/pmap.h:1.27 src/sys/arch/amd64/include/pmap.h:1.28
--- src/sys/arch/amd64/include/pmap.h:1.27	Sun Nov  6 11:40:46 2011
+++ src/sys/arch/amd64/include/pmap.h	Sun Nov  6 15:18:18 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.h,v 1.27 2011/11/06 11:40:46 cherry Exp $	*/
+/*	$NetBSD: pmap.h,v 1.28 2011/11/06 15:18:18 cherry Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -192,7 +192,8 @@
 #define AL4_BASE ((pd_entry_t *)((char *)AL3_BASE + L4_SLOT_PTE * NBPD_L1))
 
 #define PDP_PDE		(L4_BASE + PDIR_SLOT_PTE)
-#define APDP_PDE	(L4_BASE + PDIR_SLOT_APTE)
+#define APDP_PDE	(&curcpu()->ci_kpm_pdir[PDIR_SLOT_APTE])
+#define APDP_PDE_SHADOW	(L4_BASE + PDIR_SLOT_APTE)
 
 #define PDP_BASE	L4_BASE
 #define APDP_BASE	AL4_BASE

Index: src/sys/arch/i386/include/pmap.h
diff -u src/sys/arch/i386/include/pmap.h:1.111 src/sys/arch/i386/include/pmap.h:1.112
--- src/sys/arch/i386/include/pmap.h:1.111	Sun Nov  6 11:40:46 2011
+++ src/sys/arch/i386/include/pmap.h	Sun Nov  6 15:18:18 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.h,v 1.111 2011/11/06 11:40:46 cherry Exp $	*/
+/*	$NetBSD: pmap.h,v 1.112 2011/11/06 15:18:18 cherry Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -277,7 +277,7 @@
  * mapping, because it points to the shadow PD. Use the kernel PD instead,
  * which is static
  */
-#define APDP_PDE	(&pmap_kl2pd[l2tol2(PDIR_SLOT_APTE)])
+#define APDP_PDE	(&curcpu()->ci_kpm_pdir[l2tol2(PDIR_SLOT_APTE)])
 #define APDP_PDE_SHADOW	(L2_BASE + PDIR_SLOT_APTE)
 #else /* PAE && XEN */
 #define APDP_PDE	(L2_BASE + PDIR_SLOT_APTE)
@@ -428,13 +428,6 @@ pmap_pte_flush(void)
 
 #endif
 
-#ifdef PAE
-/* Address of the static kernel's L2 page */
-pd_entry_t *pmap_kl2pd;
-paddr_t pmap_kl2paddr;
-#endif
-
-
 struct trapframe;
 
 int	pmap_exec_fixup(struct vm_map *, struct trapframe *, struct pcb *);

Index: src/sys/arch/x86/include/cpu.h
diff -u src/sys/arch/x86/include/cpu.h:1.40 src/sys/arch/x86/include/cpu.h:1.41
--- src/sys/arch/x86/include/cpu.h:1.40	Tue Nov  1 21:21:32 2011
+++ src/sys/arch/x86/include/cpu.h	Sun Nov  6 15:18:18 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.40 2011/11/01 21:21:32 joerg Exp $	*/
+/*	$NetBSD: cpu.h,v 1.41 2011/11/06 15:18:18 cherry Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -176,16 +176,19 @@ struct cpu_info {
 #endif
 
 #ifdef PAE
-	uint32_t	ci_pae_l3_pdirpa; /* PA of L3 PD */
+	paddr_t	ci_pae_l3_pdirpa; /* PA of L3 PD */
 	pd_entry_t *	ci_pae_l3_pdir; /* VA pointer to L3 PD */
 #endif
 
-#if defined(XEN) && defined(__x86_64__)
+#if defined(XEN) && (defined(PAE) || defined(__x86_64__))
 	/* Currently active user PGD (can't use rcr3() with Xen) */
-	pd_entry_t *	ci_kpm_pdir;	/* per-cpu L4 PD (va) */
-	paddr_t		ci_kpm_pdirpa; /* per-cpu L4 PD (pa) */
+	pd_entry_t *	ci_kpm_pdir;	/* per-cpu PMD (va) */
+	paddr_t		ci_kpm_pdirpa; /* per-cpu PMD (pa) */
+#if defined(__x86_64__)
 	paddr_t		ci_xen_current_user_pgd;
-#endif
+#endif /* __x86_64__ */
+#endif /* XEN et.al */
+
 
 	char *ci_doubleflt_stack;
 	char *ci_ddbipi_stack;
@@ -233,11 +236,6 @@ struct cpu_info {
 	int		ci_padout __aligned(64);
 };
 
-#ifdef __x86_64__
-#define ci_pdirpa(ci, index) \
-	((ci)->ci_kpm_pdirpa + (index) * sizeof(pd_entry_t))
-#endif /* __x86_64__ */
-
 /*
  * Macros to handle (some) trapframe registers for common x86 code.
  */

Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.137 src/sys/arch/x86/x86/pmap.c:1.138
--- src/sys/arch/x86/x86/pmap.c:1.137	Tue Oct 18 23:43:06 2011
+++ src/sys/arch/x86/x86/pmap.c	Sun Nov  6 15:18:18 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.137 2011/10/18 23:43:06 jym Exp $	*/
+/*	$NetBSD: pmap.c,v 1.138 2011/11/06 15:18:18 cherry Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc.
@@ -171,7 +171,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.137 2011/10/18 23:43:06 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.138 2011/11/06 15:18:18 cherry Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -211,13 +211,6 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.1
 #include <xen/hypervisor.h>
 #endif
 
-/* flag to be used for kernel mappings: PG_u on Xen/amd64, 0 otherwise */
-#if defined(XEN) && defined(__x86_64__)
-#define PG_k PG_u
-#else
-#define PG_k 0
-#endif
-
 /*
  * general info:
  *
@@ -1513,10 +1506,17 @@ pmap_prealloc_lowmem_ptps(void)
 		if (newp < (NKL2_KIMG_ENTRIES * NBPD_L2))
 			HYPERVISOR_update_va_mapping (newp + KERNBASE,
 			    xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG);
+		/* Update the pmap_kernel() L4 shadow */
 		xpq_queue_pte_update (
 		    xpmap_ptom_masked(pdes_pa)
 		    + (pl_i(0, level) * sizeof (pd_entry_t)),
 		    xpmap_ptom_masked(newp) | PG_RW | PG_u | PG_V);
+		/* sync to per-cpu PD */
+		xpq_queue_pte_update(
+			xpmap_ptom_masked(cpu_info_primary.ci_kpm_pdirpa +
+			    pl_i(0, PTP_LEVELS) *
+			    sizeof(pd_entry_t)),
+			pmap_kernel()->pm_pdir[pl_i(0, PTP_LEVELS)]);
 		pmap_pte_flush();
 		level--;
 		if (level <= 1)
@@ -1580,18 +1580,22 @@ pmap_init(void)
  * pmap_cpu_init_late: perform late per-CPU initialization.
  */
 
+#ifndef XEN
 void
 pmap_cpu_init_late(struct cpu_info *ci)
 {
+	/*
+	 * The BP has already its own PD page allocated during early
+	 * MD startup.
+	 */
+	if (ci == &cpu_info_primary)
+		return;
+
 #ifdef PAE
 	int ret;
 	struct pglist pg;
 	struct vm_page *vmap;
 
-	/* The BP has already its own L3 page allocated in locore.S. */
-	if (ci == &cpu_info_primary)
-		return;
-
 	/*
 	 * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts
 	 * resides below the 4GB boundary.
@@ -1617,6 +1621,7 @@ pmap_cpu_init_late(struct cpu_info *ci)
 	pmap_update(pmap_kernel());
 #endif
 }
+#endif
 
 /*
  * p v _ e n t r y   f u n c t i o n s
@@ -1833,8 +1838,23 @@ pmap_free_ptp(struct pmap *pmap, struct 
 		 * clear it before freeing
 		 */
 		if (pmap_pdirpa(pmap, 0) == curcpu()->ci_xen_current_user_pgd
-		    && level == PTP_LEVELS - 1)
+		    && level == PTP_LEVELS - 1) {
 			pmap_pte_set(&pmap_kernel()->pm_pdir[index], 0);
+			/*
+			 * Update the per-cpu PD on all cpus the current
+			 * pmap is active on 
+			 */ 
+			CPU_INFO_ITERATOR cii;
+			struct cpu_info *ci;
+			for (CPU_INFO_FOREACH(cii, ci)) {
+				if (ci == NULL) {
+					continue;
+				}
+				if (ci->ci_cpumask & pmap->pm_cpus) {
+					pmap_pte_set(&ci->ci_kpm_pdir[index], 0);
+				}
+			}
+		}
 #  endif /*__x86_64__ */
 		invaladdr = level == 1 ? (vaddr_t)ptes :
 		    (vaddr_t)pdes[level - 2];
@@ -1934,6 +1954,21 @@ pmap_get_ptp(struct pmap *pmap, vaddr_t 
 		        pmap_pte_set(&pmap_kernel()->pm_pdir[index],
 		                (pd_entry_t) (pmap_pa2pte(pa)
 		                        | PG_u | PG_RW | PG_V));
+			/*
+			 * Update the per-cpu PD on all cpus the current
+			 * pmap is active on 
+			 */ 
+			CPU_INFO_ITERATOR cii;
+			struct cpu_info *ci;
+			for (CPU_INFO_FOREACH(cii, ci)) {
+				if (ci == NULL) {
+					continue;
+				}
+				if (ci->ci_cpumask & pmap->pm_cpus) {
+					pmap_pte_set(&ci->ci_kpm_pdir[index],
+						     (pd_entry_t) (pmap_pa2pte(pa) | PG_u | PG_RW | PG_V));
+				}
+			}
 		}
 #endif /* XEN && __x86_64__ */
 		pmap_pte_flush();
@@ -2621,7 +2656,8 @@ pmap_load(void)
 	/* should be able to take ipis. */
 	KASSERT(ci->ci_ilevel < IPL_HIGH); 
 #ifdef XEN
-	/* XXX not yet KASSERT(x86_read_psl() != 0); */
+	/* Check to see if interrupts are enabled (ie; no events are masked) */
+	KASSERT(x86_read_psl() == 0);
 #else
 	KASSERT((x86_read_psl() & PSL_I) != 0);
 #endif
@@ -4093,23 +4129,42 @@ pmap_alloc_level(pd_entry_t * const *pde
 
 
 		for (i = index; i <= endindex; i++) {
+			pt_entry_t pte;
+
 			KASSERT(!pmap_valid_entry(pdep[i]));
 			pmap_get_physpage(va, level - 1, &pa);
+			pte = pmap_pa2pte(pa) | PG_k | PG_V | PG_RW;
 #ifdef XEN
-			xpq_queue_pte_update((level == PTP_LEVELS) ?
-			    xpmap_ptom(pmap_pdirpa(pmap_kernel(), i)) :
-			    xpmap_ptetomach(&pdep[i]),
-			    pmap_pa2pte(pa) | PG_k | PG_V | PG_RW);
+			switch (level) {
+			case PTP_LEVELS: 
+#if defined(PAE) || defined(__x86_64__)
+				if (i >= PDIR_SLOT_KERN) {
+					/* update per-cpu PMDs on all cpus */
+					CPU_INFO_ITERATOR cii;
+					struct cpu_info *ci;
+					for (CPU_INFO_FOREACH(cii, ci)) {
+						if (ci == NULL) {
+							continue;
+						}
 #ifdef PAE
-			if (level == PTP_LEVELS &&  i > L2_SLOT_KERN) {
-				/* update real kernel PD too */
+						xpq_queue_pte_update(
+							xpmap_ptetomach(&ci->ci_kpm_pdir[l2tol2(i)]), pte);
+#elif defined(__x86_64__)
+						xpq_queue_pte_update(
+							xpmap_ptetomach(&ci->ci_kpm_pdir[i]), pte);
+#endif /* PAE */
+					}
+				}
+#endif /* PAE || __x86_64__ */
+				/* FALLTHROUGH */
+
+			default: /* All other levels */
 				xpq_queue_pte_update(
-				    xpmap_ptetomach(&pmap_kl2pd[l2tol2(i)]),
-				    pmap_pa2pte(pa) | PG_k | PG_V | PG_RW);
+					xpmap_ptetomach(&pdep[i]), 
+					pte);
 			}
-#endif
 #else /* XEN */
-			pdep[i] = pmap_pa2pte(pa) | PG_k | PG_V | PG_RW;
+			pdep[i] = pte;
 #endif /* XEN */
 			KASSERT(level != PTP_LEVELS || nkptp[level - 1] +
 			    pl_i(VM_MIN_KERNEL_ADDRESS, level) == i);

Index: src/sys/arch/xen/x86/cpu.c
diff -u src/sys/arch/xen/x86/cpu.c:1.69 src/sys/arch/xen/x86/cpu.c:1.70
--- src/sys/arch/xen/x86/cpu.c:1.69	Sun Nov  6 11:40:47 2011
+++ src/sys/arch/xen/x86/cpu.c	Sun Nov  6 15:18:19 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.69 2011/11/06 11:40:47 cherry Exp $	*/
+/*	$NetBSD: cpu.c,v 1.70 2011/11/06 15:18:19 cherry Exp $	*/
 /* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp  */
 
 /*-
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.69 2011/11/06 11:40:47 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.70 2011/11/06 15:18:19 cherry Exp $");
 
 #include "opt_ddb.h"
 #include "opt_multiprocessor.h"
@@ -915,7 +915,7 @@ xen_init_amd64_vcpuctxt(struct cpu_info 
 	 * Use pmap_kernel() L4 PD directly, until we setup the
 	 * per-cpu L4 PD in pmap_cpu_init_late()
 	 */
-	initctx->ctrlreg[3] = xpmap_ptom(pcb->pcb_cr3);
+	initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(ci->ci_kpm_pdirpa)));
 	initctx->ctrlreg[4] = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT;
 
 
@@ -1015,11 +1015,11 @@ xen_init_i386_vcpuctxt(struct cpu_info *
 	initctx->ctrlreg[0] = pcb->pcb_cr0;
 	initctx->ctrlreg[1] = 0; /* "resuming" from kernel - no User cr3. */
 	initctx->ctrlreg[2] = pcb->pcb_cr2; /* XXX: */
-	/* 
-	 * Use pmap_kernel() L4 PD directly, until we setup the
-	 * per-cpu L4 PD in pmap_cpu_init_late()
-	 */
-	initctx->ctrlreg[3] = xpmap_ptom(pcb->pcb_cr3);
+#ifdef PAE
+	initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(ci->ci_pae_l3_pdirpa)));
+#else /* PAE */
+	initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(pcb->pcb_cr3)));
+#endif /* PAE */
 	initctx->ctrlreg[4] = /* CR4_PAE |  */CR4_OSFXSR | CR4_OSXMMEXCPT;
 
 
@@ -1058,7 +1058,7 @@ mp_cpu_start(struct cpu_info *ci, vaddr_
 
 	/* Start it up */
 
-	/* First bring it down - the Xen documentation conveniently omits this slight detail. */
+	/* First bring it down */
 	if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_down, ci->ci_cpuid, NULL))) {
 		aprint_error(": VCPUOP_down hypervisor command failed. errno = %d\n", hyperror);
 		return hyperror;
@@ -1196,34 +1196,147 @@ cpu_load_pmap(struct pmap *pmap)
 
 #ifdef __x86_64__
 	int i, s;
-	pd_entry_t *old_pgd, *new_pgd;
-	paddr_t addr;
+	pd_entry_t *new_pgd;
 	struct cpu_info *ci;
+	paddr_t l4_pd_ma;
 
-	/* kernel pmap always in cr3 and should never go in user cr3 */
-	if (pmap_pdirpa(pmap, 0) != pmap_pdirpa(pmap_kernel(), 0)) {
-		ci = curcpu();
-		/*
-		 * Map user space address in kernel space and load
-		 * user cr3
-		 */
-		s = splvm();
-		new_pgd = pmap->pm_pdir;
-		old_pgd = pmap_kernel()->pm_pdir;
-		addr = xpmap_ptom(pmap_pdirpa(pmap_kernel(), 0));
-		for (i = 0; i < PDIR_SLOT_PTE;
-		    i++, addr += sizeof(pd_entry_t)) {
-			if ((new_pgd[i] & PG_V) || (old_pgd[i] & PG_V))
-				xpq_queue_pte_update(addr, new_pgd[i]);
-		}
+	ci = curcpu();
+	l4_pd_ma = xpmap_ptom_masked(ci->ci_kpm_pdirpa);
+
+	/*
+	 * Map user space address in kernel space and load
+	 * user cr3
+	 */
+	s = splvm();
+	new_pgd = pmap->pm_pdir;
+
+	/* Copy user pmap L4 PDEs (in user addr. range) to per-cpu L4 */
+	for (i = 0; i < PDIR_SLOT_PTE; i++) {
+		xpq_queue_pte_update(l4_pd_ma + i * sizeof(pd_entry_t), new_pgd[i]);
+	}
+
+	if (__predict_true(pmap != pmap_kernel())) {
 		xen_set_user_pgd(pmap_pdirpa(pmap, 0));
 		ci->ci_xen_current_user_pgd = pmap_pdirpa(pmap, 0);
-		tlbflush();
-		splx(s);
 	}
+	else {
+		xpq_queue_pt_switch(l4_pd_ma);
+		ci->ci_xen_current_user_pgd = 0;
+	}
+
+	tlbflush();
+	splx(s);
+
 #endif /* __x86_64__ */
 }
 
+ /*
+  * pmap_cpu_init_late: perform late per-CPU initialization.
+  * Short note about percpu PDIR pages:
+  * Both the PAE and __x86_64__ architectures have per-cpu PDIR
+  * tables. This is to get around Xen's pagetable setup constraints for
+  * PAE (multiple L3[3]s cannot point to the same L2 - Xen
+  * will refuse to pin a table setup this way.) and for multiple cpus
+  * to map in different user pmaps on __x86_64__ (see: cpu_load_pmap())
+  *
+  * What this means for us is that the PDIR of the pmap_kernel() is
+  * considered to be a canonical "SHADOW" PDIR with the following
+  * properties: 
+  * - Its recursive mapping points to itself
+  * - per-cpu recurseive mappings point to themselves
+  * - per-cpu L4 pages' kernel entries are expected to be in sync with
+  *   the shadow
+  * - APDP_PDE_SHADOW accesses the shadow pdir
+  * - APDP_PDE accesses the per-cpu pdir
+  * - alternate mappings are considered per-cpu - however, x86 pmap
+  *   currently partially consults the shadow - this works because the
+  *   shadow PDE is updated together with the per-cpu entry (see:
+  *   xen_pmap.c: pmap_map_ptes(), and the pmap is locked while the
+  * alternate ptes are mapped in.
+  */
+
+void
+pmap_cpu_init_late(struct cpu_info *ci)
+{
+#if defined(PAE) || defined(__x86_64__)
+	/*
+	 * The BP has already its own PD page allocated during early
+	 * MD startup.
+	 */
+
+	if (ci == &cpu_info_primary)
+		return;
+
+	KASSERT(ci != NULL);
+
+#if defined(PAE)
+	ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
+	    UVM_KMF_WIRED | UVM_KMF_ZERO | UVM_KMF_NOWAIT);
+
+	if (ci->ci_pae_l3_pdir == NULL) {
+		panic("%s: failed to allocate L3 per-cpu PD for CPU %d\n",
+		      __func__, cpu_index(ci));
+	}
+	ci->ci_pae_l3_pdirpa = vtophys((vaddr_t) ci->ci_pae_l3_pdir);
+	KASSERT(ci->ci_pae_l3_pdirpa != 0);
+
+	/* Initialise L2 entries 0 - 2: Point them to pmap_kernel() */
+	ci->ci_pae_l3_pdir[0] =
+	    xpmap_ptom_masked(pmap_kernel()->pm_pdirpa[0]) | PG_V;
+	ci->ci_pae_l3_pdir[1] =
+	    xpmap_ptom_masked(pmap_kernel()->pm_pdirpa[1]) | PG_V;
+	ci->ci_pae_l3_pdir[2] =
+	    xpmap_ptom_masked(pmap_kernel()->pm_pdirpa[2]) | PG_V;
+#endif /* PAE */
+
+	ci->ci_kpm_pdir = (pd_entry_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
+	    UVM_KMF_WIRED | UVM_KMF_ZERO | UVM_KMF_NOWAIT);
+
+	if (ci->ci_kpm_pdir == NULL) {
+		panic("%s: failed to allocate L4 per-cpu PD for CPU %d\n",
+		      __func__, cpu_index(ci));
+	}
+	ci->ci_kpm_pdirpa = vtophys((vaddr_t) ci->ci_kpm_pdir);
+	KASSERT(ci->ci_kpm_pdirpa != 0);
+
+#if defined(__x86_64__)
+	/*
+	 * Copy over the pmap_kernel() shadow L4 entries 
+	 */
+
+	memcpy(ci->ci_kpm_pdir, pmap_kernel()->pm_pdir, PAGE_SIZE);
+
+	/* Recursive kernel mapping */
+	ci->ci_kpm_pdir[PDIR_SLOT_PTE] = xpmap_ptom_masked(ci->ci_kpm_pdirpa) | PG_k | PG_V;
+#elif defined(PAE)
+	/* Copy over the pmap_kernel() shadow L2 entries that map the kernel */
+	memcpy(ci->ci_kpm_pdir, pmap_kernel()->pm_pdir + PDIR_SLOT_KERN, nkptp[PTP_LEVELS - 1] * sizeof(pd_entry_t));
+#endif /* __x86_64__ else PAE */
+
+	/* Xen wants R/O */
+	pmap_kenter_pa((vaddr_t)ci->ci_kpm_pdir, ci->ci_kpm_pdirpa,
+	    VM_PROT_READ, 0);
+
+#if defined(PAE)
+	/* Initialise L3 entry 3. This mapping is shared across all
+	 * pmaps and is static, ie; loading a new pmap will not update
+	 * this entry.
+	 */
+	
+	ci->ci_pae_l3_pdir[3] = xpmap_ptom_masked(ci->ci_kpm_pdirpa) | PG_k | PG_V;
+
+	/* Mark L3 R/O (Xen wants this) */
+	pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa,
+		VM_PROT_READ, 0);
+
+	xpq_queue_pin_l3_table(xpmap_ptom_masked(ci->ci_pae_l3_pdirpa));
+
+#elif defined(__x86_64__)	
+	xpq_queue_pin_l4_table(xpmap_ptom_masked(ci->ci_kpm_pdirpa));
+#endif /* PAE */
+#endif /* defined(PAE) || defined(__x86_64__) */
+}
+
 /*
  * Notify all other cpus to halt.
  */

Index: src/sys/arch/xen/x86/x86_xpmap.c
diff -u src/sys/arch/xen/x86/x86_xpmap.c:1.35 src/sys/arch/xen/x86/x86_xpmap.c:1.36
--- src/sys/arch/xen/x86/x86_xpmap.c:1.35	Sun Nov  6 11:40:47 2011
+++ src/sys/arch/xen/x86/x86_xpmap.c	Sun Nov  6 15:18:19 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: x86_xpmap.c,v 1.35 2011/11/06 11:40:47 cherry Exp $	*/
+/*	$NetBSD: x86_xpmap.c,v 1.36 2011/11/06 15:18:19 cherry Exp $	*/
 
 /*
  * Copyright (c) 2006 Mathieu Ropert <[email protected]>
@@ -69,7 +69,7 @@
 
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.35 2011/11/06 11:40:47 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.36 2011/11/06 15:18:19 cherry Exp $");
 
 #include "opt_xen.h"
 #include "opt_ddb.h"
@@ -543,6 +543,8 @@ vaddr_t xen_pmap_bootstrap (void);
  * for L3[3].
  */
 static const int l2_4_count = 6;
+#elif defined(__x86_64__)
+static const int l2_4_count = PTP_LEVELS;
 #else
 static const int l2_4_count = PTP_LEVELS - 1;
 #endif
@@ -725,13 +727,20 @@ xen_bootstrap_tables (vaddr_t old_pgd, v
 	memset (bt_pgd, 0, PAGE_SIZE);
 	avail = new_pgd + PAGE_SIZE;
 #if PTP_LEVELS > 3
+	/* per-cpu L4 PD */
+	pd_entry_t *bt_cpu_pgd = bt_pgd;
+	/* pmap_kernel() "shadow" L4 PD */
+	bt_pgd = (pd_entry_t *) avail;
+	memset(bt_pgd, 0, PAGE_SIZE);
+	avail += PAGE_SIZE;
+
 	/* Install level 3 */
 	pdtpe = (pd_entry_t *) avail;
 	memset (pdtpe, 0, PAGE_SIZE);
 	avail += PAGE_SIZE;
 
 	addr = ((u_long) pdtpe) - KERNBASE;
-	bt_pgd[pl4_pi(KERNTEXTOFF)] =
+	bt_pgd[pl4_pi(KERNTEXTOFF)] = bt_cpu_pgd[pl4_pi(KERNTEXTOFF)] =
 	    xpmap_ptom_masked(addr) | PG_k | PG_RW | PG_V;
 
 	__PRINTK(("L3 va %#lx pa %#" PRIxPADDR " entry %#" PRIxPADDR
@@ -877,8 +886,9 @@ xen_bootstrap_tables (vaddr_t old_pgd, v
 	 * pde[L2_SLOT_KERN] always point to the shadow.
 	 */
 	memcpy(&pde[L2_SLOT_KERN + NPDPG], &pde[L2_SLOT_KERN], PAGE_SIZE);
-	pmap_kl2pd = &pde[L2_SLOT_KERN + NPDPG];
-	pmap_kl2paddr = (u_long)pmap_kl2pd - KERNBASE;
+	cpu_info_primary.ci_kpm_pdir = &pde[L2_SLOT_KERN + NPDPG];
+	cpu_info_primary.ci_kpm_pdirpa =
+	    (vaddr_t) cpu_info_primary.ci_kpm_pdir - KERNBASE;
 
 	/*
 	 * We don't enter a recursive entry from the L3 PD. Instead,
@@ -923,9 +933,12 @@ xen_bootstrap_tables (vaddr_t old_pgd, v
 	xpq_queue_pin_l2_table(xpmap_ptom_masked(addr));
 #endif
 #else /* PAE */
-	/* recursive entry in higher-level PD */
-	bt_pgd[PDIR_SLOT_PTE] =
-	    xpmap_ptom_masked(new_pgd - KERNBASE) | PG_k | PG_V;
+	/* recursive entry in higher-level per-cpu PD and pmap_kernel() */
+	bt_pgd[PDIR_SLOT_PTE] = xpmap_ptom_masked((paddr_t)bt_pgd - KERNBASE) | PG_k | PG_V;
+#ifdef __x86_64__
+	   bt_cpu_pgd[PDIR_SLOT_PTE] =
+		   xpmap_ptom_masked((paddr_t)bt_cpu_pgd - KERNBASE) | PG_k | PG_V;
+#endif /* __x86_64__ */
 	__PRINTK(("bt_pgd[PDIR_SLOT_PTE] va %#" PRIxVADDR " pa %#" PRIxPADDR
 	    " entry %#" PRIxPADDR "\n", new_pgd, (paddr_t)new_pgd - KERNBASE,
 	    bt_pgd[PDIR_SLOT_PTE]));
@@ -952,7 +965,7 @@ xen_bootstrap_tables (vaddr_t old_pgd, v
 #ifdef PAE
 	PDPpaddr = (u_long)pde - KERNBASE; /* PDP is the L2 with PAE */
 #else
-	PDPpaddr = (u_long)new_pgd - KERNBASE;
+	PDPpaddr = (u_long)bt_pgd - KERNBASE;
 #endif
 
 	/* Switch to new tables */
@@ -974,6 +987,12 @@ xen_bootstrap_tables (vaddr_t old_pgd, v
 		    xpmap_ptom_masked(addr) | PG_k | PG_V);
 		xpq_flush_queue();
 	}
+#elif defined(__x86_64__)
+	if (final) {
+		/* save the address of the real per-cpu L4 pgd page */
+		cpu_info_primary.ci_kpm_pdir = bt_cpu_pgd;
+		cpu_info_primary.ci_kpm_pdirpa = ((paddr_t) bt_cpu_pgd - KERNBASE);
+	}
 #endif
 
 	/* Now we can safely reclaim space taken by old tables */

Reply via email to