Module Name: src
Committed By: cherry
Date: Sun Nov 6 15:18:19 UTC 2011
Modified Files:
src/sys/arch/amd64/include: pmap.h
src/sys/arch/i386/include: pmap.h
src/sys/arch/x86/include: cpu.h
src/sys/arch/x86/x86: pmap.c
src/sys/arch/xen/x86: cpu.c x86_xpmap.c
Log Message:
[merging from cherry-xenmp] make pmap_kernel() shadow PMD per-cpu and MP aware.
To generate a diff of this commit:
cvs rdiff -u -r1.27 -r1.28 src/sys/arch/amd64/include/pmap.h
cvs rdiff -u -r1.111 -r1.112 src/sys/arch/i386/include/pmap.h
cvs rdiff -u -r1.40 -r1.41 src/sys/arch/x86/include/cpu.h
cvs rdiff -u -r1.137 -r1.138 src/sys/arch/x86/x86/pmap.c
cvs rdiff -u -r1.69 -r1.70 src/sys/arch/xen/x86/cpu.c
cvs rdiff -u -r1.35 -r1.36 src/sys/arch/xen/x86/x86_xpmap.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/amd64/include/pmap.h
diff -u src/sys/arch/amd64/include/pmap.h:1.27 src/sys/arch/amd64/include/pmap.h:1.28
--- src/sys/arch/amd64/include/pmap.h:1.27 Sun Nov 6 11:40:46 2011
+++ src/sys/arch/amd64/include/pmap.h Sun Nov 6 15:18:18 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.h,v 1.27 2011/11/06 11:40:46 cherry Exp $ */
+/* $NetBSD: pmap.h,v 1.28 2011/11/06 15:18:18 cherry Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -192,7 +192,8 @@
#define AL4_BASE ((pd_entry_t *)((char *)AL3_BASE + L4_SLOT_PTE * NBPD_L1))
#define PDP_PDE (L4_BASE + PDIR_SLOT_PTE)
-#define APDP_PDE (L4_BASE + PDIR_SLOT_APTE)
+#define APDP_PDE (&curcpu()->ci_kpm_pdir[PDIR_SLOT_APTE])
+#define APDP_PDE_SHADOW (L4_BASE + PDIR_SLOT_APTE)
#define PDP_BASE L4_BASE
#define APDP_BASE AL4_BASE
Index: src/sys/arch/i386/include/pmap.h
diff -u src/sys/arch/i386/include/pmap.h:1.111 src/sys/arch/i386/include/pmap.h:1.112
--- src/sys/arch/i386/include/pmap.h:1.111 Sun Nov 6 11:40:46 2011
+++ src/sys/arch/i386/include/pmap.h Sun Nov 6 15:18:18 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.h,v 1.111 2011/11/06 11:40:46 cherry Exp $ */
+/* $NetBSD: pmap.h,v 1.112 2011/11/06 15:18:18 cherry Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -277,7 +277,7 @@
* mapping, because it points to the shadow PD. Use the kernel PD instead,
* which is static
*/
-#define APDP_PDE (&pmap_kl2pd[l2tol2(PDIR_SLOT_APTE)])
+#define APDP_PDE (&curcpu()->ci_kpm_pdir[l2tol2(PDIR_SLOT_APTE)])
#define APDP_PDE_SHADOW (L2_BASE + PDIR_SLOT_APTE)
#else /* PAE && XEN */
#define APDP_PDE (L2_BASE + PDIR_SLOT_APTE)
@@ -428,13 +428,6 @@ pmap_pte_flush(void)
#endif
-#ifdef PAE
-/* Address of the static kernel's L2 page */
-pd_entry_t *pmap_kl2pd;
-paddr_t pmap_kl2paddr;
-#endif
-
-
struct trapframe;
int pmap_exec_fixup(struct vm_map *, struct trapframe *, struct pcb *);
Index: src/sys/arch/x86/include/cpu.h
diff -u src/sys/arch/x86/include/cpu.h:1.40 src/sys/arch/x86/include/cpu.h:1.41
--- src/sys/arch/x86/include/cpu.h:1.40 Tue Nov 1 21:21:32 2011
+++ src/sys/arch/x86/include/cpu.h Sun Nov 6 15:18:18 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.40 2011/11/01 21:21:32 joerg Exp $ */
+/* $NetBSD: cpu.h,v 1.41 2011/11/06 15:18:18 cherry Exp $ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
@@ -176,16 +176,19 @@ struct cpu_info {
#endif
#ifdef PAE
- uint32_t ci_pae_l3_pdirpa; /* PA of L3 PD */
+ paddr_t ci_pae_l3_pdirpa; /* PA of L3 PD */
pd_entry_t * ci_pae_l3_pdir; /* VA pointer to L3 PD */
#endif
-#if defined(XEN) && defined(__x86_64__)
+#if defined(XEN) && (defined(PAE) || defined(__x86_64__))
/* Currently active user PGD (can't use rcr3() with Xen) */
- pd_entry_t * ci_kpm_pdir; /* per-cpu L4 PD (va) */
- paddr_t ci_kpm_pdirpa; /* per-cpu L4 PD (pa) */
+ pd_entry_t * ci_kpm_pdir; /* per-cpu PMD (va) */
+ paddr_t ci_kpm_pdirpa; /* per-cpu PMD (pa) */
+#if defined(__x86_64__)
paddr_t ci_xen_current_user_pgd;
-#endif
+#endif /* __x86_64__ */
+#endif /* XEN et.al */
+
char *ci_doubleflt_stack;
char *ci_ddbipi_stack;
@@ -233,11 +236,6 @@ struct cpu_info {
int ci_padout __aligned(64);
};
-#ifdef __x86_64__
-#define ci_pdirpa(ci, index) \
- ((ci)->ci_kpm_pdirpa + (index) * sizeof(pd_entry_t))
-#endif /* __x86_64__ */
-
/*
* Macros to handle (some) trapframe registers for common x86 code.
*/
Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.137 src/sys/arch/x86/x86/pmap.c:1.138
--- src/sys/arch/x86/x86/pmap.c:1.137 Tue Oct 18 23:43:06 2011
+++ src/sys/arch/x86/x86/pmap.c Sun Nov 6 15:18:18 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.c,v 1.137 2011/10/18 23:43:06 jym Exp $ */
+/* $NetBSD: pmap.c,v 1.138 2011/11/06 15:18:18 cherry Exp $ */
/*-
* Copyright (c) 2008, 2010 The NetBSD Foundation, Inc.
@@ -171,7 +171,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.137 2011/10/18 23:43:06 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.138 2011/11/06 15:18:18 cherry Exp $");
#include "opt_user_ldt.h"
#include "opt_lockdebug.h"
@@ -211,13 +211,6 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.1
#include <xen/hypervisor.h>
#endif
-/* flag to be used for kernel mappings: PG_u on Xen/amd64, 0 otherwise */
-#if defined(XEN) && defined(__x86_64__)
-#define PG_k PG_u
-#else
-#define PG_k 0
-#endif
-
/*
* general info:
*
@@ -1513,10 +1506,17 @@ pmap_prealloc_lowmem_ptps(void)
if (newp < (NKL2_KIMG_ENTRIES * NBPD_L2))
HYPERVISOR_update_va_mapping (newp + KERNBASE,
xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG);
+ /* Update the pmap_kernel() L4 shadow */
xpq_queue_pte_update (
xpmap_ptom_masked(pdes_pa)
+ (pl_i(0, level) * sizeof (pd_entry_t)),
xpmap_ptom_masked(newp) | PG_RW | PG_u | PG_V);
+ /* sync to per-cpu PD */
+ xpq_queue_pte_update(
+ xpmap_ptom_masked(cpu_info_primary.ci_kpm_pdirpa +
+ pl_i(0, PTP_LEVELS) *
+ sizeof(pd_entry_t)),
+ pmap_kernel()->pm_pdir[pl_i(0, PTP_LEVELS)]);
pmap_pte_flush();
level--;
if (level <= 1)
@@ -1580,18 +1580,22 @@ pmap_init(void)
* pmap_cpu_init_late: perform late per-CPU initialization.
*/
+#ifndef XEN
void
pmap_cpu_init_late(struct cpu_info *ci)
{
+ /*
+ * The BP has already its own PD page allocated during early
+ * MD startup.
+ */
+ if (ci == &cpu_info_primary)
+ return;
+
#ifdef PAE
int ret;
struct pglist pg;
struct vm_page *vmap;
- /* The BP has already its own L3 page allocated in locore.S. */
- if (ci == &cpu_info_primary)
- return;
-
/*
* Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts
* resides below the 4GB boundary.
@@ -1617,6 +1621,7 @@ pmap_cpu_init_late(struct cpu_info *ci)
pmap_update(pmap_kernel());
#endif
}
+#endif
/*
* p v _ e n t r y f u n c t i o n s
@@ -1833,8 +1838,23 @@ pmap_free_ptp(struct pmap *pmap, struct
* clear it before freeing
*/
if (pmap_pdirpa(pmap, 0) == curcpu()->ci_xen_current_user_pgd
- && level == PTP_LEVELS - 1)
+ && level == PTP_LEVELS - 1) {
pmap_pte_set(&pmap_kernel()->pm_pdir[index], 0);
+ /*
+ * Update the per-cpu PD on all cpus the current
+ * pmap is active on
+ */
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci;
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ if (ci == NULL) {
+ continue;
+ }
+ if (ci->ci_cpumask & pmap->pm_cpus) {
+ pmap_pte_set(&ci->ci_kpm_pdir[index], 0);
+ }
+ }
+ }
# endif /*__x86_64__ */
invaladdr = level == 1 ? (vaddr_t)ptes :
(vaddr_t)pdes[level - 2];
@@ -1934,6 +1954,21 @@ pmap_get_ptp(struct pmap *pmap, vaddr_t
pmap_pte_set(&pmap_kernel()->pm_pdir[index],
(pd_entry_t) (pmap_pa2pte(pa)
| PG_u | PG_RW | PG_V));
+ /*
+ * Update the per-cpu PD on all cpus the current
+ * pmap is active on
+ */
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci;
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ if (ci == NULL) {
+ continue;
+ }
+ if (ci->ci_cpumask & pmap->pm_cpus) {
+ pmap_pte_set(&ci->ci_kpm_pdir[index],
+ (pd_entry_t) (pmap_pa2pte(pa) | PG_u | PG_RW | PG_V));
+ }
+ }
}
#endif /* XEN && __x86_64__ */
pmap_pte_flush();
@@ -2621,7 +2656,8 @@ pmap_load(void)
/* should be able to take ipis. */
KASSERT(ci->ci_ilevel < IPL_HIGH);
#ifdef XEN
- /* XXX not yet KASSERT(x86_read_psl() != 0); */
+ /* Check to see if interrupts are enabled (ie; no events are masked) */
+ KASSERT(x86_read_psl() == 0);
#else
KASSERT((x86_read_psl() & PSL_I) != 0);
#endif
@@ -4093,23 +4129,42 @@ pmap_alloc_level(pd_entry_t * const *pde
for (i = index; i <= endindex; i++) {
+ pt_entry_t pte;
+
KASSERT(!pmap_valid_entry(pdep[i]));
pmap_get_physpage(va, level - 1, &pa);
+ pte = pmap_pa2pte(pa) | PG_k | PG_V | PG_RW;
#ifdef XEN
- xpq_queue_pte_update((level == PTP_LEVELS) ?
- xpmap_ptom(pmap_pdirpa(pmap_kernel(), i)) :
- xpmap_ptetomach(&pdep[i]),
- pmap_pa2pte(pa) | PG_k | PG_V | PG_RW);
+ switch (level) {
+ case PTP_LEVELS:
+#if defined(PAE) || defined(__x86_64__)
+ if (i >= PDIR_SLOT_KERN) {
+ /* update per-cpu PMDs on all cpus */
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci;
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ if (ci == NULL) {
+ continue;
+ }
#ifdef PAE
- if (level == PTP_LEVELS && i > L2_SLOT_KERN) {
- /* update real kernel PD too */
+ xpq_queue_pte_update(
+ xpmap_ptetomach(&ci->ci_kpm_pdir[l2tol2(i)]), pte);
+#elif defined(__x86_64__)
+ xpq_queue_pte_update(
+ xpmap_ptetomach(&ci->ci_kpm_pdir[i]), pte);
+#endif /* PAE */
+ }
+ }
+#endif /* PAE || __x86_64__ */
+ /* FALLTHROUGH */
+
+ default: /* All other levels */
xpq_queue_pte_update(
- xpmap_ptetomach(&pmap_kl2pd[l2tol2(i)]),
- pmap_pa2pte(pa) | PG_k | PG_V | PG_RW);
+ xpmap_ptetomach(&pdep[i]),
+ pte);
}
-#endif
#else /* XEN */
- pdep[i] = pmap_pa2pte(pa) | PG_k | PG_V | PG_RW;
+ pdep[i] = pte;
#endif /* XEN */
KASSERT(level != PTP_LEVELS || nkptp[level - 1] +
pl_i(VM_MIN_KERNEL_ADDRESS, level) == i);
Index: src/sys/arch/xen/x86/cpu.c
diff -u src/sys/arch/xen/x86/cpu.c:1.69 src/sys/arch/xen/x86/cpu.c:1.70
--- src/sys/arch/xen/x86/cpu.c:1.69 Sun Nov 6 11:40:47 2011
+++ src/sys/arch/xen/x86/cpu.c Sun Nov 6 15:18:19 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.c,v 1.69 2011/11/06 11:40:47 cherry Exp $ */
+/* $NetBSD: cpu.c,v 1.70 2011/11/06 15:18:19 cherry Exp $ */
/* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp */
/*-
@@ -66,7 +66,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.69 2011/11/06 11:40:47 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.70 2011/11/06 15:18:19 cherry Exp $");
#include "opt_ddb.h"
#include "opt_multiprocessor.h"
@@ -915,7 +915,7 @@ xen_init_amd64_vcpuctxt(struct cpu_info
* Use pmap_kernel() L4 PD directly, until we setup the
* per-cpu L4 PD in pmap_cpu_init_late()
*/
- initctx->ctrlreg[3] = xpmap_ptom(pcb->pcb_cr3);
+ initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(ci->ci_kpm_pdirpa)));
initctx->ctrlreg[4] = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT;
@@ -1015,11 +1015,11 @@ xen_init_i386_vcpuctxt(struct cpu_info *
initctx->ctrlreg[0] = pcb->pcb_cr0;
initctx->ctrlreg[1] = 0; /* "resuming" from kernel - no User cr3. */
initctx->ctrlreg[2] = pcb->pcb_cr2; /* XXX: */
- /*
- * Use pmap_kernel() L4 PD directly, until we setup the
- * per-cpu L4 PD in pmap_cpu_init_late()
- */
- initctx->ctrlreg[3] = xpmap_ptom(pcb->pcb_cr3);
+#ifdef PAE
+ initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(ci->ci_pae_l3_pdirpa)));
+#else /* PAE */
+ initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(pcb->pcb_cr3)));
+#endif /* PAE */
initctx->ctrlreg[4] = /* CR4_PAE | */CR4_OSFXSR | CR4_OSXMMEXCPT;
@@ -1058,7 +1058,7 @@ mp_cpu_start(struct cpu_info *ci, vaddr_
/* Start it up */
- /* First bring it down - the Xen documentation conveniently omits this slight detail. */
+ /* First bring it down */
if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_down, ci->ci_cpuid, NULL))) {
aprint_error(": VCPUOP_down hypervisor command failed. errno = %d\n", hyperror);
return hyperror;
@@ -1196,34 +1196,147 @@ cpu_load_pmap(struct pmap *pmap)
#ifdef __x86_64__
int i, s;
- pd_entry_t *old_pgd, *new_pgd;
- paddr_t addr;
+ pd_entry_t *new_pgd;
struct cpu_info *ci;
+ paddr_t l4_pd_ma;
- /* kernel pmap always in cr3 and should never go in user cr3 */
- if (pmap_pdirpa(pmap, 0) != pmap_pdirpa(pmap_kernel(), 0)) {
- ci = curcpu();
- /*
- * Map user space address in kernel space and load
- * user cr3
- */
- s = splvm();
- new_pgd = pmap->pm_pdir;
- old_pgd = pmap_kernel()->pm_pdir;
- addr = xpmap_ptom(pmap_pdirpa(pmap_kernel(), 0));
- for (i = 0; i < PDIR_SLOT_PTE;
- i++, addr += sizeof(pd_entry_t)) {
- if ((new_pgd[i] & PG_V) || (old_pgd[i] & PG_V))
- xpq_queue_pte_update(addr, new_pgd[i]);
- }
+ ci = curcpu();
+ l4_pd_ma = xpmap_ptom_masked(ci->ci_kpm_pdirpa);
+
+ /*
+ * Map user space address in kernel space and load
+ * user cr3
+ */
+ s = splvm();
+ new_pgd = pmap->pm_pdir;
+
+ /* Copy user pmap L4 PDEs (in user addr. range) to per-cpu L4 */
+ for (i = 0; i < PDIR_SLOT_PTE; i++) {
+ xpq_queue_pte_update(l4_pd_ma + i * sizeof(pd_entry_t), new_pgd[i]);
+ }
+
+ if (__predict_true(pmap != pmap_kernel())) {
xen_set_user_pgd(pmap_pdirpa(pmap, 0));
ci->ci_xen_current_user_pgd = pmap_pdirpa(pmap, 0);
- tlbflush();
- splx(s);
}
+ else {
+ xpq_queue_pt_switch(l4_pd_ma);
+ ci->ci_xen_current_user_pgd = 0;
+ }
+
+ tlbflush();
+ splx(s);
+
#endif /* __x86_64__ */
}
+ /*
+ * pmap_cpu_init_late: perform late per-CPU initialization.
+ * Short note about percpu PDIR pages:
+ * Both the PAE and __x86_64__ architectures have per-cpu PDIR
+ * tables. This is to get around Xen's pagetable setup constraints for
+ * PAE (multiple L3[3]s cannot point to the same L2 - Xen
+ * will refuse to pin a table setup this way.) and for multiple cpus
+ * to map in different user pmaps on __x86_64__ (see: cpu_load_pmap())
+ *
+ * What this means for us is that the PDIR of the pmap_kernel() is
+ * considered to be a canonical "SHADOW" PDIR with the following
+ * properties:
+ * - Its recursive mapping points to itself
+ * - per-cpu recurseive mappings point to themselves
+ * - per-cpu L4 pages' kernel entries are expected to be in sync with
+ * the shadow
+ * - APDP_PDE_SHADOW accesses the shadow pdir
+ * - APDP_PDE accesses the per-cpu pdir
+ * - alternate mappings are considered per-cpu - however, x86 pmap
+ * currently partially consults the shadow - this works because the
+ * shadow PDE is updated together with the per-cpu entry (see:
+ * xen_pmap.c: pmap_map_ptes(), and the pmap is locked while the
+ * alternate ptes are mapped in.
+ */
+
+void
+pmap_cpu_init_late(struct cpu_info *ci)
+{
+#if defined(PAE) || defined(__x86_64__)
+ /*
+ * The BP has already its own PD page allocated during early
+ * MD startup.
+ */
+
+ if (ci == &cpu_info_primary)
+ return;
+
+ KASSERT(ci != NULL);
+
+#if defined(PAE)
+ ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
+ UVM_KMF_WIRED | UVM_KMF_ZERO | UVM_KMF_NOWAIT);
+
+ if (ci->ci_pae_l3_pdir == NULL) {
+ panic("%s: failed to allocate L3 per-cpu PD for CPU %d\n",
+ __func__, cpu_index(ci));
+ }
+ ci->ci_pae_l3_pdirpa = vtophys((vaddr_t) ci->ci_pae_l3_pdir);
+ KASSERT(ci->ci_pae_l3_pdirpa != 0);
+
+ /* Initialise L2 entries 0 - 2: Point them to pmap_kernel() */
+ ci->ci_pae_l3_pdir[0] =
+ xpmap_ptom_masked(pmap_kernel()->pm_pdirpa[0]) | PG_V;
+ ci->ci_pae_l3_pdir[1] =
+ xpmap_ptom_masked(pmap_kernel()->pm_pdirpa[1]) | PG_V;
+ ci->ci_pae_l3_pdir[2] =
+ xpmap_ptom_masked(pmap_kernel()->pm_pdirpa[2]) | PG_V;
+#endif /* PAE */
+
+ ci->ci_kpm_pdir = (pd_entry_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
+ UVM_KMF_WIRED | UVM_KMF_ZERO | UVM_KMF_NOWAIT);
+
+ if (ci->ci_kpm_pdir == NULL) {
+ panic("%s: failed to allocate L4 per-cpu PD for CPU %d\n",
+ __func__, cpu_index(ci));
+ }
+ ci->ci_kpm_pdirpa = vtophys((vaddr_t) ci->ci_kpm_pdir);
+ KASSERT(ci->ci_kpm_pdirpa != 0);
+
+#if defined(__x86_64__)
+ /*
+ * Copy over the pmap_kernel() shadow L4 entries
+ */
+
+ memcpy(ci->ci_kpm_pdir, pmap_kernel()->pm_pdir, PAGE_SIZE);
+
+ /* Recursive kernel mapping */
+ ci->ci_kpm_pdir[PDIR_SLOT_PTE] = xpmap_ptom_masked(ci->ci_kpm_pdirpa) | PG_k | PG_V;
+#elif defined(PAE)
+ /* Copy over the pmap_kernel() shadow L2 entries that map the kernel */
+ memcpy(ci->ci_kpm_pdir, pmap_kernel()->pm_pdir + PDIR_SLOT_KERN, nkptp[PTP_LEVELS - 1] * sizeof(pd_entry_t));
+#endif /* __x86_64__ else PAE */
+
+ /* Xen wants R/O */
+ pmap_kenter_pa((vaddr_t)ci->ci_kpm_pdir, ci->ci_kpm_pdirpa,
+ VM_PROT_READ, 0);
+
+#if defined(PAE)
+ /* Initialise L3 entry 3. This mapping is shared across all
+ * pmaps and is static, ie; loading a new pmap will not update
+ * this entry.
+ */
+
+ ci->ci_pae_l3_pdir[3] = xpmap_ptom_masked(ci->ci_kpm_pdirpa) | PG_k | PG_V;
+
+ /* Mark L3 R/O (Xen wants this) */
+ pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa,
+ VM_PROT_READ, 0);
+
+ xpq_queue_pin_l3_table(xpmap_ptom_masked(ci->ci_pae_l3_pdirpa));
+
+#elif defined(__x86_64__)
+ xpq_queue_pin_l4_table(xpmap_ptom_masked(ci->ci_kpm_pdirpa));
+#endif /* PAE */
+#endif /* defined(PAE) || defined(__x86_64__) */
+}
+
/*
* Notify all other cpus to halt.
*/
Index: src/sys/arch/xen/x86/x86_xpmap.c
diff -u src/sys/arch/xen/x86/x86_xpmap.c:1.35 src/sys/arch/xen/x86/x86_xpmap.c:1.36
--- src/sys/arch/xen/x86/x86_xpmap.c:1.35 Sun Nov 6 11:40:47 2011
+++ src/sys/arch/xen/x86/x86_xpmap.c Sun Nov 6 15:18:19 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: x86_xpmap.c,v 1.35 2011/11/06 11:40:47 cherry Exp $ */
+/* $NetBSD: x86_xpmap.c,v 1.36 2011/11/06 15:18:19 cherry Exp $ */
/*
* Copyright (c) 2006 Mathieu Ropert <[email protected]>
@@ -69,7 +69,7 @@
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.35 2011/11/06 11:40:47 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.36 2011/11/06 15:18:19 cherry Exp $");
#include "opt_xen.h"
#include "opt_ddb.h"
@@ -543,6 +543,8 @@ vaddr_t xen_pmap_bootstrap (void);
* for L3[3].
*/
static const int l2_4_count = 6;
+#elif defined(__x86_64__)
+static const int l2_4_count = PTP_LEVELS;
#else
static const int l2_4_count = PTP_LEVELS - 1;
#endif
@@ -725,13 +727,20 @@ xen_bootstrap_tables (vaddr_t old_pgd, v
memset (bt_pgd, 0, PAGE_SIZE);
avail = new_pgd + PAGE_SIZE;
#if PTP_LEVELS > 3
+ /* per-cpu L4 PD */
+ pd_entry_t *bt_cpu_pgd = bt_pgd;
+ /* pmap_kernel() "shadow" L4 PD */
+ bt_pgd = (pd_entry_t *) avail;
+ memset(bt_pgd, 0, PAGE_SIZE);
+ avail += PAGE_SIZE;
+
/* Install level 3 */
pdtpe = (pd_entry_t *) avail;
memset (pdtpe, 0, PAGE_SIZE);
avail += PAGE_SIZE;
addr = ((u_long) pdtpe) - KERNBASE;
- bt_pgd[pl4_pi(KERNTEXTOFF)] =
+ bt_pgd[pl4_pi(KERNTEXTOFF)] = bt_cpu_pgd[pl4_pi(KERNTEXTOFF)] =
xpmap_ptom_masked(addr) | PG_k | PG_RW | PG_V;
__PRINTK(("L3 va %#lx pa %#" PRIxPADDR " entry %#" PRIxPADDR
@@ -877,8 +886,9 @@ xen_bootstrap_tables (vaddr_t old_pgd, v
* pde[L2_SLOT_KERN] always point to the shadow.
*/
memcpy(&pde[L2_SLOT_KERN + NPDPG], &pde[L2_SLOT_KERN], PAGE_SIZE);
- pmap_kl2pd = &pde[L2_SLOT_KERN + NPDPG];
- pmap_kl2paddr = (u_long)pmap_kl2pd - KERNBASE;
+ cpu_info_primary.ci_kpm_pdir = &pde[L2_SLOT_KERN + NPDPG];
+ cpu_info_primary.ci_kpm_pdirpa =
+ (vaddr_t) cpu_info_primary.ci_kpm_pdir - KERNBASE;
/*
* We don't enter a recursive entry from the L3 PD. Instead,
@@ -923,9 +933,12 @@ xen_bootstrap_tables (vaddr_t old_pgd, v
xpq_queue_pin_l2_table(xpmap_ptom_masked(addr));
#endif
#else /* PAE */
- /* recursive entry in higher-level PD */
- bt_pgd[PDIR_SLOT_PTE] =
- xpmap_ptom_masked(new_pgd - KERNBASE) | PG_k | PG_V;
+ /* recursive entry in higher-level per-cpu PD and pmap_kernel() */
+ bt_pgd[PDIR_SLOT_PTE] = xpmap_ptom_masked((paddr_t)bt_pgd - KERNBASE) | PG_k | PG_V;
+#ifdef __x86_64__
+ bt_cpu_pgd[PDIR_SLOT_PTE] =
+ xpmap_ptom_masked((paddr_t)bt_cpu_pgd - KERNBASE) | PG_k | PG_V;
+#endif /* __x86_64__ */
__PRINTK(("bt_pgd[PDIR_SLOT_PTE] va %#" PRIxVADDR " pa %#" PRIxPADDR
" entry %#" PRIxPADDR "\n", new_pgd, (paddr_t)new_pgd - KERNBASE,
bt_pgd[PDIR_SLOT_PTE]));
@@ -952,7 +965,7 @@ xen_bootstrap_tables (vaddr_t old_pgd, v
#ifdef PAE
PDPpaddr = (u_long)pde - KERNBASE; /* PDP is the L2 with PAE */
#else
- PDPpaddr = (u_long)new_pgd - KERNBASE;
+ PDPpaddr = (u_long)bt_pgd - KERNBASE;
#endif
/* Switch to new tables */
@@ -974,6 +987,12 @@ xen_bootstrap_tables (vaddr_t old_pgd, v
xpmap_ptom_masked(addr) | PG_k | PG_V);
xpq_flush_queue();
}
+#elif defined(__x86_64__)
+ if (final) {
+ /* save the address of the real per-cpu L4 pgd page */
+ cpu_info_primary.ci_kpm_pdir = bt_cpu_pgd;
+ cpu_info_primary.ci_kpm_pdirpa = ((paddr_t) bt_cpu_pgd - KERNBASE);
+ }
#endif
/* Now we can safely reclaim space taken by old tables */