Module Name: src
Committed By: ad
Date: Tue Mar 17 21:02:56 UTC 2020
Modified Files:
src/sys/arch/x86/include: pmap.h pmap_pv.h
src/sys/arch/x86/x86: pmap.c
Log Message:
Back out the recent pmap changes until I can figure out what is going on
with pmap_page_remove() (to pmap.c rev 1.365).
To generate a diff of this commit:
cvs rdiff -u -r1.113 -r1.114 src/sys/arch/x86/include/pmap.h
cvs rdiff -u -r1.15 -r1.16 src/sys/arch/x86/include/pmap_pv.h
cvs rdiff -u -r1.372 -r1.373 src/sys/arch/x86/x86/pmap.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/x86/include/pmap.h
diff -u src/sys/arch/x86/include/pmap.h:1.113 src/sys/arch/x86/include/pmap.h:1.114
--- src/sys/arch/x86/include/pmap.h:1.113 Sat Mar 14 18:24:10 2020
+++ src/sys/arch/x86/include/pmap.h Tue Mar 17 21:02:56 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.h,v 1.113 2020/03/14 18:24:10 ad Exp $ */
+/* $NetBSD: pmap.h,v 1.114 2020/03/17 21:02:56 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -248,8 +248,6 @@ extern struct pool_cache pmap_cache;
* (the other object locks are only used when uvm_pagealloc is called)
*/
-struct pv_page;
-
struct pmap {
struct uvm_object pm_obj[PTP_LEVELS-1];/* objects for lvl >= 1) */
LIST_ENTRY(pmap) pm_list; /* list of all pmaps */
@@ -258,11 +256,11 @@ struct pmap {
struct vm_page *pm_ptphint[PTP_LEVELS-1];
/* pointer to a PTP in our pmap */
struct pmap_statistics pm_stats; /* pmap stats */
- struct pv_entry *pm_pve; /* spare pv_entry */
#if !defined(__x86_64__)
vaddr_t pm_hiexec; /* highest executable mapping */
#endif /* !defined(__x86_64__) */
+ struct lwp *pm_remove_all; /* who's emptying the pmap */
union descriptor *pm_ldt; /* user-set LDT */
size_t pm_ldt_len; /* size of LDT in bytes */
Index: src/sys/arch/x86/include/pmap_pv.h
diff -u src/sys/arch/x86/include/pmap_pv.h:1.15 src/sys/arch/x86/include/pmap_pv.h:1.16
--- src/sys/arch/x86/include/pmap_pv.h:1.15 Sun Mar 15 15:58:24 2020
+++ src/sys/arch/x86/include/pmap_pv.h Tue Mar 17 21:02:56 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap_pv.h,v 1.15 2020/03/15 15:58:24 ad Exp $ */
+/* $NetBSD: pmap_pv.h,v 1.16 2020/03/17 21:02:56 ad Exp $ */
/*-
* Copyright (c)2008 YAMAMOTO Takashi,
@@ -34,7 +34,6 @@
#include <sys/rbtree.h>
struct vm_page;
-struct pmap_page;
/*
* structures to track P->V mapping
@@ -52,14 +51,14 @@ struct pv_pte {
};
/*
- * pv_entry: plug pv_pte into lists. 32 bytes on i386, 64 on amd64.
+ * pv_entry: plug pv_pte into lists.
*/
struct pv_entry {
struct pv_pte pve_pte; /* should be the first member */
LIST_ENTRY(pv_entry) pve_list; /* on pmap_page::pp_pvlist */
rb_node_t pve_rb; /* red-black tree node */
- struct pmap_page *pve_pp; /* backpointer to mapped page */
+ uintptr_t pve_padding; /* unused */
};
#define pve_next pve_list.le_next
@@ -72,13 +71,16 @@ struct pmap_page {
/* PTPs */
rb_tree_t rb;
- /* PTPs, when being freed */
+ /* PTPs */
LIST_ENTRY(vm_page) link;
- /* Non-PTPs (i.e. normal pages) */
+ /* Non-PTPs */
struct {
+ /* PP_EMBEDDED */
struct pv_pte pte;
+
LIST_HEAD(, pv_entry) pvlist;
+ uint8_t flags;
uint8_t attrs;
} s;
} pp_u;
@@ -87,6 +89,7 @@ struct pmap_page {
#define pp_link pp_u.link
#define pp_pte pp_u.s.pte
#define pp_pvlist pp_u.s.pvlist
+#define pp_pflags pp_u.s.flags
#define pp_attrs pp_u.s.attrs
};
@@ -94,6 +97,10 @@ struct pmap_page {
#define PP_ATTRS_A 0x02 /* Accessed */
#define PP_ATTRS_W 0x04 /* Writable */
+/* pp_flags */
+#define PP_EMBEDDED 1
+#define PP_FREEING 2
+
#define PMAP_PAGE_INIT(pp) \
do { \
LIST_INIT(&(pp)->pp_pvlist); \
Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.372 src/sys/arch/x86/x86/pmap.c:1.373
--- src/sys/arch/x86/x86/pmap.c:1.372 Tue Mar 17 18:40:35 2020
+++ src/sys/arch/x86/x86/pmap.c Tue Mar 17 21:02:56 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.c,v 1.372 2020/03/17 18:40:35 ad Exp $ */
+/* $NetBSD: pmap.c,v 1.373 2020/03/17 21:02:56 ad Exp $ */
/*
* Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc.
@@ -130,7 +130,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.372 2020/03/17 18:40:35 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.373 2020/03/17 21:02:56 ad Exp $");
#include "opt_user_ldt.h"
#include "opt_lockdebug.h"
@@ -139,8 +139,6 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.3
#include "opt_svs.h"
#include "opt_kaslr.h"
-#define __MUTEX_PRIVATE /* for assertions */
-
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
@@ -226,39 +224,23 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.3
/*
* Locking
*
- * We have the following locks that we must deal with, listed in the order
- * that they are acquired:
- *
- * pg->uobject->vmobjlock, pg->uanon->an_lock
+ * We have the following locks that we must contend with, listed in the
+ * order that they must be acquired:
*
- * For managed pages, these per-object locks are taken by the VM system
- * before calling into the pmap module - either a read or write hold.
- * The lock hold prevent pages from changing identity while the pmap is
- * operating on them. For example, the same lock is held across a call
- * to pmap_remove() and the following call to pmap_update(), so that a
- * page does not gain a new identity while its TLB visibility is stale.
- *
- * pmap->pm_lock
- *
- * This lock protects the fields in the pmap structure including the
- * non-kernel PDEs in the PDP, the PTEs, and PTPs and connected data
- * structures. For modifying unmanaged kernel PTEs it is not needed as
- * kernel PDEs are never freed, and the kernel is expected to be self
- * consistent (and the lock can't be taken for unmanaged kernel PTEs,
- * because they can be modified from interrupt context).
- *
- * pmaps_lock
- *
- * This lock protects the list of active pmaps (headed by "pmaps").
- * It's acqired when adding or removing pmaps or adjusting kernel PDEs.
- *
- * pp_lock
- *
- * This per-page lock protects PV entry lists and the embedded PV entry
- * in each vm_page, allowing for concurrent operation on pages by
- * different pmaps. This is a spin mutex at IPL_VM, because at the
- * points it is taken context switching is usually not tolerable, and
- * spin mutexes must block out interrupts that could take kernel_lock.
+ * - pg->uobject->vmobjlock, pg->uanon->an_lock
+ * These per-object locks are taken by the VM system before calling into
+ * the pmap module. Holding them prevents concurrent operations on the
+ * given page or set of pages.
+ *
+ * - pmap->pm_lock (per pmap)
+ * This lock protects the fields in the pmap structure including the
+ * non-kernel PDEs in the PDP, the PTEs, and the PVE radix tree. For
+ * modifying kernel PTEs it is not required as kernel PDEs are never
+ * freed, and the kernel is expected to be self consistent.
+ *
+ * - pmaps_lock
+ * This lock protects the list of active pmaps (headed by "pmaps"). We
+ * lock it when adding or removing pmaps from this list.
*/
/* uvm_object is abused here to index pmap_pages; make assertions happy. */
@@ -335,8 +317,6 @@ paddr_t pmap_pa_end; /* PA of last phy
#endif
#define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp)
-#define PMAP_CHECK_PP(pp) \
- KASSERTMSG((pp)->pp_lock.mtx_ipl._ipl == IPL_VM, "bad pmap_page %p", pp)
/*
* Other data structures
@@ -543,17 +523,6 @@ pvpte_to_pve(struct pv_pte *pvpte)
}
/*
- * Return true if the pmap page has an embedded PV entry.
- */
-static inline bool
-pv_pte_embedded(struct pmap_page *pp)
-{
-
- KASSERT(mutex_owned(&pp->pp_lock));
- return (bool)((vaddr_t)pp->pp_pte.pte_ptp | pp->pp_pte.pte_va);
-}
-
-/*
* pv_pte_first, pv_pte_next: PV list iterator.
*/
static struct pv_pte *
@@ -561,7 +530,7 @@ pv_pte_first(struct pmap_page *pp)
{
KASSERT(mutex_owned(&pp->pp_lock));
- if (pv_pte_embedded(pp)) {
+ if ((pp->pp_pflags & PP_EMBEDDED) != 0) {
return &pp->pp_pte;
}
return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist));
@@ -574,6 +543,7 @@ pv_pte_next(struct pmap_page *pp, struct
KASSERT(mutex_owned(&pp->pp_lock));
KASSERT(pvpte != NULL);
if (pvpte == &pp->pp_pte) {
+ KASSERT((pp->pp_pflags & PP_EMBEDDED) != 0);
return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist));
}
return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list));
@@ -635,61 +605,6 @@ pmap_compare_key(void *context, const vo
}
/*
- * pmap_ptp_init: initialize new page table page
- */
-static inline void
-pmap_ptp_init(struct vm_page *ptp)
-{
-
- ptp->uanon = (struct vm_anon *)(vaddr_t)~0L;
- rb_tree_init(&VM_PAGE_TO_PP(ptp)->pp_rb, &pmap_rbtree_ops);
- PMAP_CHECK_PP(VM_PAGE_TO_PP(ptp));
-}
-
-/*
- * pmap_ptp_fini: finalize a page table page
- */
-static inline void
-pmap_ptp_fini(struct vm_page *ptp)
-{
-
- KASSERT(RB_TREE_MIN(&VM_PAGE_TO_PP(ptp)->pp_rb) == NULL);
- PMAP_CHECK_PP(VM_PAGE_TO_PP(ptp));
- ptp->uanon = NULL;
-}
-
-/*
- * pmap_ptp_range_set: abuse ptp->uanon to record minimum VA of PTE
- */
-static inline void
-pmap_ptp_range_set(struct vm_page *ptp, vaddr_t va)
-{
- vaddr_t *min = (vaddr_t *)&ptp->uanon;
-
- if (va < *min) {
- *min = va;
- }
-}
-
-/*
- * pmap_ptp_range_clip: abuse ptp->uanon to clip range of PTEs to remove
- */
-static inline void
-pmap_ptp_range_clip(struct vm_page *ptp, vaddr_t *startva, pt_entry_t **pte)
-{
- vaddr_t sclip;
-
- if (ptp == NULL) {
- return;
- }
-
- sclip = (vaddr_t)ptp->uanon;
- sclip = (*startva < sclip ? sclip : *startva);
- *pte += (sclip - *startva) / PAGE_SIZE;
- *startva = sclip;
-}
-
-/*
* pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
*
* there are several pmaps involved. some or all of them might be same.
@@ -741,9 +656,7 @@ pmap_map_ptes(struct pmap *pmap, struct
* often the case during exit(), when we have switched
* to the kernel pmap in order to destroy a user pmap.
*/
- if (__predict_false(ci->ci_tlbstate != TLBSTATE_VALID)) {
- pmap_reactivate(pmap);
- }
+ pmap_reactivate(pmap);
*pmap2 = NULL;
} else {
/*
@@ -1858,7 +1771,7 @@ pmap_init(void)
* The kernel doesn't keep track of PTPs, so there's nowhere handy
* to hang a tree of pv_entry records. Dynamically allocated
* pv_entry lists are not heavily used in the kernel's pmap (the
- * usual case is embedded), so cop out and use a single RB tree
+ * usual case is PP_EMBEDDED), so cop out and use a single RB tree
* to cover them.
*/
rb_tree_init(&pmap_kernel_rb, &pmap_rbtree_ops);
@@ -1944,6 +1857,28 @@ pmap_vpage_cpu_init(struct cpu_info *ci)
* p v _ e n t r y f u n c t i o n s
*/
+
+/*
+ * pmap_pp_needs_pve: return true if we need to allocate a pv entry.
+ */
+static bool
+pmap_pp_needs_pve(struct pmap_page *pp, struct vm_page *ptp, vaddr_t va)
+{
+
+ /*
+ * Adding a pv entry for this page only needs to allocate a pv_entry
+ * structure if the page already has at least one pv entry, since
+ * the first pv entry is stored in the pmap_page. However, because
+ * of subsequent removal(s), PP_EMBEDDED can be false and there can
+ * still be pv entries on the list.
+ */
+
+ if (pp == NULL || (pp->pp_pflags & PP_EMBEDDED) == 0) {
+ return false;
+ }
+ return pp->pp_pte.pte_ptp != ptp || pp->pp_pte.pte_va != va;
+}
+
/*
* pmap_free_pvs: free a linked list of pv entries. the pv entries have
* been removed from their respective pages, but are still entered into the
@@ -1965,57 +1900,49 @@ pmap_free_pvs(struct pmap *pmap, struct
}
/*
- * pmap_check_pv: verify {VA, PTP} pair is either tracked/untracked by page
+ * pmap_lookup_pv: look up a non-PP_EMBEDDED pv entry for the given pmap
+ *
+ * => pmap must be locked
*/
-static void
-pmap_check_pv(struct pmap *pmap, struct vm_page *ptp, struct pmap_page *pp,
- vaddr_t va, bool tracked)
+
+static struct pv_entry *
+pmap_lookup_pv(struct pmap *pmap, struct vm_page *ptp,
+ struct pmap_page *pp, vaddr_t va)
{
-#ifdef DIAGNOSTIC /* XXX too slow make this DEBUG before April 2020 */
- struct pv_pte *pvpte;
+ struct rb_node *node;
+ struct pv_entry *pve;
- PMAP_CHECK_PP(pp);
+ KASSERT(mutex_owned(&pmap->pm_lock));
- mutex_spin_enter(&pp->pp_lock);
- for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
- if (pvpte->pte_ptp == ptp && pvpte->pte_va == va) {
- break;
- }
+ /*
+ * Do an unlocked check on the page: if tracked with PP_EMBEDDED we
+ * can avoid touching the tree.
+ */
+ if ((pp->pp_pflags & PP_EMBEDDED) != 0 &&
+ pp->pp_pte.pte_ptp == ptp &&
+ pp->pp_pte.pte_va == va) {
+ return NULL;
}
- mutex_spin_exit(&pp->pp_lock);
- if (pvpte && !tracked) {
- panic("pmap_check_pv: %p/%lx found on pp %p", ptp, va, pp);
- } else if (!pvpte && tracked) {
- panic("pmap_check_pv: %p/%lx missing on pp %p", ptp, va, pp);
+ if (ptp != NULL) {
+ node = VM_PAGE_TO_PP(ptp)->pp_rb.rbt_root;
+ } else {
+ KASSERT(pmap == pmap_kernel());
+ node = pmap_kernel_rb.rbt_root;
}
-#endif
-}
-
-/*
- * pmap_treelookup_pv: search the PV tree for a dynamic entry
- *
- * => pmap must be locked
- */
-static struct pv_entry *
-pmap_treelookup_pv(const struct pmap *pmap, const struct vm_page *ptp,
- const rb_tree_t *tree, const vaddr_t va)
-{
- struct pv_entry *pve;
- rb_node_t *node;
/*
- * Inlined lookup tailored for exactly what's needed here that is
- * quite a bit faster than using rb_tree_find_node().
+ * Search the RB tree for the key. This is an inlined lookup
+ * tailored for exactly what's needed here that is quite a bit
+ * faster than using rb_tree_find_node().
*/
- for (node = tree->rbt_root;;) {
+ for (;;) {
if (__predict_false(RB_SENTINEL_P(node))) {
return NULL;
}
pve = (struct pv_entry *)
((uintptr_t)node - offsetof(struct pv_entry, pve_rb));
if (pve->pve_pte.pte_va == va) {
- KASSERT(pve->pve_pte.pte_ptp == ptp);
return pve;
}
node = node->rb_nodes[pve->pve_pte.pte_va < va];
@@ -2023,194 +1950,91 @@ pmap_treelookup_pv(const struct pmap *pm
}
/*
- * pmap_lookup_pv: look up a non-embedded pv entry for the given pmap
- *
- * => a PV entry must be known present (doesn't check for existence)
- * => pmap must be locked
- */
-static struct pv_entry *
-pmap_lookup_pv(const struct pmap *pmap, const struct vm_page *ptp,
- const struct pmap_page * const old_pp, const vaddr_t va)
-{
- struct pv_entry *pve;
- const rb_tree_t *tree;
-
- KASSERT(mutex_owned(&pmap->pm_lock));
- KASSERT(ptp != NULL || pmap == pmap_kernel());
-
- /*
- * [This mostly deals with the case of process-private pages, i.e.
- * anonymous memory allocations or COW.]
- *
- * If the page is tracked with an embedded entry then the tree
- * lookup can be avoided. It's safe to check for this specific
- * set of values without pp_lock because both will only ever be
- * set together for this pmap.
- *
- */
- if (atomic_load_relaxed(&old_pp->pp_pte.pte_ptp) == ptp &&
- atomic_load_relaxed(&old_pp->pp_pte.pte_va) == va) {
- return NULL;
- }
-
- /*
- * [This mostly deals with shared mappings, for example shared libs
- * and executables.]
- *
- * Optimise for pmap_remove_all() which works by ascending scan:
- * look at the lowest numbered node in the tree first. The tree is
- * known non-empty because of the check above. For short lived
- * processes where pmap_remove() isn't used much this gets close to
- * a 100% hit rate.
- */
- tree = (ptp != NULL ? &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb);
- KASSERT(!RB_SENTINEL_P(tree->rbt_root));
- pve = (struct pv_entry *)
- ((uintptr_t)tree->rbt_minmax[RB_DIR_LEFT] -
- offsetof(struct pv_entry, pve_rb));
- if (__predict_true(pve->pve_pte.pte_va == va)) {
- KASSERT(pve->pve_pte.pte_ptp == ptp);
- return pve;
- }
-
- /* Search the RB tree for the key (uncommon). */
- return pmap_treelookup_pv(pmap, ptp, tree, va);
-}
-
-/*
* pmap_enter_pv: enter a mapping onto a pmap_page lst
*
- * => pmap must be locked
- * => does NOT insert dynamic entries to tree (pmap_enter() does later)
+ * => caller should adjust ptp's wire_count before calling
+ * => caller has preallocated pve for us
+ * => if not embedded, tree node must be in place beforehand
*/
-static int
-pmap_enter_pv(struct pmap *pmap, struct pmap_page *pp, struct vm_page *ptp,
- vaddr_t va, struct pv_entry **new_pve, struct pv_entry **old_pve,
- bool *samepage, bool *new_embedded, rb_tree_t *tree)
+static struct pv_entry *
+pmap_enter_pv(struct pmap *pmap, struct pmap_page *pp, struct pv_entry *pve,
+ struct vm_page *ptp, vaddr_t va)
{
- struct pv_entry *pve;
- int error;
KASSERT(mutex_owned(&pmap->pm_lock));
KASSERT(ptp_to_pmap(ptp) == pmap);
+ KASSERT(ptp == NULL || ptp->wire_count >= 2);
KASSERT(ptp == NULL || ptp->uobject != NULL);
KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
- PMAP_CHECK_PP(pp);
- /*
- * If entering the same page and it's already tracked with an
- * embedded entry, we can avoid the expense below. It's safe
- * to check for this very specific set of values without a lock
- * because both will only ever be set together for this pmap.
- */
- if (atomic_load_relaxed(&pp->pp_pte.pte_ptp) == ptp &&
- atomic_load_relaxed(&pp->pp_pte.pte_va) == va) {
- *samepage = true;
- pmap_check_pv(pmap, ptp, pp, va, true);
- return 0;
- }
-
- /*
- * Check for an existing dynamic mapping at this address. If it's
- * for the same page, then it will be reused and nothing needs to be
- * changed.
- */
- *old_pve = pmap_treelookup_pv(pmap, ptp, tree, va);
- if (*old_pve != NULL && (*old_pve)->pve_pp == pp) {
- *samepage = true;
- pmap_check_pv(pmap, ptp, pp, va, true);
- return 0;
- }
-
- /*
- * Need to put a new mapping in place. Grab a spare pv_entry in
- * case it's needed; won't know for sure until the lock is taken.
- */
- if (pmap->pm_pve == NULL) {
- pmap->pm_pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
- }
-
- error = 0;
- pmap_check_pv(pmap, ptp, pp, va, false);
mutex_spin_enter(&pp->pp_lock);
- if (!pv_pte_embedded(pp)) {
- /*
- * Embedded PV tracking available - easy.
- */
+ if ((pp->pp_pflags & PP_EMBEDDED) == 0) {
+ pp->pp_pflags |= PP_EMBEDDED;
pp->pp_pte.pte_ptp = ptp;
pp->pp_pte.pte_va = va;
- *new_embedded = true;
- } else if (__predict_false(pmap->pm_pve == NULL)) {
- /*
- * No memory.
- */
- error = ENOMEM;
- } else {
- /*
- * Install new pv_entry on the page.
- */
- pve = pmap->pm_pve;
- pmap->pm_pve = NULL;
- *new_pve = pve;
- pve->pve_pte.pte_ptp = ptp;
- pve->pve_pte.pte_va = va;
- pve->pve_pp = pp;
- LIST_INSERT_HEAD(&pp->pp_pvlist, pve, pve_list);
+ mutex_spin_exit(&pp->pp_lock);
+ return pve;
}
+
+ KASSERT(pve != NULL);
+ pve->pve_pte.pte_ptp = ptp;
+ pve->pve_pte.pte_va = va;
+ KASSERT(pmap_lookup_pv(pmap, ptp, pp, va) == NULL);
+ LIST_INSERT_HEAD(&pp->pp_pvlist, pve, pve_list);
mutex_spin_exit(&pp->pp_lock);
- pmap_check_pv(pmap, ptp, pp, va, true);
- return error;
+ if (ptp != NULL) {
+ rb_tree_insert_node(&VM_PAGE_TO_PP(ptp)->pp_rb, pve);
+ } else {
+ KASSERT(pmap == pmap_kernel());
+ rb_tree_insert_node(&pmap_kernel_rb, pve);
+ }
+ return NULL;
}
/*
* pmap_remove_pv: try to remove a mapping from a pv_list
*
- * => pmap must be locked
- * => removes dynamic entries from tree
* => caller should adjust ptp's wire_count and free PTP if needed
+ * => we don't remove radix tree entry; defer till later (it could block)
+ * => we return the removed pve
+ * => caller can optionally supply pve, if looked up already
*/
static void
pmap_remove_pv(struct pmap *pmap, struct pmap_page *pp, struct vm_page *ptp,
vaddr_t va, struct pv_entry *pve, uint8_t oattrs)
{
- rb_tree_t *tree = (ptp != NULL ?
- &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb);
KASSERT(mutex_owned(&pmap->pm_lock));
KASSERT(ptp_to_pmap(ptp) == pmap);
KASSERT(ptp == NULL || ptp->uobject != NULL);
KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
- KASSERT(ptp != NULL || pmap == pmap_kernel());
-
- pmap_check_pv(pmap, ptp, pp, va, true);
mutex_spin_enter(&pp->pp_lock);
pp->pp_attrs |= oattrs;
- if (pve == NULL) {
- KASSERT(pp->pp_pte.pte_ptp == ptp);
- KASSERT(pp->pp_pte.pte_va == va);
+ if ((pp->pp_pflags & PP_EMBEDDED) != 0 &&
+ pp->pp_pte.pte_ptp == ptp &&
+ pp->pp_pte.pte_va == va) {
+ KASSERT(pve == NULL);
+ pp->pp_pflags &= ~PP_EMBEDDED;
pp->pp_pte.pte_ptp = NULL;
pp->pp_pte.pte_va = 0;
mutex_spin_exit(&pp->pp_lock);
} else {
- KASSERT(pp->pp_pte.pte_ptp != ptp ||
- pp->pp_pte.pte_va != va);
+ KASSERT(pve != NULL);
+ KASSERT(pve == pmap_lookup_pv(pmap, ptp, pp, va));
KASSERT(pve->pve_pte.pte_ptp == ptp);
KASSERT(pve->pve_pte.pte_va == va);
- KASSERT(pve->pve_pp == pp);
LIST_REMOVE(pve, pve_list);
mutex_spin_exit(&pp->pp_lock);
- KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == pve);
- rb_tree_remove_node(tree, pve);
-#ifdef DIAGNOSTIC
- memset(pve, 0, sizeof(*pve));
-#endif
+ if (ptp != NULL) {
+ rb_tree_remove_node(&VM_PAGE_TO_PP(ptp)->pp_rb, pve);
+ } else {
+ KASSERT(pmap == pmap_kernel());
+ rb_tree_remove_node(&pmap_kernel_rb, pve);
+ }
}
-
- KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
- pmap_check_pv(pmap, ptp, pp, va, false);
}
/*
@@ -2228,9 +2052,7 @@ pmap_find_ptp(struct pmap *pmap, vaddr_t
if (pmap->pm_ptphint[lidx] && off == pmap->pm_ptphint[lidx]->offset) {
KASSERT(pmap->pm_ptphint[lidx]->wire_count > 0);
- pg = pmap->pm_ptphint[lidx];
- PMAP_CHECK_PP(VM_PAGE_TO_PP(pg));
- return pg;
+ return pmap->pm_ptphint[lidx];
}
PMAP_DUMMY_LOCK(pmap);
pg = uvm_pagelookup(&pmap->pm_obj[lidx], off);
@@ -2239,9 +2061,6 @@ pmap_find_ptp(struct pmap *pmap, vaddr_t
/* This page is queued to be freed - ignore. */
pg = NULL;
}
- if (pg != NULL) {
- PMAP_CHECK_PP(VM_PAGE_TO_PP(pg));
- }
pmap->pm_ptphint[lidx] = pg;
return pg;
}
@@ -2258,7 +2077,6 @@ pmap_freepage(struct pmap *pmap, struct
if (pmap->pm_ptphint[lidx] == ptp)
pmap->pm_ptphint[lidx] = NULL;
ptp->wire_count = 0;
- pmap_ptp_fini(ptp);
/*
* Enqueue the PTP to be freed by pmap_update(). We can't remove
@@ -2267,6 +2085,7 @@ pmap_freepage(struct pmap *pmap, struct
* Instead mark the PTP as free and if we bump into it again, we'll
* either ignore or reuse (depending on what's useful at the time).
*/
+ KASSERT(RB_TREE_MIN(&VM_PAGE_TO_PP(ptp)->pp_rb) == NULL);
LIST_INSERT_HEAD(&pmap->pm_gc_ptp, ptp, mdpage.mp_pp.pp_link);
}
@@ -2359,12 +2178,14 @@ pmap_get_ptp(struct pmap *pmap, struct p
pt->pg[i] = uvm_pagealloc(obj, off, NULL, aflags);
pt->alloced[i] = true;
if (pt->pg[i] != NULL) {
- pmap_ptp_init(pt->pg[i]);
+ rb_tree_init(&VM_PAGE_TO_PP(pt->pg[i])->pp_rb,
+ &pmap_rbtree_ops);
}
} else if (pt->pg[i]->wire_count == 0) {
/* This page was queued to be freed; dequeue it. */
LIST_REMOVE(pt->pg[i], mdpage.mp_pp.pp_link);
- pmap_ptp_init(pt->pg[i]);
+ rb_tree_init(&VM_PAGE_TO_PP(pt->pg[i])->pp_rb,
+ &pmap_rbtree_ops);
}
PMAP_DUMMY_UNLOCK(pmap);
if (pt->pg[i] == NULL) {
@@ -2471,10 +2292,8 @@ pmap_unget_ptp(struct pmap *pmap, struct
continue;
}
KASSERT(pt->pg[i]->wire_count == 0);
- PMAP_CHECK_PP(VM_PAGE_TO_PP(pt->pg[i]));
/* pmap zeros all pages before freeing. */
pt->pg[i]->flags |= PG_ZERO;
- pmap_ptp_fini(pt->pg[i]);
PMAP_DUMMY_LOCK(pmap);
uvm_pagefree(pt->pg[i]);
PMAP_DUMMY_UNLOCK(pmap);
@@ -2669,7 +2488,7 @@ pmap_ctor(void *arg, void *obj, int flag
kcpuset_create(&pmap->pm_xen_ptp_cpus, true);
#endif
LIST_INIT(&pmap->pm_gc_ptp);
- pmap->pm_pve = NULL;
+ pmap->pm_remove_all = NULL;
/* allocate and init PDP */
pmap->pm_pdir = pool_get(&pmap_pdp_pool, PR_WAITOK);
@@ -2702,10 +2521,6 @@ pmap_dtor(void *arg, void *obj)
{
struct pmap *pmap = obj;
- if (pmap->pm_pve != NULL) {
- pool_cache_put(&pmap_pv_cache, pmap->pm_pve);
- }
-
mutex_enter(&pmaps_lock);
LIST_REMOVE(pmap, pm_list);
mutex_exit(&pmaps_lock);
@@ -2822,28 +2637,26 @@ pmap_destroy(struct pmap *pmap)
{
int i;
+ /* Undo pmap_remove_all(). */
+ if (pmap->pm_remove_all == curlwp) {
+ pmap_update(pmap);
+ }
+
/*
- * drop reference count and verify not in use.
+ * drop reference count
*/
if (atomic_dec_uint_nv(&pmap->pm_obj[0].uo_refs) > 0) {
return;
}
- pmap_check_inuse(pmap);
- /*
- * XXX handle deferred PTP page free for EPT. ordinarily this is
- * taken care of by pmap_remove_all(). once shared with EPT this
- * can go away.
- */
- if (__predict_false(!LIST_EMPTY(&pmap->pm_gc_ptp))) {
- pmap_update(pmap);
- }
+ pmap_check_inuse(pmap);
/*
* Reference count is zero, free pmap resources and then free pmap.
*/
+ KASSERT(pmap->pm_remove_all == NULL);
pmap_check_ptps(pmap);
KASSERT(LIST_EMPTY(&pmap->pm_gc_ptp));
@@ -2884,85 +2697,20 @@ pmap_destroy(struct pmap *pmap)
}
/*
- * pmap_remove_all: remove all mappings from pmap in bulk.
- *
- * Ordinarily when removing mappings it's important to hold the UVM object's
- * lock, so that pages do not gain a new identity while retaining stale TLB
- * entries (the same lock hold covers both pmap_remove() and pmap_update()).
- * Here it's known that the address space is no longer visible to any user
- * process, so we don't need to worry about that.
+ * pmap_remove_all: pmap is being torn down by the current thread.
+ * avoid unnecessary invalidations.
*/
bool
pmap_remove_all(struct pmap *pmap)
{
- struct vm_page *ptps[32];
- vaddr_t va, blkendva;
- struct pmap *pmap2;
- pt_entry_t *ptes;
- pd_entry_t pde __diagused;
- pd_entry_t * const *pdes;
- struct pv_entry *pv_tofree;
- int lvl __diagused, i, n;
- /* XXX Can't handle EPT just yet. */
- if (pmap->pm_remove != NULL) {
- return false;
- }
-
- for (;;) {
- /* Fetch a block of PTPs from tree. */
- mutex_enter(&pmap->pm_lock);
- n = radix_tree_gang_lookup_node(&pmap->pm_obj[0].uo_pages, 0,
- (void **)ptps, __arraycount(ptps), false);
- if (n == 0) {
- mutex_exit(&pmap->pm_lock);
- break;
- }
-
- /* Remove all mappings in the set of PTPs. */
- pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
- pv_tofree = NULL;
- for (i = 0; i < n; i++) {
- if (ptps[i]->wire_count == 0) {
- /* It's dead: pmap_update() will expunge. */
- continue;
- }
-
- /* Determine range of block. */
- va = ptps[i]->offset * PAGE_SIZE / sizeof(pt_entry_t);
- blkendva = x86_round_pdr(va + 1);
-
- /* Make sure everything squares up... */
- KASSERT(pmap_pdes_valid(va, pdes, &pde, &lvl));
- KASSERT(lvl == 1);
- KASSERT(pmap_find_ptp(pmap, va, 1) == ptps[i]);
-
- /* Zap! */
- pmap_remove_ptes(pmap, ptps[i],
- (vaddr_t)&ptes[pl1_i(va)], va,
- blkendva, &pv_tofree);
-
- /* PTP should now be unused - free it. */
- KASSERT(ptps[i]->wire_count == 1);
- pmap_free_ptp(pmap, ptps[i], va, ptes, pdes);
- }
- pmap_unmap_ptes(pmap, pmap2);
- pmap_free_pvs(pmap, pv_tofree);
- mutex_exit(&pmap->pm_lock);
-
- /* Process deferred frees. */
- pmap_update(pmap);
-
- /* A breathing point. */
- preempt_point();
- }
-
- /* Verify that the pmap is now completely empty. */
- pmap_check_ptps(pmap);
- KASSERTMSG(pmap->pm_stats.resident_count == PDP_SIZE,
- "pmap %p not empty", pmap);
-
- return true;
+ /*
+ * No locking needed; at this point it should only ever be checked
+ * by curlwp.
+ */
+ KASSERT(pmap->pm_remove_all == NULL);
+ pmap->pm_remove_all = curlwp;
+ return false;
}
#if defined(PMAP_FORK)
@@ -3204,7 +2952,7 @@ pmap_reactivate(struct pmap *pmap)
ci->ci_tlbstate = TLBSTATE_VALID;
KASSERT(kcpuset_isset(pmap->pm_kernel_cpus, cid));
- if (__predict_true(kcpuset_isset(pmap->pm_cpus, cid))) {
+ if (kcpuset_isset(pmap->pm_cpus, cid)) {
/* We have the reference, state is valid. */
} else {
/*
@@ -3794,12 +3542,6 @@ pmap_remove_ptes(struct pmap *pmap, stru
KASSERT(kpreempt_disabled());
/*
- * mappings are very often sparse, so clip the given range to the
- * range of PTEs that are known present in the PTP.
- */
- pmap_ptp_range_clip(ptp, &startva, &pte);
-
- /*
* note that ptpva points to the PTE that maps startva. this may
* or may not be the first PTE in the PTP.
*
@@ -3899,8 +3641,6 @@ pmap_remove_pte(struct pmap *pmap, struc
KASSERTMSG((pmap_pv_tracked(pmap_pte2pa(opte)) == NULL),
"pv-tracked page without PTE_PVLIST for %#"PRIxVADDR, va);
#endif
- KASSERT(pmap_treelookup_pv(pmap, ptp, (ptp != NULL ?
- &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb), va) == NULL);
return true;
}
@@ -4013,7 +3753,8 @@ pmap_remove(struct pmap *pmap, vaddr_t s
pmap_unmap_ptes(pmap, pmap2);
/*
* Now safe to free, as we no longer have the PTEs mapped and can
- * block again.
+ * block again. Radix tree nodes are removed here, so we need to
+ * continue holding the pmap locked until complete.
*/
if (pv_tofree != NULL) {
pmap_free_pvs(pmap, pv_tofree);
@@ -4148,36 +3889,20 @@ pmap_pp_remove(struct pmap_page *pp, pad
{
struct pv_pte *pvpte;
struct vm_page *ptp;
- uintptr_t sum;
uint8_t oattrs;
bool locked;
+ int count;
- /*
- * Do an unlocked check to see if the page has no mappings, eg when
- * pmap_remove_all() was called before amap_wipeout() for a process
- * private amap - common. The page being removed must be on the way
- * out, so we don't have to worry about concurrent attempts to enter
- * it (otherwise the caller either doesn't care or has screwed up).
- */
- sum = (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_va);
- sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_ptp);
- sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pvlist.lh_first);
- if (sum == 0) {
- return;
- }
-
+ count = SPINLOCK_BACKOFF_MIN;
kpreempt_disable();
- for (;;) {
+startover:
+ mutex_spin_enter(&pp->pp_lock);
+ while ((pvpte = pv_pte_first(pp)) != NULL) {
struct pmap *pmap;
struct pv_entry *pve;
pt_entry_t opte;
vaddr_t va;
-
- mutex_spin_enter(&pp->pp_lock);
- if ((pvpte = pv_pte_first(pp)) == NULL) {
- mutex_spin_exit(&pp->pp_lock);
- break;
- }
+ int error;
/*
* Add a reference to the pmap before clearing the pte.
@@ -4205,37 +3930,23 @@ pmap_pp_remove(struct pmap_page *pp, pad
if (ptp != NULL) {
pmap_destroy(pmap);
}
- continue;
- }
- va = pvpte->pte_va;
-
- KASSERTMSG(pmap->pm_stats.resident_count > PDP_SIZE,
- "va %lx pmap %p ptp %p is empty", va, pmap, ptp);
- KASSERTMSG(ptp == NULL || (ptp->flags & PG_FREE) == 0,
- "va %lx pmap %p ptp %p is free", va, pmap, ptp);
- KASSERTMSG(ptp == NULL || ptp->wire_count > 1,
- "va %lx pmap %p ptp %p is empty", va, pmap, ptp);
-
-#ifdef DIAGNOSTIC /* XXX Too expensive make DEBUG before April 2020 */
- pmap_check_pv(pmap, ptp, pp, pvpte->pte_va, true);
- rb_tree_t *tree = (ptp != NULL ?
- &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb);
- pve = pmap_treelookup_pv(pmap, ptp, tree, va);
- if (pve == NULL) {
- KASSERTMSG(&pp->pp_pte == pvpte,
- "va %lx pmap %p ptp %p pvpte %p pve %p oops 1",
- va, pmap, ptp, pvpte, pve);
- } else {
- KASSERTMSG(&pve->pve_pte == pvpte,
- "va %lx pmap %p ptp %p pvpte %p pve %p oops 2",
- va, pmap, ptp, pvpte, pve);
+ goto startover;
}
-#endif
-
- if (pmap_sync_pv(pvpte, pa, ~0, &oattrs, &opte)) {
- panic("pmap_pp_remove: mapping not present");
+
+ error = pmap_sync_pv(pvpte, pa, ~0, &oattrs, &opte);
+ if (error == EAGAIN) {
+ int hold_count;
+ KERNEL_UNLOCK_ALL(curlwp, &hold_count);
+ mutex_exit(&pmap->pm_lock);
+ if (ptp != NULL) {
+ pmap_destroy(pmap);
+ }
+ SPINLOCK_BACKOFF(count);
+ KERNEL_LOCK(hold_count, curlwp);
+ goto startover;
}
+ va = pvpte->pte_va;
pve = pmap_lookup_pv(pmap, ptp, pp, va);
pmap_remove_pv(pmap, pp, ptp, va, pve, oattrs);
@@ -4253,15 +3964,21 @@ pmap_pp_remove(struct pmap_page *pp, pad
pmap_stats_update_bypte(pmap, 0, opte);
}
if (pve != NULL) {
+ /*
+ * Must free pve, and remove from PV tree with the
+ * pmap's lock still held.
+ */
pve->pve_next = NULL;
pmap_free_pvs(pmap, pve);
}
- pmap_tlb_shootnow();
mutex_exit(&pmap->pm_lock);
if (ptp != NULL) {
pmap_destroy(pmap);
}
+ mutex_spin_enter(&pp->pp_lock);
}
+ mutex_spin_exit(&pp->pp_lock);
+ pmap_tlb_shootnow();
kpreempt_enable();
}
@@ -4311,7 +4028,6 @@ pmap_test_attrs(struct vm_page *pg, unsi
{
struct pmap_page *pp;
struct pv_pte *pvpte;
- struct pmap *pmap;
uint8_t oattrs;
u_int result;
paddr_t pa;
@@ -4321,29 +4037,17 @@ pmap_test_attrs(struct vm_page *pg, unsi
return true;
}
pa = VM_PAGE_TO_PHYS(pg);
- startover:
mutex_spin_enter(&pp->pp_lock);
for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
+ int error;
+
if ((pp->pp_attrs & testbits) != 0) {
break;
}
- if (pmap_sync_pv(pvpte, pa, 0, &oattrs, NULL)) {
- /*
- * raced with a V->P operation. wait for the other
- * side to finish by acquring pmap's lock. if no
- * wait, updates to pp_attrs by the other side may
- * go unseen.
- */
- pmap = ptp_to_pmap(pvpte->pte_ptp);
- pmap_reference(pmap);
- mutex_spin_exit(&pp->pp_lock);
- mutex_enter(&pmap->pm_lock);
- /* nothing. */
- mutex_exit(&pmap->pm_lock);
- pmap_destroy(pmap);
- goto startover;
+ error = pmap_sync_pv(pvpte, pa, 0, &oattrs, NULL);
+ if (error == 0) {
+ pp->pp_attrs |= oattrs;
}
- pp->pp_attrs |= oattrs;
}
result = pp->pp_attrs & testbits;
mutex_spin_exit(&pp->pp_lock);
@@ -4360,27 +4064,23 @@ static bool
pmap_pp_clear_attrs(struct pmap_page *pp, paddr_t pa, unsigned clearbits)
{
struct pv_pte *pvpte;
- struct pmap *pmap;
uint8_t oattrs;
u_int result;
+ int count;
-startover:
+ count = SPINLOCK_BACKOFF_MIN;
mutex_spin_enter(&pp->pp_lock);
+startover:
for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
- if (pmap_sync_pv(pvpte, pa, clearbits, &oattrs, NULL)) {
- /*
- * raced with a V->P operation. wait for the other
- * side to finish by acquring pmap's lock. it is
- * probably unmapping the page, and it will be gone
- * when the loop is restarted.
- */
- pmap = ptp_to_pmap(pvpte->pte_ptp);
- pmap_reference(pmap);
+ int error;
+
+ error = pmap_sync_pv(pvpte, pa, clearbits, &oattrs, NULL);
+ if (error == EAGAIN) {
+ int hold_count;
mutex_spin_exit(&pp->pp_lock);
- mutex_enter(&pmap->pm_lock);
- /* nothing. */
- mutex_exit(&pmap->pm_lock);
- pmap_destroy(pmap);
+ KERNEL_UNLOCK_ALL(curlwp, &hold_count);
+ SPINLOCK_BACKOFF(count);
+ KERNEL_LOCK(hold_count, curlwp);
goto startover;
}
pp->pp_attrs |= oattrs;
@@ -4475,6 +4175,8 @@ pmap_write_protect(struct pmap *pmap, va
vaddr_t blockend, va;
int lvl, i;
+ KASSERT(pmap->pm_remove_all == NULL);
+
if (__predict_false(pmap->pm_write_protect != NULL)) {
(*pmap->pm_write_protect)(pmap, sva, eva, prot);
return;
@@ -4493,8 +4195,7 @@ pmap_write_protect(struct pmap *pmap, va
/*
* Acquire pmap. No need to lock the kernel pmap as we won't
- * be touching PV entries nor stats and kernel PDEs aren't
- * freed.
+ * be touching the pvmap nor the stats.
*/
if (pmap != pmap_kernel()) {
mutex_enter(&pmap->pm_lock);
@@ -4634,14 +4335,14 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
struct vm_page *new_pg, *old_pg;
struct pmap_page *new_pp, *old_pp;
struct pv_entry *old_pve, *new_pve;
+ int error;
bool wired = (flags & PMAP_WIRED) != 0;
struct pmap *pmap2;
struct pmap_ptparray pt;
- int error;
- bool getptp, samepage, new_embedded;
- rb_tree_t *tree;
+ bool getptp;
KASSERT(pmap_initialized);
+ KASSERT(pmap->pm_remove_all == NULL);
KASSERT(va < VM_MAX_KERNEL_ADDRESS);
KASSERTMSG(va != (vaddr_t)PDP_BASE, "%s: trying to map va=%#"
PRIxVADDR " over PDP!", __func__, va);
@@ -4676,16 +4377,13 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
else
#endif
new_pg = PHYS_TO_VM_PAGE(pa);
-
if (new_pg != NULL) {
/* This is a managed page */
npte |= PTE_PVLIST;
new_pp = VM_PAGE_TO_PP(new_pg);
- PMAP_CHECK_PP(new_pp);
} else if ((new_pp = pmap_pv_tracked(pa)) != NULL) {
/* This is an unmanaged pv-tracked page */
npte |= PTE_PVLIST;
- PMAP_CHECK_PP(new_pp);
} else {
new_pp = NULL;
}
@@ -4710,36 +4408,18 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
error);
}
}
- tree = &VM_PAGE_TO_PP(ptp)->pp_rb;
- } else {
- /* Embedded PV entries rely on this. */
- KASSERT(va != 0);
- tree = &pmap_kernel_rb;
}
/*
- * Look up the old PV entry at this VA (if any), and insert a new PV
- * entry if required for the new mapping. Temporarily track the old
- * and new mappings concurrently. Only after the old mapping is
- * evicted from the pmap will we remove its PV entry. Otherwise,
- * our picture of modified/accessed state for either page could get
- * out of sync (we need any P->V operation for either page to stall
- * on pmap->pm_lock until done here).
+ * Now check to see if we need a pv entry for this VA. If we do,
+ * allocate and install in the PV tree. In any case look up the
+ * pv entry in case the old mapping used it.
*/
- new_pve = NULL;
old_pve = NULL;
- samepage = false;
- new_embedded = false;
-
- if (new_pp != NULL) {
- error = pmap_enter_pv(pmap, new_pp, ptp, va, &new_pve,
- &old_pve, &samepage, &new_embedded, tree);
-
- /*
- * If a new pv_entry was needed and none was available, we
- * can go no further.
- */
- if (error != 0) {
+ new_pve = NULL;
+ if (pmap_pp_needs_pve(new_pp, ptp, va)) {
+ new_pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
+ if (new_pve == NULL) {
if (flags & PMAP_CANFAIL) {
if (getptp) {
pmap_unget_ptp(pmap, &pt);
@@ -4749,8 +4429,6 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
}
panic("%s: alloc pve failed", __func__);
}
- } else {
- old_pve = pmap_treelookup_pv(pmap, ptp, tree, va);
}
/* Map PTEs into address space. */
@@ -4791,27 +4469,11 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
vtomach((vaddr_t)ptep), npte, domid);
splx(s);
if (error) {
- /* Undo pv_entry tracking - oof. */
- if (new_pp != NULL) {
- mutex_spin_enter(&new_pp->pp_lock);
- if (new_pve != NULL) {
- LIST_REMOVE(new_pve, pve_list);
- KASSERT(pmap->pm_pve == NULL);
- pmap->pm_pve = new_pve;
- } else if (new_embedded) {
- new_pp->pp_pte.pte_ptp = NULL;
- new_pp->pp_pte.pte_va = 0;
- }
- mutex_spin_exit(&new_pp->pp_lock);
- }
- pmap_unmap_ptes(pmap, pmap2);
- /* Free new PTP. */
if (ptp != NULL && ptp->wire_count <= 1) {
pmap_free_ptp(pmap, ptp, va, ptes,
pdes);
}
- mutex_exit(&pmap->pm_lock);
- return error;
+ goto out;
}
break;
}
@@ -4819,20 +4481,11 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
} while (pmap_pte_cas(ptep, opte, npte) != opte);
/*
- * Done with the PTEs: they can now be unmapped.
- */
- pmap_unmap_ptes(pmap, pmap2);
-
- /*
* Update statistics and PTP's reference count.
*/
pmap_stats_update_bypte(pmap, npte, opte);
- if (ptp != NULL) {
- if (!have_oldpa) {
- ptp->wire_count++;
- }
- /* Remember minimum VA in PTP. */
- pmap_ptp_range_set(ptp, va);
+ if (ptp != NULL && !have_oldpa) {
+ ptp->wire_count++;
}
KASSERT(ptp == NULL || ptp->wire_count > 1);
@@ -4841,13 +4494,7 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
*/
if (((opte ^ npte) & (PTE_FRAME | PTE_P)) == 0) {
KASSERT(((opte ^ npte) & PTE_PVLIST) == 0);
- if ((npte & PTE_PVLIST) != 0) {
- KASSERT(samepage);
- pmap_check_pv(pmap, ptp, new_pp, va, true);
- }
goto same_pa;
- } else if ((npte & PTE_PVLIST) != 0) {
- KASSERT(!samepage);
}
/*
@@ -4863,28 +4510,16 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
__func__, va, oldpa, atop(pa));
}
+ old_pve = pmap_lookup_pv(pmap, ptp, old_pp, va);
pmap_remove_pv(pmap, old_pp, ptp, va, old_pve,
pmap_pte_to_pp_attrs(opte));
- if (old_pve != NULL) {
- if (pmap->pm_pve == NULL) {
- pmap->pm_pve = old_pve;
- } else {
- pool_cache_put(&pmap_pv_cache, old_pve);
- }
- }
- } else {
- KASSERT(old_pve == NULL);
- KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
}
/*
- * If new page is dynamically PV tracked, insert to tree.
+ * If new page is pv-tracked, insert pv_entry into its list.
*/
- if (new_pve != NULL) {
- KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
- old_pve = rb_tree_insert_node(tree, new_pve);
- KASSERT(old_pve == new_pve);
- pmap_check_pv(pmap, ptp, new_pp, va, true);
+ if (new_pp) {
+ new_pve = pmap_enter_pv(pmap, new_pp, new_pve, ptp, va);
}
same_pa:
@@ -4896,8 +4531,20 @@ same_pa:
((opte ^ npte) & (PTE_FRAME | PTE_W)) != 0) {
pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER);
}
+
+ error = 0;
+#if defined(XENPV)
+out:
+#endif
+ pmap_unmap_ptes(pmap, pmap2);
+ if (old_pve != NULL) {
+ pool_cache_put(&pmap_pv_cache, old_pve);
+ }
+ if (new_pve != NULL) {
+ pool_cache_put(&pmap_pv_cache, new_pve);
+ }
mutex_exit(&pmap->pm_lock);
- return 0;
+ return error;
}
paddr_t
@@ -5216,10 +4863,20 @@ pmap_update(struct pmap *pmap)
struct vm_page *ptp;
/*
+ * If pmap_remove_all() was in effect, re-enable invalidations from
+ * this point on; issue a shootdown for all the mappings just
+ * removed.
+ */
+ kpreempt_disable();
+ if (pmap->pm_remove_all == curlwp) {
+ pmap->pm_remove_all = NULL;
+ pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_UPDATE);
+ }
+
+ /*
* Initiate any pending TLB shootdowns. Wait for them to
* complete before returning control to the caller.
*/
- kpreempt_disable();
pmap_tlb_shootnow();
kpreempt_enable();
@@ -5228,7 +4885,7 @@ pmap_update(struct pmap *pmap)
* is an unlocked check, but is safe as we're only interested in
* work done in this LWP - we won't get a false negative.
*/
- if (__predict_false(!LIST_EMPTY(&pmap->pm_gc_ptp))) {
+ if (!LIST_EMPTY(&pmap->pm_gc_ptp)) {
mutex_enter(&pmap->pm_lock);
while ((ptp = LIST_FIRST(&pmap->pm_gc_ptp)) != NULL) {
KASSERT(ptp->wire_count == 0);
@@ -5236,9 +4893,7 @@ pmap_update(struct pmap *pmap)
pp = VM_PAGE_TO_PP(ptp);
LIST_INIT(&pp->pp_pvlist);
pp->pp_attrs = 0;
- pp->pp_pte.pte_ptp = NULL;
- pp->pp_pte.pte_va = 0;
- PMAP_CHECK_PP(VM_PAGE_TO_PP(ptp));
+ pp->pp_pflags = 0;
/*
* XXX Hack to avoid extra locking, and lock
@@ -5593,10 +5248,10 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
bool accessed;
struct pmap_ptparray pt;
int error;
- bool getptp, samepage, new_embedded;
- rb_tree_t *tree;
+ bool getptp;
KASSERT(pmap_initialized);
+ KASSERT(pmap->pm_remove_all == NULL);
KASSERT(va < VM_MAXUSER_ADDRESS);
npte = pa | pmap_ept_prot(prot) | pmap_ept_type(flags);
@@ -5643,36 +5298,18 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
error);
}
}
- tree = &VM_PAGE_TO_PP(ptp)->pp_rb;
- } else {
- /* Embedded PV entries rely on this. */
- KASSERT(va != 0);
- tree = &pmap_kernel_rb;
}
/*
- * Look up the old PV entry at this VA (if any), and insert a new PV
- * entry if required for the new mapping. Temporarily track the old
- * and new mappings concurrently. Only after the old mapping is
- * evicted from the pmap will we remove its PV entry. Otherwise,
- * our picture of modified/accessed state for either page could get
- * out of sync (we need any P->V operation for either page to stall
- * on pmap->pm_lock until done here).
+ * Now check to see if we need a pv entry for this VA. If we do,
+ * allocate and install in the radix tree. In any case look up the
+ * pv entry in case the old mapping used it.
*/
- new_pve = NULL;
old_pve = NULL;
- samepage = false;
- new_embedded = false;
-
- if (new_pp != NULL) {
- error = pmap_enter_pv(pmap, new_pp, ptp, va, &new_pve,
- &old_pve, &samepage, &new_embedded, tree);
-
- /*
- * If a new pv_entry was needed and none was available, we
- * can go no further.
- */
- if (error != 0) {
+ new_pve = NULL;
+ if (pmap_pp_needs_pve(new_pp, ptp, va)) {
+ new_pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
+ if (new_pve == NULL) {
if (flags & PMAP_CANFAIL) {
if (getptp) {
pmap_unget_ptp(pmap, &pt);
@@ -5681,9 +5318,7 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
return error;
}
panic("%s: alloc pve failed", __func__);
- }
- } else {
- old_pve = pmap_treelookup_pv(pmap, ptp, tree, va);
+ }
}
/* Map PTEs into address space. */
@@ -5694,7 +5329,12 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
pmap_ept_install_ptp(pmap, &pt, va);
}
- /* Check if there is an existing mapping. */
+ /*
+ * Check if there is an existing mapping. If we are now sure that
+ * we need pves and we failed to allocate them earlier, handle that.
+ * Caching the value of oldpa here is safe because only the mod/ref
+ * bits can change while the pmap is locked.
+ */
ptes = (pt_entry_t *)PMAP_DIRECT_MAP(VM_PAGE_TO_PHYS(ptp));
ptep = &ptes[pl1_pi(va)];
opte = *ptep;
@@ -5716,20 +5356,11 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
} while (pmap_pte_cas(ptep, opte, npte) != opte);
/*
- * Done with the PTEs: they can now be unmapped.
- */
- kpreempt_enable();
-
- /*
* Update statistics and PTP's reference count.
*/
pmap_ept_stats_update_bypte(pmap, npte, opte);
- if (ptp != NULL) {
- if (!have_oldpa) {
- ptp->wire_count++;
- }
- /* Remember minimum VA in PTP. */
- pmap_ptp_range_set(ptp, va);
+ if (ptp != NULL && !have_oldpa) {
+ ptp->wire_count++;
}
KASSERT(ptp == NULL || ptp->wire_count > 1);
@@ -5738,17 +5369,11 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
*/
if (((opte ^ npte) & (PTE_FRAME | EPT_R)) == 0) {
KASSERT(((opte ^ npte) & EPT_PVLIST) == 0);
- if ((npte & EPT_PVLIST) != 0) {
- KASSERT(samepage);
- pmap_check_pv(pmap, ptp, new_pp, va, true);
- }
goto same_pa;
- } else if ((npte & EPT_PVLIST) != 0) {
- KASSERT(!samepage);
}
/*
- * If old page is pv-tracked, remove pv_entry from its list.
+ * If old page is pv-tracked, replace pv_entry from its list.
*/
if ((~opte & (EPT_R | EPT_PVLIST)) == 0) {
if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) {
@@ -5760,35 +5385,19 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
__func__, va, oldpa, atop(pa));
}
+ old_pve = pmap_lookup_pv(pmap, ptp, old_pp, va);
pmap_remove_pv(pmap, old_pp, ptp, va, old_pve,
pmap_ept_to_pp_attrs(opte));
- if (old_pve != NULL) {
- if (pmap->pm_pve == NULL) {
- pmap->pm_pve = old_pve;
- } else {
- pool_cache_put(&pmap_pv_cache, old_pve);
- }
- }
- } else {
- KASSERT(old_pve == NULL);
- KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
}
/*
- * If new page is dynamically PV tracked, insert to tree.
+ * If new page is pv-tracked, insert pv_entry into its list.
*/
- if (new_pve != NULL) {
- KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
- old_pve = rb_tree_insert_node(tree, new_pve);
- KASSERT(old_pve == new_pve);
- pmap_check_pv(pmap, ptp, new_pp, va, true);
+ if (new_pp) {
+ new_pve = pmap_enter_pv(pmap, new_pp, new_pve, ptp, va);
}
same_pa:
- /*
- * shootdown tlb if necessary.
- */
-
if (pmap_ept_has_ad) {
accessed = (~opte & (EPT_R | EPT_A)) == 0;
} else {
@@ -5797,8 +5406,18 @@ same_pa:
if (accessed && ((opte ^ npte) & (PTE_FRAME | EPT_W)) != 0) {
pmap_tlb_shootdown(pmap, va, 0, TLBSHOOT_ENTER);
}
+
+ error = 0;
+ kpreempt_enable();
+ if (old_pve != NULL) {
+ pool_cache_put(&pmap_pv_cache, old_pve);
+ }
+ if (new_pve != NULL) {
+ pool_cache_put(&pmap_pv_cache, new_pve);
+ }
mutex_exit(&pmap->pm_lock);
- return 0;
+
+ return error;
}
/* Pay close attention, this returns L2. */
@@ -5922,8 +5541,6 @@ pmap_ept_remove_pte(struct pmap *pmap, s
"managed page without EPT_PVLIST for %#"PRIxVADDR, va);
KASSERTMSG((pmap_pv_tracked(pmap_pte2pa(opte)) == NULL),
"pv-tracked page without EPT_PVLIST for %#"PRIxVADDR, va);
- KASSERT(pmap_treelookup_pv(pmap, ptp, (ptp != NULL ?
- &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb), va) == NULL);
return true;
}
@@ -5958,12 +5575,6 @@ pmap_ept_remove_ptes(struct pmap *pmap,
KASSERT(kpreempt_disabled());
/*
- * mappings are very often sparse, so clip the given range to the
- * range of PTEs that are known present in the PTP.
- */
- pmap_ptp_range_clip(ptp, &startva, &pte);
-
- /*
* note that ptpva points to the PTE that maps startva. this may
* or may not be the first PTE in the PTP.
*
@@ -6025,6 +5636,10 @@ pmap_ept_remove(struct pmap *pmap, vaddr
}
kpreempt_enable();
+ /*
+ * Radix tree nodes are removed here, so we need to continue holding
+ * the pmap locked until complete.
+ */
if (pv_tofree != NULL) {
pmap_free_pvs(pmap, pv_tofree);
}