Module Name: src
Committed By: ad
Date: Tue Mar 10 22:38:42 UTC 2020
Modified Files:
src/sys/arch/x86/include: pmap.h pmap_pv.h
src/sys/arch/x86/x86: pmap.c
Log Message:
- pmap_check_inuse() is expensive so make it DEBUG not DIAGNOSTIC.
- Put PV locking back in place with only a minor performance impact.
pmap_enter() still needs more work - it's not easy to satisfy all the
competing requirements so I'll do that with another change.
- Use pmap_find_ptp() (lookup only) in preference to pmap_get_ptp() (alloc).
Make pm_ptphint indexed by VA not PA. Replace the per-pmap radixtree for
dynamic PV entries with a per-PTP rbtree. Cuts system time during kernel
build by ~10% for me.
To generate a diff of this commit:
cvs rdiff -u -r1.110 -r1.111 src/sys/arch/x86/include/pmap.h
cvs rdiff -u -r1.12 -r1.13 src/sys/arch/x86/include/pmap_pv.h
cvs rdiff -u -r1.362 -r1.363 src/sys/arch/x86/x86/pmap.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/x86/include/pmap.h
diff -u src/sys/arch/x86/include/pmap.h:1.110 src/sys/arch/x86/include/pmap.h:1.111
--- src/sys/arch/x86/include/pmap.h:1.110 Sun Feb 23 15:46:39 2020
+++ src/sys/arch/x86/include/pmap.h Tue Mar 10 22:38:41 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.h,v 1.110 2020/02/23 15:46:39 ad Exp $ */
+/* $NetBSD: pmap.h,v 1.111 2020/03/10 22:38:41 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -67,8 +67,6 @@
#ifndef _X86_PMAP_H_
#define _X86_PMAP_H_
-#include <sys/radixtree.h>
-
/*
* pl*_pi: index in the ptp page for a pde mapping a VA.
* (pl*_i below is the index in the virtual array of all pdes per level)
@@ -257,7 +255,6 @@ struct pmap {
paddr_t pm_pdirpa[PDP_SIZE]; /* PA of PDs (read-only after create) */
struct vm_page *pm_ptphint[PTP_LEVELS-1];
/* pointer to a PTP in our pmap */
- struct radix_tree pm_pvtree; /* tree of non-embedded pv entries */
struct pmap_statistics pm_stats; /* pmap stats */
#if !defined(__x86_64__)
Index: src/sys/arch/x86/include/pmap_pv.h
diff -u src/sys/arch/x86/include/pmap_pv.h:1.12 src/sys/arch/x86/include/pmap_pv.h:1.13
--- src/sys/arch/x86/include/pmap_pv.h:1.12 Sun Feb 23 22:28:53 2020
+++ src/sys/arch/x86/include/pmap_pv.h Tue Mar 10 22:38:41 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap_pv.h,v 1.12 2020/02/23 22:28:53 ad Exp $ */
+/* $NetBSD: pmap_pv.h,v 1.13 2020/03/10 22:38:41 ad Exp $ */
/*-
* Copyright (c)2008 YAMAMOTO Takashi,
@@ -31,6 +31,7 @@
#include <sys/mutex.h>
#include <sys/queue.h>
+#include <sys/rbtree.h>
struct vm_page;
@@ -56,6 +57,8 @@ struct pv_pte {
struct pv_entry {
struct pv_pte pve_pte; /* should be the first member */
LIST_ENTRY(pv_entry) pve_list; /* on pmap_page::pp_pvlist */
+ rb_node_t pve_rb; /* red-black tree node */
+ uintptr_t pve_padding; /* unused */
};
#define pve_next pve_list.le_next
@@ -65,26 +68,43 @@ struct pv_entry {
struct pmap_page {
union {
- /* PP_EMBEDDED */
- struct pv_pte u_pte;
+ /* PTPs */
+ rb_tree_t rb;
/* PTPs */
- LIST_ENTRY(vm_page) u_link;
+ LIST_ENTRY(vm_page) link;
+
+ /* Non-PTPs */
+ struct {
+ /* PP_EMBEDDED */
+ struct pv_pte pte;
+
+ LIST_HEAD(, pv_entry) pvlist;
+ uint8_t flags;
+ uint8_t attrs;
+ } s;
} pp_u;
- LIST_HEAD(, pv_entry) pp_pvlist;
-#define pp_pte pp_u.u_pte
-#define pp_link pp_u.u_link
- uint8_t pp_flags;
- uint8_t pp_attrs;
+ kmutex_t pp_lock;
+#define pp_rb pp_u.rb
+#define pp_link pp_u.link
+#define pp_pte pp_u.s.pte
+#define pp_pvlist pp_u.s.pvlist
+#define pp_pflags pp_u.s.flags
+#define pp_attrs pp_u.s.attrs
+};
+
#define PP_ATTRS_D 0x01 /* Dirty */
#define PP_ATTRS_A 0x02 /* Accessed */
#define PP_ATTRS_W 0x04 /* Writable */
-};
/* pp_flags */
#define PP_EMBEDDED 1
#define PP_FREEING 2
-#define PMAP_PAGE_INIT(pp) LIST_INIT(&(pp)->pp_pvlist)
+#define PMAP_PAGE_INIT(pp) \
+do { \
+ LIST_INIT(&(pp)->pp_pvlist); \
+ mutex_init(&(pp)->pp_lock, MUTEX_NODEBUG, IPL_VM); \
+} while (/* CONSTCOND */ 0);
#endif /* !_X86_PMAP_PV_H_ */
Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.362 src/sys/arch/x86/x86/pmap.c:1.363
--- src/sys/arch/x86/x86/pmap.c:1.362 Wed Mar 4 22:00:03 2020
+++ src/sys/arch/x86/x86/pmap.c Tue Mar 10 22:38:41 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.c,v 1.362 2020/03/04 22:00:03 ad Exp $ */
+/* $NetBSD: pmap.c,v 1.363 2020/03/10 22:38:41 ad Exp $ */
/*
* Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc.
@@ -130,7 +130,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.362 2020/03/04 22:00:03 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.363 2020/03/10 22:38:41 ad Exp $");
#include "opt_user_ldt.h"
#include "opt_lockdebug.h"
@@ -293,6 +293,7 @@ static bool cpu_pat_enabled __read_mostl
static struct pmap kernel_pmap_store __cacheline_aligned; /* kernel's pmap */
struct pmap *const kernel_pmap_ptr = &kernel_pmap_store;
+static rb_tree_t pmap_kernel_rb __cacheline_aligned;
struct bootspace bootspace __read_mostly;
struct slotspace slotspace __read_mostly;
@@ -409,6 +410,21 @@ struct pmap_ptparray {
};
/*
+ * PV tree prototypes
+ */
+
+static int pmap_compare_key(void *, const void *, const void *);
+static int pmap_compare_nodes(void *, const void *, const void *);
+
+/* Read-black tree */
+static const rb_tree_ops_t pmap_rbtree_ops = {
+ .rbto_compare_nodes = pmap_compare_nodes,
+ .rbto_compare_key = pmap_compare_key,
+ .rbto_node_offset = offsetof(struct pv_entry, pve_rb),
+ .rbto_context = NULL
+};
+
+/*
* Local prototypes
*/
@@ -431,7 +447,7 @@ static int pmap_get_ptp(struct pmap *, s
static void pmap_unget_ptp(struct pmap *, struct pmap_ptparray *);
static void pmap_install_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t,
pd_entry_t * const *);
-static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int);
+static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, int);
static void pmap_freepage(struct pmap *, struct vm_page *, int);
static void pmap_free_ptp(struct pmap *, struct vm_page *, vaddr_t,
pt_entry_t *, pd_entry_t * const *);
@@ -440,10 +456,6 @@ static bool pmap_remove_pte(struct pmap
static void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, vaddr_t,
vaddr_t, struct pv_entry **);
-static int pmap_pvmap_insert(struct pmap *, vaddr_t, struct pv_entry *);
-static struct pv_entry *pmap_pvmap_lookup(struct pmap *, vaddr_t);
-static void pmap_pvmap_remove(struct pmap *, vaddr_t, struct pv_entry *);
-
static void pmap_alloc_level(struct pmap *, vaddr_t, long *);
static void pmap_load1(struct lwp *, struct pmap *, struct pmap *);
@@ -517,7 +529,8 @@ static struct pv_pte *
pv_pte_first(struct pmap_page *pp)
{
- if ((pp->pp_flags & PP_EMBEDDED) != 0) {
+ KASSERT(mutex_owned(&pp->pp_lock));
+ if ((pp->pp_pflags & PP_EMBEDDED) != 0) {
return &pp->pp_pte;
}
return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist));
@@ -527,9 +540,10 @@ static struct pv_pte *
pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte)
{
+ KASSERT(mutex_owned(&pp->pp_lock));
KASSERT(pvpte != NULL);
if (pvpte == &pp->pp_pte) {
- KASSERT((pp->pp_flags & PP_EMBEDDED) != 0);
+ KASSERT((pp->pp_pflags & PP_EMBEDDED) != 0);
return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist));
}
return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list));
@@ -553,6 +567,44 @@ pmap_reference(struct pmap *pmap)
}
/*
+ * rbtree: compare two nodes.
+ */
+static int
+pmap_compare_nodes(void *context, const void *n1, const void *n2)
+{
+ const struct pv_entry *pve1 = n1;
+ const struct pv_entry *pve2 = n2;
+
+ KASSERT(pve1->pve_pte.pte_ptp == pve2->pve_pte.pte_ptp);
+
+ if (pve1->pve_pte.pte_va < pve2->pve_pte.pte_va) {
+ return -1;
+ }
+ if (pve1->pve_pte.pte_va > pve2->pve_pte.pte_va) {
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * rbtree: compare a node and a key.
+ */
+static int
+pmap_compare_key(void *context, const void *n, const void *k)
+{
+ const struct pv_entry *pve = n;
+ const vaddr_t key = (vaddr_t)k;
+
+ if (pve->pve_pte.pte_va < key) {
+ return -1;
+ }
+ if (pve->pve_pte.pte_va > key) {
+ return 1;
+ }
+ return 0;
+}
+
+/*
* pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
*
* there are several pmaps involved. some or all of them might be same.
@@ -1696,9 +1748,14 @@ pmap_init(void)
pool_init(&pmap_pdp_pool, PAGE_SIZE, 0, 0, flags,
"pdppl", NULL, IPL_NONE);
#endif
- pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
- PR_LARGECACHE, "pvpl", &pool_allocator_kmem, IPL_NONE, NULL,
- NULL, NULL);
+ pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry),
+#ifdef _LP64
+ coherency_unit,
+#else
+ coherency_unit / 2,
+#endif
+ 0, PR_LARGECACHE, "pvpl", &pool_allocator_kmem,
+ IPL_NONE, NULL, NULL, NULL);
pmap_tlb_init();
@@ -1710,7 +1767,14 @@ pmap_init(void)
evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC,
NULL, "x86", "ldt sync");
- radix_tree_init_tree(&pmap_kernel()->pm_pvtree);
+ /*
+ * The kernel doesn't keep track of PTPs, so there's nowhere handy
+ * to hang a tree of pv_entry records. Dynamically allocated
+ * pv_entry lists are not heavily used in the kernel's pmap (the
+ * usual case is PP_EMBEDDED), so cop out and use a single RB tree
+ * to cover them.
+ */
+ rb_tree_init(&pmap_kernel_rb, &pmap_rbtree_ops);
/*
* done: pmap module is up (and ready for business)
@@ -1795,8 +1859,7 @@ pmap_vpage_cpu_init(struct cpu_info *ci)
/*
- * pmap_pp_needs_pve: return true if we need to allocate a pv entry and
- * corresponding radix tree entry for the page.
+ * pmap_pp_needs_pve: return true if we need to allocate a pv entry.
*/
static bool
pmap_pp_needs_pve(struct pmap_page *pp, struct vm_page *ptp, vaddr_t va)
@@ -1810,7 +1873,7 @@ pmap_pp_needs_pve(struct pmap_page *pp,
* still be pv entries on the list.
*/
- if (pp == NULL || (pp->pp_flags & PP_EMBEDDED) == 0) {
+ if (pp == NULL || (pp->pp_pflags & PP_EMBEDDED) == 0) {
return false;
}
return pp->pp_pte.pte_ptp != ptp || pp->pp_pte.pte_va != va;
@@ -1831,60 +1894,59 @@ pmap_free_pvs(struct pmap *pmap, struct
KASSERT(mutex_owned(&pmap->pm_lock));
for ( /* null */ ; pve != NULL ; pve = next) {
- pmap_pvmap_remove(pmap, pve->pve_pte.pte_va, pve);
next = pve->pve_next;
pool_cache_put(&pmap_pv_cache, pve);
}
}
/*
- * pmap_pvmap_lookup: look up a non-PP_EMBEDDED pv entry for the given pmap
+ * pmap_lookup_pv: look up a non-PP_EMBEDDED pv entry for the given pmap
*
* => pmap must be locked
*/
static struct pv_entry *
-pmap_pvmap_lookup(struct pmap *pmap, vaddr_t va)
-{
-
- KASSERT(mutex_owned(&pmap->pm_lock));
-
- return radix_tree_lookup_node(&pmap->pm_pvtree, va >> PAGE_SHIFT);
-}
-
-/*
- * pmap_pvmap_insert: insert a non-PP_EMBEDDED pv entry for the given pmap
- *
- * => pmap must be locked
- * => an error can be returned
- */
-
-static int
-pmap_pvmap_insert(struct pmap *pmap, vaddr_t va, struct pv_entry *pve)
+pmap_lookup_pv(struct pmap *pmap, struct vm_page *ptp,
+ struct pmap_page *pp, vaddr_t va)
{
+ struct rb_node *node;
+ struct pv_entry *pve;
KASSERT(mutex_owned(&pmap->pm_lock));
- KASSERT(pmap_pvmap_lookup(pmap, va) == NULL);
-
- return radix_tree_insert_node(&pmap->pm_pvtree, va >> PAGE_SHIFT, pve);
-}
-/*
- * pmap_pvmap_remove: look up a non-PP_EMBEDDED pv entry for the given pmap
- *
- * => pmap must be locked
- */
-
-static void
-pmap_pvmap_remove(struct pmap *pmap, vaddr_t va, struct pv_entry *pve)
-{
- struct pv_entry *pve2 __diagused;
-
- KASSERT(mutex_owned(&pmap->pm_lock));
+ /*
+ * Do an unlocked check on the page: if tracked with PP_EMBEDDED we
+ * can avoid touching the tree.
+ */
+ if ((pp->pp_pflags & PP_EMBEDDED) != 0 &&
+ pp->pp_pte.pte_ptp == ptp &&
+ pp->pp_pte.pte_va == va) {
+ return NULL;
+ }
- pve2 = radix_tree_remove_node(&pmap->pm_pvtree, va >> PAGE_SHIFT);
+ if (ptp != NULL) {
+ node = VM_PAGE_TO_PP(ptp)->pp_rb.rbt_root;
+ } else {
+ KASSERT(pmap == pmap_kernel());
+ node = pmap_kernel_rb.rbt_root;
+ }
- KASSERT(pve2 == pve);
+ /*
+ * Search the RB tree for the key. This is an inlined lookup
+ * tailored for exactly what's needed here that is quite a bit
+ * faster than using rb_tree_find_node().
+ */
+ for (;;) {
+ if (__predict_false(RB_SENTINEL_P(node))) {
+ return NULL;
+ }
+ pve = (struct pv_entry *)
+ ((uintptr_t)node - offsetof(struct pv_entry, pve_rb));
+ if (pve->pve_pte.pte_va == va) {
+ return pve;
+ }
+ node = node->rb_nodes[pve->pve_pte.pte_va < va];
+ }
}
/*
@@ -1905,19 +1967,28 @@ pmap_enter_pv(struct pmap *pmap, struct
KASSERT(ptp == NULL || ptp->uobject != NULL);
KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
- if ((pp->pp_flags & PP_EMBEDDED) == 0) {
- pp->pp_flags |= PP_EMBEDDED;
+ mutex_spin_enter(&pp->pp_lock);
+ if ((pp->pp_pflags & PP_EMBEDDED) == 0) {
+ pp->pp_pflags |= PP_EMBEDDED;
pp->pp_pte.pte_ptp = ptp;
pp->pp_pte.pte_va = va;
+ mutex_spin_exit(&pp->pp_lock);
return pve;
}
KASSERT(pve != NULL);
pve->pve_pte.pte_ptp = ptp;
pve->pve_pte.pte_va = va;
- KASSERT(pmap_pvmap_lookup(pmap, va) != NULL);
- KASSERT(pmap_pvmap_lookup(pmap, va) == pve);
+ KASSERT(pmap_lookup_pv(pmap, ptp, pp, va) == NULL);
LIST_INSERT_HEAD(&pp->pp_pvlist, pve, pve_list);
+ mutex_spin_exit(&pp->pp_lock);
+
+ if (ptp != NULL) {
+ rb_tree_insert_node(&VM_PAGE_TO_PP(ptp)->pp_rb, pve);
+ } else {
+ KASSERT(pmap == pmap_kernel());
+ rb_tree_insert_node(&pmap_kernel_rb, pve);
+ }
return NULL;
}
@@ -1931,7 +2002,7 @@ pmap_enter_pv(struct pmap *pmap, struct
*/
static void
pmap_remove_pv(struct pmap *pmap, struct pmap_page *pp, struct vm_page *ptp,
- vaddr_t va, struct pv_entry *pve)
+ vaddr_t va, struct pv_entry *pve, uint8_t oattrs)
{
KASSERT(mutex_owned(&pmap->pm_lock));
@@ -1939,19 +2010,30 @@ pmap_remove_pv(struct pmap *pmap, struct
KASSERT(ptp == NULL || ptp->uobject != NULL);
KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
- if ((pp->pp_flags & PP_EMBEDDED) != 0 &&
+ mutex_spin_enter(&pp->pp_lock);
+ pp->pp_attrs |= oattrs;
+ if ((pp->pp_pflags & PP_EMBEDDED) != 0 &&
pp->pp_pte.pte_ptp == ptp &&
pp->pp_pte.pte_va == va) {
KASSERT(pve == NULL);
- pp->pp_flags &= ~PP_EMBEDDED;
+ pp->pp_pflags &= ~PP_EMBEDDED;
pp->pp_pte.pte_ptp = NULL;
pp->pp_pte.pte_va = 0;
+ mutex_spin_exit(&pp->pp_lock);
} else {
KASSERT(pve != NULL);
- KASSERT(pve == pmap_pvmap_lookup(pmap, va));
+ KASSERT(pve == pmap_lookup_pv(pmap, ptp, pp, va));
KASSERT(pve->pve_pte.pte_ptp == ptp);
KASSERT(pve->pve_pte.pte_va == va);
LIST_REMOVE(pve, pve_list);
+ mutex_spin_exit(&pp->pp_lock);
+
+ if (ptp != NULL) {
+ rb_tree_remove_node(&VM_PAGE_TO_PP(ptp)->pp_rb, pve);
+ } else {
+ KASSERT(pmap == pmap_kernel());
+ rb_tree_remove_node(&pmap_kernel_rb, pve);
+ }
}
}
@@ -1960,38 +2042,32 @@ pmap_remove_pv(struct pmap *pmap, struct
*/
static struct vm_page *
-pmap_find_ptp(struct pmap *pmap, vaddr_t va, paddr_t pa, int level)
+pmap_find_ptp(struct pmap *pmap, vaddr_t va, int level)
{
int lidx = level - 1;
+ off_t off = ptp_va2o(va, level);
struct vm_page *pg;
KASSERT(mutex_owned(&pmap->pm_lock));
- if (pa != (paddr_t)-1 && pmap->pm_ptphint[lidx] &&
- pa == VM_PAGE_TO_PHYS(pmap->pm_ptphint[lidx])) {
- return (pmap->pm_ptphint[lidx]);
+ if (pmap->pm_ptphint[lidx] && off == pmap->pm_ptphint[lidx]->offset) {
+ KASSERT(pmap->pm_ptphint[lidx]->wire_count > 0);
+ return pmap->pm_ptphint[lidx];
}
PMAP_DUMMY_LOCK(pmap);
- pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level));
+ pg = uvm_pagelookup(&pmap->pm_obj[lidx], off);
PMAP_DUMMY_UNLOCK(pmap);
- if (pg != NULL) {
- if (__predict_false(pg->wire_count == 0)) {
- /* This page is queued to be freed - ignore. */
- KASSERT((VM_PAGE_TO_PP(pg)->pp_flags &
- PP_FREEING) != 0);
- pg = NULL;
- } else {
- KASSERT((VM_PAGE_TO_PP(pg)->pp_flags &
- PP_FREEING) == 0);
- }
+ if (pg != NULL && __predict_false(pg->wire_count == 0)) {
+ /* This page is queued to be freed - ignore. */
+ pg = NULL;
}
+ pmap->pm_ptphint[lidx] = pg;
return pg;
}
static inline void
pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level)
{
- struct pmap_page *pp;
int lidx;
KASSERT(ptp->wire_count == 1);
@@ -2007,12 +2083,9 @@ pmap_freepage(struct pmap *pmap, struct
* the page from the uvm_object, as that can take further locks
* (intolerable right now because the PTEs are likely mapped in).
* Instead mark the PTP as free and if we bump into it again, we'll
- * either ignore or reuse (depending on what's tolerable at the
- * time).
+ * either ignore or reuse (depending on what's useful at the time).
*/
- pp = VM_PAGE_TO_PP(ptp);
- KASSERT((pp->pp_flags & PP_FREEING) == 0);
- pp->pp_flags |= PP_FREEING;
+ KASSERT(RB_TREE_MIN(&VM_PAGE_TO_PP(ptp)->pp_rb) == NULL);
LIST_INSERT_HEAD(&pmap->pm_gc_ptp, ptp, mdpage.mp_pp.pp_link);
}
@@ -2059,7 +2132,7 @@ pmap_free_ptp(struct pmap *pmap, struct
pmap_freepage(pmap, ptp, level);
if (level < PTP_LEVELS - 1) {
- ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1);
+ ptp = pmap_find_ptp(pmap, va, level + 1);
ptp->wire_count--;
if (ptp->wire_count > 1)
break;
@@ -2104,15 +2177,15 @@ pmap_get_ptp(struct pmap *pmap, struct p
if (pt->pg[i] == NULL) {
pt->pg[i] = uvm_pagealloc(obj, off, NULL, aflags);
pt->alloced[i] = true;
+ if (pt->pg[i] != NULL) {
+ rb_tree_init(&VM_PAGE_TO_PP(pt->pg[i])->pp_rb,
+ &pmap_rbtree_ops);
+ }
} else if (pt->pg[i]->wire_count == 0) {
/* This page was queued to be freed; dequeue it. */
- KASSERT((VM_PAGE_TO_PP(pt->pg[i])->pp_flags &
- PP_FREEING) != 0);
- VM_PAGE_TO_PP(pt->pg[i])->pp_flags &= ~PP_FREEING;
LIST_REMOVE(pt->pg[i], mdpage.mp_pp.pp_link);
- } else {
- KASSERT((VM_PAGE_TO_PP(pt->pg[i])->pp_flags &
- PP_FREEING) == 0);
+ rb_tree_init(&VM_PAGE_TO_PP(pt->pg[i])->pp_rb,
+ &pmap_rbtree_ops);
}
PMAP_DUMMY_UNLOCK(pmap);
if (pt->pg[i] == NULL) {
@@ -2128,7 +2201,7 @@ pmap_get_ptp(struct pmap *pmap, struct p
}
/*
- * pmap_install_ptp: instal any freshly allocated PTPs
+ * pmap_install_ptp: install any freshly allocated PTPs
*
* => pmap should NOT be pmap_kernel()
* => pmap should be locked
@@ -2218,8 +2291,6 @@ pmap_unget_ptp(struct pmap *pmap, struct
if (!pt->alloced[i]) {
continue;
}
- KASSERT((VM_PAGE_TO_PP(pt->pg[i])->pp_flags &
- PP_FREEING) == 0);
KASSERT(pt->pg[i]->wire_count == 0);
/* pmap zeros all pages before freeing. */
pt->pg[i]->flags |= PG_ZERO;
@@ -2411,7 +2482,6 @@ pmap_ctor(void *arg, void *obj, int flag
mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE);
rw_init(&pmap->pm_dummy_lock);
- radix_tree_init_tree(&pmap->pm_pvtree);
kcpuset_create(&pmap->pm_cpus, true);
kcpuset_create(&pmap->pm_kernel_cpus, true);
#ifdef XENPV
@@ -2457,7 +2527,6 @@ pmap_dtor(void *arg, void *obj)
pmap_pdp_fini(pmap->pm_pdir);
pool_put(&pmap_pdp_pool, pmap->pm_pdir);
- radix_tree_fini_tree(&pmap->pm_pvtree);
mutex_destroy(&pmap->pm_lock);
rw_destroy(&pmap->pm_dummy_lock);
kcpuset_destroy(pmap->pm_cpus);
@@ -2514,20 +2583,22 @@ pmap_create(void)
* pmap_check_ptps: verify that none of the pmap's page table objects
* have any pages allocated to them.
*/
-static inline void
+static void
pmap_check_ptps(struct pmap *pmap)
{
int i;
for (i = 0; i < PTP_LEVELS - 1; i++) {
- KASSERT(pmap->pm_obj[i].uo_npages == 0);
+ KASSERTMSG(pmap->pm_obj[i].uo_npages == 0,
+ "pmap %p level %d still has %d pages",
+ pmap, i, (int)pmap->pm_obj[i].uo_npages);
}
}
static void
pmap_check_inuse(struct pmap *pmap)
{
-#ifdef DIAGNOSTIC
+#ifdef DEBUG
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
@@ -2551,7 +2622,7 @@ pmap_check_inuse(struct pmap *pmap)
}
#endif
}
-#endif /* DIAGNOSTIC */
+#endif /* DEBUG */
}
/*
@@ -2616,7 +2687,6 @@ pmap_destroy(struct pmap *pmap)
kcpuset_zero(pmap->pm_xen_ptp_cpus);
#endif
- KASSERT(radix_tree_empty_tree_p(&pmap->pm_pvtree));
pmap_check_ptps(pmap);
if (__predict_false(pmap->pm_enter != NULL)) {
/* XXX make this a different cache */
@@ -3583,9 +3653,8 @@ pmap_remove_pte(struct pmap *pmap, struc
}
/* Sync R/M bits. */
- pve = pmap_pvmap_lookup(pmap, va);
- pp->pp_attrs |= pmap_pte_to_pp_attrs(opte);
- pmap_remove_pv(pmap, pp, ptp, va, pve);
+ pve = pmap_lookup_pv(pmap, ptp, pp, va);
+ pmap_remove_pv(pmap, pp, ptp, va, pve, pmap_pte_to_pp_attrs(opte));
if (pve) {
pve->pve_next = *pv_tofree;
@@ -3607,7 +3676,6 @@ pmap_remove(struct pmap *pmap, vaddr_t s
pd_entry_t * const *pdes;
struct pv_entry *pv_tofree = NULL;
bool result;
- paddr_t ptppa;
vaddr_t blkendva, va = sva;
struct vm_page *ptp;
struct pmap *pmap2;
@@ -3629,12 +3697,9 @@ pmap_remove(struct pmap *pmap, vaddr_t s
if (pmap_pdes_valid(va, pdes, &pde, &lvl)) {
KASSERT(lvl == 1);
- /* PA of the PTP */
- ptppa = pmap_pte2pa(pde);
-
/* Get PTP if non-kernel mapping. */
if (pmap != pmap_kernel()) {
- ptp = pmap_find_ptp(pmap, va, ptppa, 1);
+ ptp = pmap_find_ptp(pmap, va, 1);
KASSERTMSG(ptp != NULL,
"%s: unmanaged PTP detected", __func__);
} else {
@@ -3666,12 +3731,9 @@ pmap_remove(struct pmap *pmap, vaddr_t s
}
KASSERT(lvl == 1);
- /* PA of the PTP */
- ptppa = pmap_pte2pa(pde);
-
/* Get PTP if non-kernel mapping. */
if (pmap != pmap_kernel()) {
- ptp = pmap_find_ptp(pmap, va, ptppa, 1);
+ ptp = pmap_find_ptp(pmap, va, 1);
KASSERTMSG(ptp != NULL, "%s: unmanaged PTP detected",
__func__);
} else {
@@ -3827,11 +3889,13 @@ pmap_pp_remove(struct pmap_page *pp, pad
struct pv_pte *pvpte;
struct vm_page *ptp;
uint8_t oattrs;
+ bool locked;
int count;
count = SPINLOCK_BACKOFF_MIN;
kpreempt_disable();
startover:
+ mutex_spin_enter(&pp->pp_lock);
while ((pvpte = pv_pte_first(pp)) != NULL) {
struct pmap *pmap;
struct pv_entry *pve;
@@ -3848,7 +3912,26 @@ startover:
if (ptp != NULL) {
pmap_reference(pmap);
}
- mutex_enter(&pmap->pm_lock);
+
+ /*
+ * Now try to lock it. We need a direct handoff between
+ * pp_lock and pm_lock to know the pv_entry is kept intact
+ * and kept associated with this pmap. If that can't be
+ * had, wait for the pmap's lock to become free and then
+ * retry.
+ */
+ locked = mutex_tryenter(&pmap->pm_lock);
+ mutex_spin_exit(&pp->pp_lock);
+ if (!locked) {
+ mutex_enter(&pmap->pm_lock);
+ /* nothing, just wait for it */
+ mutex_exit(&pmap->pm_lock);
+ if (ptp != NULL) {
+ pmap_destroy(pmap);
+ }
+ goto startover;
+ }
+
error = pmap_sync_pv(pvpte, pa, ~0, &oattrs, &opte);
if (error == EAGAIN) {
int hold_count;
@@ -3863,9 +3946,8 @@ startover:
}
va = pvpte->pte_va;
- pve = pmap_pvmap_lookup(pmap, va);
- pp->pp_attrs |= oattrs;
- pmap_remove_pv(pmap, pp, ptp, va, pve);
+ pve = pmap_lookup_pv(pmap, ptp, pp, va);
+ pmap_remove_pv(pmap, pp, ptp, va, pve, oattrs);
/* Update the PTP reference count. Free if last reference. */
if (ptp != NULL) {
@@ -3882,8 +3964,8 @@ startover:
}
if (pve != NULL) {
/*
- * Must free pve, and remove from pmap's radix tree
- * with the pmap's lock still held.
+ * Must free pve, and remove from PV tree with the
+ * pmap's lock still held.
*/
pve->pve_next = NULL;
pmap_free_pvs(pmap, pve);
@@ -3892,7 +3974,9 @@ startover:
if (ptp != NULL) {
pmap_destroy(pmap);
}
+ mutex_spin_enter(&pp->pp_lock);
}
+ mutex_spin_exit(&pp->pp_lock);
pmap_tlb_shootnow();
kpreempt_enable();
}
@@ -3952,7 +4036,7 @@ pmap_test_attrs(struct vm_page *pg, unsi
return true;
}
pa = VM_PAGE_TO_PHYS(pg);
- kpreempt_disable();
+ mutex_spin_enter(&pp->pp_lock);
for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
int error;
@@ -3965,7 +4049,7 @@ pmap_test_attrs(struct vm_page *pg, unsi
}
}
result = pp->pp_attrs & testbits;
- kpreempt_enable();
+ mutex_spin_exit(&pp->pp_lock);
/*
* note that we will exit the for loop with a non-null pve if
@@ -3984,7 +4068,7 @@ pmap_pp_clear_attrs(struct pmap_page *pp
int count;
count = SPINLOCK_BACKOFF_MIN;
- kpreempt_disable();
+ mutex_spin_enter(&pp->pp_lock);
startover:
for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
int error;
@@ -3992,6 +4076,7 @@ startover:
error = pmap_sync_pv(pvpte, pa, clearbits, &oattrs, NULL);
if (error == EAGAIN) {
int hold_count;
+ mutex_spin_exit(&pp->pp_lock);
KERNEL_UNLOCK_ALL(curlwp, &hold_count);
SPINLOCK_BACKOFF(count);
KERNEL_LOCK(hold_count, curlwp);
@@ -4002,7 +4087,7 @@ startover:
result = pp->pp_attrs & clearbits;
pp->pp_attrs &= ~clearbits;
pmap_tlb_shootnow();
- kpreempt_enable();
+ mutex_spin_exit(&pp->pp_lock);
return result != 0;
}
@@ -4248,11 +4333,12 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
struct vm_page *ptp;
struct vm_page *new_pg, *old_pg;
struct pmap_page *new_pp, *old_pp;
- struct pv_entry *pve;
+ struct pv_entry *old_pve, *new_pve;
int error;
bool wired = (flags & PMAP_WIRED) != 0;
struct pmap *pmap2;
struct pmap_ptparray pt;
+ bool getptp;
KASSERT(pmap_initialized);
KASSERT(pmap->pm_remove_all == NULL);
@@ -4301,32 +4387,40 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
new_pp = NULL;
}
- /* Make sure we have PTPs allocated. */
+ /* Begin by locking the pmap. */
mutex_enter(&pmap->pm_lock);
+
+ /* Look up the PTP. Allocate if none present. */
ptp = NULL;
+ getptp = false;
if (pmap != pmap_kernel()) {
- error = pmap_get_ptp(pmap, &pt, va, flags, &ptp);
- if (error != 0) {
- if (flags & PMAP_CANFAIL) {
- mutex_exit(&pmap->pm_lock);
- return error;
+ ptp = pmap_find_ptp(pmap, va, 1);
+ if (ptp == NULL) {
+ getptp = true;
+ error = pmap_get_ptp(pmap, &pt, va, flags, &ptp);
+ if (error != 0) {
+ if (flags & PMAP_CANFAIL) {
+ mutex_exit(&pmap->pm_lock);
+ return error;
+ }
+ panic("%s: get ptp failed, error=%d", __func__,
+ error);
}
- panic("%s: get ptp failed, error=%d", __func__,
- error);
}
}
/*
* Now check to see if we need a pv entry for this VA. If we do,
- * allocate and install in the radix tree. In any case look up the
+ * allocate and install in the PV tree. In any case look up the
* pv entry in case the old mapping used it.
*/
- pve = pmap_pvmap_lookup(pmap, va);
- if (pve == NULL && pmap_pp_needs_pve(new_pp, ptp, va)) {
- pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
- if (pve == NULL) {
+ old_pve = NULL;
+ new_pve = NULL;
+ if (pmap_pp_needs_pve(new_pp, ptp, va)) {
+ new_pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
+ if (new_pve == NULL) {
if (flags & PMAP_CANFAIL) {
- if (ptp != NULL) {
+ if (getptp) {
pmap_unget_ptp(pmap, &pt);
}
mutex_exit(&pmap->pm_lock);
@@ -4334,26 +4428,13 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
}
panic("%s: alloc pve failed", __func__);
}
- error = pmap_pvmap_insert(pmap, va, pve);
- if (error != 0) {
- if (flags & PMAP_CANFAIL) {
- if (ptp != NULL) {
- pmap_unget_ptp(pmap, &pt);
- }
- pool_cache_put(&pmap_pv_cache, pve);
- mutex_exit(&pmap->pm_lock);
- return error;
- }
- panic("%s: radixtree insert failed, error=%d",
- __func__, error);
- }
}
/* Map PTEs into address space. */
pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
/* Install any newly allocated PTPs. */
- if (ptp != NULL) {
+ if (getptp) {
pmap_install_ptp(pmap, &pt, va, pdes);
}
@@ -4412,11 +4493,6 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
*/
if (((opte ^ npte) & (PTE_FRAME | PTE_P)) == 0) {
KASSERT(((opte ^ npte) & PTE_PVLIST) == 0);
- if ((opte & PTE_PVLIST) != 0 && pve != NULL) {
- KASSERT(pve->pve_pte.pte_ptp == ptp);
- KASSERT(pve->pve_pte.pte_va == va);
- pve = NULL;
- }
goto same_pa;
}
@@ -4433,15 +4509,16 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
__func__, va, oldpa, atop(pa));
}
- pmap_remove_pv(pmap, old_pp, ptp, va, pve);
- old_pp->pp_attrs |= pmap_pte_to_pp_attrs(opte);
+ old_pve = pmap_lookup_pv(pmap, ptp, old_pp, va);
+ pmap_remove_pv(pmap, old_pp, ptp, va, old_pve,
+ pmap_pte_to_pp_attrs(opte));
}
/*
* If new page is pv-tracked, insert pv_entry into its list.
*/
if (new_pp) {
- pve = pmap_enter_pv(pmap, new_pp, pve, ptp, va);
+ new_pve = pmap_enter_pv(pmap, new_pp, new_pve, ptp, va);
}
same_pa:
@@ -4459,9 +4536,11 @@ same_pa:
out:
#endif
pmap_unmap_ptes(pmap, pmap2);
- if (pve != NULL) {
- pmap_pvmap_remove(pmap, va, pve);
- pool_cache_put(&pmap_pv_cache, pve);
+ if (old_pve != NULL) {
+ pool_cache_put(&pmap_pv_cache, old_pve);
+ }
+ if (new_pve != NULL) {
+ pool_cache_put(&pmap_pv_cache, new_pve);
}
mutex_exit(&pmap->pm_lock);
return error;
@@ -4805,13 +4884,13 @@ pmap_update(struct pmap *pmap)
if (!LIST_EMPTY(&pmap->pm_gc_ptp)) {
mutex_enter(&pmap->pm_lock);
while ((ptp = LIST_FIRST(&pmap->pm_gc_ptp)) != NULL) {
+ KASSERT(ptp->wire_count == 0);
LIST_REMOVE(ptp, mdpage.mp_pp.pp_link);
pp = VM_PAGE_TO_PP(ptp);
LIST_INIT(&pp->pp_pvlist);
- KASSERT((pp->pp_flags & PP_FREEING) != 0);
- KASSERT(ptp->wire_count == 0);
- pp->pp_flags &= ~PP_FREEING;
-
+ pp->pp_attrs = 0;
+ pp->pp_pflags = 0;
+
/*
* XXX Hack to avoid extra locking, and lock
* assertions in uvm_pagefree(). Despite uobject
@@ -5093,7 +5172,7 @@ pmap_ept_free_ptp(struct pmap *pmap, str
pmap_freepage(pmap, ptp, level);
if (level < PTP_LEVELS - 1) {
- ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1);
+ ptp = pmap_find_ptp(pmap, va, level + 1);
ptp->wire_count--;
if (ptp->wire_count > 1)
break;
@@ -5160,11 +5239,12 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
struct vm_page *ptp;
struct vm_page *new_pg, *old_pg;
struct pmap_page *new_pp, *old_pp;
- struct pv_entry *pve;
+ struct pv_entry *old_pve, *new_pve;
bool wired = (flags & PMAP_WIRED) != 0;
bool accessed;
struct pmap_ptparray pt;
int error;
+ bool getptp;
KASSERT(pmap_initialized);
KASSERT(pmap->pm_remove_all == NULL);
@@ -5194,18 +5274,25 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
new_pp = NULL;
}
- /* Make sure we have PTPs allocated. */
+ /* Begin by locking the pmap. */
mutex_enter(&pmap->pm_lock);
+
+ /* Look up the PTP. Allocate if none present. */
ptp = NULL;
+ getptp = false;
if (pmap != pmap_kernel()) {
- error = pmap_get_ptp(pmap, &pt, va, flags, &ptp);
- if (error != 0) {
- if (flags & PMAP_CANFAIL) {
- mutex_exit(&pmap->pm_lock);
- return error;
+ ptp = pmap_find_ptp(pmap, va, 1);
+ if (ptp == NULL) {
+ getptp = true;
+ error = pmap_get_ptp(pmap, &pt, va, flags, &ptp);
+ if (error != 0) {
+ if (flags & PMAP_CANFAIL) {
+ mutex_exit(&pmap->pm_lock);
+ return error;
+ }
+ panic("%s: get ptp failed, error=%d", __func__,
+ error);
}
- panic("%s: get ptp failed, error=%d", __func__,
- error);
}
}
@@ -5214,12 +5301,13 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
* allocate and install in the radix tree. In any case look up the
* pv entry in case the old mapping used it.
*/
- pve = pmap_pvmap_lookup(pmap, va);
- if (pve == NULL && pmap_pp_needs_pve(new_pp, ptp, va)) {
- pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
- if (pve == NULL) {
+ old_pve = NULL;
+ new_pve = NULL;
+ if (pmap_pp_needs_pve(new_pp, ptp, va)) {
+ new_pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
+ if (new_pve == NULL) {
if (flags & PMAP_CANFAIL) {
- if (ptp != NULL) {
+ if (getptp) {
pmap_unget_ptp(pmap, &pt);
}
mutex_exit(&pmap->pm_lock);
@@ -5227,26 +5315,13 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
}
panic("%s: alloc pve failed", __func__);
}
- error = pmap_pvmap_insert(pmap, va, pve);
- if (error != 0) {
- if (flags & PMAP_CANFAIL) {
- if (ptp != NULL) {
- pmap_unget_ptp(pmap, &pt);
- }
- pool_cache_put(&pmap_pv_cache, pve);
- mutex_exit(&pmap->pm_lock);
- return error;
- }
- panic("%s: radixtree insert failed, error=%d",
- __func__, error);
- }
}
/* Map PTEs into address space. */
kpreempt_disable();
/* Install any newly allocated PTPs. */
- if (ptp != NULL) {
+ if (getptp) {
pmap_ept_install_ptp(pmap, &pt, va);
}
@@ -5290,11 +5365,6 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
*/
if (((opte ^ npte) & (PTE_FRAME | EPT_R)) == 0) {
KASSERT(((opte ^ npte) & EPT_PVLIST) == 0);
- if ((opte & EPT_PVLIST) != 0 && pve != NULL) {
- KASSERT(pve->pve_pte.pte_ptp == ptp);
- KASSERT(pve->pve_pte.pte_va == va);
- pve = NULL;
- }
goto same_pa;
}
@@ -5311,15 +5381,16 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
__func__, va, oldpa, atop(pa));
}
- pmap_remove_pv(pmap, old_pp, ptp, va, pve);
- old_pp->pp_attrs |= pmap_ept_to_pp_attrs(opte);
+ old_pve = pmap_lookup_pv(pmap, ptp, old_pp, va);
+ pmap_remove_pv(pmap, old_pp, ptp, va, old_pve,
+ pmap_ept_to_pp_attrs(opte));
}
/*
* If new page is pv-tracked, insert pv_entry into its list.
*/
if (new_pp) {
- pve = pmap_enter_pv(pmap, new_pp, pve, ptp, va);
+ new_pve = pmap_enter_pv(pmap, new_pp, new_pve, ptp, va);
}
same_pa:
@@ -5334,9 +5405,11 @@ same_pa:
error = 0;
kpreempt_enable();
- if (pve != NULL) {
- pmap_pvmap_remove(pmap, va, pve);
- pool_cache_put(&pmap_pv_cache, pve);
+ if (old_pve != NULL) {
+ pool_cache_put(&pmap_pv_cache, old_pve);
+ }
+ if (new_pve != NULL) {
+ pool_cache_put(&pmap_pv_cache, new_pve);
}
mutex_exit(&pmap->pm_lock);
@@ -5477,9 +5550,8 @@ pmap_ept_remove_pte(struct pmap *pmap, s
}
/* Sync R/M bits. */
- pve = pmap_pvmap_lookup(pmap, va);
- pp->pp_attrs |= pmap_ept_to_pp_attrs(opte);
- pmap_remove_pv(pmap, pp, ptp, va, pve);
+ pve = pmap_lookup_pv(pmap, ptp, pp, va);
+ pmap_remove_pv(pmap, pp, ptp, va, pve, pmap_ept_to_pp_attrs(opte));
if (pve) {
pve->pve_next = *pv_tofree;
@@ -5544,7 +5616,7 @@ pmap_ept_remove(struct pmap *pmap, vaddr
/* PA of the PTP */
ptppa = pmap_pte2pa(pde);
- ptp = pmap_find_ptp(pmap, va, ptppa, 1);
+ ptp = pmap_find_ptp(pmap, va, 1);
KASSERTMSG(ptp != NULL, "%s: unmanaged PTP detected",
__func__);