Module Name: src Committed By: ad Date: Sat Jan 4 22:49:20 UTC 2020
Modified Files: src/sys/arch/x86/include: pmap.h pmap_pv.h src/sys/arch/x86/x86: pmap.c src/sys/arch/xen/x86: xen_pmap.c Log Message: x86 pmap improvements, reducing system time during a build by about 15% on my test machine: - Replace the global pv_hash with a per-pmap record of dynamically allocated pv entries. The data structure used for this can be changed easily, and has no special concurrency requirements. For now go with radixtree. - Change pmap_pdp_cache back into a pool; cache the page directory with the pmap, and avoid contention on pmaps_lock by adjusting the global list in the pool_cache ctor & dtor. Align struct pmap and its lock, and update some comments. - Simplify pv_entry lists slightly. Allow both PP_EMBEDDED and dynamically allocated entries to co-exist on a single page. This adds a pointer to struct vm_page on x86, but shrinks pv_entry to 32 bytes (which also gets it nicely aligned). - More elegantly solve the chicken-and-egg problem introduced into the pmap with radixtree lookup for pages, where we need PTEs mapped and page allocations to happen under a single hold of the pmap's lock. While here undo some cut-n-paste. - Don't adjust pmap_kernel's stats with atomics, because its mutex is now held in the places the stats are changed. To generate a diff of this commit: cvs rdiff -u -r1.107 -r1.108 src/sys/arch/x86/include/pmap.h cvs rdiff -u -r1.8 -r1.9 src/sys/arch/x86/include/pmap_pv.h cvs rdiff -u -r1.352 -r1.353 src/sys/arch/x86/x86/pmap.c cvs rdiff -u -r1.34 -r1.35 src/sys/arch/xen/x86/xen_pmap.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/x86/include/pmap.h diff -u src/sys/arch/x86/include/pmap.h:1.107 src/sys/arch/x86/include/pmap.h:1.108 --- src/sys/arch/x86/include/pmap.h:1.107 Sun Dec 15 19:24:11 2019 +++ src/sys/arch/x86/include/pmap.h Sat Jan 4 22:49:20 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.h,v 1.107 2019/12/15 19:24:11 ad Exp $ */ +/* $NetBSD: pmap.h,v 1.108 2020/01/04 22:49:20 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -67,6 +67,8 @@ #ifndef _X86_PMAP_H_ #define _X86_PMAP_H_ +#include <sys/radixtree.h> + /* * pl*_pi: index in the ptp page for a pde mapping a VA. * (pl*_i below is the index in the virtual array of all pdes per level) @@ -232,9 +234,9 @@ extern struct pmap_head pmaps; extern kmutex_t pmaps_lock; /* protects pmaps */ /* - * pool_cache(9) that PDPs are allocated from + * pool_cache(9) that pmaps are allocated from */ -extern struct pool_cache pmap_pdp_cache; +extern struct pool_cache pmap_cache; /* * the pmap structure @@ -248,14 +250,14 @@ extern struct pool_cache pmap_pdp_cache; */ struct pmap { - struct uvm_object pm_obj[PTP_LEVELS-1]; /* objects for lvl >= 1) */ - kmutex_t pm_lock; /* locks for pm_objs */ - LIST_ENTRY(pmap) pm_list; /* list (lck by pm_list lock) */ - pd_entry_t *pm_pdir; /* VA of PD (lck by object lock) */ + struct uvm_object pm_obj[PTP_LEVELS-1];/* objects for lvl >= 1) */ + LIST_ENTRY(pmap) pm_list; /* list of all pmaps */ + pd_entry_t *pm_pdir; /* VA of PD */ paddr_t pm_pdirpa[PDP_SIZE]; /* PA of PDs (read-only after create) */ struct vm_page *pm_ptphint[PTP_LEVELS-1]; /* pointer to a PTP in our pmap */ - struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */ + struct radix_tree pm_pvtree; /* tree of non-embedded pv entries */ + struct pmap_statistics pm_stats; /* pmap stats */ #if !defined(__x86_64__) vaddr_t pm_hiexec; /* highest executable mapping */ @@ -286,6 +288,9 @@ struct pmap { void (*pm_tlb_flush)(struct pmap *); void *pm_data; + + kmutex_t pm_lock /* locks for pm_objs */ + __aligned(64); /* give lock own cache line */ }; /* macro to access pm_pdirpa slots */ @@ -374,7 +379,7 @@ void pmap_pv_untrack(paddr_t, psize_t); void pmap_map_ptes(struct pmap *, struct pmap **, pd_entry_t **, pd_entry_t * const **); -void pmap_unmap_ptes(struct pmap *, struct pmap *, struct vm_page *); +void pmap_unmap_ptes(struct pmap *, struct pmap *); bool pmap_pdes_valid(vaddr_t, pd_entry_t * const *, pd_entry_t *, int *lastlvl); Index: src/sys/arch/x86/include/pmap_pv.h diff -u src/sys/arch/x86/include/pmap_pv.h:1.8 src/sys/arch/x86/include/pmap_pv.h:1.9 --- src/sys/arch/x86/include/pmap_pv.h:1.8 Thu Jan 2 21:39:42 2020 +++ src/sys/arch/x86/include/pmap_pv.h Sat Jan 4 22:49:20 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap_pv.h,v 1.8 2020/01/02 21:39:42 ad Exp $ */ +/* $NetBSD: pmap_pv.h,v 1.9 2020/01/04 22:49:20 ad Exp $ */ /*- * Copyright (c)2008 YAMAMOTO Takashi, @@ -55,8 +55,7 @@ struct pv_pte { struct pv_entry { struct pv_pte pve_pte; /* should be the first member */ - LIST_ENTRY(pv_entry) pve_list; /* on pv_head::pvh_list */ - SLIST_ENTRY(pv_entry) pve_hash; + LIST_ENTRY(pv_entry) pve_list; /* on pmap_page::pp_pvlist */ }; #define pve_next pve_list.le_next @@ -69,16 +68,11 @@ struct pmap_page { /* PP_EMBEDDED */ struct pv_pte u_pte; - /* !PP_EMBEDDED */ - struct pv_head { - LIST_HEAD(, pv_entry) pvh_list; - } u_head; - /* PTPs */ struct vm_page *u_link; } pp_u; + LIST_HEAD(, pv_entry) pp_pvlist; #define pp_pte pp_u.u_pte -#define pp_head pp_u.u_head #define pp_link pp_u.u_link uint8_t pp_flags; uint8_t pp_attrs; @@ -90,6 +84,6 @@ struct pmap_page { /* pp_flags */ #define PP_EMBEDDED 1 -#define PMAP_PAGE_INIT(pp) /* none */ +#define PMAP_PAGE_INIT(pp) LIST_INIT(&(pp)->pp_pvlist) #endif /* !_X86_PMAP_PV_H_ */ Index: src/sys/arch/x86/x86/pmap.c diff -u src/sys/arch/x86/x86/pmap.c:1.352 src/sys/arch/x86/x86/pmap.c:1.353 --- src/sys/arch/x86/x86/pmap.c:1.352 Thu Jan 2 21:39:42 2020 +++ src/sys/arch/x86/x86/pmap.c Sat Jan 4 22:49:20 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.352 2020/01/02 21:39:42 ad Exp $ */ +/* $NetBSD: pmap.c,v 1.353 2020/01/04 22:49:20 ad Exp $ */ /* * Copyright (c) 2008, 2010, 2016, 2017, 2019 The NetBSD Foundation, Inc. @@ -130,7 +130,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.352 2020/01/02 21:39:42 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.353 2020/01/04 22:49:20 ad Exp $"); #include "opt_user_ldt.h" #include "opt_lockdebug.h" @@ -213,8 +213,8 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.3 * - struct pmap_page: describes one pv-tracked page, without * necessarily a corresponding vm_page * - struct pv_entry: describes one <PMAP,VA> mapping of a PA - * - struct pv_head: there is one pv_head per pv-tracked page of - * physical memory. the pv_head points to a list of pv_entry + * - pmap_page::pp_pvlist: there is one list per pv-tracked page of + * physical memory. the pp_pvlist points to a list of pv_entry * structures which describe all the <PMAP,VA> pairs that this * page is mapped in. this is critical for page based operations * such as pmap_page_protect() [change protection on _all_ mappings @@ -224,16 +224,19 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.3 /* * Locking * - * We have the following locks that we must contend with: + * We have the following locks that we must contend with, listed in the + * order that they must be acquired: * - * - pmap lock (per pmap, part of uvm_object) - * This lock protects the fields in the pmap structure including the - * non-kernel PDEs in the PDP, and the PTEs. + * - pg->uobject->vmobjlock, pg->uanon->an_lock + * These per-object locks are taken by the VM system before calling into + * the pmap module. Holding them prevents concurrent operations on the + * given page or set of pages. Asserted with uvm_page_owner_locked_p(). * - * - pvh_lock (per pv_head) - * This lock protects the pv_entry list which is chained off the pv_head - * structure for a specific pv-tracked PA. It is locked when traversing - * the list (e.g. adding/removing mappings, syncing R/M bits, etc). + * - pmap->pm_lock (per pmap) + * This lock protects the fields in the pmap structure including the + * non-kernel PDEs in the PDP, the PTEs, and the PVE radix tree. For + * modifying kernel PTEs it is not required as kernel PDEs are never + * freed, and the kernel is expected to be self consistent. * * - pmaps_lock * This lock protects the list of active pmaps (headed by "pmaps"). We @@ -254,7 +257,7 @@ pd_entry_t *normal_pdes[3]; long nkptp[] = NKPTP_INITIALIZER; struct pmap_head pmaps; -kmutex_t pmaps_lock; +kmutex_t pmaps_lock __cacheline_aligned; struct pcpu_area *pcpuarea __read_mostly; @@ -275,7 +278,7 @@ static bool cpu_pat_enabled __read_mostl * Global data structures */ -static struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ +static struct pmap kernel_pmap_store __cacheline_aligned; /* kernel's pmap */ struct pmap *const kernel_pmap_ptr = &kernel_pmap_store; struct bootspace bootspace __read_mostly; @@ -301,61 +304,6 @@ paddr_t pmap_pa_end; /* PA of last phy #define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp) -#define PV_HASH_SIZE 32768 -#define PV_HASH_LOCK_CNT 32 - -struct pv_hash_lock { - kmutex_t lock; -} __aligned(CACHE_LINE_SIZE) pv_hash_locks[PV_HASH_LOCK_CNT] - __aligned(CACHE_LINE_SIZE); - -struct pv_hash_head { - SLIST_HEAD(, pv_entry) hh_list; -} pv_hash_heads[PV_HASH_SIZE]; - -static u_int -pvhash_hash(struct vm_page *ptp, vaddr_t va) -{ - - return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT); -} - -static struct pv_hash_head * -pvhash_head(u_int hash) -{ - - return &pv_hash_heads[hash % PV_HASH_SIZE]; -} - -static kmutex_t * -pvhash_lock(u_int hash) -{ - - return &pv_hash_locks[hash % PV_HASH_LOCK_CNT].lock; -} - -static struct pv_entry * -pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va) -{ - struct pv_entry *pve; - struct pv_entry *prev; - - prev = NULL; - SLIST_FOREACH(pve, &hh->hh_list, pve_hash) { - if (pve->pve_pte.pte_ptp == ptp && - pve->pve_pte.pte_va == va) { - if (prev != NULL) { - SLIST_REMOVE_AFTER(prev, pve_hash); - } else { - SLIST_REMOVE_HEAD(&hh->hh_list, pve_hash); - } - break; - } - prev = pve; - } - return pve; -} - /* * Other data structures */ @@ -384,7 +332,9 @@ paddr_t local_apic_pa __read_mostly; /* * pool that pmap structures are allocated from */ -static struct pool_cache pmap_cache; +struct pool_cache pmap_cache; +static int pmap_ctor(void *, void *, int); +static void pmap_dtor(void *, void *); /* * pv_entry cache @@ -411,10 +361,10 @@ static char *early_zerop; int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int); -/* PDP pool_cache(9) and its callbacks */ -struct pool_cache pmap_pdp_cache; -static int pmap_pdp_ctor(void *, void *, int); -static void pmap_pdp_dtor(void *, void *); +/* PDP pool and its callbacks */ +static struct pool pmap_pdp_pool; +static void pmap_pdp_init(pd_entry_t *); +static void pmap_pdp_fini(pd_entry_t *); #ifdef PAE /* need to allocate items of 4 pages */ @@ -439,6 +389,12 @@ extern paddr_t ldt_paddr; extern vaddr_t pentium_idt_vaddr; #endif +/* Array of freshly allocated PTPs, for pmap_get_ptp(). */ +struct pmap_ptparray { + struct vm_page *pg[PTP_LEVELS + 1]; + bool alloced[PTP_LEVELS + 1]; +}; + /* * Local prototypes */ @@ -457,8 +413,11 @@ static void pmap_init_lapic(void); static void pmap_remap_largepages(void); #endif -static int pmap_get_ptp(struct pmap *, vaddr_t, - pd_entry_t * const *, int, struct vm_page **); +static int pmap_get_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t, int, + struct vm_page **); +static void pmap_unget_ptp(struct pmap *, struct pmap_ptparray *); +static void pmap_install_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t, + pd_entry_t * const *); static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int); static void pmap_freepages(struct pmap *, struct vm_page *); static void pmap_free_ptp(struct pmap *, struct vm_page *, vaddr_t, @@ -468,6 +427,10 @@ static bool pmap_remove_pte(struct pmap static void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, vaddr_t, vaddr_t, struct pv_entry **); +static int pmap_pvmap_insert(struct pmap *, vaddr_t, struct pv_entry *); +static struct pv_entry *pmap_pvmap_lookup(struct pmap *, vaddr_t); +static void pmap_pvmap_remove(struct pmap *, vaddr_t, struct pv_entry *); + static void pmap_alloc_level(struct pmap *, vaddr_t, long *); static void pmap_load1(struct lwp *, struct pmap *, struct pmap *); @@ -477,21 +440,16 @@ static void pmap_reactivate(struct pmap * p m a p h e l p e r f u n c t i o n s */ -static void +static inline void pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff) { - if (pmap == pmap_kernel()) { - atomic_add_long(&pmap->pm_stats.resident_count, resid_diff); - atomic_add_long(&pmap->pm_stats.wired_count, wired_diff); - } else { - KASSERT(mutex_owned(&pmap->pm_lock)); - pmap->pm_stats.resident_count += resid_diff; - pmap->pm_stats.wired_count += wired_diff; - } + KASSERT(cold || mutex_owned(&pmap->pm_lock)); + pmap->pm_stats.resident_count += resid_diff; + pmap->pm_stats.wired_count += wired_diff; } -static void +static inline void pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) { int resid_diff = ((npte & PTE_P) ? 1 : 0) - ((opte & PTE_P) ? 1 : 0); @@ -549,7 +507,7 @@ pv_pte_first(struct pmap_page *pp) if ((pp->pp_flags & PP_EMBEDDED) != 0) { return &pp->pp_pte; } - return pve_to_pvpte(LIST_FIRST(&pp->pp_head.pvh_list)); + return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist)); } static struct pv_pte * @@ -559,9 +517,8 @@ pv_pte_next(struct pmap_page *pp, struct KASSERT(pvpte != NULL); if (pvpte == &pp->pp_pte) { KASSERT((pp->pp_flags & PP_EMBEDDED) != 0); - return NULL; + return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist)); } - KASSERT((pp->pp_flags & PP_EMBEDDED) == 0); return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list)); } @@ -601,8 +558,9 @@ pmap_reference(struct pmap *pmap) * - vm_map_pmap(&curproc->p_vmspace->vm_map) * current process' pmap. * - * => we lock enough pmaps to keep things locked in + * => caller must lock pmap first (if not the kernel pmap) * => must be undone with pmap_unmap_ptes before returning + * => disables kernel preemption */ void pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, pd_entry_t **ptepp, @@ -612,6 +570,8 @@ pmap_map_ptes(struct pmap *pmap, struct struct cpu_info *ci; lwp_t *l; + kpreempt_disable(); + /* The kernel's pmap is always accessible. */ if (pmap == pmap_kernel()) { *pmap2 = NULL; @@ -619,39 +579,35 @@ pmap_map_ptes(struct pmap *pmap, struct *pdeppp = normal_pdes; return; } - KASSERT(kpreempt_disabled()); + + KASSERT(mutex_owned(&pmap->pm_lock)); l = curlwp; - retry: - mutex_enter(&pmap->pm_lock); - ci = curcpu(); + ci = l->l_cpu; curpmap = ci->ci_pmap; - if (vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) { - /* Our own pmap so just load it: easy. */ - if (__predict_false(ci->ci_want_pmapload)) { - mutex_exit(&pmap->pm_lock); - pmap_load(); - goto retry; - } - KASSERT(pmap == curpmap); - } else if (pmap == curpmap) { + if (pmap == curpmap) { /* * Already on the CPU: make it valid. This is very * often the case during exit(), when we have switched * to the kernel pmap in order to destroy a user pmap. */ pmap_reactivate(pmap); + *pmap2 = NULL; } else { /* * Toss current pmap from CPU and install new pmap, but keep - * a reference to the old one - dropping the reference can - * block, so we'll defer to pmap_unmap_ptes(). + * a reference to the old one. Dropping the reference can + * can block as it needs to take locks, so defer that to + * pmap_unmap_ptes(). */ pmap_reference(pmap); pmap_load1(l, pmap, curpmap); + *pmap2 = curpmap; } + KASSERT(ci->ci_tlbstate == TLBSTATE_VALID); +#ifdef DIAGNOSTIC pmap->pm_ncsw = lwp_pctr(); - *pmap2 = curpmap; +#endif *ptepp = PTE_BASE; #if defined(XENPV) && defined(__x86_64__) @@ -665,68 +621,52 @@ pmap_map_ptes(struct pmap *pmap, struct /* * pmap_unmap_ptes: unlock the PTE mapping of "pmap" + * + * => we cannot tolerate context switches while mapped in: assert this. + * => reenables kernel preemption. + * => does not unlock pmap. */ void -pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2, - struct vm_page *ptp_tofree) +pmap_unmap_ptes(struct pmap *pmap, struct pmap * pmap2) { struct cpu_info *ci; struct pmap *mypmap; + struct lwp *l; KASSERT(kpreempt_disabled()); /* The kernel's pmap is always accessible. */ if (pmap == pmap_kernel()) { - KASSERT(ptp_tofree == NULL); + kpreempt_enable(); return; } - ci = curcpu(); + l = curlwp; + ci = l->l_cpu; + + KASSERT(mutex_owned(&pmap->pm_lock)); + KASSERT(pmap->pm_ncsw == lwp_pctr()); #if defined(XENPV) && defined(__x86_64__) KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] != L4_BASE); ci->ci_normal_pdes[PTP_LEVELS - 2] = L4_BASE; #endif - /* - * We cannot tolerate context switches while mapped in. - * If it is our own pmap all we have to do is unlock. - */ - KASSERT(pmap->pm_ncsw == lwp_pctr()); - mypmap = vm_map_pmap(&curproc->p_vmspace->vm_map); - if (pmap == mypmap) { - /* Now safe to free PTPs, with the pmap still locked. */ - if (ptp_tofree != NULL) { - pmap_freepages(pmap, ptp_tofree); - } - mutex_exit(&pmap->pm_lock); - return; - } - - /* - * Mark whatever's on the CPU now as lazy and unlock. - * If the pmap was already installed, we are done. - */ - if (ci->ci_pmap != mypmap) { + /* If not our own pmap, mark whatever's on the CPU now as lazy. */ + KASSERT(ci->ci_tlbstate == TLBSTATE_VALID); + mypmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); + if (ci->ci_pmap == vm_map_pmap(&l->l_proc->p_vmspace->vm_map)) { + ci->ci_want_pmapload = 0; + } else { ci->ci_want_pmapload = (mypmap != pmap_kernel()); - if (ci->ci_tlbstate == TLBSTATE_VALID) { - ci->ci_tlbstate = TLBSTATE_LAZY; - } else { - /* - * This can happen when undoing after pmap_get_ptp - * blocked. - */ - } + ci->ci_tlbstate = TLBSTATE_LAZY; } - /* Now safe to free PTPs, with the pmap still locked. */ - if (ptp_tofree != NULL) { - pmap_freepages(pmap, ptp_tofree); - } - mutex_exit(&pmap->pm_lock); + /* Now safe to re-enable preemption. */ + kpreempt_enable(); - /* Toss the pmap we evicted earlier (can block). */ - if (pmap != pmap2) { + /* Toss reference to other pmap taken earlier. */ + if (pmap2 != NULL) { pmap_destroy(pmap2); } } @@ -1239,11 +1179,7 @@ pmap_bootstrap(vaddr_t kva_start) virtual_avail = reserve_dumppages(virtual_avail); /* - * Init the static-global locks and global lists. - * - * => pventry::pvh_lock (initialized elsewhere) must also be - * a spin lock, again at IPL_VM to prevent deadlock, and - * again is never taken from interrupt context. + * Init the global lock and global list. */ mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE); LIST_INIT(&pmaps); @@ -1709,21 +1645,14 @@ pmap_remap_largepages(void) void pmap_init(void) { - int i, flags; - - for (i = 0; i < PV_HASH_SIZE; i++) { - SLIST_INIT(&pv_hash_heads[i].hh_list); - } - for (i = 0; i < PV_HASH_LOCK_CNT; i++) { - mutex_init(&pv_hash_locks[i].lock, MUTEX_NODEBUG, IPL_VM); - } + int flags; /* * initialize caches. */ - pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), 0, 0, 0, - "pmappl", NULL, IPL_NONE, NULL, NULL, NULL); + pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), COHERENCY_UNIT, + 0, 0, "pmappl", NULL, IPL_NONE, pmap_ctor, pmap_dtor, NULL); #ifdef XENPV /* @@ -1736,12 +1665,11 @@ pmap_init(void) #endif #ifdef PAE - pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE * PDP_SIZE, 0, 0, flags, - "pdppl", &pmap_pdp_allocator, IPL_NONE, - pmap_pdp_ctor, pmap_pdp_dtor, NULL); + pool_init(&pmap_pdp_pool, PAGE_SIZE * PDP_SIZE, 0, 0, flags, + "pdppl", &pmap_pdp_allocator, IPL_NONE); #else - pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE, 0, 0, flags, - "pdppl", NULL, IPL_NONE, pmap_pdp_ctor, pmap_pdp_dtor, NULL); + pool_init(&pmap_pdp_pool, PAGE_SIZE, 0, 0, flags, + "pdppl", NULL, IPL_NONE); #endif pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, PR_LARGECACHE, "pvpl", &pool_allocator_kmem, IPL_NONE, NULL, @@ -1757,6 +1685,8 @@ pmap_init(void) evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC, NULL, "x86", "ldt sync"); + radix_tree_init_tree(&pmap_kernel()->pm_pvtree); + /* * done: pmap module is up (and ready for business) */ @@ -1838,99 +1768,131 @@ pmap_vpage_cpu_init(struct cpu_info *ci) * p v _ e n t r y f u n c t i o n s */ + +/* + * pmap_pp_needs_pve: return true if we need to allocate a pv entry and + * corresponding radix tree entry for the page. + */ static bool -pmap_pp_needs_pve(struct pmap_page *pp) +pmap_pp_needs_pve(struct pmap_page *pp, struct vm_page *ptp, vaddr_t va) { /* * Adding a pv entry for this page only needs to allocate a pv_entry - * structure if the page already has at least one pv entry, - * since the first pv entry is stored in the pmap_page. + * structure if the page already has at least one pv entry, since + * the first pv entry is stored in the pmap_page. However, because + * of subsequent removal(s), PP_EMBEDDED can be false and there can + * still be pv entries on the list. */ - return pp && ((pp->pp_flags & PP_EMBEDDED) != 0 || - !LIST_EMPTY(&pp->pp_head.pvh_list)); + if (pp == NULL || (pp->pp_flags & PP_EMBEDDED) == 0) { + return false; + } + return pp->pp_pte.pte_ptp != ptp || pp->pp_pte.pte_va != va; } +/* + * pmap_free_pvs: free a linked list of pv entries. the pv entries have + * been removed from their respective pages, but are still entered into the + * map and we must undo that. + * + * => must be called with pmap locked. + */ static void -pmap_free_pvs(struct pv_entry *pve) +pmap_free_pvs(struct pmap *pmap, struct pv_entry *pve) { struct pv_entry *next; + KASSERT(mutex_owned(&pmap->pm_lock)); + for ( /* null */ ; pve != NULL ; pve = next) { + pmap_pvmap_remove(pmap, pve->pve_pte.pte_va, pve); next = pve->pve_next; pool_cache_put(&pmap_pv_cache, pve); } } /* - * main pv_entry manipulation functions: - * pmap_enter_pv: enter a mapping onto a pv_head list - * pmap_remove_pv: remove a mapping from a pv_head list + * pmap_pvmap_lookup: look up a non-PP_EMBEDDED pv entry for the given pmap + * + * => pmap must be locked + */ + +static struct pv_entry * +pmap_pvmap_lookup(struct pmap *pmap, vaddr_t va) +{ + + KASSERT(mutex_owned(&pmap->pm_lock)); + + return radix_tree_lookup_node(&pmap->pm_pvtree, va >> PAGE_SHIFT); +} + +/* + * pmap_pvmap_insert: insert a non-PP_EMBEDDED pv entry for the given pmap * - * NOTE: Both pmap_enter_pv and pmap_remove_pv expect the caller to lock - * the pvh before calling + * => pmap must be locked + * => an error can be returned */ +static int +pmap_pvmap_insert(struct pmap *pmap, vaddr_t va, struct pv_entry *pve) +{ + + KASSERT(mutex_owned(&pmap->pm_lock)); + KASSERT(pmap_pvmap_lookup(pmap, va) == NULL); + + return radix_tree_insert_node(&pmap->pm_pvtree, va >> PAGE_SHIFT, pve); +} + /* - * insert_pv: a helper of pmap_enter_pv + * pmap_pvmap_remove: look up a non-PP_EMBEDDED pv entry for the given pmap + * + * => pmap must be locked */ + static void -insert_pv(struct pmap_page *pp, struct pv_entry *pve) +pmap_pvmap_remove(struct pmap *pmap, vaddr_t va, struct pv_entry *pve) { - struct pv_hash_head *hh; - kmutex_t *lock; - u_int hash; + struct pv_entry *pve2 __diagused; + + KASSERT(mutex_owned(&pmap->pm_lock)); - hash = pvhash_hash(pve->pve_pte.pte_ptp, pve->pve_pte.pte_va); - lock = pvhash_lock(hash); - hh = pvhash_head(hash); - mutex_spin_enter(lock); - SLIST_INSERT_HEAD(&hh->hh_list, pve, pve_hash); - mutex_spin_exit(lock); + pve2 = radix_tree_remove_node(&pmap->pm_pvtree, va >> PAGE_SHIFT); - LIST_INSERT_HEAD(&pp->pp_head.pvh_list, pve, pve_list); + KASSERT(pve2 == pve); } /* - * pmap_enter_pv: enter a mapping onto a pv_head lst + * pmap_enter_pv: enter a mapping onto a pmap_page lst * * => caller should adjust ptp's wire_count before calling - * => caller has preallocated pve and *sparepve for us + * => caller has preallocated pve for us + * => if not embedded, tree node must be in place beforehand */ static struct pv_entry * -pmap_enter_pv(struct pmap_page *pp, struct pv_entry *pve, - struct pv_entry **sparepve, struct vm_page *ptp, vaddr_t va) +pmap_enter_pv(struct pmap *pmap, struct pmap_page *pp, struct pv_entry *pve, + struct vm_page *ptp, vaddr_t va) { + KASSERT(mutex_owned(&pmap->pm_lock)); + KASSERT(ptp_to_pmap(ptp) == pmap); KASSERT(ptp == NULL || ptp->wire_count >= 2); KASSERT(ptp == NULL || ptp->uobject != NULL); KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset); if ((pp->pp_flags & PP_EMBEDDED) == 0) { - if (LIST_EMPTY(&pp->pp_head.pvh_list)) { - pp->pp_flags |= PP_EMBEDDED; - pp->pp_pte.pte_ptp = ptp; - pp->pp_pte.pte_va = va; - - return pve; - } - } else { - struct pv_entry *pve2; - - pve2 = *sparepve; - *sparepve = NULL; - - pve2->pve_pte = pp->pp_pte; - pp->pp_flags &= ~PP_EMBEDDED; - LIST_INIT(&pp->pp_head.pvh_list); - insert_pv(pp, pve2); + pp->pp_flags |= PP_EMBEDDED; + pp->pp_pte.pte_ptp = ptp; + pp->pp_pte.pte_va = va; + return pve; } + KASSERT(pve != NULL); pve->pve_pte.pte_ptp = ptp; pve->pve_pte.pte_va = va; - insert_pv(pp, pve); - + KASSERT(pmap_pvmap_lookup(pmap, va) != NULL); + KASSERT(pmap_pvmap_lookup(pmap, va) == pve); + LIST_INSERT_HEAD(&pp->pp_pvlist, pve, pve_list); return NULL; } @@ -1938,38 +1900,39 @@ pmap_enter_pv(struct pmap_page *pp, stru * pmap_remove_pv: try to remove a mapping from a pv_list * * => caller should adjust ptp's wire_count and free PTP if needed + * => we don't remove radix tree entry; defer till later (it could block) * => we return the removed pve + * => caller can optionally supply pve, if looked up already */ static struct pv_entry * -pmap_remove_pv(struct pmap_page *pp, struct vm_page *ptp, vaddr_t va) +pmap_remove_pv(struct pmap *pmap, struct pmap_page *pp, struct vm_page *ptp, + vaddr_t va, struct pv_entry *pve) { - struct pv_hash_head *hh; - struct pv_entry *pve; - kmutex_t *lock; - u_int hash; + KASSERT(mutex_owned(&pmap->pm_lock)); + KASSERT(ptp_to_pmap(ptp) == pmap); KASSERT(ptp == NULL || ptp->uobject != NULL); KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset); - if ((pp->pp_flags & PP_EMBEDDED) != 0) { - KASSERT(pp->pp_pte.pte_ptp == ptp); - KASSERT(pp->pp_pte.pte_va == va); - + if ((pp->pp_flags & PP_EMBEDDED) != 0 && + pp->pp_pte.pte_ptp == ptp && + pp->pp_pte.pte_va == va) { + KASSERT(pve == NULL); pp->pp_flags &= ~PP_EMBEDDED; - LIST_INIT(&pp->pp_head.pvh_list); - + pp->pp_pte.pte_ptp = NULL; + pp->pp_pte.pte_va = 0; return NULL; } - hash = pvhash_hash(ptp, va); - lock = pvhash_lock(hash); - hh = pvhash_head(hash); - mutex_spin_enter(lock); - pve = pvhash_remove(hh, ptp, va); - mutex_spin_exit(lock); - + if (pve == NULL) { + pve = pmap_pvmap_lookup(pmap, va); + KASSERT(pve != NULL); + } else { + KASSERT(pve == pmap_pvmap_lookup(pmap, va)); + } + KASSERT(pve->pve_pte.pte_ptp == ptp); + KASSERT(pve->pve_pte.pte_va == va); LIST_REMOVE(pve, pve_list); - return pve; } @@ -2079,56 +2042,69 @@ pmap_free_ptp(struct pmap *pmap, struct * * => pmap should NOT be pmap_kernel() * => pmap should be locked - * => preemption should be disabled + * => we are not touching any PTEs yet, so they need not be mapped in */ static int -pmap_get_ptp(struct pmap *pmap, vaddr_t va, pd_entry_t * const *pdes, int flags, - struct vm_page **resultp) +pmap_get_ptp(struct pmap *pmap, struct pmap_ptparray *pt, vaddr_t va, + int flags, struct vm_page **resultp) { struct vm_page *ptp; - struct { - struct vm_page *pg; - bool new; - } pt[PTP_LEVELS + 1]; - int i, aflags, error; - unsigned long index; - pd_entry_t *pva; - paddr_t pa; + int i, aflags; struct uvm_object *obj; voff_t off; - uint64_t ncsw; KASSERT(pmap != pmap_kernel()); KASSERT(mutex_owned(&pmap->pm_lock)); - KASSERT(kpreempt_disabled()); /* * Loop through all page table levels allocating a page * for any level where we don't already have one. */ - memset(pt, 0, sizeof(pt)); + memset(pt, 0, sizeof(*pt)); aflags = ((flags & PMAP_CANFAIL) ? 0 : UVM_PGA_USERESERVE) | UVM_PGA_ZERO; for (i = PTP_LEVELS; i > 1; i--) { obj = &pmap->pm_obj[i - 2]; off = ptp_va2o(va, i - 1); - pt[i].pg = uvm_pagelookup(obj, off); - if (pt[i].pg == NULL) { - ncsw = lwp_pctr(); - pt[i].pg = uvm_pagealloc(obj, off, NULL, aflags); - pt[i].new = true; - if (__predict_false(ncsw != lwp_pctr())) { - /* uvm_pagealloc can block. */ - error = EAGAIN; - goto fail; - } + pt->pg[i] = uvm_pagelookup(obj, off); + if (pt->pg[i] == NULL) { + pt->pg[i] = uvm_pagealloc(obj, off, NULL, aflags); + pt->alloced[i] = true; } - if (pt[i].pg == NULL) { - error = ENOMEM; - goto fail; + if (pt->pg[i] == NULL) { + pmap_unget_ptp(pmap, pt); + return ENOMEM; } } + ptp = pt->pg[2]; + KASSERT(ptp != NULL); + *resultp = ptp; + pmap->pm_ptphint[0] = ptp; + return 0; +} + +/* + * pmap_install_ptp: instal any freshly allocated PTPs + * + * => pmap should NOT be pmap_kernel() + * => pmap should be locked + * => PTEs must be mapped + * => preemption must be disabled + */ +static void +pmap_install_ptp(struct pmap *pmap, struct pmap_ptparray *pt, vaddr_t va, + pd_entry_t * const *pdes) +{ + struct vm_page *ptp; + unsigned long index; + pd_entry_t *pva; + paddr_t pa; + int i; + + KASSERT(pmap != pmap_kernel()); + KASSERT(mutex_owned(&pmap->pm_lock)); + KASSERT(kpreempt_disabled()); /* * Now that we have all the pages looked up or allocated, @@ -2139,11 +2115,11 @@ pmap_get_ptp(struct pmap *pmap, vaddr_t pva = pdes[i - 2]; if (pmap_valid_entry(pva[index])) { - KASSERT(!pt[i].new); + KASSERT(!pt->alloced[i]); continue; } - ptp = pt[i].pg; + ptp = pt->pg[i]; ptp->flags &= ~PG_BUSY; /* never busy */ ptp->wire_count = 1; pmap->pm_ptphint[i - 2] = ptp; @@ -2173,31 +2149,36 @@ pmap_get_ptp(struct pmap *pmap, vaddr_t * wire count of the parent page. */ if (i < PTP_LEVELS) { - pt[i + 1].pg->wire_count++; + pt->pg[i + 1]->wire_count++; } } - ptp = pt[2].pg; - KASSERT(ptp != NULL); - pmap->pm_ptphint[0] = ptp; - *resultp = ptp; - return 0; +} + +/* + * pmap_unget_ptp: free unusued PTPs + * + * => pmap should NOT be pmap_kernel() + * => pmap should be locked + */ +static void +pmap_unget_ptp(struct pmap *pmap, struct pmap_ptparray *pt) +{ + int i; + + KASSERT(pmap != pmap_kernel()); + KASSERT(mutex_owned(&pmap->pm_lock)); - /* - * Allocation of a PTP failed, free any others that we just allocated. - */ -fail: for (i = PTP_LEVELS; i > 1; i--) { - if (pt[i].pg == NULL) { + if (pt->pg[i] == NULL) { break; } - if (!pt[i].new) { + if (!pt->alloced[i]) { continue; } - uvm_pagefree(pt[i].pg); + uvm_pagefree(pt->pg[i]); + pt->pg[i] = NULL; + pmap->pm_ptphint[0] = NULL; } - /* XXX silence assertion in pmap_unmap_ptes */ - pmap->pm_ncsw = lwp_pctr(); - return error; } /* @@ -2205,12 +2186,11 @@ fail: */ /* - * pmap_pdp_ctor: constructor for the PDP cache. + * pmap_pdp_init: constructor a new PDP. */ -static int -pmap_pdp_ctor(void *arg, void *v, int flags) +static void +pmap_pdp_init(pd_entry_t *pdir) { - pd_entry_t *pdir = v; paddr_t pdirpa = 0; vaddr_t object; int i; @@ -2225,7 +2205,8 @@ pmap_pdp_ctor(void *arg, void *v, int fl memset(pdir, 0, PDP_SIZE * PAGE_SIZE); /* - * NOTE: The `pmaps_lock' is held when the PDP is allocated. + * NOTE: This is all done unlocked, but we will check afterwards + * if we have raced with pmap_growkernel(). */ #if defined(XENPV) && defined(__x86_64__) @@ -2248,7 +2229,7 @@ pmap_pdp_ctor(void *arg, void *v, int fl pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] = (pd_entry_t)-1 & PTE_FRAME; #else /* XENPV && __x86_64__*/ - object = (vaddr_t)v; + object = (vaddr_t)pdir; for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { /* Fetch the physical address of the page directory */ (void)pmap_extract(pmap_kernel(), object, &pdirpa); @@ -2288,7 +2269,7 @@ pmap_pdp_ctor(void *arg, void *v, int fl #ifdef XENPV s = splvm(); - object = (vaddr_t)v; + object = (vaddr_t)pdir; pmap_protect(pmap_kernel(), object, object + (PAGE_SIZE * PDP_SIZE), VM_PROT_READ); pmap_update(pmap_kernel()); @@ -2316,19 +2297,17 @@ pmap_pdp_ctor(void *arg, void *v, int fl #endif splx(s); #endif /* XENPV */ - - return 0; } /* - * pmap_pdp_dtor: destructor for the PDP cache. + * pmap_pdp_fini: destructor for the PDPs. */ static void -pmap_pdp_dtor(void *arg, void *v) +pmap_pdp_fini(pd_entry_t *pdir) { #ifdef XENPV paddr_t pdirpa = 0; /* XXX: GCC */ - vaddr_t object = (vaddr_t)v; + vaddr_t object = (vaddr_t)pdir; int i; int s = splvm(); pt_entry_t *pte; @@ -2339,7 +2318,7 @@ pmap_pdp_dtor(void *arg, void *v) /* unpin page table */ xpq_queue_unpin_table(xpmap_ptom_masked(pdirpa)); } - object = (vaddr_t)v; + object = (vaddr_t)pdir; for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { /* Set page RW again */ pte = kvtopte(object); @@ -2369,6 +2348,72 @@ pmap_pdp_free(struct pool *pp, void *v) #endif /* PAE */ /* + * pmap_ctor: constructor for the pmap cache. + */ +static int +pmap_ctor(void *arg, void *obj, int flags) +{ + struct pmap *pmap = obj; + pt_entry_t p; + int i; + + KASSERT((flags & PR_WAITOK) != 0); + + mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE); + radix_tree_init_tree(&pmap->pm_pvtree); + kcpuset_create(&pmap->pm_cpus, true); + kcpuset_create(&pmap->pm_kernel_cpus, true); +#ifdef XENPV + kcpuset_create(&pmap->pm_xen_ptp_cpus, true); +#endif + + /* allocate and init PDP */ + pmap->pm_pdir = pool_get(&pmap_pdp_pool, PR_WAITOK); + + for (;;) { + pmap_pdp_init(pmap->pm_pdir); + mutex_enter(&pmaps_lock); + p = pmap->pm_pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1]; + if (__predict_true(p != 0)) { + break; + } + mutex_exit(&pmaps_lock); + } + + for (i = 0; i < PDP_SIZE; i++) + pmap->pm_pdirpa[i] = + pmap_pte2pa(pmap->pm_pdir[PDIR_SLOT_PTE + i]); + + LIST_INSERT_HEAD(&pmaps, pmap, pm_list); + mutex_exit(&pmaps_lock); + + return 0; +} + +/* + * pmap_ctor: destructor for the pmap cache. + */ +static void +pmap_dtor(void *arg, void *obj) +{ + struct pmap *pmap = obj; + + mutex_enter(&pmaps_lock); + LIST_REMOVE(pmap, pm_list); + mutex_exit(&pmaps_lock); + + pmap_pdp_fini(pmap->pm_pdir); + pool_put(&pmap_pdp_pool, pmap->pm_pdir); + radix_tree_fini_tree(&pmap->pm_pvtree); + mutex_destroy(&pmap->pm_lock); + kcpuset_destroy(pmap->pm_cpus); + kcpuset_destroy(pmap->pm_kernel_cpus); +#ifdef XENPV + kcpuset_destroy(pmap->pm_xen_ptp_cpus); +#endif +} + +/* * pmap_create: create a pmap object. */ struct pmap * @@ -2380,7 +2425,6 @@ pmap_create(void) pmap = pool_cache_get(&pmap_cache, PR_WAITOK); /* init uvm_object */ - mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE); for (i = 0; i < PTP_LEVELS - 1; i++) { uvm_obj_init(&pmap->pm_obj[i], NULL, false, 1); uvm_obj_setlock(&pmap->pm_obj[i], &pmap->pm_lock); @@ -2406,36 +2450,11 @@ pmap_create(void) pmap->pm_tlb_flush = NULL; pmap->pm_data = NULL; - kcpuset_create(&pmap->pm_cpus, true); - kcpuset_create(&pmap->pm_kernel_cpus, true); -#ifdef XENPV - kcpuset_create(&pmap->pm_xen_ptp_cpus, true); -#endif /* init the LDT */ pmap->pm_ldt = NULL; pmap->pm_ldt_len = 0; pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL); - /* allocate PDP */ - try_again: - pmap->pm_pdir = pool_cache_get(&pmap_pdp_cache, PR_WAITOK); - - mutex_enter(&pmaps_lock); - - if (pmap->pm_pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] == 0) { - mutex_exit(&pmaps_lock); - pool_cache_destruct_object(&pmap_pdp_cache, pmap->pm_pdir); - goto try_again; - } - - for (i = 0; i < PDP_SIZE; i++) - pmap->pm_pdirpa[i] = - pmap_pte2pa(pmap->pm_pdir[PDIR_SLOT_PTE + i]); - - LIST_INSERT_HEAD(&pmaps, pmap, pm_list); - - mutex_exit(&pmaps_lock); - return (pmap); } @@ -2451,7 +2470,7 @@ pmap_free_ptps(struct vm_page *empty_ptp while ((ptp = empty_ptps) != NULL) { pp = VM_PAGE_TO_PP(ptp); empty_ptps = pp->pp_link; - LIST_INIT(&pp->pp_head.pvh_list); + LIST_INIT(&pp->pp_pvlist); uvm_pagefree(ptp); } } @@ -2503,6 +2522,9 @@ pmap_check_inuse(struct pmap *pmap) /* * pmap_destroy: drop reference count on pmap. free pmap if * reference count goes to zero. + * + * => we can be called from pmap_unmap_ptes() with a different, unrelated + * pmap's lock held. be careful! */ void pmap_destroy(struct pmap *pmap) @@ -2536,13 +2558,8 @@ pmap_destroy(struct pmap *pmap) /* * Reference count is zero, free pmap resources and then free pmap. - * First, remove it from global list of pmaps. */ - mutex_enter(&pmaps_lock); - LIST_REMOVE(pmap, pm_list); - mutex_exit(&pmaps_lock); - /* * Process deferred PTP frees. No TLB shootdown required, as the * PTP pages are no longer visible to any CPU. @@ -2550,12 +2567,6 @@ pmap_destroy(struct pmap *pmap) pmap_free_ptps(pmap->pm_gc_ptp); - if (__predict_false(pmap->pm_enter != NULL)) { - pool_cache_destruct_object(&pmap_pdp_cache, pmap->pm_pdir); - } else { - pool_cache_put(&pmap_pdp_cache, pmap->pm_pdir); - } - #ifdef USER_LDT if (pmap->pm_ldt != NULL) { /* @@ -2576,15 +2587,20 @@ pmap_destroy(struct pmap *pmap) for (i = 0; i < PTP_LEVELS - 1; i++) { uvm_obj_destroy(&pmap->pm_obj[i], false); } - mutex_destroy(&pmap->pm_lock); - kcpuset_destroy(pmap->pm_cpus); - kcpuset_destroy(pmap->pm_kernel_cpus); + kcpuset_zero(pmap->pm_cpus); + kcpuset_zero(pmap->pm_kernel_cpus); #ifdef XENPV - kcpuset_destroy(pmap->pm_xen_ptp_cpus); + kcpuset_zero(pmap->pm_xen_ptp_cpus); #endif + KASSERT(radix_tree_empty_tree_p(&pmap->pm_pvtree)); pmap_check_ptps(pmap); - pool_cache_put(&pmap_cache, pmap); + if (__predict_false(pmap->pm_enter != NULL)) { + /* XXX make this a different cache */ + pool_cache_destruct_object(&pmap_cache, pmap); + } else { + pool_cache_put(&pmap_cache, pmap); + } } /* @@ -3114,7 +3130,6 @@ pmap_extract(struct pmap *pmap, vaddr_t pa = 0; l = curlwp; - kpreempt_disable(); ci = l->l_cpu; if (pmap == pmap_kernel() || __predict_true(!ci->ci_want_pmapload && ci->ci_pmap == pmap)) { @@ -3129,9 +3144,11 @@ pmap_extract(struct pmap *pmap, vaddr_t hard = false; ptes = PTE_BASE; pdes = normal_pdes; + kpreempt_disable(); } else { /* we lose, do it the hard way. */ hard = true; + mutex_enter(&pmap->pm_lock); pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); } if (pmap_pdes_valid(va, pdes, &pde, &lvl)) { @@ -3148,9 +3165,11 @@ pmap_extract(struct pmap *pmap, vaddr_t } } if (__predict_false(hard)) { - pmap_unmap_ptes(pmap, pmap2, NULL); + pmap_unmap_ptes(pmap, pmap2); + mutex_exit(&pmap->pm_lock); + } else { + kpreempt_enable(); } - kpreempt_enable(); if (pap != NULL) { *pap = pa; } @@ -3423,7 +3442,7 @@ pmap_remove_ptes(struct pmap *pmap, stru { pt_entry_t *pte = (pt_entry_t *)ptpva; - KASSERT(pmap == pmap_kernel() || mutex_owned(&pmap->pm_lock)); + KASSERT(mutex_owned(&pmap->pm_lock)); KASSERT(kpreempt_disabled()); /* @@ -3485,7 +3504,7 @@ pmap_remove_pte(struct pmap *pmap, struc struct pmap_page *pp; pt_entry_t opte; - KASSERT(pmap == pmap_kernel() || mutex_owned(&pmap->pm_lock)); + KASSERT(mutex_owned(&pmap->pm_lock)); KASSERT(kpreempt_disabled()); if (!pmap_valid_entry(*pte)) { @@ -3517,7 +3536,7 @@ pmap_remove_pte(struct pmap *pmap, struc } /* - * If we are not on a pv_head list - we are done. + * If we are not on a pv list - we are done. */ if ((opte & PTE_PVLIST) == 0) { #ifndef DOM0OPS @@ -3541,7 +3560,7 @@ pmap_remove_pte(struct pmap *pmap, struc /* Sync R/M bits. */ pp->pp_attrs |= pmap_pte_to_pp_attrs(opte); - pve = pmap_remove_pv(pp, ptp, va); + pve = pmap_remove_pv(pmap, pp, ptp, va, NULL); if (pve) { pve->pve_next = *pv_tofree; @@ -3575,8 +3594,8 @@ pmap_remove(struct pmap *pmap, vaddr_t s return; } - kpreempt_disable(); - pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ + mutex_enter(&pmap->pm_lock); + pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* * removing one page? take shortcut function. @@ -3651,17 +3670,30 @@ pmap_remove(struct pmap *pmap, vaddr_t s } pmap_free_ptp(pmap, ptp, va, ptes, pdes, &ptp_tofree); if (ptp_tofree != NULL) { - pmap_unmap_ptes(pmap, pmap2, ptp_tofree); + pmap_unmap_ptes(pmap, pmap2); + /* Now safe to free, with the pmap still locked. */ + pmap_freepages(pmap, ptp_tofree); ptp_tofree = NULL; + if (pv_tofree != NULL) { + pmap_free_pvs(pmap, pv_tofree); + pv_tofree = NULL; + } pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); } } - pmap_unmap_ptes(pmap, pmap2, ptp_tofree); /* unlock pmap */ - kpreempt_enable(); - - /* Now we free unused PVs */ - if (pv_tofree) - pmap_free_pvs(pv_tofree); + pmap_unmap_ptes(pmap, pmap2); + /* + * Now safe to free, as we no longer have the PTEs mapped and can + * block again. Radix tree nodes are removed here, so we need to + * continue holding the pmap locked until complete. + */ + if (ptp_tofree != NULL) { + pmap_freepages(pmap, ptp_tofree); + } + if (pv_tofree != NULL) { + pmap_free_pvs(pmap, pv_tofree); + } + mutex_exit(&pmap->pm_lock); } /* @@ -3776,20 +3808,25 @@ pmap_pp_remove_ent(struct pmap *pmap, st pt_entry_t *ptes; pd_entry_t * const *pdes; + KASSERT(mutex_owned(&pmap->pm_lock)); + pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); pmap_stats_update_bypte(pmap, 0, opte); ptp->wire_count--; if (ptp->wire_count <= 1) { pmap_free_ptp(pmap, ptp, va, ptes, pdes, &ptp_tofree); } - pmap_unmap_ptes(pmap, pmap2, ptp_tofree); + pmap_unmap_ptes(pmap, pmap2); + /* Now safe to free PTPs, with the pmap still locked. */ + if (ptp_tofree != NULL) { + pmap_freepages(pmap, ptp_tofree); + } } static void pmap_pp_remove(struct pmap_page *pp, paddr_t pa) { struct pv_pte *pvpte; - struct pv_entry *killlist = NULL; struct vm_page *ptp; uint8_t oattrs; int count; @@ -3813,11 +3850,12 @@ startover: if (ptp != NULL) { pmap_reference(pmap); } - + mutex_enter(&pmap->pm_lock); error = pmap_sync_pv(pvpte, pa, ~0, &oattrs, &opte); if (error == EAGAIN) { int hold_count; KERNEL_UNLOCK_ALL(curlwp, &hold_count); + mutex_exit(&pmap->pm_lock); if (ptp != NULL) { pmap_destroy(pmap); } @@ -3828,7 +3866,7 @@ startover: pp->pp_attrs |= oattrs; va = pvpte->pte_va; - pve = pmap_remove_pv(pp, ptp, va); + pve = pmap_remove_pv(pmap, pp, ptp, va, NULL); /* Update the PTP reference count. Free if last reference. */ if (ptp != NULL) { @@ -3839,22 +3877,25 @@ startover: } else { pmap_pp_remove_ent(pmap, ptp, opte, va); } - pmap_destroy(pmap); } else { KASSERT(pmap == pmap_kernel()); pmap_stats_update_bypte(pmap, 0, opte); } - if (pve != NULL) { - pve->pve_next = killlist; /* mark it for death */ - killlist = pve; + /* + * Must free pve, and remove from pmap's radix tree + * with the pmap's lock still held. + */ + pve->pve_next = NULL; + pmap_free_pvs(pmap, pve); + } + mutex_exit(&pmap->pm_lock); + if (ptp != NULL) { + pmap_destroy(pmap); } } pmap_tlb_shootnow(); kpreempt_enable(); - - /* Now free unused pvs. */ - pmap_free_pvs(killlist); } /* @@ -4073,8 +4114,13 @@ pmap_write_protect(struct pmap *pmap, va sva &= ~PAGE_MASK; eva &= ~PAGE_MASK; - /* Acquire pmap. */ - kpreempt_disable(); + /* + * Acquire pmap. No need to lock the kernel pmap as we won't + * be touching the pvmap nor the stats. + */ + if (pmap != pmap_kernel()) { + mutex_enter(&pmap->pm_lock); + } pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); for (va = sva ; va < eva; va = blockend) { @@ -4115,8 +4161,10 @@ next:; } /* Release pmap. */ - pmap_unmap_ptes(pmap, pmap2, NULL); - kpreempt_enable(); + pmap_unmap_ptes(pmap, pmap2); + if (pmap != pmap_kernel()) { + mutex_exit(&pmap->pm_lock); + } } /* @@ -4137,8 +4185,11 @@ pmap_unwire(struct pmap *pmap, vaddr_t v return; } - /* Acquire pmap. */ - kpreempt_disable(); + /* + * Acquire pmap. Need to lock the kernel pmap only to protect the + * statistics. + */ + mutex_enter(&pmap->pm_lock); pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); if (!pmap_pdes_valid(va, pdes, NULL, &lvl)) { @@ -4161,8 +4212,8 @@ pmap_unwire(struct pmap *pmap, vaddr_t v } /* Release pmap. */ - pmap_unmap_ptes(pmap, pmap2, NULL); - kpreempt_enable(); + pmap_unmap_ptes(pmap, pmap2); + mutex_exit(&pmap->pm_lock); } /* @@ -4193,7 +4244,6 @@ pmap_enter_default(pmap_t pmap, vaddr_t * pmap_enter: enter a mapping into a pmap * * => must be done "now" ... no lazy-evaluation - * => we set pmap => pv_head locking */ int pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa, @@ -4205,12 +4255,11 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t struct vm_page *ptp; struct vm_page *new_pg, *old_pg; struct pmap_page *new_pp, *old_pp; - struct pv_entry *old_pve = NULL; - struct pv_entry *new_pve; - struct pv_entry *new_sparepve; + struct pv_entry *pve; int error; bool wired = (flags & PMAP_WIRED) != 0; struct pmap *pmap2; + struct pmap_ptparray pt; KASSERT(pmap_initialized); KASSERT(curlwp->l_md.md_gc_pmap != pmap); @@ -4259,62 +4308,68 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t new_pp = NULL; } - /* - * Try to get pves now if we might need them. - * Keep going even if we fail, since we will not actually need them - * if we are just changing the permissions on an existing mapping, - * but we won't know if that's the case until later. - */ - - bool needpves = pmap_pp_needs_pve(new_pp); - if (needpves) { - new_pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); - new_sparepve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); - } else { - new_pve = NULL; - new_sparepve = NULL; - } - - kpreempt_disable(); - retry: - pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ + /* Make sure we have PTPs allocated. */ + mutex_enter(&pmap->pm_lock); ptp = NULL; if (pmap != pmap_kernel()) { - error = pmap_get_ptp(pmap, va, pdes, flags, &ptp); - if (error == EAGAIN) { - pmap_unmap_ptes(pmap, pmap2, NULL); - goto retry; - } - if (error == ENOMEM) { - pmap_unmap_ptes(pmap, pmap2, NULL); + error = pmap_get_ptp(pmap, &pt, va, flags, &ptp); + if (error != 0) { if (flags & PMAP_CANFAIL) { - goto out; + mutex_exit(&pmap->pm_lock); + return error; } - panic("%s: get ptp failed", __func__); + panic("%s: get ptp failed, error=%d", __func__, + error); } } /* - * Check if there is an existing mapping. If we are now sure that - * we need pves and we failed to allocate them earlier, handle that. - * Caching the value of oldpa here is safe because only the mod/ref - * bits can change while the pmap is locked. + * Now check to see if we need a pv entry for this VA. If we do, + * allocate and install in the radix tree. In any case look up the + * pv entry in case the old mapping used it. */ + pve = pmap_pvmap_lookup(pmap, va); + if (pve == NULL && pmap_pp_needs_pve(new_pp, ptp, va)) { + pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); + if (pve == NULL) { + if (flags & PMAP_CANFAIL) { + if (ptp != NULL) { + pmap_unget_ptp(pmap, &pt); + } + mutex_exit(&pmap->pm_lock); + return error; + } + panic("%s: alloc pve failed", __func__); + } + error = pmap_pvmap_insert(pmap, va, pve); + if (error != 0) { + if (flags & PMAP_CANFAIL) { + if (ptp != NULL) { + pmap_unget_ptp(pmap, &pt); + } + pool_cache_put(&pmap_pv_cache, pve); + mutex_exit(&pmap->pm_lock); + return error; + } + panic("%s: radixtree insert failed, error=%d", + __func__, error); + } + } + + /* Map PTEs into address space. */ + pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); + + /* Install any newly allocated PTPs. */ + if (ptp != NULL) { + pmap_install_ptp(pmap, &pt, va, pdes); + } + + /* Check if there is an existing mapping. */ ptep = &ptes[pl1_i(va)]; opte = *ptep; bool have_oldpa = pmap_valid_entry(opte); paddr_t oldpa = pmap_pte2pa(opte); - if (needpves && (!have_oldpa || oldpa != pa) && - (new_pve == NULL || new_sparepve == NULL)) { - pmap_unmap_ptes(pmap, pmap2, NULL); - if (flags & PMAP_CANFAIL) { - error = ENOMEM; - goto out; - } - panic("%s: pve allocation failed", __func__); - } - /* * Update the pte. */ @@ -4344,7 +4399,6 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t pmap_free_ptp(pmap, ptp, va, ptes, pdes, &ptp_tofree); } - pmap_unmap_ptes(pmap, pmap2, ptp_tofree); goto out; } break; @@ -4366,6 +4420,11 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t */ if (((opte ^ npte) & (PTE_FRAME | PTE_P)) == 0) { KASSERT(((opte ^ npte) & PTE_PVLIST) == 0); + if ((opte & PTE_PVLIST) != 0 && pve != NULL) { + KASSERT(pve->pve_pte.pte_ptp == ptp); + KASSERT(pve->pve_pte.pte_va == va); + } + pve = NULL; goto same_pa; } @@ -4383,7 +4442,7 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t __func__, va, oldpa, atop(pa)); } - old_pve = pmap_remove_pv(old_pp, ptp, va); + (void)pmap_remove_pv(pmap, old_pp, ptp, va, pve); old_pp->pp_attrs |= pmap_pte_to_pp_attrs(opte); } @@ -4391,12 +4450,10 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t * If new page is pv-tracked, insert pv_entry into its list. */ if (new_pp) { - new_pve = pmap_enter_pv(new_pp, new_pve, &new_sparepve, ptp, va); + pve = pmap_enter_pv(pmap, new_pp, pve, ptp, va); } same_pa: - pmap_unmap_ptes(pmap, pmap2, NULL); - /* * shootdown tlb if necessary. */ @@ -4407,18 +4464,15 @@ same_pa: } error = 0; +#if defined(XENPV) out: - kpreempt_enable(); - if (old_pve != NULL) { - pool_cache_put(&pmap_pv_cache, old_pve); - } - if (new_pve != NULL) { - pool_cache_put(&pmap_pv_cache, new_pve); - } - if (new_sparepve != NULL) { - pool_cache_put(&pmap_pv_cache, new_sparepve); +#endif + pmap_unmap_ptes(pmap, pmap2); + if (pve != NULL) { + pmap_pvmap_remove(pmap, va, pve); + pool_cache_put(&pmap_pv_cache, pve); } - + mutex_exit(&pmap->pm_lock); return error; } @@ -4660,8 +4714,8 @@ pmap_growkernel(vaddr_t maxkvaddr) splx(s); if (invalidate && pmap_initialized) { - /* Invalidate the PDP cache. */ - pool_cache_invalidate(&pmap_pdp_cache); + /* Invalidate the pmap cache. */ + pool_cache_invalidate(&pmap_cache); } return maxkvaddr; @@ -4692,12 +4746,8 @@ pmap_dump(struct pmap *pmap, vaddr_t sva if (eva > VM_MAXUSER_ADDRESS || eva <= sva) eva = VM_MAXUSER_ADDRESS; - /* - * we lock in the pmap => pv_head direction - */ - - kpreempt_disable(); - pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ + mutex_enter(&pmap->pm_lock); + pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* * dumping a range of pages: we dump in PTP sized blocks (4MB) @@ -4724,8 +4774,8 @@ pmap_dump(struct pmap *pmap, vaddr_t sva sva, (paddr_t)pmap_pte2pa(*pte), (paddr_t)*pte); } } - pmap_unmap_ptes(pmap, pmap2, NULL); - kpreempt_enable(); + pmap_unmap_ptes(pmap, pmap2); + mutex_exit(&pmap->pm_lock); } #endif @@ -5044,47 +5094,19 @@ pmap_ept_free_ptp(struct pmap *pmap, str } /* Allocate L4->L3->L2. Return L2. */ -static struct vm_page * -pmap_ept_get_ptp(struct pmap *pmap, vaddr_t va, int flags) +static void +pmap_ept_install_ptp(struct pmap *pmap, struct pmap_ptparray *pt, vaddr_t va) { struct vm_page *ptp; - struct { - struct vm_page *pg; - bool new; - } pt[PTP_LEVELS + 1]; - int i, aflags; unsigned long index; pd_entry_t *pteva; paddr_t ptepa; - struct uvm_object *obj; - voff_t off; + int i; KASSERT(pmap != pmap_kernel()); KASSERT(mutex_owned(&pmap->pm_lock)); KASSERT(kpreempt_disabled()); - memset(pt, 0, sizeof(pt)); - aflags = ((flags & PMAP_CANFAIL) ? 0 : UVM_PGA_USERESERVE) | - UVM_PGA_ZERO; - - /* - * Loop through all page table levels allocating a page - * for any level where we don't already have one. - */ - for (i = PTP_LEVELS; i > 1; i--) { - obj = &pmap->pm_obj[i - 2]; - off = ptp_va2o(va, i - 1); - - pt[i].pg = uvm_pagelookup(obj, off); - if (pt[i].pg == NULL) { - pt[i].pg = uvm_pagealloc(obj, off, NULL, aflags); - pt[i].new = true; - } - - if (pt[i].pg == NULL) - goto fail; - } - /* * Now that we have all the pages looked up or allocated, * loop through again installing any new ones into the tree. @@ -5095,12 +5117,12 @@ pmap_ept_get_ptp(struct pmap *pmap, vadd pteva = (pt_entry_t *)PMAP_DIRECT_MAP(ptepa); if (pmap_ept_valid_entry(pteva[index])) { - KASSERT(!pt[i].new); + KASSERT(!pt->alloced[i]); ptepa = pmap_pte2pa(pteva[index]); continue; } - ptp = pt[i].pg; + ptp = pt->pg[i]; ptp->flags &= ~PG_BUSY; /* never busy */ ptp->wire_count = 1; pmap->pm_ptphint[i - 2] = ptp; @@ -5115,29 +5137,9 @@ pmap_ept_get_ptp(struct pmap *pmap, vadd * wire count of the parent page. */ if (i < PTP_LEVELS) { - pt[i + 1].pg->wire_count++; + pt->pg[i + 1]->wire_count++; } } - ptp = pt[2].pg; - KASSERT(ptp != NULL); - pmap->pm_ptphint[0] = ptp; - return ptp; - - /* - * Allocation of a PTP failed, free any others that we just allocated. - */ -fail: - for (i = PTP_LEVELS; i > 1; i--) { - if (pt[i].pg == NULL) { - break; - } - if (!pt[i].new) { - continue; - } - obj = &pmap->pm_obj[i - 2]; - uvm_pagefree(pt[i].pg); - } - return NULL; } static int @@ -5149,11 +5151,10 @@ pmap_ept_enter(struct pmap *pmap, vaddr_ struct vm_page *ptp; struct vm_page *new_pg, *old_pg; struct pmap_page *new_pp, *old_pp; - struct pv_entry *old_pve = NULL; - struct pv_entry *new_pve; - struct pv_entry *new_sparepve; + struct pv_entry *pve; bool wired = (flags & PMAP_WIRED) != 0; bool accessed; + struct pmap_ptparray pt; int error; KASSERT(pmap_initialized); @@ -5184,33 +5185,60 @@ pmap_ept_enter(struct pmap *pmap, vaddr_ new_pp = NULL; } + /* Make sure we have PTPs allocated. */ + mutex_enter(&pmap->pm_lock); + ptp = NULL; + if (pmap != pmap_kernel()) { + error = pmap_get_ptp(pmap, &pt, va, flags, &ptp); + if (error != 0) { + if (flags & PMAP_CANFAIL) { + mutex_exit(&pmap->pm_lock); + return error; + } + panic("%s: get ptp failed, error=%d", __func__, + error); + } + } + /* - * Try to get pves now if we might need them. - * Keep going even if we fail, since we will not actually need them - * if we are just changing the permissions on an existing mapping, - * but we won't know if that's the case until later. + * Now check to see if we need a pv entry for this VA. If we do, + * allocate and install in the radix tree. In any case look up the + * pv entry in case the old mapping used it. */ - - bool needpves = pmap_pp_needs_pve(new_pp); - if (needpves) { - new_pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); - new_sparepve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); - } else { - new_pve = NULL; - new_sparepve = NULL; + pve = pmap_pvmap_lookup(pmap, va); + if (pve == NULL && pmap_pp_needs_pve(new_pp, ptp, va)) { + pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); + if (pve == NULL) { + if (flags & PMAP_CANFAIL) { + if (ptp != NULL) { + pmap_unget_ptp(pmap, &pt); + } + mutex_exit(&pmap->pm_lock); + return error; + } + panic("%s: alloc pve failed", __func__); + } + error = pmap_pvmap_insert(pmap, va, pve); + if (error != 0) { + if (flags & PMAP_CANFAIL) { + if (ptp != NULL) { + pmap_unget_ptp(pmap, &pt); + } + pool_cache_put(&pmap_pv_cache, pve); + mutex_exit(&pmap->pm_lock); + return error; + } + panic("%s: radixtree insert failed, error=%d", + __func__, error); + } } + /* Map PTEs into address space. */ kpreempt_disable(); - mutex_enter(&pmap->pm_lock); - ptp = pmap_ept_get_ptp(pmap, va, flags); - if (ptp == NULL) { - mutex_exit(&pmap->pm_lock); - if (flags & PMAP_CANFAIL) { - error = ENOMEM; - goto out; - } - panic("%s: get ptp failed", __func__); + /* Install any newly allocated PTPs. */ + if (ptp != NULL) { + pmap_ept_install_ptp(pmap, &pt, va); } /* @@ -5225,16 +5253,6 @@ pmap_ept_enter(struct pmap *pmap, vaddr_ bool have_oldpa = pmap_ept_valid_entry(opte); paddr_t oldpa = pmap_pte2pa(opte); - if (needpves && (!have_oldpa || oldpa != pa) && - (new_pve == NULL || new_sparepve == NULL)) { - mutex_exit(&pmap->pm_lock); - if (flags & PMAP_CANFAIL) { - error = ENOMEM; - goto out; - } - panic("%s: pve allocation failed", __func__); - } - /* * Update the pte. */ @@ -5263,11 +5281,16 @@ pmap_ept_enter(struct pmap *pmap, vaddr_ */ if (((opte ^ npte) & (PTE_FRAME | EPT_R)) == 0) { KASSERT(((opte ^ npte) & EPT_PVLIST) == 0); + if ((opte & PTE_PVLIST) != 0 && pve != NULL) { + KASSERT(pve->pve_pte.pte_ptp == ptp); + KASSERT(pve->pve_pte.pte_va == va); + } + pve = NULL; goto same_pa; } /* - * If old page is pv-tracked, remove pv_entry from its list. + * If old page is pv-tracked, replace pv_entry from its list. */ if ((~opte & (EPT_R | EPT_PVLIST)) == 0) { if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) { @@ -5280,20 +5303,18 @@ pmap_ept_enter(struct pmap *pmap, vaddr_ __func__, va, oldpa, atop(pa)); } - old_pve = pmap_remove_pv(old_pp, ptp, va); - old_pp->pp_attrs |= pmap_ept_to_pp_attrs(opte); + (void)pmap_remove_pv(pmap, old_pp, ptp, va, pve); + old_pp->pp_attrs |= pmap_pte_to_pp_attrs(opte); } /* * If new page is pv-tracked, insert pv_entry into its list. */ if (new_pp) { - new_pve = pmap_enter_pv(new_pp, new_pve, &new_sparepve, ptp, va); + pve = pmap_enter_pv(pmap, new_pp, pve, ptp, va); } same_pa: - mutex_exit(&pmap->pm_lock); - if (pmap_ept_has_ad) { accessed = (~opte & (EPT_R | EPT_A)) == 0; } else { @@ -5304,17 +5325,12 @@ same_pa: } error = 0; -out: kpreempt_enable(); - if (old_pve != NULL) { - pool_cache_put(&pmap_pv_cache, old_pve); - } - if (new_pve != NULL) { - pool_cache_put(&pmap_pv_cache, new_pve); - } - if (new_sparepve != NULL) { - pool_cache_put(&pmap_pv_cache, new_sparepve); + if (pve != NULL) { + pmap_pvmap_remove(pmap, va, pve); + pool_cache_put(&pmap_pv_cache, pve); } + mutex_exit(&pmap->pm_lock); return error; } @@ -5364,8 +5380,8 @@ pmap_ept_extract(struct pmap *pmap, vadd rv = false; pa = 0; - kpreempt_disable(); mutex_enter(&pmap->pm_lock); + kpreempt_disable(); if (!pmap_ept_pdes_invalid(pmap, va, &pde)) { ptppa = pmap_pte2pa(pde); @@ -5377,8 +5393,8 @@ pmap_ept_extract(struct pmap *pmap, vadd } } - mutex_exit(&pmap->pm_lock); kpreempt_enable(); + mutex_exit(&pmap->pm_lock); if (pap != NULL) { *pap = pa; @@ -5433,7 +5449,7 @@ pmap_ept_remove_pte(struct pmap *pmap, s } /* - * If we are not on a pv_head list - we are done. + * If we are not on a pv list - we are done. */ if ((opte & EPT_PVLIST) == 0) { KASSERTMSG((PHYS_TO_VM_PAGE(pmap_pte2pa(opte)) == NULL), @@ -5455,7 +5471,7 @@ pmap_ept_remove_pte(struct pmap *pmap, s /* Sync R/M bits. */ pp->pp_attrs |= pmap_ept_to_pp_attrs(opte); - pve = pmap_remove_pv(pp, ptp, va); + pve = pmap_remove_pv(pmap, pp, ptp, va, NULL); if (pve) { pve->pve_next = *pv_tofree; @@ -5499,8 +5515,8 @@ pmap_ept_remove(struct pmap *pmap, vaddr vaddr_t blkendva, va = sva; struct vm_page *ptp; - kpreempt_disable(); mutex_enter(&pmap->pm_lock); + kpreempt_disable(); for (/* null */ ; va < eva ; va = blkendva) { int lvl; @@ -5535,12 +5551,15 @@ pmap_ept_remove(struct pmap *pmap, vaddr } } - mutex_exit(&pmap->pm_lock); kpreempt_enable(); - - /* Now we free unused PVs */ - if (pv_tofree) - pmap_free_pvs(pv_tofree); + /* + * Radix tree nodes are removed here, so we need to continue holding + * the pmap locked until complete. + */ + if (pv_tofree != NULL) { + pmap_free_pvs(pmap, pv_tofree); + } + mutex_exit(&pmap->pm_lock); } static int @@ -5633,13 +5652,14 @@ static void pmap_ept_pp_remove_ent(struct pmap *pmap, struct vm_page *ptp, pt_entry_t opte, vaddr_t va) { - mutex_enter(&pmap->pm_lock); + + KASSERT(mutex_owned(&pmap->pm_lock)); + pmap_ept_stats_update_bypte(pmap, 0, opte); ptp->wire_count--; if (ptp->wire_count <= 1) { pmap_ept_free_ptp(pmap, ptp, va); } - mutex_exit(&pmap->pm_lock); } static void @@ -5661,9 +5681,8 @@ pmap_ept_write_protect(struct pmap *pmap eva &= PTE_FRAME; /* Acquire pmap. */ - kpreempt_disable(); - mutex_enter(&pmap->pm_lock); + kpreempt_disable(); for (va = sva; va < eva; va += PAGE_SIZE) { if (pmap_ept_pdes_invalid(pmap, va, &pde)) { @@ -5695,8 +5714,8 @@ pmap_ept_write_protect(struct pmap *pmap next:; } - mutex_exit(&pmap->pm_lock); kpreempt_enable(); + mutex_exit(&pmap->pm_lock); } static void @@ -5707,8 +5726,8 @@ pmap_ept_unwire(struct pmap *pmap, vaddr paddr_t ptppa; /* Acquire pmap. */ - kpreempt_disable(); mutex_enter(&pmap->pm_lock); + kpreempt_disable(); if (pmap_ept_pdes_invalid(pmap, va, &pde)) { panic("%s: invalid PDE va=%#" PRIxVADDR, __func__, va); @@ -5731,8 +5750,8 @@ pmap_ept_unwire(struct pmap *pmap, vaddr } /* Release pmap. */ - mutex_exit(&pmap->pm_lock); kpreempt_enable(); + mutex_exit(&pmap->pm_lock); } /* -------------------------------------------------------------------------- */ Index: src/sys/arch/xen/x86/xen_pmap.c diff -u src/sys/arch/xen/x86/xen_pmap.c:1.34 src/sys/arch/xen/x86/xen_pmap.c:1.35 --- src/sys/arch/xen/x86/xen_pmap.c:1.34 Sun Dec 15 19:24:11 2019 +++ src/sys/arch/xen/x86/xen_pmap.c Sat Jan 4 22:49:20 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: xen_pmap.c,v 1.34 2019/12/15 19:24:11 ad Exp $ */ +/* $NetBSD: xen_pmap.c,v 1.35 2020/01/04 22:49:20 ad Exp $ */ /* * Copyright (c) 2007 Manuel Bouyer. @@ -101,7 +101,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.34 2019/12/15 19:24:11 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.35 2020/01/04 22:49:20 ad Exp $"); #include "opt_user_ldt.h" #include "opt_lockdebug.h" @@ -214,18 +214,18 @@ pmap_extract_ma(struct pmap *pmap, vaddr struct pmap *pmap2; int lvl; - kpreempt_disable(); + mutex_enter(&pmap->pm_lock); pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) { - pmap_unmap_ptes(pmap, pmap2, NULL); - kpreempt_enable(); + pmap_unmap_ptes(pmap, pmap2); + mutex_exit(&pmap->pm_lock); return false; } KASSERT(lvl == 1); pte = ptes[pl1_i(va)]; - pmap_unmap_ptes(pmap, pmap2, NULL); - kpreempt_enable(); + pmap_unmap_ptes(pmap, pmap2); + mutex_exit(&pmap->pm_lock); if (__predict_true((pte & PTE_P) != 0)) { if (pap != NULL) @@ -305,7 +305,7 @@ pmap_unmap_recursive_entries(void) * XXX jym@ : find a way to drain per-CPU caches to. pool_cache_inv * does not do that. */ - pool_cache_invalidate(&pmap_pdp_cache); + pool_cache_invalidate(&pmap_cache); mutex_enter(&pmaps_lock); LIST_FOREACH(pm, &pmaps, pm_list) {