Module Name: src Committed By: bouyer Date: Tue Apr 23 11:21:21 UTC 2019
Modified Files: src/sys/arch/arm/arm32: pmap.c Log Message: Fix a deadlock between the pool and pmap codes: - cpu0 grabs the kernel lock (e.g. from a non-MPSAFE interrupt) and calls pool_get(). - cpu1 does a pool_get() on the same pool from MPSAFE code, which needs a pool_page_alloc(), which ends up in pmap_extract_coherency(). So cpu0 holds the kernel_lock and wants the pool lock. cpu1 holds the pool lock and wants the kernel_lock in pmap_extract_coherency(). The pmap code should not rely on kernel_lock. Intead make the pmap_kernel()->pm_obj_lock a IPL_VM lock and use it as pmap lock (thus dropping the pmap test pmap_{acquire,release}_pmap_lock()). This needs to be a IPL_VM because unlike user pmaps, this can be locked from interrupt context. Add a IPL_NONE lock for pmap_growkernel(). We can't use pmap_kernel()->pm_obj_lock here because pmap_grow_map() may sleep. Make pmap_lock (which may be locked with pm_obj_lock held) a IPL_VM lock in all case. reorder a few things to not call pool_get()/pool_put() (which may sleep) with pm_obj_lock held. Patch initially posted to port-arm@ on April 19, improved patch (per suggestions from Nick Hudson and Jason Thorpe) on April 21. To generate a diff of this commit: cvs rdiff -u -r1.372 -r1.373 src/sys/arch/arm/arm32/pmap.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/arm/arm32/pmap.c diff -u src/sys/arch/arm/arm32/pmap.c:1.372 src/sys/arch/arm/arm32/pmap.c:1.373 --- src/sys/arch/arm/arm32/pmap.c:1.372 Tue Apr 23 11:05:14 2019 +++ src/sys/arch/arm/arm32/pmap.c Tue Apr 23 11:21:21 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.372 2019/04/23 11:05:14 bouyer Exp $ */ +/* $NetBSD: pmap.c,v 1.373 2019/04/23 11:21:21 bouyer Exp $ */ /* * Copyright 2003 Wasabi Systems, Inc. @@ -221,7 +221,7 @@ #include <arm/db_machdep.h> #endif -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.372 2019/04/23 11:05:14 bouyer Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.373 2019/04/23 11:21:21 bouyer Exp $"); //#define PMAP_DEBUG #ifdef PMAP_DEBUG @@ -521,6 +521,7 @@ static size_t cnptes; vaddr_t memhook; /* used by mem.c & others */ kmutex_t memlock __cacheline_aligned; /* used by mem.c & others */ kmutex_t pmap_lock __cacheline_aligned; +kmutex_t kpm_lock __cacheline_aligned; extern void *msgbufaddr; int pmap_kmpages; /* @@ -543,33 +544,21 @@ static inline void pmap_acquire_pmap_lock(pmap_t pm) { #if defined(MULTIPROCESSOR) && defined(DDB) - if (db_onproc != NULL) + if (__predict_false(db_onproc != NULL)) return; #endif - if (pm == pmap_kernel()) { -#ifdef MULTIPROCESSOR - KERNEL_LOCK(1, NULL); -#endif - } else { - mutex_enter(pm->pm_lock); - } + mutex_enter(pm->pm_lock); } static inline void pmap_release_pmap_lock(pmap_t pm) { #if defined(MULTIPROCESSOR) && defined(DDB) - if (db_onproc != NULL) + if (__predict_false(db_onproc != NULL)) return; #endif - if (pm == pmap_kernel()) { -#ifdef MULTIPROCESSOR - KERNEL_UNLOCK_ONE(NULL); -#endif - } else { - mutex_exit(pm->pm_lock); - } + mutex_exit(pm->pm_lock); } static inline void @@ -3070,6 +3059,10 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_ #else const bool vector_page_p = (va == vector_page); #endif + struct pmap_page *pp = pmap_pv_tracked(pa); + struct pv_entry *new_pv = NULL; + struct pv_entry *old_pv = NULL; + int error = 0; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); @@ -3085,6 +3078,12 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_ * test for a managed page by checking pg != NULL. */ pg = pmap_initialized ? PHYS_TO_VM_PAGE(pa) : NULL; + /* + * if we may need a new pv entry allocate if now, as we can't do it + * with the kernel_pmap locked + */ + if (pg || pp) + new_pv = pool_get(&pmap_pv_pool, PR_NOWAIT); nflags = 0; if (prot & VM_PROT_WRITE) @@ -3108,7 +3107,8 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_ if (l2b == NULL) { if (flags & PMAP_CANFAIL) { pmap_release_pmap_lock(pm); - return (ENOMEM); + error = ENOMEM; + goto free_pv; } panic("pmap_enter: failed to allocate L2 bucket"); } @@ -3131,8 +3131,6 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_ } else opg = NULL; - struct pmap_page *pp = pmap_pv_tracked(pa); - if (pg || pp) { KASSERT((pg != NULL) != (pp != NULL)); struct vm_page_md *md = (pg != NULL) ? VM_PAGE_TO_MD(pg) : @@ -3241,9 +3239,10 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_ } #endif } else { - pmap_release_page_lock(md); - pv = pool_get(&pmap_pv_pool, PR_NOWAIT); + pv = new_pv; + new_pv = NULL; if (pv == NULL) { + pmap_release_page_lock(md); pmap_release_pmap_lock(pm); if ((flags & PMAP_CANFAIL) == 0) panic("pmap_enter: " @@ -3254,7 +3253,6 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_ 0, 0, 0, 0); return (ENOMEM); } - pmap_acquire_page_lock(md); } pmap_enter_pv(md, pa, pv, pm, va, nflags); @@ -3291,9 +3289,9 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_ paddr_t opa = VM_PAGE_TO_PHYS(opg); pmap_acquire_page_lock(omd); - struct pv_entry *pv = pmap_remove_pv(omd, opa, pm, va); + old_pv = pmap_remove_pv(omd, opa, pm, va); pmap_vac_me_harder(omd, opa, pm, 0); - oflags = pv->pv_flags; + oflags = old_pv->pv_flags; pmap_release_page_lock(omd); #ifdef PMAP_CACHE_VIVT @@ -3301,7 +3299,6 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_ pmap_cache_wbinv_page(pm, va, true, oflags); } #endif - pool_put(&pmap_pv_pool, pv); } } @@ -3403,7 +3400,13 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_ pmap_release_pmap_lock(pm); - return (0); + + if (old_pv) + pool_put(&pmap_pv_pool, old_pv); +free_pv: + if (new_pv) + pool_put(&pmap_pv_pool, new_pv); + return (error); } /* @@ -3431,10 +3434,13 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_ void pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva) { + SLIST_HEAD(,pv_entry) opv_list; + struct pv_entry *pv, *npv; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist, " (pm=%#jx, sva=%#jx, eva=%#jx)", (uintptr_t)pm, sva, eva, 0); + SLIST_INIT(&opv_list); /* * we lock in the pmap => pv_head direction */ @@ -3493,7 +3499,6 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd */ if (pg != NULL) { struct vm_page_md *md = VM_PAGE_TO_MD(pg); - struct pv_entry *pv; pmap_acquire_page_lock(md); pv = pmap_remove_pv(md, pa, pm, sva); @@ -3503,7 +3508,8 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd if (pm->pm_remove_all == false) { flags = pv->pv_flags; } - pool_put(&pmap_pv_pool, pv); + SLIST_INSERT_HEAD(&opv_list, + pv, pv_link); } } mappings += PAGE_SIZE / L2_S_SIZE; @@ -3605,6 +3611,9 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd } pmap_release_pmap_lock(pm); + SLIST_FOREACH_SAFE(pv, &opv_list, pv_link, npv) { + pool_put(&pmap_pv_pool, pv); + } } #if defined(PMAP_CACHE_VIPT) && !defined(ARM_MMU_EXTENDED) @@ -3828,15 +3837,15 @@ pmap_kremove(vaddr_t va, vsize_t len) UVMHIST_LOG(maphist, " (va=%#jx, len=%#jx)", va, len, 0, 0); const vaddr_t eva = va + len; + pmap_t kpm = pmap_kernel(); - pmap_acquire_pmap_lock(pmap_kernel()); + pmap_acquire_pmap_lock(kpm); while (va < eva) { vaddr_t next_bucket = L2_NEXT_BUCKET_VA(va); if (next_bucket > eva) next_bucket = eva; - pmap_t kpm = pmap_kernel(); struct l2_bucket * const l2b = pmap_get_l2_bucket(kpm, va); KDASSERT(l2b != NULL); @@ -3892,7 +3901,7 @@ pmap_kremove(vaddr_t va, vsize_t len) total_mappings += mappings; #endif } - pmap_release_pmap_lock(pmap_kernel()); + pmap_release_pmap_lock(kpm); cpu_cpwait(); UVMHIST_LOG(maphist, " <--- done (%ju mappings removed)", total_mappings, 0, 0, 0); @@ -5917,8 +5926,8 @@ pmap_growkernel(vaddr_t maxkvaddr) * whoops! we need to add kernel PTPs */ - s = splhigh(); /* to be safe */ - mutex_enter(kpm->pm_lock); + s = splvm(); /* to be safe */ + mutex_enter(&kpm_lock); /* Map 1MB at a time */ size_t l1slot = l1pte_index(pmap_curmaxkvaddr); @@ -5962,7 +5971,7 @@ pmap_growkernel(vaddr_t maxkvaddr) cpu_cpwait(); #endif - mutex_exit(kpm->pm_lock); + mutex_exit(&kpm_lock); splx(s); out: @@ -6160,16 +6169,13 @@ pmap_bootstrap(vaddr_t vstart, vaddr_t v #endif VPRINTF("locks "); -#if defined(PMAP_CACHE_VIPT) && !defined(ARM_MMU_EXTENDED) - if (arm_cache_prefer_mask != 0) { - mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_VM); - } else { -#endif - mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_NONE); -#if defined(PMAP_CACHE_VIPT) && !defined(ARM_MMU_EXTENDED) - } -#endif - mutex_init(&pm->pm_obj_lock, MUTEX_DEFAULT, IPL_NONE); + /* + * pmap_kenter_pa() and pmap_kremove() may be called from interrupt + * context, so its locks have to be at IPL_VM + */ + mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_VM); + mutex_init(&kpm_lock, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&pm->pm_obj_lock, MUTEX_DEFAULT, IPL_VM); uvm_obj_init(&pm->pm_obj, NULL, false, 1); uvm_obj_setlock(&pm->pm_obj, &pm->pm_obj_lock);