Module Name:    src
Committed By:   bouyer
Date:           Tue Apr 23 11:21:21 UTC 2019

Modified Files:
        src/sys/arch/arm/arm32: pmap.c

Log Message:
Fix a deadlock between the pool and pmap codes:
- cpu0 grabs the kernel lock (e.g. from a non-MPSAFE interrupt) and
  calls pool_get().
- cpu1 does a pool_get() on the same pool from MPSAFE code, which needs a
  pool_page_alloc(), which ends up in pmap_extract_coherency().

So cpu0 holds the kernel_lock and wants the pool lock. cpu1 holds the pool
lock and wants the kernel_lock in pmap_extract_coherency().

The pmap code should not rely on kernel_lock. Intead make the
pmap_kernel()->pm_obj_lock a IPL_VM lock and use it as pmap lock
(thus dropping the pmap test pmap_{acquire,release}_pmap_lock()).
This needs to be a IPL_VM because unlike user pmaps, this can be locked
from interrupt context.

Add a IPL_NONE lock for pmap_growkernel(). We can't use
pmap_kernel()->pm_obj_lock here because pmap_grow_map() may sleep.

Make pmap_lock (which may be locked with pm_obj_lock held) a IPL_VM
lock in all case.

reorder a few things to not call pool_get()/pool_put() (which may sleep)
with pm_obj_lock held.

Patch initially posted to port-arm@ on April 19, improved patch (per
suggestions from Nick Hudson and Jason Thorpe) on April 21.


To generate a diff of this commit:
cvs rdiff -u -r1.372 -r1.373 src/sys/arch/arm/arm32/pmap.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/arm/arm32/pmap.c
diff -u src/sys/arch/arm/arm32/pmap.c:1.372 src/sys/arch/arm/arm32/pmap.c:1.373
--- src/sys/arch/arm/arm32/pmap.c:1.372	Tue Apr 23 11:05:14 2019
+++ src/sys/arch/arm/arm32/pmap.c	Tue Apr 23 11:21:21 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.372 2019/04/23 11:05:14 bouyer Exp $	*/
+/*	$NetBSD: pmap.c,v 1.373 2019/04/23 11:21:21 bouyer Exp $	*/
 
 /*
  * Copyright 2003 Wasabi Systems, Inc.
@@ -221,7 +221,7 @@
 #include <arm/db_machdep.h>
 #endif
 
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.372 2019/04/23 11:05:14 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.373 2019/04/23 11:21:21 bouyer Exp $");
 
 //#define PMAP_DEBUG
 #ifdef PMAP_DEBUG
@@ -521,6 +521,7 @@ static size_t cnptes;
 vaddr_t memhook;			/* used by mem.c & others */
 kmutex_t memlock __cacheline_aligned;	/* used by mem.c & others */
 kmutex_t pmap_lock __cacheline_aligned;
+kmutex_t kpm_lock __cacheline_aligned;
 extern void *msgbufaddr;
 int pmap_kmpages;
 /*
@@ -543,33 +544,21 @@ static inline void
 pmap_acquire_pmap_lock(pmap_t pm)
 {
 #if defined(MULTIPROCESSOR) && defined(DDB)
-	if (db_onproc != NULL)
+	if (__predict_false(db_onproc != NULL))
 		return;
 #endif
 	
-	if (pm == pmap_kernel()) {
-#ifdef MULTIPROCESSOR
-		KERNEL_LOCK(1, NULL);
-#endif
-	} else {
-		mutex_enter(pm->pm_lock);
-	}
+	mutex_enter(pm->pm_lock);
 }
 
 static inline void
 pmap_release_pmap_lock(pmap_t pm)
 {
 #if defined(MULTIPROCESSOR) && defined(DDB)
-	if (db_onproc != NULL)
+	if (__predict_false(db_onproc != NULL))
 		return;
 #endif
-	if (pm == pmap_kernel()) {
-#ifdef MULTIPROCESSOR
-		KERNEL_UNLOCK_ONE(NULL);
-#endif
-	} else {
-		mutex_exit(pm->pm_lock);
-	}
+	mutex_exit(pm->pm_lock);
 }
 
 static inline void
@@ -3070,6 +3059,10 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 #else
 	const bool vector_page_p = (va == vector_page);
 #endif
+	struct pmap_page *pp = pmap_pv_tracked(pa);
+	struct pv_entry *new_pv = NULL;
+	struct pv_entry *old_pv = NULL;
+	int error = 0;
 
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
 
@@ -3085,6 +3078,12 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 	 * test for a managed page by checking pg != NULL.
 	 */
 	pg = pmap_initialized ? PHYS_TO_VM_PAGE(pa) : NULL;
+	/*
+	 * if we may need a new pv entry allocate if now, as we can't do it
+	 * with the kernel_pmap locked
+	 */
+	if (pg || pp)
+		new_pv = pool_get(&pmap_pv_pool, PR_NOWAIT);
 
 	nflags = 0;
 	if (prot & VM_PROT_WRITE)
@@ -3108,7 +3107,8 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 	if (l2b == NULL) {
 		if (flags & PMAP_CANFAIL) {
 			pmap_release_pmap_lock(pm);
-			return (ENOMEM);
+			error = ENOMEM;
+			goto free_pv;
 		}
 		panic("pmap_enter: failed to allocate L2 bucket");
 	}
@@ -3131,8 +3131,6 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 	} else
 		opg = NULL;
 
-	struct pmap_page *pp = pmap_pv_tracked(pa);
-
 	if (pg || pp) {
 		KASSERT((pg != NULL) != (pp != NULL));
 		struct vm_page_md *md = (pg != NULL) ? VM_PAGE_TO_MD(pg) :
@@ -3241,9 +3239,10 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 				}
 #endif
 			} else {
-				pmap_release_page_lock(md);
-				pv = pool_get(&pmap_pv_pool, PR_NOWAIT);
+				pv = new_pv;
+				new_pv = NULL;
 				if (pv == NULL) {
+					pmap_release_page_lock(md);
 					pmap_release_pmap_lock(pm);
 					if ((flags & PMAP_CANFAIL) == 0)
 						panic("pmap_enter: "
@@ -3254,7 +3253,6 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 					    0, 0, 0, 0);
 					return (ENOMEM);
 				}
-				pmap_acquire_page_lock(md);
 			}
 
 			pmap_enter_pv(md, pa, pv, pm, va, nflags);
@@ -3291,9 +3289,9 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 			paddr_t opa = VM_PAGE_TO_PHYS(opg);
 
 			pmap_acquire_page_lock(omd);
-			struct pv_entry *pv = pmap_remove_pv(omd, opa, pm, va);
+			old_pv = pmap_remove_pv(omd, opa, pm, va);
 			pmap_vac_me_harder(omd, opa, pm, 0);
-			oflags = pv->pv_flags;
+			oflags = old_pv->pv_flags;
 			pmap_release_page_lock(omd);
 
 #ifdef PMAP_CACHE_VIVT
@@ -3301,7 +3299,6 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 				pmap_cache_wbinv_page(pm, va, true, oflags);
 			}
 #endif
-			pool_put(&pmap_pv_pool, pv);
 		}
 	}
 
@@ -3403,7 +3400,13 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 
 	pmap_release_pmap_lock(pm);
 
-	return (0);
+
+	if (old_pv)
+		pool_put(&pmap_pv_pool, old_pv);
+free_pv:
+	if (new_pv)
+		pool_put(&pmap_pv_pool, new_pv);
+	return (error);
 }
 
 /*
@@ -3431,10 +3434,13 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 void
 pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva)
 {
+	SLIST_HEAD(,pv_entry) opv_list;
+	struct pv_entry *pv, *npv;
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
 	UVMHIST_LOG(maphist, " (pm=%#jx, sva=%#jx, eva=%#jx)",
 	    (uintptr_t)pm, sva, eva, 0);
 
+	SLIST_INIT(&opv_list);
 	/*
 	 * we lock in the pmap => pv_head direction
 	 */
@@ -3493,7 +3499,6 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
 			 */
 			if (pg != NULL) {
 				struct vm_page_md *md = VM_PAGE_TO_MD(pg);
-				struct pv_entry *pv;
 
 				pmap_acquire_page_lock(md);
 				pv = pmap_remove_pv(md, pa, pm, sva);
@@ -3503,7 +3508,8 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
 					if (pm->pm_remove_all == false) {
 						flags = pv->pv_flags;
 					}
-					pool_put(&pmap_pv_pool, pv);
+					SLIST_INSERT_HEAD(&opv_list,
+					    pv, pv_link);
 				}
 			}
 			mappings += PAGE_SIZE / L2_S_SIZE;
@@ -3605,6 +3611,9 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
 	}
 
 	pmap_release_pmap_lock(pm);
+	SLIST_FOREACH_SAFE(pv, &opv_list, pv_link, npv) {
+		pool_put(&pmap_pv_pool, pv);
+	}
 }
 
 #if defined(PMAP_CACHE_VIPT) && !defined(ARM_MMU_EXTENDED)
@@ -3828,15 +3837,15 @@ pmap_kremove(vaddr_t va, vsize_t len)
 	UVMHIST_LOG(maphist, " (va=%#jx, len=%#jx)", va, len, 0, 0);
 
 	const vaddr_t eva = va + len;
+	pmap_t kpm = pmap_kernel();
 
-	pmap_acquire_pmap_lock(pmap_kernel());
+	pmap_acquire_pmap_lock(kpm);
 
 	while (va < eva) {
 		vaddr_t next_bucket = L2_NEXT_BUCKET_VA(va);
 		if (next_bucket > eva)
 			next_bucket = eva;
 
-		pmap_t kpm = pmap_kernel();
 		struct l2_bucket * const l2b = pmap_get_l2_bucket(kpm, va);
 		KDASSERT(l2b != NULL);
 
@@ -3892,7 +3901,7 @@ pmap_kremove(vaddr_t va, vsize_t len)
 		total_mappings += mappings;
 #endif
 	}
-	pmap_release_pmap_lock(pmap_kernel());
+	pmap_release_pmap_lock(kpm);
 	cpu_cpwait();
 	UVMHIST_LOG(maphist, "  <--- done (%ju mappings removed)",
 	    total_mappings, 0, 0, 0);
@@ -5917,8 +5926,8 @@ pmap_growkernel(vaddr_t maxkvaddr)
 	 * whoops!   we need to add kernel PTPs
 	 */
 
-	s = splhigh();	/* to be safe */
-	mutex_enter(kpm->pm_lock);
+	s = splvm();	/* to be safe */
+	mutex_enter(&kpm_lock);
 
 	/* Map 1MB at a time */
 	size_t l1slot = l1pte_index(pmap_curmaxkvaddr);
@@ -5962,7 +5971,7 @@ pmap_growkernel(vaddr_t maxkvaddr)
 	cpu_cpwait();
 #endif
 
-	mutex_exit(kpm->pm_lock);
+	mutex_exit(&kpm_lock);
 	splx(s);
 
 out:
@@ -6160,16 +6169,13 @@ pmap_bootstrap(vaddr_t vstart, vaddr_t v
 #endif
 
 	VPRINTF("locks ");
-#if defined(PMAP_CACHE_VIPT) && !defined(ARM_MMU_EXTENDED)
-	if (arm_cache_prefer_mask != 0) {
-		mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_VM);
-	} else {
-#endif
-		mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_NONE);
-#if defined(PMAP_CACHE_VIPT) && !defined(ARM_MMU_EXTENDED)
-	}
-#endif
-	mutex_init(&pm->pm_obj_lock, MUTEX_DEFAULT, IPL_NONE);
+	/*
+	 * pmap_kenter_pa() and pmap_kremove() may be called from interrupt
+	 * context, so its locks have to be at IPL_VM
+	 */
+	mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_VM);
+	mutex_init(&kpm_lock, MUTEX_DEFAULT, IPL_NONE);
+	mutex_init(&pm->pm_obj_lock, MUTEX_DEFAULT, IPL_VM);
 	uvm_obj_init(&pm->pm_obj, NULL, false, 1);
 	uvm_obj_setlock(&pm->pm_obj, &pm->pm_obj_lock);
 

Reply via email to