Module Name:    src
Committed By:   ad
Date:           Sun Mar 22 18:32:42 UTC 2020

Modified Files:
        src/sys/miscfs/genfs: genfs_io.c
        src/sys/nfs: nfs_bio.c
        src/sys/sys: cpu_data.h vnode.h
        src/sys/uvm: uvm_anon.c uvm_aobj.c uvm_extern.h uvm_fault.c uvm_loan.c
            uvm_map.c uvm_meter.c uvm_pager.h uvm_vnode.c
        src/usr.bin/vmstat: vmstat.c

Log Message:
Process concurrent page faults on individual uvm_objects / vm_amaps in
parallel, where the relevant pages are already in-core.  Proposed on
tech-kern.

Temporarily disabled on MP architectures with __HAVE_UNLOCKED_PMAP until
adjustments are made to their pmaps.


To generate a diff of this commit:
cvs rdiff -u -r1.94 -r1.95 src/sys/miscfs/genfs/genfs_io.c
cvs rdiff -u -r1.194 -r1.195 src/sys/nfs/nfs_bio.c
cvs rdiff -u -r1.49 -r1.50 src/sys/sys/cpu_data.h
cvs rdiff -u -r1.293 -r1.294 src/sys/sys/vnode.h
cvs rdiff -u -r1.76 -r1.77 src/sys/uvm/uvm_anon.c
cvs rdiff -u -r1.138 -r1.139 src/sys/uvm/uvm_aobj.c
cvs rdiff -u -r1.221 -r1.222 src/sys/uvm/uvm_extern.h src/sys/uvm/uvm_fault.c
cvs rdiff -u -r1.99 -r1.100 src/sys/uvm/uvm_loan.c
cvs rdiff -u -r1.375 -r1.376 src/sys/uvm/uvm_map.c
cvs rdiff -u -r1.75 -r1.76 src/sys/uvm/uvm_meter.c
cvs rdiff -u -r1.46 -r1.47 src/sys/uvm/uvm_pager.h
cvs rdiff -u -r1.110 -r1.111 src/sys/uvm/uvm_vnode.c
cvs rdiff -u -r1.237 -r1.238 src/usr.bin/vmstat/vmstat.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/miscfs/genfs/genfs_io.c
diff -u src/sys/miscfs/genfs/genfs_io.c:1.94 src/sys/miscfs/genfs/genfs_io.c:1.95
--- src/sys/miscfs/genfs/genfs_io.c:1.94	Tue Mar 17 18:31:38 2020
+++ src/sys/miscfs/genfs/genfs_io.c	Sun Mar 22 18:32:41 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: genfs_io.c,v 1.94 2020/03/17 18:31:38 ad Exp $	*/
+/*	$NetBSD: genfs_io.c,v 1.95 2020/03/22 18:32:41 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.94 2020/03/17 18:31:38 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.95 2020/03/22 18:32:41 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -43,6 +43,7 @@ __KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v
 #include <sys/kauth.h>
 #include <sys/fstrans.h>
 #include <sys/buf.h>
+#include <sys/atomic.h>
 
 #include <miscfs/genfs/genfs.h>
 #include <miscfs/genfs/genfs_node.h>
@@ -103,7 +104,7 @@ genfs_getpages(void *v)
 	} */ * const ap = v;
 
 	off_t diskeof, memeof;
-	int i, error, npages;
+	int i, error, npages, iflag;
 	const int flags = ap->a_flags;
 	struct vnode * const vp = ap->a_vp;
 	struct uvm_object * const uobj = &vp->v_uobj;
@@ -125,18 +126,35 @@ genfs_getpages(void *v)
 	KASSERT(vp->v_type == VREG || vp->v_type == VDIR ||
 	    vp->v_type == VLNK || vp->v_type == VBLK);
 
+	/*
+	 * the object must be locked.  it can only be a read lock when
+	 * processing a read fault with PGO_LOCKED | PGO_NOBUSY.
+	 */
+
+	KASSERT(rw_lock_held(uobj->vmobjlock));
+	KASSERT(rw_write_held(uobj->vmobjlock) ||
+	   ((~flags & (PGO_LOCKED | PGO_NOBUSY)) == 0 && !memwrite));
+
 #ifdef DIAGNOSTIC
 	if ((flags & PGO_JOURNALLOCKED) && vp->v_mount->mnt_wapbl)
                 WAPBL_JLOCK_ASSERT(vp->v_mount);
 #endif
 
-	mutex_enter(vp->v_interlock);
-	error = vdead_check(vp, VDEAD_NOWAIT);
-	mutex_exit(vp->v_interlock);
-	if (error) {
-		if ((flags & PGO_LOCKED) == 0)
-			rw_exit(uobj->vmobjlock);
-		return error;
+	/*
+	 * check for reclaimed vnode.  v_interlock is not held here, but
+	 * VI_DEADCHECK is set with vmobjlock held.
+	 */
+
+	iflag = atomic_load_relaxed(&vp->v_iflag);
+	if (__predict_false((iflag & VI_DEADCHECK) != 0)) {
+		mutex_enter(vp->v_interlock);
+		error = vdead_check(vp, VDEAD_NOWAIT);
+		mutex_exit(vp->v_interlock);
+		if (error) {
+			if ((flags & PGO_LOCKED) == 0)
+				rw_exit(uobj->vmobjlock);
+			return error;
+		}
 	}
 
 startover:
@@ -217,9 +235,11 @@ startover:
 			KASSERT(pg == NULL || pg == PGO_DONTCARE);
 		}
 #endif /* defined(DEBUG) */
-		nfound = uvn_findpages(uobj, origoffset, &npages,
+ 		nfound = uvn_findpages(uobj, origoffset, &npages,
 		    ap->a_m, NULL,
-		    UFP_NOWAIT|UFP_NOALLOC|(memwrite ? UFP_NORDONLY : 0));
+		    UFP_NOWAIT | UFP_NOALLOC |
+		    (memwrite ? UFP_NORDONLY : 0) |
+		    ((flags & PGO_NOBUSY) != 0 ? UFP_NOBUSY : 0));
 		KASSERT(npages == *ap->a_count);
 		if (nfound == 0) {
 			error = EBUSY;
@@ -230,7 +250,9 @@ startover:
 		 * the file behind us.
 		 */
 		if (!genfs_node_rdtrylock(vp)) {
-			genfs_rel_pages(ap->a_m, npages);
+			if ((flags & PGO_NOBUSY) == 0) {
+				genfs_rel_pages(ap->a_m, npages);
+			}
 
 			/*
 			 * restore the array.

Index: src/sys/nfs/nfs_bio.c
diff -u src/sys/nfs/nfs_bio.c:1.194 src/sys/nfs/nfs_bio.c:1.195
--- src/sys/nfs/nfs_bio.c:1.194	Sun Feb 23 15:46:41 2020
+++ src/sys/nfs/nfs_bio.c	Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: nfs_bio.c,v 1.194 2020/02/23 15:46:41 ad Exp $	*/
+/*	$NetBSD: nfs_bio.c,v 1.195 2020/03/22 18:32:42 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.194 2020/02/23 15:46:41 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.195 2020/03/22 18:32:42 ad Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_nfs.h"
@@ -1260,6 +1260,19 @@ nfs_getpages(void *v)
 	bool v3 = NFS_ISV3(vp);
 	bool write = (ap->a_access_type & VM_PROT_WRITE) != 0;
 	bool locked = (ap->a_flags & PGO_LOCKED) != 0;
+	bool nobusy = (ap->a_flags & PGO_NOBUSY);
+
+	/*
+	 * XXX NFS wants to modify the pages below and that can't be done
+	 * with a read lock.  We can't upgrade the lock here because it
+	 * would screw up UVM fault processing.  Have NFS take the I/O
+	 * path.
+	 */
+	if (locked && rw_lock_op(uobj->vmobjlock) == RW_READER) {
+		*ap->a_count = 0;
+		ap->a_m[ap->a_centeridx] = NULL;
+		return EBUSY;
+	}
 
 	/*
 	 * If we are not locked we are not really using opgs,
@@ -1341,7 +1354,8 @@ nfs_getpages(void *v)
 				 * available and put back original pgs array.
 				 */
 
-				uvm_page_unbusy(pgs, npages);
+				if (nobusy == false)
+					uvm_page_unbusy(pgs, npages);
 				*ap->a_count = 0;
 				memcpy(pgs, opgs,
 				    npages * sizeof(struct vm_pages *));

Index: src/sys/sys/cpu_data.h
diff -u src/sys/sys/cpu_data.h:1.49 src/sys/sys/cpu_data.h:1.50
--- src/sys/sys/cpu_data.h:1.49	Wed Jan 15 17:55:44 2020
+++ src/sys/sys/cpu_data.h	Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu_data.h,v 1.49 2020/01/15 17:55:44 ad Exp $	*/
+/*	$NetBSD: cpu_data.h,v 1.50 2020/03/22 18:32:42 ad Exp $	*/
 
 /*-
  * Copyright (c) 2004, 2006, 2007, 2008, 2019 The NetBSD Foundation, Inc.
@@ -93,8 +93,8 @@ enum cpu_count {
 	CPU_COUNT_FILEUNKNOWN,
 	CPU_COUNT_FILECLEAN,
 	CPU_COUNT_FILEDIRTY,
-	CPU_COUNT__UNUSED1,
-	CPU_COUNT__UNUSED2,
+	CPU_COUNT_FLTUP,
+	CPU_COUNT_FLTNOUP,
 	CPU_COUNT_MAX			/* 48 */
 };
 

Index: src/sys/sys/vnode.h
diff -u src/sys/sys/vnode.h:1.293 src/sys/sys/vnode.h:1.294
--- src/sys/sys/vnode.h:1.293	Sat Mar 14 20:45:23 2020
+++ src/sys/sys/vnode.h	Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: vnode.h,v 1.293 2020/03/14 20:45:23 ad Exp $	*/
+/*	$NetBSD: vnode.h,v 1.294 2020/03/22 18:32:42 ad Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -212,6 +212,7 @@ typedef struct vnode vnode_t;
 #define	VI_WRMAP	0x00000400	/* might have PROT_WRITE u. mappings */
 #define	VI_PAGES	0x00000800	/* UVM object has >0 pages */
 #define	VI_ONWORKLST	0x00004000	/* On syncer work-list */
+#define	VI_DEADCHECK	0x00008000	/* UVM: need to call vdead_check() */
 
 /*
  * The third set are locked by the underlying file system.
@@ -220,7 +221,7 @@ typedef struct vnode vnode_t;
 
 #define	VNODE_FLAGBITS \
     "\20\1ROOT\2SYSTEM\3ISTTY\4MAPPED\5MPSAFE\6LOCKSWORK\11TEXT\12EXECMAP" \
-    "\13WRMAP\14PAGES\17ONWORKLST\31DIROP"
+    "\13WRMAP\14PAGES\17ONWORKLST\18DEADCHECK\31DIROP"
 
 #define	VSIZENOTSET	((voff_t)-1)
 

Index: src/sys/uvm/uvm_anon.c
diff -u src/sys/uvm/uvm_anon.c:1.76 src/sys/uvm/uvm_anon.c:1.77
--- src/sys/uvm/uvm_anon.c:1.76	Fri Mar 20 19:08:54 2020
+++ src/sys/uvm/uvm_anon.c	Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_anon.c,v 1.76 2020/03/20 19:08:54 ad Exp $	*/
+/*	$NetBSD: uvm_anon.c,v 1.77 2020/03/22 18:32:42 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.76 2020/03/20 19:08:54 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.77 2020/03/22 18:32:42 ad Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -296,6 +296,8 @@ uvm_anon_pagein(struct vm_amap *amap, st
 		 * anon was freed.
 		 */
 		return false;
+	case ENOLCK:
+		panic("uvm_anon_pagein");
 	default:
 		return true;
 	}

Index: src/sys/uvm/uvm_aobj.c
diff -u src/sys/uvm/uvm_aobj.c:1.138 src/sys/uvm/uvm_aobj.c:1.139
--- src/sys/uvm/uvm_aobj.c:1.138	Tue Mar 17 18:31:39 2020
+++ src/sys/uvm/uvm_aobj.c	Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_aobj.c,v 1.138 2020/03/17 18:31:39 ad Exp $	*/
+/*	$NetBSD: uvm_aobj.c,v 1.139 2020/03/22 18:32:42 ad Exp $	*/
 
 /*
  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.138 2020/03/17 18:31:39 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.139 2020/03/22 18:32:42 ad Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_uvmhist.h"
@@ -807,6 +807,16 @@ uao_get(struct uvm_object *uobj, voff_t 
 		    (uintptr_t)uobj, offset, flags,0);
 
 	/*
+	 * the object must be locked.  it can only be a read lock when
+	 * processing a read fault with PGO_LOCKED | PGO_NOBUSY.
+	 */
+
+	KASSERT(rw_lock_held(uobj->vmobjlock));
+	KASSERT(rw_write_held(uobj->vmobjlock) ||
+	   ((~flags & (PGO_LOCKED | PGO_NOBUSY)) == 0 &&
+	   (access_type & VM_PROT_WRITE) == 0));
+
+	/*
  	 * get number of pages
  	 */
 
@@ -835,10 +845,12 @@ uao_get(struct uvm_object *uobj, voff_t 
 
 			/*
  			 * if page is new, attempt to allocate the page,
-			 * zero-fill'd.
+			 * zero-fill'd.  we can only do this if busying
+			 * pages, as otherwise the object is read locked.
  			 */
 
-			if (ptmp == NULL && uao_find_swslot(uobj,
+			if ((flags & PGO_NOBUSY) == 0 && ptmp == NULL &&
+			    uao_find_swslot(uobj,
 			    current_offset >> PAGE_SHIFT) == 0) {
 				ptmp = uao_pagealloc(uobj, current_offset,
 				    UVM_FLAG_COLORMATCH|UVM_PGA_ZERO);
@@ -870,9 +882,11 @@ uao_get(struct uvm_object *uobj, voff_t 
 			KASSERT(uvm_pagegetdirty(ptmp) !=
 			    UVM_PAGE_STATUS_CLEAN);
 
-			/* caller must un-busy this page */
-			ptmp->flags |= PG_BUSY;
-			UVM_PAGE_OWN(ptmp, "uao_get1");
+			if ((flags & PGO_NOBUSY) == 0) {
+				/* caller must un-busy this page */
+				ptmp->flags |= PG_BUSY;
+				UVM_PAGE_OWN(ptmp, "uao_get1");
+			}
 gotpage:
 			pps[lcv] = ptmp;
 			gotpages++;

Index: src/sys/uvm/uvm_extern.h
diff -u src/sys/uvm/uvm_extern.h:1.221 src/sys/uvm/uvm_extern.h:1.222
--- src/sys/uvm/uvm_extern.h:1.221	Sun Feb 23 15:46:43 2020
+++ src/sys/uvm/uvm_extern.h	Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_extern.h,v 1.221 2020/02/23 15:46:43 ad Exp $	*/
+/*	$NetBSD: uvm_extern.h,v 1.222 2020/03/22 18:32:42 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -249,6 +249,7 @@ b\32UNMAP\0\
 #define UFP_NORDONLY	0x08
 #define UFP_DIRTYONLY	0x10
 #define UFP_BACKWARD	0x20
+#define UFP_NOBUSY	0x40
 
 /*
  * lockflags that control the locking behavior of various functions.
@@ -506,6 +507,8 @@ struct uvmexp_sysctl {
 	int64_t fileunknown;
 	int64_t fileclean;
 	int64_t filedirty;
+	int64_t fltup;
+	int64_t fltnoup;
 };
 
 #ifdef _KERNEL
Index: src/sys/uvm/uvm_fault.c
diff -u src/sys/uvm/uvm_fault.c:1.221 src/sys/uvm/uvm_fault.c:1.222
--- src/sys/uvm/uvm_fault.c:1.221	Fri Mar 20 19:08:54 2020
+++ src/sys/uvm/uvm_fault.c	Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_fault.c,v 1.221 2020/03/20 19:08:54 ad Exp $	*/
+/*	$NetBSD: uvm_fault.c,v 1.222 2020/03/22 18:32:42 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.221 2020/03/20 19:08:54 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.222 2020/03/22 18:32:42 ad Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -194,7 +194,7 @@ uvmfault_anonflush(struct vm_anon **anon
 	for (lcv = 0; lcv < n; lcv++) {
 		if (anons[lcv] == NULL)
 			continue;
-		KASSERT(rw_write_held(anons[lcv]->an_lock));
+		KASSERT(rw_lock_held(anons[lcv]->an_lock));
 		pg = anons[lcv]->an_page;
 		if (pg && (pg->flags & PG_BUSY) == 0) {
 			uvm_pagelock(pg);
@@ -276,10 +276,11 @@ uvmfault_anonget(struct uvm_faultinfo *u
     struct vm_anon *anon)
 {
 	struct vm_page *pg;
+	krw_t lock_type;
 	int error;
 
 	UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist);
-	KASSERT(rw_write_held(anon->an_lock));
+	KASSERT(rw_lock_held(anon->an_lock));
 	KASSERT(anon->an_lock == amap->am_lock);
 
 	/* Increment the counters.*/
@@ -316,6 +317,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
 		 * Is page resident?  Make sure it is not busy/released.
 		 */
 
+		lock_type = rw_lock_op(anon->an_lock);
 		if (pg) {
 
 			/*
@@ -352,9 +354,14 @@ uvmfault_anonget(struct uvm_faultinfo *u
 		} else {
 #if defined(VMSWAP)
 			/*
-			 * No page, therefore allocate one.
+			 * No page, therefore allocate one.  A write lock is
+			 * required for this.  If the caller didn't supply
+			 * one, fail now and have them retry.
 			 */
 
+			if (lock_type == RW_READER) {
+				return ENOLCK;
+			}
 			pg = uvm_pagealloc(NULL,
 			    ufi != NULL ? ufi->orig_rvaddr : 0,
 			    anon, ufi != NULL ? UVM_FLAG_COLORMATCH : 0);
@@ -400,7 +407,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
 
 		locked = uvmfault_relock(ufi);
 		if (locked || we_own) {
-			rw_enter(anon->an_lock, RW_WRITER);
+			rw_enter(anon->an_lock, lock_type);
 		}
 
 		/*
@@ -415,6 +422,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
 		 */
 
 		if (we_own) {
+			KASSERT(lock_type == RW_WRITER);
 #if defined(VMSWAP)
 			if (error) {
 
@@ -561,6 +569,11 @@ uvmfault_promote(struct uvm_faultinfo *u
 	} else if (uobjpage != PGO_DONTCARE) {
 		/* object-backed COW */
 		opg = uobjpage;
+		if ((uobjpage->flags & PG_BUSY) != 0) {
+			KASSERT(rw_write_held(opg->uobject->vmobjlock));
+		} else {
+			KASSERT(rw_read_held(opg->uobject->vmobjlock));
+		}
 	} else {
 		/* ZFOD */
 		opg = NULL;
@@ -573,10 +586,9 @@ uvmfault_promote(struct uvm_faultinfo *u
 
 	KASSERT(amap != NULL);
 	KASSERT(uobjpage != NULL);
-	KASSERT(uobjpage == PGO_DONTCARE || (uobjpage->flags & PG_BUSY) != 0);
 	KASSERT(rw_write_held(amap->am_lock));
 	KASSERT(oanon == NULL || amap->am_lock == oanon->an_lock);
-	KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+	KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock));
 
 	if (*spare != NULL) {
 		anon = *spare;
@@ -615,7 +627,10 @@ uvmfault_promote(struct uvm_faultinfo *u
 		}
 
 		/* unlock and fail ... */
-		uvm_page_unbusy(&uobjpage, 1);
+		if (uobjpage != PGO_DONTCARE &&
+		    (uobjpage->flags & PG_BUSY) != 0) {
+			uvm_page_unbusy(&uobjpage, 1);
+		}
 		uvmfault_unlockall(ufi, amap, uobj);
 		if (!uvm_reclaimable()) {
 			UVMHIST_LOG(maphist, "out of VM", 0,0,0,0);
@@ -738,6 +753,12 @@ struct uvm_faultctx {
 	 * (or due to the mechanical separation of the function?)
 	 */
 	bool promote;
+
+	/*
+	 * type of lock to acquire on objects in both layers.
+	 */
+	krw_t lower_lock_type;
+	krw_t upper_lock_type;
 };
 
 static inline int	uvm_fault_check(
@@ -780,7 +801,7 @@ static inline void	uvm_fault_lower_neigh
 			    struct uvm_faultinfo *, const struct uvm_faultctx *,
 			    vaddr_t, struct vm_page *);
 static inline int	uvm_fault_lower_io(
-			    struct uvm_faultinfo *, const struct uvm_faultctx *,
+			    struct uvm_faultinfo *, struct uvm_faultctx *,
 			    struct uvm_object **, struct vm_page **);
 static inline int	uvm_fault_lower_direct(
 			    struct uvm_faultinfo *, struct uvm_faultctx *,
@@ -814,6 +835,20 @@ uvm_fault_internal(struct vm_map *orig_m
 		/* "wire" fault causes wiring of both mapping and paging */
 		.wire_mapping = (fault_flag & UVM_FAULT_WIRE) != 0,
 		.wire_paging = (fault_flag & UVM_FAULT_WIRE) != 0,
+
+		/*
+		 * default lock type to acquire on upper & lower layer
+		 * objects: reader.  this can be upgraded at any point
+		 * during the fault from read -> write and uvm_faultctx
+		 * changed to match, but is never downgraded write -> read.
+		 */
+#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
+		.upper_lock_type = RW_WRITER,
+		.lower_lock_type = RW_WRITER,
+#else
+		.upper_lock_type = RW_READER,
+		.lower_lock_type = RW_READER,
+#endif
 	};
 	const bool maxprot = (fault_flag & UVM_FAULT_MAXPROT) != 0;
 	struct vm_anon *anons_store[UVM_MAXRANGE], **anons;
@@ -998,6 +1033,12 @@ uvm_fault_check(
 		flt->cow_now = (flt->access_type & VM_PROT_WRITE) != 0;
 	}
 
+	if (flt->wire_paging) {
+		/* wiring pages requires a write lock. */
+		flt->upper_lock_type = RW_WRITER;
+		flt->lower_lock_type = RW_WRITER;
+	}
+
 	flt->promote = false;
 
 	/*
@@ -1093,18 +1134,42 @@ uvm_fault_check(
 	    (uintptr_t)ufi->entry, (uintptr_t)amap, (uintptr_t)uobj, 0);
 
 	/*
-	 * if we've got an amap, lock it and extract current anons.
+	 * guess at the most suitable lock types to acquire.
+	 * if we've got an amap then lock it and extract current anons.
 	 */
 
 	if (amap) {
-		amap_lock(amap, RW_WRITER);
+		if ((amap_flags(amap) & AMAP_SHARED) == 0) {
+			/*
+			 * the amap isn't shared.  get a writer lock to
+			 * avoid the cost of upgrading the lock later if
+			 * needed.
+			 *
+			 * XXX nice for PostgreSQL, but consider threads.
+			 */
+			flt->upper_lock_type = RW_WRITER;
+		} else if ((flt->access_type & VM_PROT_WRITE) != 0) {
+			/*
+			 * assume we're about to COW.
+			 */
+			flt->upper_lock_type = RW_WRITER;
+		}
+		amap_lock(amap, flt->upper_lock_type);
 		amap_lookups(&ufi->entry->aref, eoff, *ranons, flt->npages);
 	} else {
+		if ((flt->access_type & VM_PROT_WRITE) != 0) {
+			/*
+			 * we are about to dirty the object and that
+			 * requires a write lock.
+			 */
+			flt->lower_lock_type = RW_WRITER;
+		}
 		*ranons = NULL;	/* to be safe */
 	}
 
 	/* locked: maps(read), amap(if there) */
-	KASSERT(amap == NULL || rw_write_held(amap->am_lock));
+	KASSERT(amap == NULL ||
+	    rw_lock_op(amap->am_lock) == flt->upper_lock_type);
 
 	/*
 	 * for MADV_SEQUENTIAL mappings we want to deactivate the back pages
@@ -1147,6 +1212,44 @@ uvm_fault_check(
 }
 
 /*
+ * uvm_fault_upper_upgrade: upgrade upper lock, reader -> writer
+ */
+
+static inline int
+uvm_fault_upper_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
+    struct vm_amap *amap, struct uvm_object *uobj)
+{
+
+	KASSERT(amap != NULL);
+	KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock));
+
+	/*
+	 * fast path.
+	 */
+	
+	if (__predict_true(flt->upper_lock_type == RW_WRITER)) {
+		return 0;
+	}
+
+	/*
+	 * otherwise try for the upgrade.  if we don't get it, unlock
+	 * everything, restart the fault and next time around get a writer
+	 * lock.
+	 */
+
+	flt->upper_lock_type = RW_WRITER;
+	if (__predict_false(!rw_tryupgrade(amap->am_lock))) {
+		uvmfault_unlockall(ufi, amap, uobj);
+		cpu_count(CPU_COUNT_FLTNOUP, 1);
+		UVMHIST_LOG(maphist, "  !upgrade upper", 0, 0,0,0);
+		return ERESTART;
+	}
+	cpu_count(CPU_COUNT_FLTUP, 1);
+	KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock));
+	return 0;
+}
+
+/*
  * uvm_fault_upper_lookup: look up existing h/w mapping and amap.
  *
  * iterate range of interest:
@@ -1170,7 +1273,8 @@ uvm_fault_upper_lookup(
 	UVMHIST_FUNC("uvm_fault_upper_lookup"); UVMHIST_CALLED(maphist);
 
 	/* locked: maps(read), amap(if there) */
-	KASSERT(amap == NULL || rw_write_held(amap->am_lock));
+	KASSERT(amap == NULL ||
+	    rw_lock_op(amap->am_lock) == flt->upper_lock_type);
 
 	/*
 	 * map in the backpages and frontpages we found in the amap in hopes
@@ -1191,7 +1295,7 @@ uvm_fault_upper_lookup(
 		}
 
 		/*
-		 * check for present page and map if possible.   re-activate it.
+		 * check for present page and map if possible.
 		 */
 
 		pages[lcv] = PGO_DONTCARE;
@@ -1222,7 +1326,8 @@ uvm_fault_upper_lookup(
 	}
 
 	/* locked: maps(read), amap(if there) */
-	KASSERT(amap == NULL || rw_write_held(amap->am_lock));
+	KASSERT(amap == NULL ||
+	    rw_lock_op(amap->am_lock) == flt->upper_lock_type);
 	/* (shadowed == true) if there is an anon at the faulting address */
 	UVMHIST_LOG(maphist, "  shadowed=%jd, will_get=%jd", shadowed,
 	    (ufi->entry->object.uvm_obj && shadowed != false),0,0);
@@ -1255,12 +1360,21 @@ uvm_fault_upper_neighbor(
 
 	KASSERT(pg->uobject == NULL);
 	KASSERT(pg->uanon != NULL);
-	KASSERT(rw_write_held(pg->uanon->an_lock));
+	KASSERT(rw_lock_op(pg->uanon->an_lock) == flt->upper_lock_type);
 	KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN);
 
-	uvm_pagelock(pg);
-	uvm_pageenqueue(pg);
-	uvm_pageunlock(pg);
+	/*
+	 * in the read-locked case, it's not possible for this to be a new
+	 * page, therefore it's enqueued already.  there wasn't a direct
+	 * fault on the page, so avoid the cost of re-enqueuing it unless
+	 * write-locked.
+	 */
+
+	if (flt->upper_lock_type == RW_WRITER) {
+		uvm_pagelock(pg);
+		uvm_pageenqueue(pg);
+		uvm_pageunlock(pg);
+	}
 	UVMHIST_LOG(maphist,
 	    "  MAPPING: n anon: pm=%#jx, va=%#jx, pg=%#jx",
 	    (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
@@ -1300,7 +1414,7 @@ uvm_fault_upper(
 	UVMHIST_FUNC("uvm_fault_upper"); UVMHIST_CALLED(maphist);
 
 	/* locked: maps(read), amap, anon */
-	KASSERT(rw_write_held(amap->am_lock));
+	KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type);
 	KASSERT(anon->an_lock == amap->am_lock);
 
 	/*
@@ -1323,7 +1437,7 @@ uvm_fault_upper(
 	 * if the page is on loan from a uvm_object, then anonget will
 	 * lock that object for us if it does not fail.
 	 */
-
+ retry:
 	error = uvmfault_anonget(ufi, amap, anon);
 	switch (error) {
 	case 0:
@@ -1336,6 +1450,15 @@ uvm_fault_upper(
 		kpause("fltagain1", false, hz/2, NULL);
 		return ERESTART;
 
+	case ENOLCK:
+		/* it needs a write lock: retry */
+		error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL);
+		if (error != 0) {
+			return error;
+		}
+		KASSERT(rw_write_held(amap->am_lock));
+		goto retry;
+
 	default:
 		return error;
 	}
@@ -1347,9 +1470,10 @@ uvm_fault_upper(
 	uobj = anon->an_page->uobject;	/* locked by anonget if !NULL */
 
 	/* locked: maps(read), amap, anon, uobj(if one) */
-	KASSERT(rw_write_held(amap->am_lock));
+	KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type);
 	KASSERT(anon->an_lock == amap->am_lock);
-	KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+	KASSERT(uobj == NULL ||
+	    rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
 
 	/*
 	 * special handling for loaned pages
@@ -1424,6 +1548,13 @@ uvm_fault_upper_loan(
 
 		/* >1 case is already ok */
 		if (anon->an_ref == 1) {
+			/* breaking loan requires a write lock. */
+			error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL);
+			if (error != 0) {
+				return error;
+			}
+			KASSERT(rw_write_held(amap->am_lock));
+
 			error = uvm_loanbreak_anon(anon, *ruobj);
 			if (error != 0) {
 				uvmfault_unlockall(ufi, amap, *ruobj);
@@ -1452,6 +1583,7 @@ uvm_fault_upper_promote(
 	struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
 	struct uvm_object *uobj, struct vm_anon *anon)
 {
+	struct vm_amap * const amap = ufi->entry->aref.ar_amap;
 	struct vm_anon * const oanon = anon;
 	struct vm_page *pg;
 	int error;
@@ -1460,6 +1592,13 @@ uvm_fault_upper_promote(
 	UVMHIST_LOG(maphist, "  case 1B: COW fault",0,0,0,0);
 	cpu_count(CPU_COUNT_FLT_ACOW, 1);
 
+	/* promoting requires a write lock. */
+	error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL);
+	if (error != 0) {
+		return error;
+	}
+	KASSERT(rw_write_held(amap->am_lock));
+
 	error = uvmfault_promote(ufi, oanon, PGO_DONTCARE, &anon,
 	    &flt->anon_spare);
 	switch (error) {
@@ -1471,13 +1610,10 @@ uvm_fault_upper_promote(
 		return error;
 	}
 
-	KASSERT(anon == NULL || anon->an_lock == oanon->an_lock);
+	KASSERT(anon->an_lock == oanon->an_lock);
 
+	/* uvm_fault_upper_done will activate or enqueue the page */
 	pg = anon->an_page;
-	/* uvm_fault_upper_done will activate the page */
-	uvm_pagelock(pg);
-	uvm_pageenqueue(pg);
-	uvm_pageunlock(pg);
 	pg->flags &= ~(PG_BUSY|PG_FAKE);
 	UVM_PAGE_OWN(pg, NULL);
 
@@ -1531,10 +1667,11 @@ uvm_fault_upper_enter(
 	UVMHIST_FUNC("uvm_fault_upper_enter"); UVMHIST_CALLED(maphist);
 
 	/* locked: maps(read), amap, oanon, anon(if different from oanon) */
-	KASSERT(rw_write_held(amap->am_lock));
+	KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type);
 	KASSERT(anon->an_lock == amap->am_lock);
 	KASSERT(oanon->an_lock == amap->am_lock);
-	KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+	KASSERT(uobj == NULL ||
+	    rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
 	KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN);
 
 	/*
@@ -1563,6 +1700,17 @@ uvm_fault_upper_enter(
 		KASSERT(!pmap_extract(pmap, va, NULL));
 
 		/*
+		 * ensure that the page is queued in the case that
+		 * we just promoted.
+		 */
+
+		if (flt->upper_lock_type == RW_WRITER) {
+			uvm_pagelock(pg);
+			uvm_pageenqueue(pg);
+			uvm_pageunlock(pg);
+		}
+
+		/*
 		 * No need to undo what we did; we can simply think of
 		 * this as the pmap throwing away the mapping information.
 		 *
@@ -1632,6 +1780,57 @@ uvm_fault_upper_done(
 }
 
 /*
+ * uvm_fault_lower_upgrade: upgrade lower lock, reader -> writer
+ */
+
+static inline int
+uvm_fault_lower_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
+    struct vm_amap *amap, struct uvm_object *uobj, struct vm_page *uobjpage)
+{
+
+	KASSERT(uobj != NULL);
+	KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock));
+
+	/*
+	 * fast path.
+	 */
+	
+	if (__predict_true(flt->lower_lock_type == RW_WRITER)) {
+		KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0);
+		return 0;
+	}
+
+	/*
+	 * otherwise try for the upgrade.  if we don't get it, unlock
+	 * everything, restart the fault and next time around get a writer
+	 * lock.
+	 */
+
+	flt->lower_lock_type = RW_WRITER;
+	if (__predict_false(!rw_tryupgrade(uobj->vmobjlock))) {
+		uvmfault_unlockall(ufi, amap, uobj);
+		cpu_count(CPU_COUNT_FLTNOUP, 1);
+		UVMHIST_LOG(maphist, "  !upgrade lower", 0, 0,0,0);
+		return ERESTART;
+	}
+	cpu_count(CPU_COUNT_FLTUP, 1);
+	KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock));
+
+	/*
+	 * finally, if a page was supplied, assert that it's not busy
+	 * (can't be with a reader lock) and then mark it busy now that
+	 * we have a writer lock.
+	 */
+
+	if (uobjpage != NULL) {
+		KASSERT((uobjpage->flags & PG_BUSY) == 0);
+		uobjpage->flags |= PG_BUSY;
+		UVM_PAGE_OWN(uobjpage, "upgrdlwr");
+	}
+	return 0;
+}
+
+/*
  * uvm_fault_lower: handle lower fault.
  *
  *	1. check uobj
@@ -1686,9 +1885,19 @@ uvm_fault_lower(
 	 * locked:
 	 * maps(read), amap(if there), uobj(if !null), uobjpage(if !null)
 	 */
-	KASSERT(amap == NULL || rw_write_held(amap->am_lock));
-	KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
-	KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0);
+	KASSERT(amap == NULL ||
+	    rw_lock_op(amap->am_lock) == flt->upper_lock_type);
+	if (flt->lower_lock_type == RW_WRITER) {
+		KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+		KASSERTMSG(uobjpage == NULL ||
+		    (uobjpage->flags & PG_BUSY) != 0,
+		    "page %p should be busy", uobjpage);
+	} else {
+		KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock));
+		KASSERTMSG(uobjpage == NULL ||
+		    (uobjpage->flags & PG_BUSY) == 0,
+		    "page %p should not be busy", uobjpage);
+	}
 
 	/*
 	 * note that uobjpage can not be PGO_DONTCARE at this point.  we now
@@ -1729,9 +1938,15 @@ uvm_fault_lower(
 	 * locked:
 	 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj)
 	 */
-	KASSERT(amap == NULL || rw_write_held(amap->am_lock));
-	KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
-	KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+	KASSERT(amap == NULL ||
+	    rw_lock_op(amap->am_lock) == flt->upper_lock_type);
+	if (flt->lower_lock_type == RW_WRITER) {
+		KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+		KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+	} else {
+		KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock));
+		KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0);
+	}
 
 	/*
 	 * notes:
@@ -1772,17 +1987,25 @@ uvm_fault_lower_lookup(
 	vaddr_t currva;
 	UVMHIST_FUNC("uvm_fault_lower_lookup"); UVMHIST_CALLED(maphist);
 
-	rw_enter(uobj->vmobjlock, RW_WRITER);
-	/* Locked: maps(read), amap(if there), uobj */
+	rw_enter(uobj->vmobjlock, flt->lower_lock_type);
+
+	/*
+	 * Locked: maps(read), amap(if there), uobj
+	 *
+	 * if we have a read lock on the object, do a PGO_NOBUSY get, which
+	 * will return us pages with PG_BUSY clear.  if a write lock is held
+	 * pages will be returned with PG_BUSY set.
+	 */
 
 	cpu_count(CPU_COUNT_FLTLGET, 1);
 	gotpages = flt->npages;
 	(void) uobj->pgops->pgo_get(uobj,
 	    ufi->entry->offset + flt->startva - ufi->entry->start,
 	    pages, &gotpages, flt->centeridx,
-	    flt->access_type & MASK(ufi->entry), ufi->entry->advice, PGO_LOCKED);
+	    flt->access_type & MASK(ufi->entry), ufi->entry->advice,
+	    PGO_LOCKED | (flt->lower_lock_type == RW_WRITER ? 0 : PGO_NOBUSY));
 
-	KASSERT(rw_write_held(uobj->vmobjlock));
+	KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
 
 	/*
 	 * check for pages to map, if we got any
@@ -1803,10 +2026,20 @@ uvm_fault_lower_lookup(
 		}
 		KASSERT(curpg->uobject == uobj);
 
+		if (flt->lower_lock_type == RW_WRITER) {
+			KASSERT(rw_write_held(uobj->vmobjlock));
+			KASSERTMSG((curpg->flags & PG_BUSY) != 0,
+			    "page %p should be busy", curpg);
+		} else {
+			KASSERT(rw_read_held(uobj->vmobjlock));
+			KASSERTMSG((curpg->flags & PG_BUSY) == 0,
+			    "page %p should not be busy", curpg);
+		}
+
 		/*
 		 * if center page is resident and not PG_BUSY|PG_RELEASED
-		 * then pgo_get made it PG_BUSY for us and gave us a handle
-		 * to it.
+		 * and !PGO_NOBUSY, then pgo_get made it PG_BUSY for us and
+		 * gave us a handle to it.
 		 */
 
 		if (lcv == flt->centeridx) {
@@ -1839,9 +2072,18 @@ uvm_fault_lower_neighbor(
 	 * for this.  we can just directly enter the pages.
 	 */
 
-	uvm_pagelock(pg);
-	uvm_pageenqueue(pg);
-	uvm_pageunlock(pg);
+	/*
+	 * in the read-locked case, it's not possible for this to be a new
+	 * page.  it must be cached with the object and enqueued already. 
+	 * there wasn't a direct fault on the page, so avoid the cost of
+	 * re-enqueuing it.
+	 */
+
+	if (flt->lower_lock_type == RW_WRITER) {
+		uvm_pagelock(pg);
+		uvm_pageenqueue(pg);
+		uvm_pageunlock(pg);
+	}
 	UVMHIST_LOG(maphist,
 	    "  MAPPING: n obj: pm=%#jx, va=%#jx, pg=%#jx",
 	    (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
@@ -1858,10 +2100,21 @@ uvm_fault_lower_neighbor(
 	KASSERT((pg->flags & PG_RELEASED) == 0);
 	KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) ||
 	    uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN);
-	pg->flags &= ~(PG_BUSY);
-	UVM_PAGE_OWN(pg, NULL);
 
-	KASSERT(rw_write_held(pg->uobject->vmobjlock));
+	/*
+	 * if a write lock was held on the object, the pages have been
+	 * busied.  unbusy them now, as we are about to enter and then
+	 * forget about them.
+	 */
+
+	if (flt->lower_lock_type == RW_WRITER) {
+		KASSERT((pg->flags & PG_BUSY) != 0);
+		pg->flags &= ~(PG_BUSY);
+		UVM_PAGE_OWN(pg, NULL);
+	} else {
+		KASSERT((pg->flags & PG_BUSY) == 0);
+	}
+	KASSERT(rw_lock_op(pg->uobject->vmobjlock) == flt->lower_lock_type);
 
 	const vm_prot_t mapprot = 
 	    readonly ? (flt->enter_prot & ~VM_PROT_WRITE) :
@@ -1883,7 +2136,7 @@ uvm_fault_lower_neighbor(
 
 static int
 uvm_fault_lower_io(
-	struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
+	struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
 	struct uvm_object **ruobj, struct vm_page **ruobjpage)
 {
 	struct vm_amap * const amap = ufi->entry->aref.ar_amap;
@@ -1906,10 +2159,17 @@ uvm_fault_lower_io(
 	advice = ufi->entry->advice;
 
 	/* Locked: maps(read), amap(if there), uobj */
+	KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
+
+	/* Upgrade to a write lock if needed. */
+	error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, NULL);
+	if (error != 0) {
+		return error;
+	}
 	uvmfault_unlockall(ufi, amap, NULL);
 
-	/* Locked: uobj */
-	KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+	/* Locked: uobj(write) */
+	KASSERT(rw_write_held(uobj->vmobjlock));
 
 	cpu_count(CPU_COUNT_FLTGET, 1);
 	gotpages = 1;
@@ -1950,13 +2210,14 @@ uvm_fault_lower_io(
 
 	locked = uvmfault_relock(ufi);
 	if (locked && amap)
-		amap_lock(amap, RW_WRITER);
+		amap_lock(amap, flt->upper_lock_type);
 
 	/* might be changed */
 	uobj = pg->uobject;
 
-	rw_enter(uobj->vmobjlock, RW_WRITER);
+	rw_enter(uobj->vmobjlock, flt->lower_lock_type);
 	KASSERT((pg->flags & PG_BUSY) != 0);
+	KASSERT(flt->lower_lock_type == RW_WRITER);
 
 	uvm_pagelock(pg);
 	uvm_pageactivate(pg);
@@ -2056,7 +2317,11 @@ uvm_fault_lower_direct(
 	}
 	KASSERT(pg == uobjpage);
 
-	KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+	if (flt->lower_lock_type == RW_READER) {
+		KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0);
+	} else {
+		KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+	}
 	return uvm_fault_lower_enter(ufi, flt, uobj, NULL, pg);
 }
 
@@ -2076,6 +2341,7 @@ uvm_fault_lower_direct_loan(
 	struct vm_amap * const amap = ufi->entry->aref.ar_amap;
 	struct vm_page *pg;
 	struct vm_page *uobjpage = *ruobjpage;
+	int error;
 	UVMHIST_FUNC("uvm_fault_lower_direct_loan"); UVMHIST_CALLED(maphist);
 
 	if (!flt->cow_now) {
@@ -2083,7 +2349,16 @@ uvm_fault_lower_direct_loan(
 		/* cap! */
 		flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE;
 	} else {
-		/* write fault: must break the loan here */
+		/*
+		 * write fault: must break the loan here.  to do this
+		 * we need a write lock on the object.
+		 */
+
+		error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, uobjpage);
+		if (error != 0) {
+			return error;
+		}
+		KASSERT(rw_write_held(uobj->vmobjlock));
 
 		pg = uvm_loanbreak(uobjpage);
 		if (pg == NULL) {
@@ -2133,12 +2408,18 @@ uvm_fault_lower_promote(
 
 	KASSERT(amap != NULL);
 
+	/* promoting requires a write lock. */
+	error = uvm_fault_upper_upgrade(ufi, flt, amap, uobj);
+	if (error != 0) {
+		return error;
+	}
+	KASSERT(rw_write_held(amap->am_lock));
+
 	/*
 	 * If we are going to promote the data to an anon we
 	 * allocate a blank anon here and plug it into our amap.
 	 */
-	error = uvmfault_promote(ufi, NULL, uobjpage,
-	    &anon, &flt->anon_spare);
+	error = uvmfault_promote(ufi, NULL, uobjpage, &anon, &flt->anon_spare);
 	switch (error) {
 	case 0:
 		break;
@@ -2153,7 +2434,11 @@ uvm_fault_lower_promote(
 	/*
 	 * Fill in the data.
 	 */
-	KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+	if (flt->lower_lock_type == RW_READER) {
+		KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0);
+	} else {
+		KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+	}
 
 	if (uobjpage != PGO_DONTCARE) {
 		cpu_count(CPU_COUNT_FLT_PRCOPY, 1);
@@ -2175,11 +2460,13 @@ uvm_fault_lower_promote(
 		 * since we still hold the object lock.
 		 */
 
-		uobjpage->flags &= ~PG_BUSY;
-		uvm_pagelock(uobjpage);
-		uvm_pagewakeup(uobjpage);
-		uvm_pageunlock(uobjpage);
-		UVM_PAGE_OWN(uobjpage, NULL);
+		if ((uobjpage->flags & PG_BUSY) != 0) {
+			uobjpage->flags &= ~PG_BUSY;
+			uvm_pagelock(uobjpage);
+			uvm_pagewakeup(uobjpage);
+			uvm_pageunlock(uobjpage);
+			UVM_PAGE_OWN(uobjpage, NULL);
+		}
 
 		UVMHIST_LOG(maphist,
 		    "  promote uobjpage %#jx to anon/page %#jx/%#jx",
@@ -2222,12 +2509,22 @@ uvm_fault_lower_enter(
 	 *	maps(read), amap(if !null), uobj(if !null),
 	 *	anon(if !null), pg(if anon), unlock_uobj(if !null)
 	 *
+	 * anon must be write locked (promotion).  uobj can be either.
+	 *
 	 * Note: pg is either the uobjpage or the new page in the new anon.
 	 */
-	KASSERT(amap == NULL || rw_write_held(amap->am_lock));
-	KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+	KASSERT(amap == NULL ||
+	    rw_lock_op(amap->am_lock) == flt->upper_lock_type);
 	KASSERT(anon == NULL || anon->an_lock == amap->am_lock);
-	KASSERT((pg->flags & PG_BUSY) != 0);
+	if (flt->lower_lock_type == RW_WRITER) {
+		KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+		KASSERTMSG((pg->flags & PG_BUSY) != 0,
+		    "page %p should be busy", pg);
+	} else {
+		KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock));
+		KASSERTMSG(anon != NULL || (pg->flags & PG_BUSY) == 0,
+		    "page %p should not be busy", pg);
+	}
 
 	/*
 	 * all resources are present.   we can now map it in and free our
@@ -2264,18 +2561,24 @@ uvm_fault_lower_enter(
 		 * we just promoted the page.
 		 */
 
-		uvm_pagelock(pg);
-		uvm_pageenqueue(pg);
-		uvm_pagewakeup(pg);
-		uvm_pageunlock(pg);
+		if (anon != NULL || flt->lower_lock_type == RW_WRITER) {
+			uvm_pagelock(pg);
+			uvm_pageenqueue(pg);
+			uvm_pagewakeup(pg);
+			uvm_pageunlock(pg);
+		} else {
+			KASSERT((pg->flags & PG_BUSY) == 0);
+		}
 
 		/*
 		 * note that pg can't be PG_RELEASED since we did not drop
 		 * the object lock since the last time we checked.
 		 */
 		KASSERT((pg->flags & PG_RELEASED) == 0);
-		pg->flags &= ~(PG_BUSY|PG_FAKE);
-		UVM_PAGE_OWN(pg, NULL);
+		if ((pg->flags & PG_BUSY) != 0) {
+			pg->flags &= ~(PG_BUSY|PG_FAKE);
+			UVM_PAGE_OWN(pg, NULL);
+		}
 
 		uvmfault_unlockall(ufi, amap, uobj);
 		if (!uvm_reclaimable()) {
@@ -2297,11 +2600,13 @@ uvm_fault_lower_enter(
 	 * lock since the last time we checked.
 	 */
 	KASSERT((pg->flags & PG_RELEASED) == 0);
-	uvm_pagelock(pg);
-	uvm_pagewakeup(pg);
-	uvm_pageunlock(pg);
-	pg->flags &= ~(PG_BUSY|PG_FAKE);
-	UVM_PAGE_OWN(pg, NULL);
+	if ((pg->flags & PG_BUSY) != 0) {
+		uvm_pagelock(pg);
+		uvm_pagewakeup(pg);
+		uvm_pageunlock(pg);
+		pg->flags &= ~(PG_BUSY|PG_FAKE);
+		UVM_PAGE_OWN(pg, NULL);
+	}
 
 	pmap_update(ufi->orig_map->pmap);
 	uvmfault_unlockall(ufi, amap, uobj);

Index: src/sys/uvm/uvm_loan.c
diff -u src/sys/uvm/uvm_loan.c:1.99 src/sys/uvm/uvm_loan.c:1.100
--- src/sys/uvm/uvm_loan.c:1.99	Fri Mar 20 19:08:54 2020
+++ src/sys/uvm/uvm_loan.c	Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_loan.c,v 1.99 2020/03/20 19:08:54 ad Exp $	*/
+/*	$NetBSD: uvm_loan.c,v 1.100 2020/03/22 18:32:42 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.99 2020/03/20 19:08:54 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.100 2020/03/22 18:32:42 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -391,6 +391,7 @@ uvm_loananon(struct uvm_faultinfo *ufi, 
 
 	if (error) {
 		UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
+		KASSERT(error != ENOLCK);
 
 		/* need to refault (i.e. refresh our lookup) ? */
 		if (error == ERESTART) {

Index: src/sys/uvm/uvm_map.c
diff -u src/sys/uvm/uvm_map.c:1.375 src/sys/uvm/uvm_map.c:1.376
--- src/sys/uvm/uvm_map.c:1.375	Fri Mar 20 19:08:54 2020
+++ src/sys/uvm/uvm_map.c	Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_map.c,v 1.375 2020/03/20 19:08:54 ad Exp $	*/
+/*	$NetBSD: uvm_map.c,v 1.376 2020/03/22 18:32:42 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.375 2020/03/20 19:08:54 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.376 2020/03/22 18:32:42 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_pax.h"
@@ -2256,7 +2256,11 @@ uvm_unmap_remove(struct vm_map *map, vad
 			 * change while in pmap_remove().
 			 */
 
+#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
 			uvm_map_lock_entry(entry, RW_WRITER);
+#else
+			uvm_map_lock_entry(entry, RW_READER);
+#endif
 			pmap_remove(map->pmap, entry->start, entry->end);
 
 			/*
@@ -2831,7 +2835,11 @@ uvm_map_extract(struct vm_map *srcmap, v
 
 			/* we advance "entry" in the following if statement */
 			if (flags & UVM_EXTRACT_REMOVE) {
+#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
 				uvm_map_lock_entry(entry, RW_WRITER);
+#else
+				uvm_map_lock_entry(entry, RW_READER);
+#endif
 				pmap_remove(srcmap->pmap, entry->start,
 						entry->end);
 				uvm_map_unlock_entry(entry);
@@ -3063,7 +3071,11 @@ uvm_map_protect(struct vm_map *map, vadd
 
 		if (current->protection != old_prot) {
 			/* update pmap! */
+#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
 			uvm_map_lock_entry(current, RW_WRITER);
+#else
+			uvm_map_lock_entry(current, RW_READER);
+#endif
 			pmap_protect(map->pmap, current->start, current->end,
 			    current->protection & MASK(current));
 			uvm_map_unlock_entry(current);
@@ -4404,7 +4416,11 @@ uvm_mapent_forkcopy(struct vm_map *new_m
 		if (old_entry->aref.ar_amap &&
 		    !UVM_ET_ISNEEDSCOPY(old_entry)) {
 			if (old_entry->max_protection & VM_PROT_WRITE) {
+#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
 				uvm_map_lock_entry(old_entry, RW_WRITER);
+#else
+				uvm_map_lock_entry(old_entry, RW_READER);
+#endif
 				pmap_protect(old_map->pmap,
 				    old_entry->start, old_entry->end,
 				    old_entry->protection & ~VM_PROT_WRITE);

Index: src/sys/uvm/uvm_meter.c
diff -u src/sys/uvm/uvm_meter.c:1.75 src/sys/uvm/uvm_meter.c:1.76
--- src/sys/uvm/uvm_meter.c:1.75	Thu Mar 19 20:23:19 2020
+++ src/sys/uvm/uvm_meter.c	Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_meter.c,v 1.75 2020/03/19 20:23:19 ad Exp $	*/
+/*	$NetBSD: uvm_meter.c,v 1.76 2020/03/22 18:32:42 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_meter.c,v 1.75 2020/03/19 20:23:19 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_meter.c,v 1.76 2020/03/22 18:32:42 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -186,6 +186,8 @@ sysctl_vm_uvmexp2(SYSCTLFN_ARGS)
 	u.fileunknown = cpu_count_get(CPU_COUNT_FILEUNKNOWN);
 	u.fileclean = cpu_count_get(CPU_COUNT_FILECLEAN);
 	u.filedirty = cpu_count_get(CPU_COUNT_FILEDIRTY);
+	u.fltup = cpu_count_get(CPU_COUNT_FLTUP);
+	u.fltnoup = cpu_count_get(CPU_COUNT_FLTNOUP);
 
 	node = *rnode;
 	node.sysctl_data = &u;

Index: src/sys/uvm/uvm_pager.h
diff -u src/sys/uvm/uvm_pager.h:1.46 src/sys/uvm/uvm_pager.h:1.47
--- src/sys/uvm/uvm_pager.h:1.46	Sat Mar 14 20:45:23 2020
+++ src/sys/uvm/uvm_pager.h	Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pager.h,v 1.46 2020/03/14 20:45:23 ad Exp $	*/
+/*	$NetBSD: uvm_pager.h,v 1.47 2020/03/22 18:32:42 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -152,6 +152,7 @@ struct uvm_pagerops {
 #define PGO_JOURNALLOCKED 0x020	/* journal is already locked [get/put] */
 #define PGO_LOCKED	0x040	/* fault data structures are locked [get] */
 #define PGO_BUSYFAIL	0x080	/* fail if a page is busy [put] */
+#define PGO_NOBUSY	0x100	/* don't busy returned pages (read locked) */
 #define PGO_OVERWRITE	0x200	/* pages will be overwritten before unlocked */
 #define PGO_PASTEOF	0x400	/* allow allocation of pages past EOF */
 #define PGO_NOBLOCKALLOC 0x800	/* backing block allocation is not needed */

Index: src/sys/uvm/uvm_vnode.c
diff -u src/sys/uvm/uvm_vnode.c:1.110 src/sys/uvm/uvm_vnode.c:1.111
--- src/sys/uvm/uvm_vnode.c:1.110	Sat Mar 14 20:45:23 2020
+++ src/sys/uvm/uvm_vnode.c	Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_vnode.c,v 1.110 2020/03/14 20:45:23 ad Exp $	*/
+/*	$NetBSD: uvm_vnode.c,v 1.111 2020/03/22 18:32:42 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -45,7 +45,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.110 2020/03/14 20:45:23 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.111 2020/03/22 18:32:42 ad Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_uvmhist.h"
@@ -287,7 +287,15 @@ uvn_findpage(struct uvm_object *uobj, vo
 	UVMHIST_LOG(ubchist, "vp %#jx off 0x%jx", (uintptr_t)uobj, offset,
 	    0, 0);
 
-	KASSERT(rw_write_held(uobj->vmobjlock));
+	/*
+	 * NOBUSY must come with NOWAIT and NOALLOC.  if NOBUSY is
+	 * specified, this may be called with a reader lock.
+	 */
+
+	KASSERT(rw_lock_held(uobj->vmobjlock));
+	KASSERT((flags & UFP_NOBUSY) == 0 || (flags & UFP_NOWAIT) != 0);
+	KASSERT((flags & UFP_NOBUSY) == 0 || (flags & UFP_NOALLOC) != 0);
+	KASSERT((flags & UFP_NOBUSY) != 0 || rw_write_held(uobj->vmobjlock));
 
 	if (*pgp != NULL) {
 		UVMHIST_LOG(ubchist, "dontcare", 0,0,0,0);
@@ -380,8 +388,10 @@ uvn_findpage(struct uvm_object *uobj, vo
 		}
 
 		/* mark the page BUSY and we're done. */
-		pg->flags |= PG_BUSY;
-		UVM_PAGE_OWN(pg, "uvn_findpage");
+		if ((flags & UFP_NOBUSY) == 0) {
+			pg->flags |= PG_BUSY;
+			UVM_PAGE_OWN(pg, "uvn_findpage");
+		}
 		UVMHIST_LOG(ubchist, "found %#jx (color %ju)",
 		    (uintptr_t)pg, VM_PGCOLOR(pg), 0, 0);
 		uvm_page_array_advance(a);

Index: src/usr.bin/vmstat/vmstat.c
diff -u src/usr.bin/vmstat/vmstat.c:1.237 src/usr.bin/vmstat/vmstat.c:1.238
--- src/usr.bin/vmstat/vmstat.c:1.237	Sun Mar 22 14:39:28 2020
+++ src/usr.bin/vmstat/vmstat.c	Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: vmstat.c,v 1.237 2020/03/22 14:39:28 ad Exp $ */
+/* $NetBSD: vmstat.c,v 1.238 2020/03/22 18:32:42 ad Exp $ */
 
 /*-
  * Copyright (c) 1998, 2000, 2001, 2007, 2019, 2020
@@ -71,7 +71,7 @@ __COPYRIGHT("@(#) Copyright (c) 1980, 19
 #if 0
 static char sccsid[] = "@(#)vmstat.c	8.2 (Berkeley) 3/1/95";
 #else
-__RCSID("$NetBSD: vmstat.c,v 1.237 2020/03/22 14:39:28 ad Exp $");
+__RCSID("$NetBSD: vmstat.c,v 1.238 2020/03/22 18:32:42 ad Exp $");
 #endif
 #endif /* not lint */
 
@@ -1074,6 +1074,10 @@ dosum(void)
 	(void)printf("%9" PRIu64 " object faults\n", uvmexp.flt_obj);
 	(void)printf("%9" PRIu64 " promote copy faults\n", uvmexp.flt_prcopy);
 	(void)printf("%9" PRIu64 " promote zero fill faults\n", uvmexp.flt_przero);
+	(void)printf("%9" PRIu64 " faults upgraded lock\n",
+	    uvmexp.fltup);
+	(void)printf("%9" PRIu64 " faults couldn't upgrade lock\n",
+	    uvmexp.fltnoup);
 
 	(void)printf("%9" PRIu64 " times daemon wokeup\n",uvmexp.pdwoke);
 	(void)printf("%9" PRIu64 " revolutions of the clock hand\n", uvmexp.pdrevs);

Reply via email to