Module Name: src
Committed By: ad
Date: Sun Mar 22 18:32:42 UTC 2020
Modified Files:
src/sys/miscfs/genfs: genfs_io.c
src/sys/nfs: nfs_bio.c
src/sys/sys: cpu_data.h vnode.h
src/sys/uvm: uvm_anon.c uvm_aobj.c uvm_extern.h uvm_fault.c uvm_loan.c
uvm_map.c uvm_meter.c uvm_pager.h uvm_vnode.c
src/usr.bin/vmstat: vmstat.c
Log Message:
Process concurrent page faults on individual uvm_objects / vm_amaps in
parallel, where the relevant pages are already in-core. Proposed on
tech-kern.
Temporarily disabled on MP architectures with __HAVE_UNLOCKED_PMAP until
adjustments are made to their pmaps.
To generate a diff of this commit:
cvs rdiff -u -r1.94 -r1.95 src/sys/miscfs/genfs/genfs_io.c
cvs rdiff -u -r1.194 -r1.195 src/sys/nfs/nfs_bio.c
cvs rdiff -u -r1.49 -r1.50 src/sys/sys/cpu_data.h
cvs rdiff -u -r1.293 -r1.294 src/sys/sys/vnode.h
cvs rdiff -u -r1.76 -r1.77 src/sys/uvm/uvm_anon.c
cvs rdiff -u -r1.138 -r1.139 src/sys/uvm/uvm_aobj.c
cvs rdiff -u -r1.221 -r1.222 src/sys/uvm/uvm_extern.h src/sys/uvm/uvm_fault.c
cvs rdiff -u -r1.99 -r1.100 src/sys/uvm/uvm_loan.c
cvs rdiff -u -r1.375 -r1.376 src/sys/uvm/uvm_map.c
cvs rdiff -u -r1.75 -r1.76 src/sys/uvm/uvm_meter.c
cvs rdiff -u -r1.46 -r1.47 src/sys/uvm/uvm_pager.h
cvs rdiff -u -r1.110 -r1.111 src/sys/uvm/uvm_vnode.c
cvs rdiff -u -r1.237 -r1.238 src/usr.bin/vmstat/vmstat.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/miscfs/genfs/genfs_io.c
diff -u src/sys/miscfs/genfs/genfs_io.c:1.94 src/sys/miscfs/genfs/genfs_io.c:1.95
--- src/sys/miscfs/genfs/genfs_io.c:1.94 Tue Mar 17 18:31:38 2020
+++ src/sys/miscfs/genfs/genfs_io.c Sun Mar 22 18:32:41 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: genfs_io.c,v 1.94 2020/03/17 18:31:38 ad Exp $ */
+/* $NetBSD: genfs_io.c,v 1.95 2020/03/22 18:32:41 ad Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.94 2020/03/17 18:31:38 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.95 2020/03/22 18:32:41 ad Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -43,6 +43,7 @@ __KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v
#include <sys/kauth.h>
#include <sys/fstrans.h>
#include <sys/buf.h>
+#include <sys/atomic.h>
#include <miscfs/genfs/genfs.h>
#include <miscfs/genfs/genfs_node.h>
@@ -103,7 +104,7 @@ genfs_getpages(void *v)
} */ * const ap = v;
off_t diskeof, memeof;
- int i, error, npages;
+ int i, error, npages, iflag;
const int flags = ap->a_flags;
struct vnode * const vp = ap->a_vp;
struct uvm_object * const uobj = &vp->v_uobj;
@@ -125,18 +126,35 @@ genfs_getpages(void *v)
KASSERT(vp->v_type == VREG || vp->v_type == VDIR ||
vp->v_type == VLNK || vp->v_type == VBLK);
+ /*
+ * the object must be locked. it can only be a read lock when
+ * processing a read fault with PGO_LOCKED | PGO_NOBUSY.
+ */
+
+ KASSERT(rw_lock_held(uobj->vmobjlock));
+ KASSERT(rw_write_held(uobj->vmobjlock) ||
+ ((~flags & (PGO_LOCKED | PGO_NOBUSY)) == 0 && !memwrite));
+
#ifdef DIAGNOSTIC
if ((flags & PGO_JOURNALLOCKED) && vp->v_mount->mnt_wapbl)
WAPBL_JLOCK_ASSERT(vp->v_mount);
#endif
- mutex_enter(vp->v_interlock);
- error = vdead_check(vp, VDEAD_NOWAIT);
- mutex_exit(vp->v_interlock);
- if (error) {
- if ((flags & PGO_LOCKED) == 0)
- rw_exit(uobj->vmobjlock);
- return error;
+ /*
+ * check for reclaimed vnode. v_interlock is not held here, but
+ * VI_DEADCHECK is set with vmobjlock held.
+ */
+
+ iflag = atomic_load_relaxed(&vp->v_iflag);
+ if (__predict_false((iflag & VI_DEADCHECK) != 0)) {
+ mutex_enter(vp->v_interlock);
+ error = vdead_check(vp, VDEAD_NOWAIT);
+ mutex_exit(vp->v_interlock);
+ if (error) {
+ if ((flags & PGO_LOCKED) == 0)
+ rw_exit(uobj->vmobjlock);
+ return error;
+ }
}
startover:
@@ -217,9 +235,11 @@ startover:
KASSERT(pg == NULL || pg == PGO_DONTCARE);
}
#endif /* defined(DEBUG) */
- nfound = uvn_findpages(uobj, origoffset, &npages,
+ nfound = uvn_findpages(uobj, origoffset, &npages,
ap->a_m, NULL,
- UFP_NOWAIT|UFP_NOALLOC|(memwrite ? UFP_NORDONLY : 0));
+ UFP_NOWAIT | UFP_NOALLOC |
+ (memwrite ? UFP_NORDONLY : 0) |
+ ((flags & PGO_NOBUSY) != 0 ? UFP_NOBUSY : 0));
KASSERT(npages == *ap->a_count);
if (nfound == 0) {
error = EBUSY;
@@ -230,7 +250,9 @@ startover:
* the file behind us.
*/
if (!genfs_node_rdtrylock(vp)) {
- genfs_rel_pages(ap->a_m, npages);
+ if ((flags & PGO_NOBUSY) == 0) {
+ genfs_rel_pages(ap->a_m, npages);
+ }
/*
* restore the array.
Index: src/sys/nfs/nfs_bio.c
diff -u src/sys/nfs/nfs_bio.c:1.194 src/sys/nfs/nfs_bio.c:1.195
--- src/sys/nfs/nfs_bio.c:1.194 Sun Feb 23 15:46:41 2020
+++ src/sys/nfs/nfs_bio.c Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: nfs_bio.c,v 1.194 2020/02/23 15:46:41 ad Exp $ */
+/* $NetBSD: nfs_bio.c,v 1.195 2020/03/22 18:32:42 ad Exp $ */
/*
* Copyright (c) 1989, 1993
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.194 2020/02/23 15:46:41 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.195 2020/03/22 18:32:42 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_nfs.h"
@@ -1260,6 +1260,19 @@ nfs_getpages(void *v)
bool v3 = NFS_ISV3(vp);
bool write = (ap->a_access_type & VM_PROT_WRITE) != 0;
bool locked = (ap->a_flags & PGO_LOCKED) != 0;
+ bool nobusy = (ap->a_flags & PGO_NOBUSY);
+
+ /*
+ * XXX NFS wants to modify the pages below and that can't be done
+ * with a read lock. We can't upgrade the lock here because it
+ * would screw up UVM fault processing. Have NFS take the I/O
+ * path.
+ */
+ if (locked && rw_lock_op(uobj->vmobjlock) == RW_READER) {
+ *ap->a_count = 0;
+ ap->a_m[ap->a_centeridx] = NULL;
+ return EBUSY;
+ }
/*
* If we are not locked we are not really using opgs,
@@ -1341,7 +1354,8 @@ nfs_getpages(void *v)
* available and put back original pgs array.
*/
- uvm_page_unbusy(pgs, npages);
+ if (nobusy == false)
+ uvm_page_unbusy(pgs, npages);
*ap->a_count = 0;
memcpy(pgs, opgs,
npages * sizeof(struct vm_pages *));
Index: src/sys/sys/cpu_data.h
diff -u src/sys/sys/cpu_data.h:1.49 src/sys/sys/cpu_data.h:1.50
--- src/sys/sys/cpu_data.h:1.49 Wed Jan 15 17:55:44 2020
+++ src/sys/sys/cpu_data.h Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu_data.h,v 1.49 2020/01/15 17:55:44 ad Exp $ */
+/* $NetBSD: cpu_data.h,v 1.50 2020/03/22 18:32:42 ad Exp $ */
/*-
* Copyright (c) 2004, 2006, 2007, 2008, 2019 The NetBSD Foundation, Inc.
@@ -93,8 +93,8 @@ enum cpu_count {
CPU_COUNT_FILEUNKNOWN,
CPU_COUNT_FILECLEAN,
CPU_COUNT_FILEDIRTY,
- CPU_COUNT__UNUSED1,
- CPU_COUNT__UNUSED2,
+ CPU_COUNT_FLTUP,
+ CPU_COUNT_FLTNOUP,
CPU_COUNT_MAX /* 48 */
};
Index: src/sys/sys/vnode.h
diff -u src/sys/sys/vnode.h:1.293 src/sys/sys/vnode.h:1.294
--- src/sys/sys/vnode.h:1.293 Sat Mar 14 20:45:23 2020
+++ src/sys/sys/vnode.h Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: vnode.h,v 1.293 2020/03/14 20:45:23 ad Exp $ */
+/* $NetBSD: vnode.h,v 1.294 2020/03/22 18:32:42 ad Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -212,6 +212,7 @@ typedef struct vnode vnode_t;
#define VI_WRMAP 0x00000400 /* might have PROT_WRITE u. mappings */
#define VI_PAGES 0x00000800 /* UVM object has >0 pages */
#define VI_ONWORKLST 0x00004000 /* On syncer work-list */
+#define VI_DEADCHECK 0x00008000 /* UVM: need to call vdead_check() */
/*
* The third set are locked by the underlying file system.
@@ -220,7 +221,7 @@ typedef struct vnode vnode_t;
#define VNODE_FLAGBITS \
"\20\1ROOT\2SYSTEM\3ISTTY\4MAPPED\5MPSAFE\6LOCKSWORK\11TEXT\12EXECMAP" \
- "\13WRMAP\14PAGES\17ONWORKLST\31DIROP"
+ "\13WRMAP\14PAGES\17ONWORKLST\18DEADCHECK\31DIROP"
#define VSIZENOTSET ((voff_t)-1)
Index: src/sys/uvm/uvm_anon.c
diff -u src/sys/uvm/uvm_anon.c:1.76 src/sys/uvm/uvm_anon.c:1.77
--- src/sys/uvm/uvm_anon.c:1.76 Fri Mar 20 19:08:54 2020
+++ src/sys/uvm/uvm_anon.c Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_anon.c,v 1.76 2020/03/20 19:08:54 ad Exp $ */
+/* $NetBSD: uvm_anon.c,v 1.77 2020/03/22 18:32:42 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.76 2020/03/20 19:08:54 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.77 2020/03/22 18:32:42 ad Exp $");
#include "opt_uvmhist.h"
@@ -296,6 +296,8 @@ uvm_anon_pagein(struct vm_amap *amap, st
* anon was freed.
*/
return false;
+ case ENOLCK:
+ panic("uvm_anon_pagein");
default:
return true;
}
Index: src/sys/uvm/uvm_aobj.c
diff -u src/sys/uvm/uvm_aobj.c:1.138 src/sys/uvm/uvm_aobj.c:1.139
--- src/sys/uvm/uvm_aobj.c:1.138 Tue Mar 17 18:31:39 2020
+++ src/sys/uvm/uvm_aobj.c Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_aobj.c,v 1.138 2020/03/17 18:31:39 ad Exp $ */
+/* $NetBSD: uvm_aobj.c,v 1.139 2020/03/22 18:32:42 ad Exp $ */
/*
* Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.138 2020/03/17 18:31:39 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.139 2020/03/22 18:32:42 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_uvmhist.h"
@@ -807,6 +807,16 @@ uao_get(struct uvm_object *uobj, voff_t
(uintptr_t)uobj, offset, flags,0);
/*
+ * the object must be locked. it can only be a read lock when
+ * processing a read fault with PGO_LOCKED | PGO_NOBUSY.
+ */
+
+ KASSERT(rw_lock_held(uobj->vmobjlock));
+ KASSERT(rw_write_held(uobj->vmobjlock) ||
+ ((~flags & (PGO_LOCKED | PGO_NOBUSY)) == 0 &&
+ (access_type & VM_PROT_WRITE) == 0));
+
+ /*
* get number of pages
*/
@@ -835,10 +845,12 @@ uao_get(struct uvm_object *uobj, voff_t
/*
* if page is new, attempt to allocate the page,
- * zero-fill'd.
+ * zero-fill'd. we can only do this if busying
+ * pages, as otherwise the object is read locked.
*/
- if (ptmp == NULL && uao_find_swslot(uobj,
+ if ((flags & PGO_NOBUSY) == 0 && ptmp == NULL &&
+ uao_find_swslot(uobj,
current_offset >> PAGE_SHIFT) == 0) {
ptmp = uao_pagealloc(uobj, current_offset,
UVM_FLAG_COLORMATCH|UVM_PGA_ZERO);
@@ -870,9 +882,11 @@ uao_get(struct uvm_object *uobj, voff_t
KASSERT(uvm_pagegetdirty(ptmp) !=
UVM_PAGE_STATUS_CLEAN);
- /* caller must un-busy this page */
- ptmp->flags |= PG_BUSY;
- UVM_PAGE_OWN(ptmp, "uao_get1");
+ if ((flags & PGO_NOBUSY) == 0) {
+ /* caller must un-busy this page */
+ ptmp->flags |= PG_BUSY;
+ UVM_PAGE_OWN(ptmp, "uao_get1");
+ }
gotpage:
pps[lcv] = ptmp;
gotpages++;
Index: src/sys/uvm/uvm_extern.h
diff -u src/sys/uvm/uvm_extern.h:1.221 src/sys/uvm/uvm_extern.h:1.222
--- src/sys/uvm/uvm_extern.h:1.221 Sun Feb 23 15:46:43 2020
+++ src/sys/uvm/uvm_extern.h Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_extern.h,v 1.221 2020/02/23 15:46:43 ad Exp $ */
+/* $NetBSD: uvm_extern.h,v 1.222 2020/03/22 18:32:42 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -249,6 +249,7 @@ b\32UNMAP\0\
#define UFP_NORDONLY 0x08
#define UFP_DIRTYONLY 0x10
#define UFP_BACKWARD 0x20
+#define UFP_NOBUSY 0x40
/*
* lockflags that control the locking behavior of various functions.
@@ -506,6 +507,8 @@ struct uvmexp_sysctl {
int64_t fileunknown;
int64_t fileclean;
int64_t filedirty;
+ int64_t fltup;
+ int64_t fltnoup;
};
#ifdef _KERNEL
Index: src/sys/uvm/uvm_fault.c
diff -u src/sys/uvm/uvm_fault.c:1.221 src/sys/uvm/uvm_fault.c:1.222
--- src/sys/uvm/uvm_fault.c:1.221 Fri Mar 20 19:08:54 2020
+++ src/sys/uvm/uvm_fault.c Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_fault.c,v 1.221 2020/03/20 19:08:54 ad Exp $ */
+/* $NetBSD: uvm_fault.c,v 1.222 2020/03/22 18:32:42 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.221 2020/03/20 19:08:54 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.222 2020/03/22 18:32:42 ad Exp $");
#include "opt_uvmhist.h"
@@ -194,7 +194,7 @@ uvmfault_anonflush(struct vm_anon **anon
for (lcv = 0; lcv < n; lcv++) {
if (anons[lcv] == NULL)
continue;
- KASSERT(rw_write_held(anons[lcv]->an_lock));
+ KASSERT(rw_lock_held(anons[lcv]->an_lock));
pg = anons[lcv]->an_page;
if (pg && (pg->flags & PG_BUSY) == 0) {
uvm_pagelock(pg);
@@ -276,10 +276,11 @@ uvmfault_anonget(struct uvm_faultinfo *u
struct vm_anon *anon)
{
struct vm_page *pg;
+ krw_t lock_type;
int error;
UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist);
- KASSERT(rw_write_held(anon->an_lock));
+ KASSERT(rw_lock_held(anon->an_lock));
KASSERT(anon->an_lock == amap->am_lock);
/* Increment the counters.*/
@@ -316,6 +317,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
* Is page resident? Make sure it is not busy/released.
*/
+ lock_type = rw_lock_op(anon->an_lock);
if (pg) {
/*
@@ -352,9 +354,14 @@ uvmfault_anonget(struct uvm_faultinfo *u
} else {
#if defined(VMSWAP)
/*
- * No page, therefore allocate one.
+ * No page, therefore allocate one. A write lock is
+ * required for this. If the caller didn't supply
+ * one, fail now and have them retry.
*/
+ if (lock_type == RW_READER) {
+ return ENOLCK;
+ }
pg = uvm_pagealloc(NULL,
ufi != NULL ? ufi->orig_rvaddr : 0,
anon, ufi != NULL ? UVM_FLAG_COLORMATCH : 0);
@@ -400,7 +407,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
locked = uvmfault_relock(ufi);
if (locked || we_own) {
- rw_enter(anon->an_lock, RW_WRITER);
+ rw_enter(anon->an_lock, lock_type);
}
/*
@@ -415,6 +422,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
*/
if (we_own) {
+ KASSERT(lock_type == RW_WRITER);
#if defined(VMSWAP)
if (error) {
@@ -561,6 +569,11 @@ uvmfault_promote(struct uvm_faultinfo *u
} else if (uobjpage != PGO_DONTCARE) {
/* object-backed COW */
opg = uobjpage;
+ if ((uobjpage->flags & PG_BUSY) != 0) {
+ KASSERT(rw_write_held(opg->uobject->vmobjlock));
+ } else {
+ KASSERT(rw_read_held(opg->uobject->vmobjlock));
+ }
} else {
/* ZFOD */
opg = NULL;
@@ -573,10 +586,9 @@ uvmfault_promote(struct uvm_faultinfo *u
KASSERT(amap != NULL);
KASSERT(uobjpage != NULL);
- KASSERT(uobjpage == PGO_DONTCARE || (uobjpage->flags & PG_BUSY) != 0);
KASSERT(rw_write_held(amap->am_lock));
KASSERT(oanon == NULL || amap->am_lock == oanon->an_lock);
- KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+ KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock));
if (*spare != NULL) {
anon = *spare;
@@ -615,7 +627,10 @@ uvmfault_promote(struct uvm_faultinfo *u
}
/* unlock and fail ... */
- uvm_page_unbusy(&uobjpage, 1);
+ if (uobjpage != PGO_DONTCARE &&
+ (uobjpage->flags & PG_BUSY) != 0) {
+ uvm_page_unbusy(&uobjpage, 1);
+ }
uvmfault_unlockall(ufi, amap, uobj);
if (!uvm_reclaimable()) {
UVMHIST_LOG(maphist, "out of VM", 0,0,0,0);
@@ -738,6 +753,12 @@ struct uvm_faultctx {
* (or due to the mechanical separation of the function?)
*/
bool promote;
+
+ /*
+ * type of lock to acquire on objects in both layers.
+ */
+ krw_t lower_lock_type;
+ krw_t upper_lock_type;
};
static inline int uvm_fault_check(
@@ -780,7 +801,7 @@ static inline void uvm_fault_lower_neigh
struct uvm_faultinfo *, const struct uvm_faultctx *,
vaddr_t, struct vm_page *);
static inline int uvm_fault_lower_io(
- struct uvm_faultinfo *, const struct uvm_faultctx *,
+ struct uvm_faultinfo *, struct uvm_faultctx *,
struct uvm_object **, struct vm_page **);
static inline int uvm_fault_lower_direct(
struct uvm_faultinfo *, struct uvm_faultctx *,
@@ -814,6 +835,20 @@ uvm_fault_internal(struct vm_map *orig_m
/* "wire" fault causes wiring of both mapping and paging */
.wire_mapping = (fault_flag & UVM_FAULT_WIRE) != 0,
.wire_paging = (fault_flag & UVM_FAULT_WIRE) != 0,
+
+ /*
+ * default lock type to acquire on upper & lower layer
+ * objects: reader. this can be upgraded at any point
+ * during the fault from read -> write and uvm_faultctx
+ * changed to match, but is never downgraded write -> read.
+ */
+#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
+ .upper_lock_type = RW_WRITER,
+ .lower_lock_type = RW_WRITER,
+#else
+ .upper_lock_type = RW_READER,
+ .lower_lock_type = RW_READER,
+#endif
};
const bool maxprot = (fault_flag & UVM_FAULT_MAXPROT) != 0;
struct vm_anon *anons_store[UVM_MAXRANGE], **anons;
@@ -998,6 +1033,12 @@ uvm_fault_check(
flt->cow_now = (flt->access_type & VM_PROT_WRITE) != 0;
}
+ if (flt->wire_paging) {
+ /* wiring pages requires a write lock. */
+ flt->upper_lock_type = RW_WRITER;
+ flt->lower_lock_type = RW_WRITER;
+ }
+
flt->promote = false;
/*
@@ -1093,18 +1134,42 @@ uvm_fault_check(
(uintptr_t)ufi->entry, (uintptr_t)amap, (uintptr_t)uobj, 0);
/*
- * if we've got an amap, lock it and extract current anons.
+ * guess at the most suitable lock types to acquire.
+ * if we've got an amap then lock it and extract current anons.
*/
if (amap) {
- amap_lock(amap, RW_WRITER);
+ if ((amap_flags(amap) & AMAP_SHARED) == 0) {
+ /*
+ * the amap isn't shared. get a writer lock to
+ * avoid the cost of upgrading the lock later if
+ * needed.
+ *
+ * XXX nice for PostgreSQL, but consider threads.
+ */
+ flt->upper_lock_type = RW_WRITER;
+ } else if ((flt->access_type & VM_PROT_WRITE) != 0) {
+ /*
+ * assume we're about to COW.
+ */
+ flt->upper_lock_type = RW_WRITER;
+ }
+ amap_lock(amap, flt->upper_lock_type);
amap_lookups(&ufi->entry->aref, eoff, *ranons, flt->npages);
} else {
+ if ((flt->access_type & VM_PROT_WRITE) != 0) {
+ /*
+ * we are about to dirty the object and that
+ * requires a write lock.
+ */
+ flt->lower_lock_type = RW_WRITER;
+ }
*ranons = NULL; /* to be safe */
}
/* locked: maps(read), amap(if there) */
- KASSERT(amap == NULL || rw_write_held(amap->am_lock));
+ KASSERT(amap == NULL ||
+ rw_lock_op(amap->am_lock) == flt->upper_lock_type);
/*
* for MADV_SEQUENTIAL mappings we want to deactivate the back pages
@@ -1147,6 +1212,44 @@ uvm_fault_check(
}
/*
+ * uvm_fault_upper_upgrade: upgrade upper lock, reader -> writer
+ */
+
+static inline int
+uvm_fault_upper_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
+ struct vm_amap *amap, struct uvm_object *uobj)
+{
+
+ KASSERT(amap != NULL);
+ KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock));
+
+ /*
+ * fast path.
+ */
+
+ if (__predict_true(flt->upper_lock_type == RW_WRITER)) {
+ return 0;
+ }
+
+ /*
+ * otherwise try for the upgrade. if we don't get it, unlock
+ * everything, restart the fault and next time around get a writer
+ * lock.
+ */
+
+ flt->upper_lock_type = RW_WRITER;
+ if (__predict_false(!rw_tryupgrade(amap->am_lock))) {
+ uvmfault_unlockall(ufi, amap, uobj);
+ cpu_count(CPU_COUNT_FLTNOUP, 1);
+ UVMHIST_LOG(maphist, " !upgrade upper", 0, 0,0,0);
+ return ERESTART;
+ }
+ cpu_count(CPU_COUNT_FLTUP, 1);
+ KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock));
+ return 0;
+}
+
+/*
* uvm_fault_upper_lookup: look up existing h/w mapping and amap.
*
* iterate range of interest:
@@ -1170,7 +1273,8 @@ uvm_fault_upper_lookup(
UVMHIST_FUNC("uvm_fault_upper_lookup"); UVMHIST_CALLED(maphist);
/* locked: maps(read), amap(if there) */
- KASSERT(amap == NULL || rw_write_held(amap->am_lock));
+ KASSERT(amap == NULL ||
+ rw_lock_op(amap->am_lock) == flt->upper_lock_type);
/*
* map in the backpages and frontpages we found in the amap in hopes
@@ -1191,7 +1295,7 @@ uvm_fault_upper_lookup(
}
/*
- * check for present page and map if possible. re-activate it.
+ * check for present page and map if possible.
*/
pages[lcv] = PGO_DONTCARE;
@@ -1222,7 +1326,8 @@ uvm_fault_upper_lookup(
}
/* locked: maps(read), amap(if there) */
- KASSERT(amap == NULL || rw_write_held(amap->am_lock));
+ KASSERT(amap == NULL ||
+ rw_lock_op(amap->am_lock) == flt->upper_lock_type);
/* (shadowed == true) if there is an anon at the faulting address */
UVMHIST_LOG(maphist, " shadowed=%jd, will_get=%jd", shadowed,
(ufi->entry->object.uvm_obj && shadowed != false),0,0);
@@ -1255,12 +1360,21 @@ uvm_fault_upper_neighbor(
KASSERT(pg->uobject == NULL);
KASSERT(pg->uanon != NULL);
- KASSERT(rw_write_held(pg->uanon->an_lock));
+ KASSERT(rw_lock_op(pg->uanon->an_lock) == flt->upper_lock_type);
KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN);
- uvm_pagelock(pg);
- uvm_pageenqueue(pg);
- uvm_pageunlock(pg);
+ /*
+ * in the read-locked case, it's not possible for this to be a new
+ * page, therefore it's enqueued already. there wasn't a direct
+ * fault on the page, so avoid the cost of re-enqueuing it unless
+ * write-locked.
+ */
+
+ if (flt->upper_lock_type == RW_WRITER) {
+ uvm_pagelock(pg);
+ uvm_pageenqueue(pg);
+ uvm_pageunlock(pg);
+ }
UVMHIST_LOG(maphist,
" MAPPING: n anon: pm=%#jx, va=%#jx, pg=%#jx",
(uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
@@ -1300,7 +1414,7 @@ uvm_fault_upper(
UVMHIST_FUNC("uvm_fault_upper"); UVMHIST_CALLED(maphist);
/* locked: maps(read), amap, anon */
- KASSERT(rw_write_held(amap->am_lock));
+ KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type);
KASSERT(anon->an_lock == amap->am_lock);
/*
@@ -1323,7 +1437,7 @@ uvm_fault_upper(
* if the page is on loan from a uvm_object, then anonget will
* lock that object for us if it does not fail.
*/
-
+ retry:
error = uvmfault_anonget(ufi, amap, anon);
switch (error) {
case 0:
@@ -1336,6 +1450,15 @@ uvm_fault_upper(
kpause("fltagain1", false, hz/2, NULL);
return ERESTART;
+ case ENOLCK:
+ /* it needs a write lock: retry */
+ error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL);
+ if (error != 0) {
+ return error;
+ }
+ KASSERT(rw_write_held(amap->am_lock));
+ goto retry;
+
default:
return error;
}
@@ -1347,9 +1470,10 @@ uvm_fault_upper(
uobj = anon->an_page->uobject; /* locked by anonget if !NULL */
/* locked: maps(read), amap, anon, uobj(if one) */
- KASSERT(rw_write_held(amap->am_lock));
+ KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type);
KASSERT(anon->an_lock == amap->am_lock);
- KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+ KASSERT(uobj == NULL ||
+ rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
/*
* special handling for loaned pages
@@ -1424,6 +1548,13 @@ uvm_fault_upper_loan(
/* >1 case is already ok */
if (anon->an_ref == 1) {
+ /* breaking loan requires a write lock. */
+ error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL);
+ if (error != 0) {
+ return error;
+ }
+ KASSERT(rw_write_held(amap->am_lock));
+
error = uvm_loanbreak_anon(anon, *ruobj);
if (error != 0) {
uvmfault_unlockall(ufi, amap, *ruobj);
@@ -1452,6 +1583,7 @@ uvm_fault_upper_promote(
struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
struct uvm_object *uobj, struct vm_anon *anon)
{
+ struct vm_amap * const amap = ufi->entry->aref.ar_amap;
struct vm_anon * const oanon = anon;
struct vm_page *pg;
int error;
@@ -1460,6 +1592,13 @@ uvm_fault_upper_promote(
UVMHIST_LOG(maphist, " case 1B: COW fault",0,0,0,0);
cpu_count(CPU_COUNT_FLT_ACOW, 1);
+ /* promoting requires a write lock. */
+ error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL);
+ if (error != 0) {
+ return error;
+ }
+ KASSERT(rw_write_held(amap->am_lock));
+
error = uvmfault_promote(ufi, oanon, PGO_DONTCARE, &anon,
&flt->anon_spare);
switch (error) {
@@ -1471,13 +1610,10 @@ uvm_fault_upper_promote(
return error;
}
- KASSERT(anon == NULL || anon->an_lock == oanon->an_lock);
+ KASSERT(anon->an_lock == oanon->an_lock);
+ /* uvm_fault_upper_done will activate or enqueue the page */
pg = anon->an_page;
- /* uvm_fault_upper_done will activate the page */
- uvm_pagelock(pg);
- uvm_pageenqueue(pg);
- uvm_pageunlock(pg);
pg->flags &= ~(PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(pg, NULL);
@@ -1531,10 +1667,11 @@ uvm_fault_upper_enter(
UVMHIST_FUNC("uvm_fault_upper_enter"); UVMHIST_CALLED(maphist);
/* locked: maps(read), amap, oanon, anon(if different from oanon) */
- KASSERT(rw_write_held(amap->am_lock));
+ KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type);
KASSERT(anon->an_lock == amap->am_lock);
KASSERT(oanon->an_lock == amap->am_lock);
- KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+ KASSERT(uobj == NULL ||
+ rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN);
/*
@@ -1563,6 +1700,17 @@ uvm_fault_upper_enter(
KASSERT(!pmap_extract(pmap, va, NULL));
/*
+ * ensure that the page is queued in the case that
+ * we just promoted.
+ */
+
+ if (flt->upper_lock_type == RW_WRITER) {
+ uvm_pagelock(pg);
+ uvm_pageenqueue(pg);
+ uvm_pageunlock(pg);
+ }
+
+ /*
* No need to undo what we did; we can simply think of
* this as the pmap throwing away the mapping information.
*
@@ -1632,6 +1780,57 @@ uvm_fault_upper_done(
}
/*
+ * uvm_fault_lower_upgrade: upgrade lower lock, reader -> writer
+ */
+
+static inline int
+uvm_fault_lower_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
+ struct vm_amap *amap, struct uvm_object *uobj, struct vm_page *uobjpage)
+{
+
+ KASSERT(uobj != NULL);
+ KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock));
+
+ /*
+ * fast path.
+ */
+
+ if (__predict_true(flt->lower_lock_type == RW_WRITER)) {
+ KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0);
+ return 0;
+ }
+
+ /*
+ * otherwise try for the upgrade. if we don't get it, unlock
+ * everything, restart the fault and next time around get a writer
+ * lock.
+ */
+
+ flt->lower_lock_type = RW_WRITER;
+ if (__predict_false(!rw_tryupgrade(uobj->vmobjlock))) {
+ uvmfault_unlockall(ufi, amap, uobj);
+ cpu_count(CPU_COUNT_FLTNOUP, 1);
+ UVMHIST_LOG(maphist, " !upgrade lower", 0, 0,0,0);
+ return ERESTART;
+ }
+ cpu_count(CPU_COUNT_FLTUP, 1);
+ KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock));
+
+ /*
+ * finally, if a page was supplied, assert that it's not busy
+ * (can't be with a reader lock) and then mark it busy now that
+ * we have a writer lock.
+ */
+
+ if (uobjpage != NULL) {
+ KASSERT((uobjpage->flags & PG_BUSY) == 0);
+ uobjpage->flags |= PG_BUSY;
+ UVM_PAGE_OWN(uobjpage, "upgrdlwr");
+ }
+ return 0;
+}
+
+/*
* uvm_fault_lower: handle lower fault.
*
* 1. check uobj
@@ -1686,9 +1885,19 @@ uvm_fault_lower(
* locked:
* maps(read), amap(if there), uobj(if !null), uobjpage(if !null)
*/
- KASSERT(amap == NULL || rw_write_held(amap->am_lock));
- KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
- KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0);
+ KASSERT(amap == NULL ||
+ rw_lock_op(amap->am_lock) == flt->upper_lock_type);
+ if (flt->lower_lock_type == RW_WRITER) {
+ KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+ KASSERTMSG(uobjpage == NULL ||
+ (uobjpage->flags & PG_BUSY) != 0,
+ "page %p should be busy", uobjpage);
+ } else {
+ KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock));
+ KASSERTMSG(uobjpage == NULL ||
+ (uobjpage->flags & PG_BUSY) == 0,
+ "page %p should not be busy", uobjpage);
+ }
/*
* note that uobjpage can not be PGO_DONTCARE at this point. we now
@@ -1729,9 +1938,15 @@ uvm_fault_lower(
* locked:
* maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj)
*/
- KASSERT(amap == NULL || rw_write_held(amap->am_lock));
- KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
- KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+ KASSERT(amap == NULL ||
+ rw_lock_op(amap->am_lock) == flt->upper_lock_type);
+ if (flt->lower_lock_type == RW_WRITER) {
+ KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+ KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+ } else {
+ KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock));
+ KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0);
+ }
/*
* notes:
@@ -1772,17 +1987,25 @@ uvm_fault_lower_lookup(
vaddr_t currva;
UVMHIST_FUNC("uvm_fault_lower_lookup"); UVMHIST_CALLED(maphist);
- rw_enter(uobj->vmobjlock, RW_WRITER);
- /* Locked: maps(read), amap(if there), uobj */
+ rw_enter(uobj->vmobjlock, flt->lower_lock_type);
+
+ /*
+ * Locked: maps(read), amap(if there), uobj
+ *
+ * if we have a read lock on the object, do a PGO_NOBUSY get, which
+ * will return us pages with PG_BUSY clear. if a write lock is held
+ * pages will be returned with PG_BUSY set.
+ */
cpu_count(CPU_COUNT_FLTLGET, 1);
gotpages = flt->npages;
(void) uobj->pgops->pgo_get(uobj,
ufi->entry->offset + flt->startva - ufi->entry->start,
pages, &gotpages, flt->centeridx,
- flt->access_type & MASK(ufi->entry), ufi->entry->advice, PGO_LOCKED);
+ flt->access_type & MASK(ufi->entry), ufi->entry->advice,
+ PGO_LOCKED | (flt->lower_lock_type == RW_WRITER ? 0 : PGO_NOBUSY));
- KASSERT(rw_write_held(uobj->vmobjlock));
+ KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
/*
* check for pages to map, if we got any
@@ -1803,10 +2026,20 @@ uvm_fault_lower_lookup(
}
KASSERT(curpg->uobject == uobj);
+ if (flt->lower_lock_type == RW_WRITER) {
+ KASSERT(rw_write_held(uobj->vmobjlock));
+ KASSERTMSG((curpg->flags & PG_BUSY) != 0,
+ "page %p should be busy", curpg);
+ } else {
+ KASSERT(rw_read_held(uobj->vmobjlock));
+ KASSERTMSG((curpg->flags & PG_BUSY) == 0,
+ "page %p should not be busy", curpg);
+ }
+
/*
* if center page is resident and not PG_BUSY|PG_RELEASED
- * then pgo_get made it PG_BUSY for us and gave us a handle
- * to it.
+ * and !PGO_NOBUSY, then pgo_get made it PG_BUSY for us and
+ * gave us a handle to it.
*/
if (lcv == flt->centeridx) {
@@ -1839,9 +2072,18 @@ uvm_fault_lower_neighbor(
* for this. we can just directly enter the pages.
*/
- uvm_pagelock(pg);
- uvm_pageenqueue(pg);
- uvm_pageunlock(pg);
+ /*
+ * in the read-locked case, it's not possible for this to be a new
+ * page. it must be cached with the object and enqueued already.
+ * there wasn't a direct fault on the page, so avoid the cost of
+ * re-enqueuing it.
+ */
+
+ if (flt->lower_lock_type == RW_WRITER) {
+ uvm_pagelock(pg);
+ uvm_pageenqueue(pg);
+ uvm_pageunlock(pg);
+ }
UVMHIST_LOG(maphist,
" MAPPING: n obj: pm=%#jx, va=%#jx, pg=%#jx",
(uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
@@ -1858,10 +2100,21 @@ uvm_fault_lower_neighbor(
KASSERT((pg->flags & PG_RELEASED) == 0);
KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) ||
uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN);
- pg->flags &= ~(PG_BUSY);
- UVM_PAGE_OWN(pg, NULL);
- KASSERT(rw_write_held(pg->uobject->vmobjlock));
+ /*
+ * if a write lock was held on the object, the pages have been
+ * busied. unbusy them now, as we are about to enter and then
+ * forget about them.
+ */
+
+ if (flt->lower_lock_type == RW_WRITER) {
+ KASSERT((pg->flags & PG_BUSY) != 0);
+ pg->flags &= ~(PG_BUSY);
+ UVM_PAGE_OWN(pg, NULL);
+ } else {
+ KASSERT((pg->flags & PG_BUSY) == 0);
+ }
+ KASSERT(rw_lock_op(pg->uobject->vmobjlock) == flt->lower_lock_type);
const vm_prot_t mapprot =
readonly ? (flt->enter_prot & ~VM_PROT_WRITE) :
@@ -1883,7 +2136,7 @@ uvm_fault_lower_neighbor(
static int
uvm_fault_lower_io(
- struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
+ struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
struct uvm_object **ruobj, struct vm_page **ruobjpage)
{
struct vm_amap * const amap = ufi->entry->aref.ar_amap;
@@ -1906,10 +2159,17 @@ uvm_fault_lower_io(
advice = ufi->entry->advice;
/* Locked: maps(read), amap(if there), uobj */
+ KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
+
+ /* Upgrade to a write lock if needed. */
+ error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, NULL);
+ if (error != 0) {
+ return error;
+ }
uvmfault_unlockall(ufi, amap, NULL);
- /* Locked: uobj */
- KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+ /* Locked: uobj(write) */
+ KASSERT(rw_write_held(uobj->vmobjlock));
cpu_count(CPU_COUNT_FLTGET, 1);
gotpages = 1;
@@ -1950,13 +2210,14 @@ uvm_fault_lower_io(
locked = uvmfault_relock(ufi);
if (locked && amap)
- amap_lock(amap, RW_WRITER);
+ amap_lock(amap, flt->upper_lock_type);
/* might be changed */
uobj = pg->uobject;
- rw_enter(uobj->vmobjlock, RW_WRITER);
+ rw_enter(uobj->vmobjlock, flt->lower_lock_type);
KASSERT((pg->flags & PG_BUSY) != 0);
+ KASSERT(flt->lower_lock_type == RW_WRITER);
uvm_pagelock(pg);
uvm_pageactivate(pg);
@@ -2056,7 +2317,11 @@ uvm_fault_lower_direct(
}
KASSERT(pg == uobjpage);
- KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+ if (flt->lower_lock_type == RW_READER) {
+ KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0);
+ } else {
+ KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+ }
return uvm_fault_lower_enter(ufi, flt, uobj, NULL, pg);
}
@@ -2076,6 +2341,7 @@ uvm_fault_lower_direct_loan(
struct vm_amap * const amap = ufi->entry->aref.ar_amap;
struct vm_page *pg;
struct vm_page *uobjpage = *ruobjpage;
+ int error;
UVMHIST_FUNC("uvm_fault_lower_direct_loan"); UVMHIST_CALLED(maphist);
if (!flt->cow_now) {
@@ -2083,7 +2349,16 @@ uvm_fault_lower_direct_loan(
/* cap! */
flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE;
} else {
- /* write fault: must break the loan here */
+ /*
+ * write fault: must break the loan here. to do this
+ * we need a write lock on the object.
+ */
+
+ error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, uobjpage);
+ if (error != 0) {
+ return error;
+ }
+ KASSERT(rw_write_held(uobj->vmobjlock));
pg = uvm_loanbreak(uobjpage);
if (pg == NULL) {
@@ -2133,12 +2408,18 @@ uvm_fault_lower_promote(
KASSERT(amap != NULL);
+ /* promoting requires a write lock. */
+ error = uvm_fault_upper_upgrade(ufi, flt, amap, uobj);
+ if (error != 0) {
+ return error;
+ }
+ KASSERT(rw_write_held(amap->am_lock));
+
/*
* If we are going to promote the data to an anon we
* allocate a blank anon here and plug it into our amap.
*/
- error = uvmfault_promote(ufi, NULL, uobjpage,
- &anon, &flt->anon_spare);
+ error = uvmfault_promote(ufi, NULL, uobjpage, &anon, &flt->anon_spare);
switch (error) {
case 0:
break;
@@ -2153,7 +2434,11 @@ uvm_fault_lower_promote(
/*
* Fill in the data.
*/
- KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+ if (flt->lower_lock_type == RW_READER) {
+ KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0);
+ } else {
+ KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+ }
if (uobjpage != PGO_DONTCARE) {
cpu_count(CPU_COUNT_FLT_PRCOPY, 1);
@@ -2175,11 +2460,13 @@ uvm_fault_lower_promote(
* since we still hold the object lock.
*/
- uobjpage->flags &= ~PG_BUSY;
- uvm_pagelock(uobjpage);
- uvm_pagewakeup(uobjpage);
- uvm_pageunlock(uobjpage);
- UVM_PAGE_OWN(uobjpage, NULL);
+ if ((uobjpage->flags & PG_BUSY) != 0) {
+ uobjpage->flags &= ~PG_BUSY;
+ uvm_pagelock(uobjpage);
+ uvm_pagewakeup(uobjpage);
+ uvm_pageunlock(uobjpage);
+ UVM_PAGE_OWN(uobjpage, NULL);
+ }
UVMHIST_LOG(maphist,
" promote uobjpage %#jx to anon/page %#jx/%#jx",
@@ -2222,12 +2509,22 @@ uvm_fault_lower_enter(
* maps(read), amap(if !null), uobj(if !null),
* anon(if !null), pg(if anon), unlock_uobj(if !null)
*
+ * anon must be write locked (promotion). uobj can be either.
+ *
* Note: pg is either the uobjpage or the new page in the new anon.
*/
- KASSERT(amap == NULL || rw_write_held(amap->am_lock));
- KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+ KASSERT(amap == NULL ||
+ rw_lock_op(amap->am_lock) == flt->upper_lock_type);
KASSERT(anon == NULL || anon->an_lock == amap->am_lock);
- KASSERT((pg->flags & PG_BUSY) != 0);
+ if (flt->lower_lock_type == RW_WRITER) {
+ KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+ KASSERTMSG((pg->flags & PG_BUSY) != 0,
+ "page %p should be busy", pg);
+ } else {
+ KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock));
+ KASSERTMSG(anon != NULL || (pg->flags & PG_BUSY) == 0,
+ "page %p should not be busy", pg);
+ }
/*
* all resources are present. we can now map it in and free our
@@ -2264,18 +2561,24 @@ uvm_fault_lower_enter(
* we just promoted the page.
*/
- uvm_pagelock(pg);
- uvm_pageenqueue(pg);
- uvm_pagewakeup(pg);
- uvm_pageunlock(pg);
+ if (anon != NULL || flt->lower_lock_type == RW_WRITER) {
+ uvm_pagelock(pg);
+ uvm_pageenqueue(pg);
+ uvm_pagewakeup(pg);
+ uvm_pageunlock(pg);
+ } else {
+ KASSERT((pg->flags & PG_BUSY) == 0);
+ }
/*
* note that pg can't be PG_RELEASED since we did not drop
* the object lock since the last time we checked.
*/
KASSERT((pg->flags & PG_RELEASED) == 0);
- pg->flags &= ~(PG_BUSY|PG_FAKE);
- UVM_PAGE_OWN(pg, NULL);
+ if ((pg->flags & PG_BUSY) != 0) {
+ pg->flags &= ~(PG_BUSY|PG_FAKE);
+ UVM_PAGE_OWN(pg, NULL);
+ }
uvmfault_unlockall(ufi, amap, uobj);
if (!uvm_reclaimable()) {
@@ -2297,11 +2600,13 @@ uvm_fault_lower_enter(
* lock since the last time we checked.
*/
KASSERT((pg->flags & PG_RELEASED) == 0);
- uvm_pagelock(pg);
- uvm_pagewakeup(pg);
- uvm_pageunlock(pg);
- pg->flags &= ~(PG_BUSY|PG_FAKE);
- UVM_PAGE_OWN(pg, NULL);
+ if ((pg->flags & PG_BUSY) != 0) {
+ uvm_pagelock(pg);
+ uvm_pagewakeup(pg);
+ uvm_pageunlock(pg);
+ pg->flags &= ~(PG_BUSY|PG_FAKE);
+ UVM_PAGE_OWN(pg, NULL);
+ }
pmap_update(ufi->orig_map->pmap);
uvmfault_unlockall(ufi, amap, uobj);
Index: src/sys/uvm/uvm_loan.c
diff -u src/sys/uvm/uvm_loan.c:1.99 src/sys/uvm/uvm_loan.c:1.100
--- src/sys/uvm/uvm_loan.c:1.99 Fri Mar 20 19:08:54 2020
+++ src/sys/uvm/uvm_loan.c Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_loan.c,v 1.99 2020/03/20 19:08:54 ad Exp $ */
+/* $NetBSD: uvm_loan.c,v 1.100 2020/03/22 18:32:42 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.99 2020/03/20 19:08:54 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.100 2020/03/22 18:32:42 ad Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -391,6 +391,7 @@ uvm_loananon(struct uvm_faultinfo *ufi,
if (error) {
UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
+ KASSERT(error != ENOLCK);
/* need to refault (i.e. refresh our lookup) ? */
if (error == ERESTART) {
Index: src/sys/uvm/uvm_map.c
diff -u src/sys/uvm/uvm_map.c:1.375 src/sys/uvm/uvm_map.c:1.376
--- src/sys/uvm/uvm_map.c:1.375 Fri Mar 20 19:08:54 2020
+++ src/sys/uvm/uvm_map.c Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_map.c,v 1.375 2020/03/20 19:08:54 ad Exp $ */
+/* $NetBSD: uvm_map.c,v 1.376 2020/03/22 18:32:42 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -66,7 +66,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.375 2020/03/20 19:08:54 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.376 2020/03/22 18:32:42 ad Exp $");
#include "opt_ddb.h"
#include "opt_pax.h"
@@ -2256,7 +2256,11 @@ uvm_unmap_remove(struct vm_map *map, vad
* change while in pmap_remove().
*/
+#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
uvm_map_lock_entry(entry, RW_WRITER);
+#else
+ uvm_map_lock_entry(entry, RW_READER);
+#endif
pmap_remove(map->pmap, entry->start, entry->end);
/*
@@ -2831,7 +2835,11 @@ uvm_map_extract(struct vm_map *srcmap, v
/* we advance "entry" in the following if statement */
if (flags & UVM_EXTRACT_REMOVE) {
+#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
uvm_map_lock_entry(entry, RW_WRITER);
+#else
+ uvm_map_lock_entry(entry, RW_READER);
+#endif
pmap_remove(srcmap->pmap, entry->start,
entry->end);
uvm_map_unlock_entry(entry);
@@ -3063,7 +3071,11 @@ uvm_map_protect(struct vm_map *map, vadd
if (current->protection != old_prot) {
/* update pmap! */
+#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
uvm_map_lock_entry(current, RW_WRITER);
+#else
+ uvm_map_lock_entry(current, RW_READER);
+#endif
pmap_protect(map->pmap, current->start, current->end,
current->protection & MASK(current));
uvm_map_unlock_entry(current);
@@ -4404,7 +4416,11 @@ uvm_mapent_forkcopy(struct vm_map *new_m
if (old_entry->aref.ar_amap &&
!UVM_ET_ISNEEDSCOPY(old_entry)) {
if (old_entry->max_protection & VM_PROT_WRITE) {
+#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
uvm_map_lock_entry(old_entry, RW_WRITER);
+#else
+ uvm_map_lock_entry(old_entry, RW_READER);
+#endif
pmap_protect(old_map->pmap,
old_entry->start, old_entry->end,
old_entry->protection & ~VM_PROT_WRITE);
Index: src/sys/uvm/uvm_meter.c
diff -u src/sys/uvm/uvm_meter.c:1.75 src/sys/uvm/uvm_meter.c:1.76
--- src/sys/uvm/uvm_meter.c:1.75 Thu Mar 19 20:23:19 2020
+++ src/sys/uvm/uvm_meter.c Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_meter.c,v 1.75 2020/03/19 20:23:19 ad Exp $ */
+/* $NetBSD: uvm_meter.c,v 1.76 2020/03/22 18:32:42 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_meter.c,v 1.75 2020/03/19 20:23:19 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_meter.c,v 1.76 2020/03/22 18:32:42 ad Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -186,6 +186,8 @@ sysctl_vm_uvmexp2(SYSCTLFN_ARGS)
u.fileunknown = cpu_count_get(CPU_COUNT_FILEUNKNOWN);
u.fileclean = cpu_count_get(CPU_COUNT_FILECLEAN);
u.filedirty = cpu_count_get(CPU_COUNT_FILEDIRTY);
+ u.fltup = cpu_count_get(CPU_COUNT_FLTUP);
+ u.fltnoup = cpu_count_get(CPU_COUNT_FLTNOUP);
node = *rnode;
node.sysctl_data = &u;
Index: src/sys/uvm/uvm_pager.h
diff -u src/sys/uvm/uvm_pager.h:1.46 src/sys/uvm/uvm_pager.h:1.47
--- src/sys/uvm/uvm_pager.h:1.46 Sat Mar 14 20:45:23 2020
+++ src/sys/uvm/uvm_pager.h Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_pager.h,v 1.46 2020/03/14 20:45:23 ad Exp $ */
+/* $NetBSD: uvm_pager.h,v 1.47 2020/03/22 18:32:42 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -152,6 +152,7 @@ struct uvm_pagerops {
#define PGO_JOURNALLOCKED 0x020 /* journal is already locked [get/put] */
#define PGO_LOCKED 0x040 /* fault data structures are locked [get] */
#define PGO_BUSYFAIL 0x080 /* fail if a page is busy [put] */
+#define PGO_NOBUSY 0x100 /* don't busy returned pages (read locked) */
#define PGO_OVERWRITE 0x200 /* pages will be overwritten before unlocked */
#define PGO_PASTEOF 0x400 /* allow allocation of pages past EOF */
#define PGO_NOBLOCKALLOC 0x800 /* backing block allocation is not needed */
Index: src/sys/uvm/uvm_vnode.c
diff -u src/sys/uvm/uvm_vnode.c:1.110 src/sys/uvm/uvm_vnode.c:1.111
--- src/sys/uvm/uvm_vnode.c:1.110 Sat Mar 14 20:45:23 2020
+++ src/sys/uvm/uvm_vnode.c Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_vnode.c,v 1.110 2020/03/14 20:45:23 ad Exp $ */
+/* $NetBSD: uvm_vnode.c,v 1.111 2020/03/22 18:32:42 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -45,7 +45,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.110 2020/03/14 20:45:23 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.111 2020/03/22 18:32:42 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_uvmhist.h"
@@ -287,7 +287,15 @@ uvn_findpage(struct uvm_object *uobj, vo
UVMHIST_LOG(ubchist, "vp %#jx off 0x%jx", (uintptr_t)uobj, offset,
0, 0);
- KASSERT(rw_write_held(uobj->vmobjlock));
+ /*
+ * NOBUSY must come with NOWAIT and NOALLOC. if NOBUSY is
+ * specified, this may be called with a reader lock.
+ */
+
+ KASSERT(rw_lock_held(uobj->vmobjlock));
+ KASSERT((flags & UFP_NOBUSY) == 0 || (flags & UFP_NOWAIT) != 0);
+ KASSERT((flags & UFP_NOBUSY) == 0 || (flags & UFP_NOALLOC) != 0);
+ KASSERT((flags & UFP_NOBUSY) != 0 || rw_write_held(uobj->vmobjlock));
if (*pgp != NULL) {
UVMHIST_LOG(ubchist, "dontcare", 0,0,0,0);
@@ -380,8 +388,10 @@ uvn_findpage(struct uvm_object *uobj, vo
}
/* mark the page BUSY and we're done. */
- pg->flags |= PG_BUSY;
- UVM_PAGE_OWN(pg, "uvn_findpage");
+ if ((flags & UFP_NOBUSY) == 0) {
+ pg->flags |= PG_BUSY;
+ UVM_PAGE_OWN(pg, "uvn_findpage");
+ }
UVMHIST_LOG(ubchist, "found %#jx (color %ju)",
(uintptr_t)pg, VM_PGCOLOR(pg), 0, 0);
uvm_page_array_advance(a);
Index: src/usr.bin/vmstat/vmstat.c
diff -u src/usr.bin/vmstat/vmstat.c:1.237 src/usr.bin/vmstat/vmstat.c:1.238
--- src/usr.bin/vmstat/vmstat.c:1.237 Sun Mar 22 14:39:28 2020
+++ src/usr.bin/vmstat/vmstat.c Sun Mar 22 18:32:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: vmstat.c,v 1.237 2020/03/22 14:39:28 ad Exp $ */
+/* $NetBSD: vmstat.c,v 1.238 2020/03/22 18:32:42 ad Exp $ */
/*-
* Copyright (c) 1998, 2000, 2001, 2007, 2019, 2020
@@ -71,7 +71,7 @@ __COPYRIGHT("@(#) Copyright (c) 1980, 19
#if 0
static char sccsid[] = "@(#)vmstat.c 8.2 (Berkeley) 3/1/95";
#else
-__RCSID("$NetBSD: vmstat.c,v 1.237 2020/03/22 14:39:28 ad Exp $");
+__RCSID("$NetBSD: vmstat.c,v 1.238 2020/03/22 18:32:42 ad Exp $");
#endif
#endif /* not lint */
@@ -1074,6 +1074,10 @@ dosum(void)
(void)printf("%9" PRIu64 " object faults\n", uvmexp.flt_obj);
(void)printf("%9" PRIu64 " promote copy faults\n", uvmexp.flt_prcopy);
(void)printf("%9" PRIu64 " promote zero fill faults\n", uvmexp.flt_przero);
+ (void)printf("%9" PRIu64 " faults upgraded lock\n",
+ uvmexp.fltup);
+ (void)printf("%9" PRIu64 " faults couldn't upgrade lock\n",
+ uvmexp.fltnoup);
(void)printf("%9" PRIu64 " times daemon wokeup\n",uvmexp.pdwoke);
(void)printf("%9" PRIu64 " revolutions of the clock hand\n", uvmexp.pdrevs);